diff --git a/config.cfg b/config.cfg new file mode 100644 index 0000000000000000000000000000000000000000..78e00ba3dcaaeab11e65a7a1921e7e424b3f91ef --- /dev/null +++ b/config.cfg @@ -0,0 +1,55 @@ +[model] +device = cpu +gpu_num = 0 +path = /mnt/sda/pdn2scripts/nkjp_base +pretrained_path = /mnt/sda/pdn2scripts/roberta_base + +[predict] +device = cpu +save_to_file = true +path = /mnt/sda/pdn2scripts/roberta_base +max_seq_len = 100 +path_to_save = predict_res.txt + +[evaluate] +device = cpu +gpu_num = 0 +path = E:/ClarinProjects/nkjp_base +pretrained_path = ./roberta_base +squeeze = false +max_seq_len = 100 +hidden_size = 32 +dropout = 0.05 + +[data] +tag_column_index = 3 +eval_path = data/coNLL-2003/test.txt +pred_path = tests/resources/text_krakow.txt + +[train] +adam_epsilon = 0.1 +data_test = data/coNLL-2003/test.txt +data_train = data/coNLL-2003/train.txt +data_tune = data/coNLL-2003/valid.txt +device = cuda +dropout = 0.05 +epoch_save_model = True +eval_batch_size = 16 +fp16 = false +fp16_opt_level = '' +freeze_model = True +gradient_accumulation_steps = 5 +hidden_size = 32 +learning_rate = 0.001 +max_grad_norm = 5 +max_seq_length = 32 +num_train_epochs = 100 +output_dir = test_res +pretrained_path = /mnt/sda/pdn2scripts/roberta_base +seed = 42 +squeeze = true +train_batch_size = 16 +training_mix = False +transfer = None +warmup_proportion = 0.3 +weight_decay = 0.1 diff --git a/evaluator.py b/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..006204ddfe844d3787b3c91fa179895e862ad37c --- /dev/null +++ b/evaluator.py @@ -0,0 +1,91 @@ +"""Script for evaluating models on a pre-defined set of data.""" + +import configparser +import os +import time + +from poldeepner2.utils.data_utils import NerProcessor, create_dataset, \ + convert_examples_to_features +from poldeepner2.utils.train_utils import evaluate_model + + +def main(): + config_file = "config.cfg" + config = configparser.ConfigParser() + config.read(config_file) + + pretrained_model = config['evaluate']['pretrained_path'] + + device = config['evaluate']['device'] + squeeze = config.getboolean('evaluate', 'squeeze') + tag_column_index = config.getint('data', 'tag_column_index') + processor = NerProcessor() + + data_path = config['data']['eval_path'] + datasets = [data_path] + + labels_list = \ + processor.get_labels(datasets, config.getint('data', + 'tag_column_index')) + + num_labels = len(labels_list) + 1 + hidden_size = config.getint('evaluate', 'hidden_size') + dropout = config.getfloat('train', 'dropout') + + hidden_size = 1024 if 'large' in pretrained_model \ + else (768 if 'base' in pretrained_model else hidden_size) + device = device + + pretrained_path = config['model']['pretrained_path'] + + if pretrained_path.startswith("hf:"): + from poldeepner2.model.hf_for_token_calssification \ + import HfModelForTokenClassification + pretrained_dir = pretrained_path.split(':')[1] + model = HfModelForTokenClassification( + pretrained_path=pretrained_dir, n_labels=num_labels, + hidden_size=hidden_size, dropout_p=dropout, + device=device) + elif pretrained_path.startswith("mt5:"): + from poldeepner2.model.mt5_for_token_calssification \ + import Mt5ModelForTokenClassification + variant = pretrained_path.split(':')[1] + model = Mt5ModelForTokenClassification( + variant=variant, n_labels=num_labels, + hidden_size=hidden_size, dropout_p=dropout, device=device) + else: + from poldeepner2.model.xlmr_for_token_classification \ + import XLMRForTokenClassification + pretrained_dir = pretrained_path + if ":" in pretrained_dir: + pretrained_dir = pretrained_dir.split(':')[1] + if not os.path.exists(pretrained_dir): + raise ValueError("RoBERTa language model not found on path '%s'" + % pretrained_dir) + + model = XLMRForTokenClassification( + pretrained_path=pretrained_dir, n_labels=num_labels, + hidden_size=hidden_size, dropout_p=dropout, + device=device) + + max_seq_len = config.getint('evaluate', 'max_seq_len') + + eval_examples = processor.get_examples(datasets[0], tag_column_index, + 'eval') + + eval_features = convert_examples_to_features( + eval_examples, labels_list, max_seq_len, model.encode_word, + squeeze=squeeze) + + eval_data = create_dataset(eval_features) + + time_start = time.time() + f1, report = evaluate_model(model, eval_data, labels_list, 16, device) + time_end = time.time() + print(f' f1: {f1}') + print(f' report {report}') + print(f'time {time_end - time_start}') + + +if __name__ == "__main__": + main() diff --git a/poldeepner2/models.py b/poldeepner2/models.py index 573de7175ba6534ecaba73145b63b8e2bdb27101..faa6998dc717a320fad378f16ea8a8dbf6b9e159 100644 --- a/poldeepner2/models.py +++ b/poldeepner2/models.py @@ -100,7 +100,7 @@ class PolDeepNer2: """A message of shame -- documentation must be completed.""" def __init__(self, model_path: str, - pretrained_path: str = None, + pretrained_path: str, device="cpu", squeeze=False, max_seq_length=256, diff --git a/poldeepner2/utils/data_utils.py b/poldeepner2/utils/data_utils.py index 96c3b6eec39fc275594e0941ea2e4e03f0a2c515..d2de73b216650f755c8ede668e53a340b35d56df 100644 --- a/poldeepner2/utils/data_utils.py +++ b/poldeepner2/utils/data_utils.py @@ -171,7 +171,7 @@ class NerProcessor: """ label_set = set([]) for path in paths: - examples = self._create_examples(self._read_file(path), "data") + examples = self._create_examples(self._read_iob(path, tag_column_index), "data") label_set.update(NerProcessor._get_labels(examples)) return sorted(list(label_set)) @@ -208,6 +208,34 @@ class NerProcessor: data.append((sentence, label)) return data + def _read_iob(self, filename, column_index): + + data = [] + sentence = [] + label = [] + with open(filename, encoding='utf-8') as f: + for i, line in enumerate(f, 1): + line = line.strip('\n') + + # check if begining of the file or empty line + if line.startswith('-DOCSTART') or len(line) == 0: + if len(sentence) > 0: + data.append((sentence, label)) + sentence = [] + label = [] + continue + + splits = line.split() + assert len(splits) >= 2, "error on line {}. Found {} splits".format( + i, len(splits)) + + word, tag = splits[0], splits[column_index] + sentence.append(word) + label.append(tag) + if len(sentence) > 0: + data.append((sentence, label)) + return data + def _create_examples(self, lines, set_type): """A message of shame -- documentation must be completed. diff --git a/poldeepner2/utils/train_utils.py b/poldeepner2/utils/train_utils.py index a24d3540fbba15c48ed2d7871483b88d4c9f879a..de5253a1522a5f62a4affb4fac204a5a2c4318d3 100644 --- a/poldeepner2/utils/train_utils.py +++ b/poldeepner2/utils/train_utils.py @@ -96,8 +96,77 @@ def add_xlmr_args(parser): def evaluate_model(model, eval_dataset, label_list, batch_size, device, model_name='Roberta'): - """Evaluates an NER model on the eval_dataset provided. + """ + Evaluates an NER model on the eval_dataset provided. + Returns: + F1_score: Macro-average f1_score on the evaluation dataset. + Report: detailed classification report + """ + # Run prediction for full data + eval_sampler = SequentialSampler(eval_dataset) + eval_dataloader = DataLoader( + eval_dataset, sampler=eval_sampler, batch_size=batch_size) + + model.eval() # turn of dropout + + y_true = [] + y_pred = [] + label_map = {i: label for i, label in enumerate(label_list, 1)} + #print(f'label_list: {label_list}') + #print(f'label_map: {label_map}') + + for input_ids, label_ids, l_mask, valid_ids in eval_dataloader: + # print(f'eval_sampler {eval_sampler}') + # print(f'eval_dataset {eval_dataset}') + # print(f'input_ids {input_ids}') + # print(f'label_ids {label_ids}') + input_ids = input_ids.to(device) + label_ids = label_ids.to(device) + + valid_ids = valid_ids.to(device) + #print(f'valid_ids {label_ids}') + l_mask = l_mask.to(device) + + with torch.no_grad(): + if model_name == 'Roberta': + logits = model(input_ids, labels=None, labels_mask=None, + valid_mask=valid_ids) + else: + logits = model(input_ids, return_dict=True).logits + #print(f'logits1: {logits}') + logits = torch.argmax(logits, dim=2) + #print(f'logits11: {logits}') + logits = logits.detach().cpu().numpy() + label_ids = label_ids.cpu().numpy() + + for i, cur_label in enumerate(label_ids): + #print(f'i {i}, cur_label:{cur_label}') + temp_1 = [] + temp_2 = [] + + for j, m in enumerate(cur_label): + if valid_ids[i][j]: #and logits[i][j]: # if it's a valid label + temp_1.append(label_map[m]) + if logits[i][j]: + + index = label_map[logits[i][j]] # for debug + + temp_2.append(index) + #assert len(temp_1) == len(temp_2) + y_true.append(temp_1) + y_pred.append(temp_2) + + report = classification_report(y_true, y_pred, digits=4) + f1 = f1_score(y_true, y_pred, average='Macro') + + return f1, report + + +def predict_model(model, eval_dataset, label_list, batch_size, device, + report=True): + """ + Evaluates an NER model on the eval_dataset provided. Returns: F1_score: Macro-average f1_score on the evaluation dataset. Report: detailed classification report diff --git a/predictor.py b/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..3d7151ad64d70cab1ba9beb6d59fc5f2a476ea4e --- /dev/null +++ b/predictor.py @@ -0,0 +1,44 @@ +"""Script for tagging raw data.""" + +import configparser + +from poldeepner2.models import PolDeepNer2 + + +def main(): + # config serializuje razem z modelem + # json + config_file = "config.cfg" + config = configparser.ConfigParser() + config.read(config_file) + + model = config['model']['path'] + pretrained_model = config['model']['pretrained_path'] + + ner = PolDeepNer2.load(model=model, pretrained_path=pretrained_model) + + data_path = config['data']['pred_path'] + with open(data_path) as f: + data = f.readlines() + + if not config.getboolean('predict', 'save_to_file'): + for sentence in data: + if sentence != '\n': + print(sentence) + text_prediction = ner.process_text(sentence) + for pred in text_prediction: + print(f'{pred.text}, {pred.label}') + + else: + with open(config['predict']['path_to_save'], 'w+') as f_res: + for sentence in data: + if sentence != '\n': + text_prediction = ner.process_text(sentence) + for pred in text_prediction: + f_res.write(f'{pred.text}, {pred.label}\n') + else: + f_res.write('\n') + + +if __name__ == "__main__": + main() diff --git a/scripts/config.cfg b/scripts/config.cfg new file mode 100644 index 0000000000000000000000000000000000000000..9ac6aabd6e4ac56bb40aa8c4f59f4e170ced81cf --- /dev/null +++ b/scripts/config.cfg @@ -0,0 +1,36 @@ +[model] +path = +cpu_or_gpu = cpu +gpu_num = 0 + +[predict] +data_path +save_to_file = yes + +[train] +adam_epsilon = +data_test = +data_train = +data_tune = +device = gpu +dropout = 0.05 +epoch_save_model = 5 +eval_batch_size = 16 +fp16 = false +fp16_opt_level = +freeze_model = +gradient_accumulation_steps = +hidden_size = 32 +learning_rate = 0.001 +max_grad_norm = +max_seq_length = 32 +num_train_epochs = 100 +output_dir = +pretrained_path = +seed = 42 +squeeze = +train_batch_size = 16 +training_mix = 0.5 +transfer = +warmup_proportion = +weight_decay = 0.1 diff --git a/scripts/evaluator.py b/scripts/evaluator.py new file mode 100644 index 0000000000000000000000000000000000000000..983f1127af2685a7e73f81ca2ba2b5168b02948a --- /dev/null +++ b/scripts/evaluator.py @@ -0,0 +1,48 @@ +"""Script for evaluating models on a pre-defined set of data.""" + +import configparser +from sklearn.metrics import accuracy_score +from poldeepner2.poldeepner2.models import PolDeepNer2 +from poldeepner2.poldeepner2.utils.data_utils import NerProcessor + + +def main(): + # config serializuje razem z modelem + # json + config_file = "config.cfg" + config = configparser.ConfigParser() + config.read(config_file) + + model = config['model']['path'] + + ner = PolDeepNer2.load(model=model) + + data_path = config['data']['path'] + processor = NerProcessor() + + #Prediction + data = processor.get_examples(data_path) + prediction_labels = [] + for sentence in data: + print(sentence) + prediction = ner.process_text(sentence) + print(prediction) + + #predicted label + predict_label = prediction[2][2] + prediction_labels.append(predict_label) + + + #Comparing + true_labels = processor.get_labels(data_path) + + eval_res = accuracy_score(true_labels, prediction_labels) + print(eval_res) + + + +if __name__ == "__main__": + try: + main() + except ValueError as er: + print("[ERROR] %s" % er) diff --git a/trainer.py b/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..12cbeb43b8b91be55f80b201629bd36f0766e528 --- /dev/null +++ b/trainer.py @@ -0,0 +1,365 @@ +"""Script to teach new models compatible with the library.""" +import configparser +import logging +import os +import random +import sys +import time +from pathlib import Path + +import numpy as np +import torch +from pytorch_transformers import AdamW, WarmupLinearSchedule +from torch.utils.data import DataLoader, RandomSampler +from tqdm import tqdm + +from poldeepner2.utils.data_utils import NerProcessor +from poldeepner2.utils.data_utils import create_dataset, \ + convert_examples_to_features, save_params +from poldeepner2.utils.train_utils import evaluate_model + + +def main(): + + config_file = "config.cfg" + config = configparser.ConfigParser() + config.read(config_file) + + # HYPERPARAMETERS + adam_epsilon = config.getfloat('train', 'adam_epsilon') + data_test = config['train']['data_test'] + data_train = config['train']['data_train'] + data_tune = config['train']['data_tune'] + device = config['train']['device'] + dropout = config.getfloat('train', 'dropout') + epoch_save_model = config.getboolean('train', 'epoch_save_model') + eval_batch_size = config.getint('train', 'eval_batch_size') + fp16 = config.getboolean('train', 'fp16') + fp16_opt_level = config['train']['fp16_opt_level'] + freeze_model = config.getboolean('train', 'freeze_model') + gradient_accumulation_steps = \ + config.getint('train', 'gradient_accumulation_steps') + hidden_size = config.getint('train', 'hidden_size') + learning_rate = config.getfloat('train', 'learning_rate') + max_grad_norm = config.getfloat('train', 'max_grad_norm') + max_seq_length = config.getint('train', 'max_seq_length') + num_train_epochs = config.getint('train', 'num_train_epochs') + output_dir = config['train']['output_dir'] + pretrained_path = config['train']['pretrained_path'] + seed = config.getint('train', 'seed') + squeeze = config.getboolean('train', 'squeeze') + train_batch_size = config.getint('train', 'train_batch_size') + training_mix = config.getboolean('train', 'training_mix') + use_transfer = 'transfer' in config['train'] and \ + config['train']['transfer'] != 'None' + if use_transfer: + transfer = config['train']['transfer'] + else: + transfer = None + warmup_proportion = config.getfloat('train', 'warmup_proportion') + weight_decay = config.getfloat('train', 'weight_decay') + + # if wandb: + # import wandb + # wandb.init(project=wandb, config=config) + + if os.path.exists(output_dir) and os.listdir(output_dir): + raise ValueError( + "Output directory (%s) already exists and is not empty." + % output_dir) + + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + logging.basicConfig( + format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', + datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO, + filename=Path(output_dir) / "log.txt") + logging.getLogger().addHandler(logging.StreamHandler(sys.stdout)) + logger = logging.getLogger(__name__) + for item in sorted(config.items()): + logger.info(item) + + if gradient_accumulation_steps < 1: + raise ValueError( + "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" + % gradient_accumulation_steps) + + train_batch_size = train_batch_size // gradient_accumulation_steps + + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + # Determine set of labels + processor = NerProcessor() + datasets = [data_train] + if data_tune: + datasets.append(data_tune) + if data_test: + datasets.append(data_test) + label_list = \ + processor.get_labels(datasets, config.getint('data', + 'tag_column_index')) + logger.info(f"Labels: {label_list}") + num_labels = len(label_list) + 1 # add one for IGNORE label + logger.info(f"Number of labels: {num_labels}") + + # Load training data + logger.info("Loading training data...") + t0 = time.time() + train_examples = \ + processor.get_examples(data_train, + config.getint('data', + 'tag_column_index'), + "train") + logger.info(f"Training data was loaded in {time.time() - t0} second(s)") + + # preparing model configs + hidden_size = 1024 if 'large' in pretrained_path \ + else (768 if 'base' in pretrained_path else hidden_size) + device = device + + logger.info("Loading pretrained model...") + t0 = time.time() + if pretrained_path.startswith("hf:"): + from poldeepner2.model.hf_for_token_calssification import \ + HfModelForTokenClassification + pretrained_dir = pretrained_path.split(':')[1] + model = HfModelForTokenClassification( + pretrained_path=pretrained_dir, n_labels=num_labels, + hidden_size=hidden_size, dropout_p=dropout, + device=device) + elif pretrained_path.startswith("mt5:"): + from poldeepner2.model.mt5_for_token_calssification import \ + Mt5ModelForTokenClassification + variant = pretrained_path.split(':')[1] + model = Mt5ModelForTokenClassification( + variant=variant, n_labels=num_labels, hidden_size=hidden_size, + dropout_p=dropout, device=device) + else: + from poldeepner2.model.xlmr_for_token_classification \ + import XLMRForTokenClassification + pretrained_dir = pretrained_path + if ":" in pretrained_dir: + pretrained_dir = pretrained_dir.split(':')[1] + if not os.path.exists(pretrained_dir): + raise ValueError( + "RoBERTa language model not found on path '%s'" + % pretrained_dir) + model = XLMRForTokenClassification( + pretrained_path=pretrained_dir, n_labels=num_labels, + hidden_size=hidden_size, dropout_p=dropout, + device=device) + logger.info(f"Pretrained model was loaded in {time.time() - t0} second(s)") + + if use_transfer: + if device == "cpu": + state_dict = torch.load( + open(os.path.join(transfer, 'model.pt'), 'rb'), + map_location='cpu') + else: + state_dict = torch.load( + open(os.path.join(transfer, 'model.pt'), 'rb')) + model.load_state_dict(state_dict) + + model.to(device) + # if wandb: + # wandb.watch(model) + + train_features = convert_examples_to_features( + train_examples, label_list, max_seq_length, model.encode_word, + squeeze) + + if training_mix: + train_features.extend(convert_examples_to_features( + train_examples, label_list, max_seq_length, model.encode_word, + not squeeze)) + + num_train_optimization_steps = int( + len(train_features) / train_batch_size / gradient_accumulation_steps) \ + * num_train_epochs + + no_decay = ['bias', 'final_layer_norm.weight'] + + params = list(model.named_parameters()) + + optimizer_grouped_parameters = [ + {'params': [p for n, p in params if not any( + nd in n for nd in no_decay)], 'weight_decay': weight_decay}, + {'params': [p for n, p in params if any( + nd in n for nd in no_decay)], 'weight_decay': 0.0} + ] + + warmup_steps = int(warmup_proportion * num_train_optimization_steps) + optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate, + eps=adam_epsilon) + scheduler = WarmupLinearSchedule(optimizer, warmup_steps=warmup_steps, + t_total=num_train_optimization_steps) + + # freeze model if necessary + if freeze_model: + logger.info("Freezing XLM-R model...") + for n, p in model.named_parameters(): + if 'xlmr' in n and p.requires_grad: + logging.info("Parameter %s - freezed" % n) + p.requires_grad = False + else: + logging.info("Parameter %s - unchanged" % n) + + if fp16: + try: + from apex import amp + except ImportError: + raise ImportError( + "Please install apex from https://www.github.com/nvidia/apex " + "to use fp16 training.") + model, optimizer = amp.initialize( + model, optimizer, opt_level=fp16_opt_level) + + # Train the model + logger.info("***** Running training *****") + logger.info(" Num examples = %d", len(train_examples)) + logger.info(" Batch size = %d", train_batch_size) + logger.info(" Num steps = %d", num_train_optimization_steps) + + train_data = create_dataset(train_features) + + train_sampler = RandomSampler(train_data) + + train_dataloader = DataLoader(train_data, sampler=train_sampler, + batch_size=train_batch_size) + + # getting validation samples + best_val_f1 = 0.0 + if data_tune: + val_examples = \ + processor.get_examples(data_tune, + config.getint('data', 'tag_column_index'), + "tune") + val_features = convert_examples_to_features( + val_examples, label_list, max_seq_length, model.encode_word, + squeeze) + val_data = create_dataset(val_features) + + if data_test: + eval_examples = \ + processor.get_examples(data_test, + config.getint('data', 'tag_column_index'), + "test") + eval_features = convert_examples_to_features( + eval_examples, label_list, max_seq_length, model.encode_word, + squeeze) + eval_data = create_dataset(eval_features) + + for epoch_no in range(1, num_train_epochs + 1): + epoch_stats = {"epoch": epoch_no} + logger.info("Epoch %d" % epoch_no) + tr_loss = 0 + nb_tr_examples, nb_tr_steps = 0, 0 + + model.train() + steps = len(train_dataloader) + + time_start = time.time() + # ToDo: add parameter for this feature + # for g in optimizer.param_groups: + # g['lr'] = learning_rate - (learning_rate/100 * epoch_no) + # epoch_stats['lr'] = learning_rate - (learning_rate/100 * epoch_no) + + for step, batch in tqdm(enumerate(train_dataloader), total=steps): + batch = tuple(t.to(device) for t in batch) + input_ids, label_ids, l_mask, valid_ids, = batch + loss = model(input_ids, label_ids, l_mask, valid_ids) + if gradient_accumulation_steps > 1: + loss = loss / gradient_accumulation_steps + + if fp16: + with amp.scale_loss(loss, optimizer) as scaled_loss: + scaled_loss.backward() + torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), + max_grad_norm) + else: + loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), + max_grad_norm) + + tr_loss += loss.item() + nb_tr_examples += input_ids.size(0) + nb_tr_steps += 1 + + epoch_stats["loss"] = loss + epoch_stats["learning_rate"] = scheduler.get_last_lr()[0] + + if (step + 1) % gradient_accumulation_steps == 0: + optimizer.step() + scheduler.step() + model.zero_grad() + + epoch_stats["step"] = step + # if wandb: + # wandb.log(epoch_stats) + + # if wandb: + # epoch_stats["epoch_training_time"] = time.time() - time_start + + if data_tune: + logger.info("\nTesting on validation set...") + time_start = time.time() + f1, report = evaluate_model(model, val_data, label_list, + eval_batch_size, device) + time_end = time.time() + epoch_stats["validation_F1"] = f1 + epoch_stats["epoch_validation_time"] = time_end - time_start + + if f1 > best_val_f1: + best_val_f1 = f1 + logger.info( + "\nFound better f1=%.4f on validation set. Saving model\n" + % f1) + logger.info("%s\n" % report) + torch.save(model.state_dict(), + open(os.path.join(output_dir, 'model.pt'), + 'wb')) + save_params(output_dir, dropout, num_labels, + label_list) + + if data_test: + logger.info("\nTesting on test set...") + time_start = time.time() + print(f'len label_list: {len(label_list)}') + print(f'label_list: {label_list}') + + f1_score, report = evaluate_model(model, eval_data, label_list, + eval_batch_size, device) + time_end = time.time() + epoch_stats["test_F1"] = f1_score + epoch_stats["epoch_testing_time"] = time_end - time_start + logger.info("%s\n" % report) + + if epoch_save_model: + epoch_output_dir = os.path.join(output_dir, "e%03d" % epoch_no) + os.makedirs(epoch_output_dir) + torch.save(model.state_dict(), + open(os.path.join(epoch_output_dir, 'model.pt'), 'wb')) + save_params(epoch_output_dir, dropout, num_labels, label_list) + + # if wandb: + # wandb.log(epoch_stats) + + model.to(device) + + if data_test: + eval_data = create_dataset(eval_features) + f1_score, report = evaluate_model(model, eval_data, label_list, + eval_batch_size, device) + logger.info("\n%s", report) + output_eval_file = os.path.join(output_dir, "test_results.txt") + with open(output_eval_file, "w") as writer: + logger.info("***** Writing results to file *****") + writer.write(report) + logger.info("Done.") + + +if __name__ == "__main__": + main()