diff --git a/src/lambo/evaluation/evaluate.py b/src/lambo/evaluation/evaluate.py index f4bb70c1b2ab071342d8667e99b0ce0b14b47294..b86a62f5449ed1399029a25fae4d6583e3427aee 100644 --- a/src/lambo/evaluation/evaluate.py +++ b/src/lambo/evaluation/evaluate.py @@ -1,4 +1,4 @@ -from lambo.evaluation.conll18_ud_eval import load_conllu, evaluate +from lambo.evaluation.conll18_ud_eval import load_conllu, evaluate, UDError from lambo.utils.printer import print_document_to_conll @@ -19,8 +19,13 @@ def evaluate_segmenter(segmenter, test_text, gold_path, tmp_path): with open(gold_path) as fGold: pred = load_conllu(fPred) gold = load_conllu(fGold) - conll_result = evaluate(gold, pred) - for category in ['Tokens', 'Words', 'Sentences']: - result[category] = {'F1': conll_result[category].f1, 'precision': conll_result[category].precision, - 'recall': conll_result[category].recall} + try: + conll_result = evaluate(gold, pred) + for category in ['Tokens', 'Words', 'Sentences']: + result[category] = {'F1': conll_result[category].f1, 'precision': conll_result[category].precision, + 'recall': conll_result[category].recall} + except UDError as e: + for category in ['Tokens', 'Words', 'Sentences']: + result[category] = {'F1': 0.0, 'precision': 0.0, + 'recall': 0.0} return result diff --git a/src/lambo/learning/train.py b/src/lambo/learning/train.py index d3f3a06ccfce1d047d5f8d86a3169d5a2d707377..a0efdfcda1515a67c67d6e19b5922fd816694c43 100644 --- a/src/lambo/learning/train.py +++ b/src/lambo/learning/train.py @@ -121,7 +121,7 @@ def train_new_and_save(model_name, treebank_path, save_path, epochs=10, device=' BATCH_SIZE = 32 print("Initiating the model.") - MAX_LEN = 1024 + MAX_LEN = 256 dict, train_dataloader, test_dataloader = prepare_dataloaders_withdict([train_doc, dev_doc], [test_doc], MAX_LEN, BATCH_SIZE) @@ -168,7 +168,7 @@ def train_pretrained_and_save(language, treebank_path, save_path, pretrained_pat train_doc, dev_doc, test_doc = read_treebank(treebank_path, True) print("Initiating the model.") - MAX_LEN = 1024 + MAX_LEN = 256 model = LamboNetwork(MAX_LEN, dict, len(utf_category_dictionary), pretrained=pretrained_model) print("Preparing data")