From 54e373140a781f98e9285b79f464373bd3d4e36d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martyna=20Wi=C4=85cek?= <martyna.wiacek@ipipan.waw.pl> Date: Sun, 4 Feb 2024 01:11:51 +0100 Subject: [PATCH] fix default_split_level --- combo/main.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/combo/main.py b/combo/main.py index 8457161..261e797 100755 --- a/combo/main.py +++ b/combo/main.py @@ -156,8 +156,8 @@ def get_defaults(dataset_reader: Optional[DatasetReader], # Dataset reader is required to read training data and/or for training (and validation) data loader dataset_reader = default_ud_dataset_reader(FLAGS.pretrained_transformer_name, tokenizer=LamboTokenizer(FLAGS.tokenizer_language, - default_turns=FLAGS.turns, - default_split_subwords=FLAGS.split_subwords) + default_split_level="TURNS" if FLAGS.turns else "SENTENCES", + default_split_subwords=FLAGS.split_subwords) ) if not training_data_loader: @@ -403,9 +403,9 @@ def run(_): logger.info("No dataset reader in the configuration or archive file - using a default UD dataset reader", prefix=prefix) dataset_reader = default_ud_dataset_reader(FLAGS.pretrained_transformer_name, - tokenizer=LamboTokenizer(tokenizer_language, - default_turns=FLAGS.turns, - default_split_subwords=FLAGS.split_subwords) + tokenizer=LamboTokenizer(tokenizer_language, + default_split_level="TURNS" if FLAGS.turns else "SENTENCES", + default_split_subwords=FLAGS.split_subwords) ) predictor = COMBO(model, dataset_reader) -- GitLab