From 54e373140a781f98e9285b79f464373bd3d4e36d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martyna=20Wi=C4=85cek?= <martyna.wiacek@ipipan.waw.pl>
Date: Sun, 4 Feb 2024 01:11:51 +0100
Subject: [PATCH] fix default_split_level

---
 combo/main.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/combo/main.py b/combo/main.py
index 8457161..261e797 100755
--- a/combo/main.py
+++ b/combo/main.py
@@ -156,8 +156,8 @@ def get_defaults(dataset_reader: Optional[DatasetReader],
         # Dataset reader is required to read training data and/or for training (and validation) data loader
         dataset_reader = default_ud_dataset_reader(FLAGS.pretrained_transformer_name,
                                                    tokenizer=LamboTokenizer(FLAGS.tokenizer_language,
-                                                                            default_turns=FLAGS.turns,
-                                                                            default_split_subwords=FLAGS.split_subwords)
+                                                    default_split_level="TURNS" if FLAGS.turns else "SENTENCES",
+                                                    default_split_subwords=FLAGS.split_subwords)
                                                    )
 
     if not training_data_loader:
@@ -403,9 +403,9 @@ def run(_):
             logger.info("No dataset reader in the configuration or archive file - using a default UD dataset reader",
                         prefix=prefix)
             dataset_reader = default_ud_dataset_reader(FLAGS.pretrained_transformer_name,
-                                                       tokenizer=LamboTokenizer(tokenizer_language,
-                                                                                default_turns=FLAGS.turns,
-                                                                                default_split_subwords=FLAGS.split_subwords)
+                                                        tokenizer=LamboTokenizer(tokenizer_language,
+                                                        default_split_level="TURNS" if FLAGS.turns else "SENTENCES",
+                                                        default_split_subwords=FLAGS.split_subwords)
                                                        )
 
         predictor = COMBO(model, dataset_reader)
-- 
GitLab