diff --git a/combo/main.py b/combo/main.py index 84571612e87109e007274c8d264976210e57daae..261e79759b97b4b692380f7a094c61f63ae0bc9a 100755 --- a/combo/main.py +++ b/combo/main.py @@ -156,8 +156,8 @@ def get_defaults(dataset_reader: Optional[DatasetReader], # Dataset reader is required to read training data and/or for training (and validation) data loader dataset_reader = default_ud_dataset_reader(FLAGS.pretrained_transformer_name, tokenizer=LamboTokenizer(FLAGS.tokenizer_language, - default_turns=FLAGS.turns, - default_split_subwords=FLAGS.split_subwords) + default_split_level="TURNS" if FLAGS.turns else "SENTENCES", + default_split_subwords=FLAGS.split_subwords) ) if not training_data_loader: @@ -403,9 +403,9 @@ def run(_): logger.info("No dataset reader in the configuration or archive file - using a default UD dataset reader", prefix=prefix) dataset_reader = default_ud_dataset_reader(FLAGS.pretrained_transformer_name, - tokenizer=LamboTokenizer(tokenizer_language, - default_turns=FLAGS.turns, - default_split_subwords=FLAGS.split_subwords) + tokenizer=LamboTokenizer(tokenizer_language, + default_split_level="TURNS" if FLAGS.turns else "SENTENCES", + default_split_subwords=FLAGS.split_subwords) ) predictor = COMBO(model, dataset_reader)