Skip to content
Snippets Groups Projects
Commit bbe33702 authored by piotrmp's avatar piotrmp Committed by Lukasz Pszenny
Browse files

Command-line options for LAMBO segmentation.

parent e14a6660
No related branches found
No related tags found
No related merge requests found
......@@ -88,7 +88,8 @@ flags.DEFINE_boolean(name="silent", default=True,
flags.DEFINE_enum(name="predictor_name", default="combo-spacy",
enum_values=["combo", "combo-spacy", "combo-lambo"],
help="Use predictor with whitespace, spacy or LAMBO tokenizer.")
flags.DEFINE_string(name="lambo_model_name", default="en",
help="LAMBO model name (if LAMBO used for segmentation).")
def run(_):
"""Run model."""
......@@ -175,7 +176,7 @@ def _get_predictor() -> predictors.Predictor:
)
return predictors.Predictor.from_archive(
archive, FLAGS.predictor_name
archive, FLAGS.predictor_name, extra_args= {"lambo_model_name" : FLAGS.lambo_model_name}
)
......
......@@ -238,8 +238,8 @@ class COMBO(predictor.Predictor):
@classmethod
def with_lambo_tokenizer(cls, model: models.Model,
dataset_reader: allen_data.DatasetReader):
return cls(model, dataset_reader, lambo.LamboTokenizer())
dataset_reader: allen_data.DatasetReader, lambo_model_name : str):
return cls(model, dataset_reader, lambo.LamboTokenizer(lambo_model_name))
@classmethod
def from_pretrained(cls, path: str, tokenizer=tokenizers.SpacyTokenizer(),
......
......@@ -6,7 +6,7 @@ from lambo.segmenter.lambo import Lambo
class LamboTokenizer(Tokenizer):
def __init__(self, model: str = "LAMBO_no_pretraining-UD_Polish-PDB",) -> None:
def __init__(self, model: str) -> None:
self.lambo=Lambo.get(model)
# Simple tokenisation: ignoring sentence split
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment