diff --git a/combo/predict.py b/combo/predict.py index 042343781be1367282e832c10b424676c30767fe..9d0b4a62af383f825f746d9fec8494d1c02882bb 100644 --- a/combo/predict.py +++ b/combo/predict.py @@ -12,7 +12,7 @@ from overrides import overrides from combo import data from combo.data import sentence2conllu, tokens2conllu, conllu2sentence -from combo.utils import download, graph, lambo +from combo.utils import download, graph, lambo_tokenizer logger = logging.getLogger(__name__) @@ -59,7 +59,7 @@ class COMBO(predictor.Predictor): def predict(self, sentence: Union[str, List[str], List[List[str]], List[data.Sentence]]): if isinstance(sentence, str): - if isinstance(self._tokenizer,lambo.LamboTokenizer): + if isinstance(self._tokenizer,lambo_tokenizer.LamboTokenizer): segmented = self._tokenizer.segment(sentence) return self.predict(segmented) else: @@ -239,7 +239,7 @@ class COMBO(predictor.Predictor): @classmethod def with_lambo_tokenizer(cls, model: models.Model, dataset_reader: allen_data.DatasetReader, lambo_model_name : str = 'en'): - return cls(model, dataset_reader, lambo.LamboTokenizer(lambo_model_name)) + return cls(model, dataset_reader, lambo_tokenizer.LamboTokenizer(lambo_model_name)) @classmethod def from_pretrained(cls, path: str, tokenizer=tokenizers.SpacyTokenizer(), diff --git a/combo/utils/lambo.py b/combo/utils/lambo_tokenizer.py similarity index 100% rename from combo/utils/lambo.py rename to combo/utils/lambo_tokenizer.py diff --git a/docs/prediction.md b/docs/prediction.md index 25b7df175d10b3e2d5a48ef8aad693e2c2ed5a5c..f6fe5efdd488a9da3f97af76bedc7ab599c7aa0e 100644 --- a/docs/prediction.md +++ b/docs/prediction.md @@ -34,10 +34,10 @@ You can use COMBO with the [LAMBO](https://gitlab.clarin-pl.eu/syntactic-tools/l ```python # Import COMBO and lambo from combo.predict import COMBO -from combo.utils import lambo +from combo.utils import lambo_tokenizer # Download models -nlp = COMBO.from_pretrained("english-bert-base-ud29",tokenizer=lambo.LamboTokenizer("en")) +nlp = COMBO.from_pretrained("english-bert-base-ud29",tokenizer=lambo_tokenizer.LamboTokenizer("en")) sentences = nlp("This is the first sentence. This is the second sentence to parse.") ```