From c989d8c1b9a7a16646613bd22097a723ffe8e088 Mon Sep 17 00:00:00 2001 From: Lukasz Pszenny <lpszenny@hotmail.com> Date: Mon, 8 May 2023 15:44:50 +0200 Subject: [PATCH] Release 1.0.7 Adding Lambo tokenizer --- README.md | 2 +- combo/data/api.py | 2 +- combo/predict.py | 6 +++--- combo/utils/{lambo.py => lambo_tokenizer.py} | 0 docs/installation.md | 6 +++--- docs/prediction.md | 4 ++-- setup.py | 10 +++++++++- 7 files changed, 19 insertions(+), 11 deletions(-) rename combo/utils/{lambo.py => lambo_tokenizer.py} (100%) diff --git a/README.md b/README.md index 5fb02a1..6c267bd 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Clone this repository and install COMBO (we suggest creating a virtualenv/conda environment with Python 3.6+, as a bundle of required packages will be installed): ```bash pip install -U pip setuptools wheel -pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6 +pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.7 ``` For Python 3.9 you may also need to install cython: ```bash diff --git a/combo/data/api.py b/combo/data/api.py index 308e9e4..39f449a 100644 --- a/combo/data/api.py +++ b/combo/data/api.py @@ -48,7 +48,7 @@ class _TokenList(conllu.TokenList): return 'TokenList<' + ', '.join(token['token'] for token in self) + '>' -def sentence2conllu(sentence: Sentence, keep_semrel: bool = True) -> conllu.TokenList: +def sentence2conllu(sentence: Sentence, keep_semrel: bool = False) -> conllu.TokenList: tokens = [] for token in sentence.tokens: token_dict = collections.OrderedDict(dataclasses.asdict(token)) diff --git a/combo/predict.py b/combo/predict.py index 0423437..9d0b4a6 100644 --- a/combo/predict.py +++ b/combo/predict.py @@ -12,7 +12,7 @@ from overrides import overrides from combo import data from combo.data import sentence2conllu, tokens2conllu, conllu2sentence -from combo.utils import download, graph, lambo +from combo.utils import download, graph, lambo_tokenizer logger = logging.getLogger(__name__) @@ -59,7 +59,7 @@ class COMBO(predictor.Predictor): def predict(self, sentence: Union[str, List[str], List[List[str]], List[data.Sentence]]): if isinstance(sentence, str): - if isinstance(self._tokenizer,lambo.LamboTokenizer): + if isinstance(self._tokenizer,lambo_tokenizer.LamboTokenizer): segmented = self._tokenizer.segment(sentence) return self.predict(segmented) else: @@ -239,7 +239,7 @@ class COMBO(predictor.Predictor): @classmethod def with_lambo_tokenizer(cls, model: models.Model, dataset_reader: allen_data.DatasetReader, lambo_model_name : str = 'en'): - return cls(model, dataset_reader, lambo.LamboTokenizer(lambo_model_name)) + return cls(model, dataset_reader, lambo_tokenizer.LamboTokenizer(lambo_model_name)) @classmethod def from_pretrained(cls, path: str, tokenizer=tokenizers.SpacyTokenizer(), diff --git a/combo/utils/lambo.py b/combo/utils/lambo_tokenizer.py similarity index 100% rename from combo/utils/lambo.py rename to combo/utils/lambo_tokenizer.py diff --git a/docs/installation.md b/docs/installation.md index 6142605..695704f 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -2,7 +2,7 @@ Clone this repository and install COMBO (we suggest using virtualenv/conda with Python 3.6+): ```bash pip install -U pip setuptools wheel -pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6 +pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.7 combo --helpfull ``` @@ -11,7 +11,7 @@ combo --helpfull python -m venv venv source venv/bin/activate pip install -U pip setuptools wheel -pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6 +pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.7 ``` ### Conda example: @@ -19,7 +19,7 @@ pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6 conda create -n combo python=3.8 conda activate combo pip install -U pip setuptools wheel -pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6 +pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.7 ``` ## Problems & solutions diff --git a/docs/prediction.md b/docs/prediction.md index 25b7df1..f6fe5ef 100644 --- a/docs/prediction.md +++ b/docs/prediction.md @@ -34,10 +34,10 @@ You can use COMBO with the [LAMBO](https://gitlab.clarin-pl.eu/syntactic-tools/l ```python # Import COMBO and lambo from combo.predict import COMBO -from combo.utils import lambo +from combo.utils import lambo_tokenizer # Download models -nlp = COMBO.from_pretrained("english-bert-base-ud29",tokenizer=lambo.LamboTokenizer("en")) +nlp = COMBO.from_pretrained("english-bert-base-ud29",tokenizer=lambo_tokenizer.LamboTokenizer("en")) sentences = nlp("This is the first sentence. This is the second sentence to parse.") ``` diff --git a/setup.py b/setup.py index a5680e7..cabbf85 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,13 @@ """Setup.""" +import subprocess from setuptools import find_packages, setup +# Clone the lambo repository +subprocess.call(['git', 'clone', 'https://gitlab.clarin-pl.eu/syntactic-tools/lambo.git', '--depth', '1']) + +# Install lambo using pip +subprocess.call(['pip', 'install', './lambo']) + REQUIREMENTS = [ 'absl-py==0.9.0', 'allennlp==1.3.0', @@ -23,11 +30,12 @@ REQUIREMENTS = [ 'tqdm==4.43.0', 'transformers==4.0.1', 'urllib3==1.25.11', + "lambo" ] setup( name='combo', - version='1.0.6', + version='1.0.7', author='Mateusz Klimaszewski', author_email='M.Klimaszewski@ii.pw.edu.pl', install_requires=REQUIREMENTS, -- GitLab