From c989d8c1b9a7a16646613bd22097a723ffe8e088 Mon Sep 17 00:00:00 2001
From: Lukasz Pszenny <lpszenny@hotmail.com>
Date: Mon, 8 May 2023 15:44:50 +0200
Subject: [PATCH] Release 1.0.7

Adding Lambo tokenizer
---
 README.md                                    |  2 +-
 combo/data/api.py                            |  2 +-
 combo/predict.py                             |  6 +++---
 combo/utils/{lambo.py => lambo_tokenizer.py} |  0
 docs/installation.md                         |  6 +++---
 docs/prediction.md                           |  4 ++--
 setup.py                                     | 10 +++++++++-
 7 files changed, 19 insertions(+), 11 deletions(-)
 rename combo/utils/{lambo.py => lambo_tokenizer.py} (100%)

diff --git a/README.md b/README.md
index 5fb02a1..6c267bd 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
 Clone this repository and install COMBO (we suggest creating a virtualenv/conda environment with Python 3.6+, as a bundle of required packages will be installed):
 ```bash
 pip install -U pip setuptools wheel
-pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6
+pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.7
 ```
 For Python 3.9 you may also need to install cython:
 ```bash
diff --git a/combo/data/api.py b/combo/data/api.py
index 308e9e4..39f449a 100644
--- a/combo/data/api.py
+++ b/combo/data/api.py
@@ -48,7 +48,7 @@ class _TokenList(conllu.TokenList):
         return 'TokenList<' + ', '.join(token['token'] for token in self) + '>'
 
 
-def sentence2conllu(sentence: Sentence, keep_semrel: bool = True) -> conllu.TokenList:
+def sentence2conllu(sentence: Sentence, keep_semrel: bool = False) -> conllu.TokenList:
     tokens = []
     for token in sentence.tokens:
         token_dict = collections.OrderedDict(dataclasses.asdict(token))
diff --git a/combo/predict.py b/combo/predict.py
index 0423437..9d0b4a6 100644
--- a/combo/predict.py
+++ b/combo/predict.py
@@ -12,7 +12,7 @@ from overrides import overrides
 
 from combo import data
 from combo.data import sentence2conllu, tokens2conllu, conllu2sentence
-from combo.utils import download, graph, lambo
+from combo.utils import download, graph, lambo_tokenizer
 
 logger = logging.getLogger(__name__)
 
@@ -59,7 +59,7 @@ class COMBO(predictor.Predictor):
 
     def predict(self, sentence: Union[str, List[str], List[List[str]], List[data.Sentence]]):
         if isinstance(sentence, str):
-            if isinstance(self._tokenizer,lambo.LamboTokenizer):
+            if isinstance(self._tokenizer,lambo_tokenizer.LamboTokenizer):
                 segmented = self._tokenizer.segment(sentence)
                 return self.predict(segmented)
             else:
@@ -239,7 +239,7 @@ class COMBO(predictor.Predictor):
     @classmethod
     def with_lambo_tokenizer(cls, model: models.Model,
                              dataset_reader: allen_data.DatasetReader, lambo_model_name : str = 'en'):
-        return cls(model, dataset_reader, lambo.LamboTokenizer(lambo_model_name))
+        return cls(model, dataset_reader, lambo_tokenizer.LamboTokenizer(lambo_model_name))
 
     @classmethod
     def from_pretrained(cls, path: str, tokenizer=tokenizers.SpacyTokenizer(),
diff --git a/combo/utils/lambo.py b/combo/utils/lambo_tokenizer.py
similarity index 100%
rename from combo/utils/lambo.py
rename to combo/utils/lambo_tokenizer.py
diff --git a/docs/installation.md b/docs/installation.md
index 6142605..695704f 100644
--- a/docs/installation.md
+++ b/docs/installation.md
@@ -2,7 +2,7 @@
 Clone this repository and install COMBO (we suggest using virtualenv/conda with Python 3.6+):
 ```bash
 pip install -U pip setuptools wheel
-pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6
+pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.7
 combo --helpfull
 ```
 
@@ -11,7 +11,7 @@ combo --helpfull
 python -m venv venv
 source venv/bin/activate
 pip install -U pip setuptools wheel
-pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6
+pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.7
 ```
 
 ### Conda example:
@@ -19,7 +19,7 @@ pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6
 conda create -n combo python=3.8
 conda activate combo
 pip install -U pip setuptools wheel
-pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.6
+pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.7
 ```
 
 ## Problems & solutions
diff --git a/docs/prediction.md b/docs/prediction.md
index 25b7df1..f6fe5ef 100644
--- a/docs/prediction.md
+++ b/docs/prediction.md
@@ -34,10 +34,10 @@ You can use COMBO with the [LAMBO](https://gitlab.clarin-pl.eu/syntactic-tools/l
 ```python
 # Import COMBO and lambo
 from combo.predict import COMBO
-from combo.utils import lambo
+from combo.utils import lambo_tokenizer
 
 # Download models
-nlp = COMBO.from_pretrained("english-bert-base-ud29",tokenizer=lambo.LamboTokenizer("en"))
+nlp = COMBO.from_pretrained("english-bert-base-ud29",tokenizer=lambo_tokenizer.LamboTokenizer("en"))
 sentences = nlp("This is the first sentence. This is the second sentence to parse.")
 ```
 
diff --git a/setup.py b/setup.py
index a5680e7..cabbf85 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,13 @@
 """Setup."""
+import subprocess
 from setuptools import find_packages, setup
 
+# Clone the lambo repository
+subprocess.call(['git', 'clone', 'https://gitlab.clarin-pl.eu/syntactic-tools/lambo.git', '--depth', '1'])
+
+# Install lambo using pip
+subprocess.call(['pip', 'install', './lambo'])
+
 REQUIREMENTS = [
     'absl-py==0.9.0',
     'allennlp==1.3.0',
@@ -23,11 +30,12 @@ REQUIREMENTS = [
     'tqdm==4.43.0',
     'transformers==4.0.1',
     'urllib3==1.25.11',
+    "lambo"
 ]
 
 setup(
     name='combo',
-    version='1.0.6',
+    version='1.0.7',
     author='Mateusz Klimaszewski',
     author_email='M.Klimaszewski@ii.pw.edu.pl',
     install_requires=REQUIREMENTS,
-- 
GitLab