From 1b958e29ec5387665d8834dcacb736b8cb75cf74 Mon Sep 17 00:00:00 2001 From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com> Date: Thu, 4 Mar 2021 11:59:15 +0100 Subject: [PATCH 1/5] Update allennlp to 1.3.0 and transformers to 4.0.1. --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 30ad42c..002d990 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from setuptools import find_packages, setup REQUIREMENTS = [ 'absl-py==0.9.0', - 'allennlp==1.2.1', + 'allennlp==1.3.0', 'conllu==2.3.2', 'dataclasses;python_version<"3.7"', 'jsonnet==0.15.0', @@ -17,7 +17,7 @@ REQUIREMENTS = [ 'scikit-learn<=0.23.2', 'torch==1.6.0', 'tqdm==4.43.0', - 'transformers>=3.4.0,<3.5', + 'transformers==4.0.1', 'urllib3==1.25.11', ] -- GitLab From 9218e33cbcabe69e7f0fd1a7b1fed0ea53da29be Mon Sep 17 00:00:00 2001 From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com> Date: Thu, 4 Mar 2021 12:00:06 +0100 Subject: [PATCH 2/5] Extend training configuration. --- scripts/train.py | 4 ++++ scripts/train_eud.py | 11 +++++++++-- scripts/utils.py | 7 +++++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index accca4a..950ee82 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -229,6 +229,10 @@ def run(_): "UD_Marathi-UFAL", "UD_Norwegian-Bokmaal"}: command = command + " --targets deprel,head,upostag,lemma,feats" + # Datasets without FEATS + if treebank in {"UD_Japanese-GSD", "UD_Korean-Kaist"}: + command = command + " --targets deprel,head,upostag,xpostag,lemma" + # Datasets without LEMMA and FEATS if treebank in {"UD_Maltese-MUDT"}: command = command + " --targets deprel,head,upostag,xpostag" diff --git a/scripts/train_eud.py b/scripts/train_eud.py index 4904e0b..ba13a27 100644 --- a/scripts/train_eud.py +++ b/scripts/train_eud.py @@ -105,7 +105,8 @@ def run(_): serialization_dir = pathlib.Path(FLAGS.serialization_dir) / lang serialization_dir.mkdir(exist_ok=True, parents=True) - utils.execute_command("".join(f"""combo --mode train + + command = f"""combo --mode train --training_data {train_path} --validation_data {dev_path} --targets feats,upostag,xpostag,head,deprel,lemma,deps @@ -115,7 +116,13 @@ def run(_): --word_batch_size 2500 --config_path {pathlib.Path.cwd() / 'config.graph.template.jsonnet'} --notensorboard - """.splitlines())) + """ + + # Datasets without XPOS + if lang in {"fr"}: + command = command + " --targets deprel,head,upostag,lemma,feats" + + utils.execute_command("".join(command.splitlines())) def main(): diff --git a/scripts/utils.py b/scripts/utils.py index 5dda2b8..ebfec3e 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -4,6 +4,13 @@ import subprocess LANG2TRANSFORMER = { "en": "bert-base-cased", "pl": "allegro/herbert-base-cased", + "zh": "bert-base-chinese", + "fi": "TurkuNLP/bert-base-finnish-cased-v1", + "ja": "cl-tohoku/bert-base-japanese", + "ko": "kykim/bert-kor-base", + "de": "dbmdz/bert-base-german-cased", + "ar": "aubmindlab/bert-base-arabertv2", + "eu": "ixa-ehu/berteus-base-cased" } -- GitLab From d4f857d3efa87a665e98e95c43f640d387154911 Mon Sep 17 00:00:00 2001 From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com> Date: Thu, 4 Mar 2021 13:19:51 +0100 Subject: [PATCH 3/5] Remove japanese BERT. --- scripts/utils.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/utils.py b/scripts/utils.py index ebfec3e..522136f 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -6,7 +6,6 @@ LANG2TRANSFORMER = { "pl": "allegro/herbert-base-cased", "zh": "bert-base-chinese", "fi": "TurkuNLP/bert-base-finnish-cased-v1", - "ja": "cl-tohoku/bert-base-japanese", "ko": "kykim/bert-kor-base", "de": "dbmdz/bert-base-german-cased", "ar": "aubmindlab/bert-base-arabertv2", -- GitLab From 30ff58184a1d6f5b9f49c9971b74340c62b75099 Mon Sep 17 00:00:00 2001 From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com> Date: Thu, 18 Mar 2021 16:18:32 +0100 Subject: [PATCH 4/5] Add additional transformers models mapping. --- scripts/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/utils.py b/scripts/utils.py index 522136f..6ce5a8a 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -9,7 +9,8 @@ LANG2TRANSFORMER = { "ko": "kykim/bert-kor-base", "de": "dbmdz/bert-base-german-cased", "ar": "aubmindlab/bert-base-arabertv2", - "eu": "ixa-ehu/berteus-base-cased" + "eu": "ixa-ehu/berteus-base-cased", + "tr": "dbmdz/bert-base-turkish-cased" } -- GitLab From 78e3406394cd9f5e2f407b5b8d91afb52a45db52 Mon Sep 17 00:00:00 2001 From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com> Date: Thu, 18 Mar 2021 16:19:52 +0100 Subject: [PATCH 5/5] Release 1.0.2. --- README.md | 2 +- docs/installation.md | 4 ++-- setup.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 7c41fe8..1dfaba4 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Clone this repository and install COMBO (we suggest creating a virtualenv/conda environment with Python 3.6+, as a bundle of required packages will be installed): ```bash pip install -U pip setuptools wheel -pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.1 +pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.2 ``` Run the following commands in your Python console to make predictions with a pre-trained model: ```python diff --git a/docs/installation.md b/docs/installation.md index 7cf539b..36ade51 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -2,7 +2,7 @@ Clone this repository and install COMBO (we suggest using virtualenv/conda with Python 3.6+): ```bash pip install -U pip setuptools wheel -pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.1 +pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.2 combo --helpfull ``` @@ -11,7 +11,7 @@ combo --helpfull python -m venv venv source venv/bin/activate pip install -U pip setuptools wheel -pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.1 +pip install --index-url https://pypi.clarin-pl.eu/simple combo==1.0.2 ``` ### Conda example: diff --git a/setup.py b/setup.py index 002d990..5012ea9 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,7 @@ REQUIREMENTS = [ setup( name='combo', - version='1.0.1', + version='1.0.2', author='Mateusz Klimaszewski', author_email='M.Klimaszewski@ii.pw.edu.pl', install_requires=REQUIREMENTS, -- GitLab