Skip to content
Snippets Groups Projects
Commit 9218e33c authored by Mateusz Klimaszewski's avatar Mateusz Klimaszewski Committed by Mateusz Klimaszewski
Browse files

Extend training configuration.

parent 1b958e29
Branches
Tags
2 merge requests!31Release 1.0.2.,!30Release 1.0.2.
...@@ -229,6 +229,10 @@ def run(_): ...@@ -229,6 +229,10 @@ def run(_):
"UD_Marathi-UFAL", "UD_Norwegian-Bokmaal"}: "UD_Marathi-UFAL", "UD_Norwegian-Bokmaal"}:
command = command + " --targets deprel,head,upostag,lemma,feats" command = command + " --targets deprel,head,upostag,lemma,feats"
# Datasets without FEATS
if treebank in {"UD_Japanese-GSD", "UD_Korean-Kaist"}:
command = command + " --targets deprel,head,upostag,xpostag,lemma"
# Datasets without LEMMA and FEATS # Datasets without LEMMA and FEATS
if treebank in {"UD_Maltese-MUDT"}: if treebank in {"UD_Maltese-MUDT"}:
command = command + " --targets deprel,head,upostag,xpostag" command = command + " --targets deprel,head,upostag,xpostag"
......
...@@ -105,7 +105,8 @@ def run(_): ...@@ -105,7 +105,8 @@ def run(_):
serialization_dir = pathlib.Path(FLAGS.serialization_dir) / lang serialization_dir = pathlib.Path(FLAGS.serialization_dir) / lang
serialization_dir.mkdir(exist_ok=True, parents=True) serialization_dir.mkdir(exist_ok=True, parents=True)
utils.execute_command("".join(f"""combo --mode train
command = f"""combo --mode train
--training_data {train_path} --training_data {train_path}
--validation_data {dev_path} --validation_data {dev_path}
--targets feats,upostag,xpostag,head,deprel,lemma,deps --targets feats,upostag,xpostag,head,deprel,lemma,deps
...@@ -115,7 +116,13 @@ def run(_): ...@@ -115,7 +116,13 @@ def run(_):
--word_batch_size 2500 --word_batch_size 2500
--config_path {pathlib.Path.cwd() / 'config.graph.template.jsonnet'} --config_path {pathlib.Path.cwd() / 'config.graph.template.jsonnet'}
--notensorboard --notensorboard
""".splitlines())) """
# Datasets without XPOS
if lang in {"fr"}:
command = command + " --targets deprel,head,upostag,lemma,feats"
utils.execute_command("".join(command.splitlines()))
def main(): def main():
......
...@@ -4,6 +4,13 @@ import subprocess ...@@ -4,6 +4,13 @@ import subprocess
LANG2TRANSFORMER = { LANG2TRANSFORMER = {
"en": "bert-base-cased", "en": "bert-base-cased",
"pl": "allegro/herbert-base-cased", "pl": "allegro/herbert-base-cased",
"zh": "bert-base-chinese",
"fi": "TurkuNLP/bert-base-finnish-cased-v1",
"ja": "cl-tohoku/bert-base-japanese",
"ko": "kykim/bert-kor-base",
"de": "dbmdz/bert-base-german-cased",
"ar": "aubmindlab/bert-base-arabertv2",
"eu": "ixa-ehu/berteus-base-cased"
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment