Skip to content
Snippets Groups Projects
Commit 9218e33c authored by Mateusz Klimaszewski's avatar Mateusz Klimaszewski Committed by Mateusz Klimaszewski
Browse files

Extend training configuration.

parent 1b958e29
No related branches found
No related tags found
2 merge requests!31Release 1.0.2.,!30Release 1.0.2.
......@@ -229,6 +229,10 @@ def run(_):
"UD_Marathi-UFAL", "UD_Norwegian-Bokmaal"}:
command = command + " --targets deprel,head,upostag,lemma,feats"
# Datasets without FEATS
if treebank in {"UD_Japanese-GSD", "UD_Korean-Kaist"}:
command = command + " --targets deprel,head,upostag,xpostag,lemma"
# Datasets without LEMMA and FEATS
if treebank in {"UD_Maltese-MUDT"}:
command = command + " --targets deprel,head,upostag,xpostag"
......
......@@ -105,7 +105,8 @@ def run(_):
serialization_dir = pathlib.Path(FLAGS.serialization_dir) / lang
serialization_dir.mkdir(exist_ok=True, parents=True)
utils.execute_command("".join(f"""combo --mode train
command = f"""combo --mode train
--training_data {train_path}
--validation_data {dev_path}
--targets feats,upostag,xpostag,head,deprel,lemma,deps
......@@ -115,7 +116,13 @@ def run(_):
--word_batch_size 2500
--config_path {pathlib.Path.cwd() / 'config.graph.template.jsonnet'}
--notensorboard
""".splitlines()))
"""
# Datasets without XPOS
if lang in {"fr"}:
command = command + " --targets deprel,head,upostag,lemma,feats"
utils.execute_command("".join(command.splitlines()))
def main():
......
......@@ -4,6 +4,13 @@ import subprocess
LANG2TRANSFORMER = {
"en": "bert-base-cased",
"pl": "allegro/herbert-base-cased",
"zh": "bert-base-chinese",
"fi": "TurkuNLP/bert-base-finnish-cased-v1",
"ja": "cl-tohoku/bert-base-japanese",
"ko": "kykim/bert-kor-base",
"de": "dbmdz/bert-base-german-cased",
"ar": "aubmindlab/bert-base-arabertv2",
"eu": "ixa-ehu/berteus-base-cased"
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment