Skip to content
Snippets Groups Projects
Commit 9218e33c authored by Mateusz Klimaszewski's avatar Mateusz Klimaszewski Committed by Mateusz Klimaszewski
Browse files

Extend training configuration.

parent 1b958e29
2 merge requests!31Release 1.0.2.,!30Release 1.0.2.
......@@ -229,6 +229,10 @@ def run(_):
"UD_Marathi-UFAL", "UD_Norwegian-Bokmaal"}:
command = command + " --targets deprel,head,upostag,lemma,feats"
# Datasets without FEATS
if treebank in {"UD_Japanese-GSD", "UD_Korean-Kaist"}:
command = command + " --targets deprel,head,upostag,xpostag,lemma"
# Datasets without LEMMA and FEATS
if treebank in {"UD_Maltese-MUDT"}:
command = command + " --targets deprel,head,upostag,xpostag"
......
......@@ -105,7 +105,8 @@ def run(_):
serialization_dir = pathlib.Path(FLAGS.serialization_dir) / lang
serialization_dir.mkdir(exist_ok=True, parents=True)
utils.execute_command("".join(f"""combo --mode train
command = f"""combo --mode train
--training_data {train_path}
--validation_data {dev_path}
--targets feats,upostag,xpostag,head,deprel,lemma,deps
......@@ -115,7 +116,13 @@ def run(_):
--word_batch_size 2500
--config_path {pathlib.Path.cwd() / 'config.graph.template.jsonnet'}
--notensorboard
""".splitlines()))
"""
# Datasets without XPOS
if lang in {"fr"}:
command = command + " --targets deprel,head,upostag,lemma,feats"
utils.execute_command("".join(command.splitlines()))
def main():
......
......@@ -4,6 +4,13 @@ import subprocess
LANG2TRANSFORMER = {
"en": "bert-base-cased",
"pl": "allegro/herbert-base-cased",
"zh": "bert-base-chinese",
"fi": "TurkuNLP/bert-base-finnish-cased-v1",
"ja": "cl-tohoku/bert-base-japanese",
"ko": "kykim/bert-kor-base",
"de": "dbmdz/bert-base-german-cased",
"ar": "aubmindlab/bert-base-arabertv2",
"eu": "ixa-ehu/berteus-base-cased"
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment