Skip to content
Snippets Groups Projects
Commit ce2d7be6 authored by Mateusz Klimaszewski's avatar Mateusz Klimaszewski
Browse files

Add conllu-quick-fix call. Change model for Tamil.

parent 00c388ed
Branches
Tags
2 merge requests!37Release 1.0.4.,!36Release 1.0.4
......@@ -70,7 +70,7 @@ def run(_):
utils.execute_command(command)
output_collapsed = utils.path_to_str(output_pred).replace('.conllu', '.collapsed.conllu')
utils.collapse_nodes(pathlib.Path(FLAGS.data_dir), output_pred, output_collapsed)
utils.collapse_nodes(pathlib.Path(FLAGS.data_dir) / 'tools', output_pred, output_collapsed)
command = f"""python {FLAGS.evaluate_script_path} -v
{test_file}
......
......@@ -27,9 +27,11 @@ CODE2LANG = {
FLAGS = flags.FLAGS
flags.DEFINE_string(name="data_dir", default="",
help="Path to IWPT'21 data directory.")
help="Path to data directory.")
flags.DEFINE_string(name="models_dir", default="/tmp/",
help="Model serialization dir.")
flags.DEFINE_string(name="tools", default="",
help="UD tools path.")
flags.DEFINE_integer(name="cuda_device", default=-1,
help="Cuda device id (-1 for cpu).")
flags.DEFINE_boolean(name="expect_prefix", default=True,
......@@ -51,6 +53,12 @@ def run(_):
data_dir = pathlib.Path(FLAGS.data_dir)
files = list(data_dir.iterdir())
test_file = [f for f in files if f"{lang}.mwt.conllu" == f.name]
# Try to use mwt file if it exists
if test_file:
assert len(test_file) == 1, f"Should be exactly one {lang}.mwt.conllu file."
test_file = test_file[0]
else:
test_file = [f for f in files if f"{lang}.conllu" == f.name]
assert len(test_file) == 1, f"Couldn't find test file."
test_file = test_file[0]
......@@ -64,6 +72,12 @@ def run(_):
"""
utils.execute_command(command)
output_fixed = utils.path_to_str(output_pred).replace('.conllu', '.fixed.conllu')
utils.quick_fix(pathlib.Path(FLAGS.tools), output_pred, output_fixed)
output_collapsed = output_fixed.replace('.fixed.conllu', '.collapsed.conllu')
utils.collapse_nodes(pathlib.Path(FLAGS.tools), pathlib.Path(output_fixed), output_collapsed)
def main():
app.run(run)
......
......@@ -73,10 +73,10 @@ def run(_):
if "conllu" in name and "fixed" not in name:
output = utils.path_to_str(treebank_file).replace('.conllu', '.fixed.conllu')
if "train" in name:
utils.collapse_nodes(data_dir, treebank_file, output)
utils.collapse_nodes(data_dir / 'tools', treebank_file, output)
train_paths.append(output)
elif "dev" in name:
utils.collapse_nodes(data_dir, treebank_file, output)
utils.collapse_nodes(data_dir / 'tools', treebank_file, output)
dev_paths.append(output)
# elif "test" in name:
# collapse_nodes(data_dir, treebank_file, output)
......
......@@ -19,7 +19,7 @@ LANG2TRANSFORMER = {
"ru": "blinoff/roberta-base-russian-v0",
"sv": "KB/bert-base-swedish-cased",
"uk": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-uk-cased/",
"ta": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-ta-cased/",
"ta": "xlm-roberta-large",
"sk": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-sk-cased/",
"lt": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-lt-cased/",
"lv": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-lv-cased/",
......@@ -51,5 +51,12 @@ def path_to_str(path: pathlib.Path) -> str:
def collapse_nodes(data_dir: pathlib.Path, treebank_file: pathlib.Path, output: str):
output_path = pathlib.Path(output)
if not output_path.exists():
execute_command(f"perl {path_to_str(data_dir / 'tools' / 'enhanced_collapse_empty_nodes.pl')} "
execute_command(f"perl {path_to_str(data_dir / 'enhanced_collapse_empty_nodes.pl')} "
f"{path_to_str(treebank_file)}", output)
def quick_fix(data_dir: pathlib.Path, treebank_file: pathlib.Path, output: str):
output_path = pathlib.Path(output)
if not output_path.exists():
execute_command(f"perl {path_to_str(data_dir / 'conllu-quick-fix.pl')} "
f"{path_to_str(treebank_file)}", output)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment