An error occurred while loading the file. Please try again.
-
Mateusz Klimaszewski authored7b545ee5
utils.py 1.77 KiB
"""Utils for scripts."""
import pathlib
import subprocess
LANG2TRANSFORMER = {
"en": "bert-base-cased",
"pl": "allegro/herbert-large-cased",
"zh": "bert-base-chinese",
"fi": "TurkuNLP/bert-base-finnish-cased-v1",
"ko": "kykim/bert-kor-base",
"de": "dbmdz/bert-base-german-cased",
"ar": "aubmindlab/bert-base-arabertv2",
"eu": "ixa-ehu/berteus-base-cased",
"tr": "dbmdz/bert-base-turkish-cased",
"bg": "xlm-roberta-large",
"nl": "xlm-roberta-large",
"fr": "camembert-base",
"it": "xlm-roberta-large",
"ru": "xlm-roberta-large",
"sv": "xlm-roberta-large",
"uk": "xlm-roberta-large",
"ta": "xlm-roberta-large",
"sk": "xlm-roberta-large",
"lt": "xlm-roberta-large",
"lv": "xlm-roberta-large",
"cs": "xlm-roberta-large",
"et": "xlm-roberta-large",
}
def execute_command(command, output_file=None):
command = [c for c in command.split() if c.strip()]
if output_file:
with open(output_file, "w") as f:
subprocess.run(command, check=True, stdout=f)
else:
subprocess.run(command, check=True)
def path_to_str(path: pathlib.Path) -> str:
return str(path.resolve())
def collapse_nodes(data_dir: pathlib.Path, treebank_file: pathlib.Path, output: str):
output_path = pathlib.Path(output)
if not output_path.exists():
execute_command(f"perl {path_to_str(data_dir / 'enhanced_collapse_empty_nodes.pl')} "
f"{path_to_str(treebank_file)}", output)
def quick_fix(data_dir: pathlib.Path, treebank_file: pathlib.Path, output: str):
output_path = pathlib.Path(output)
if not output_path.exists():
execute_command(f"perl {path_to_str(data_dir / 'conllu-quick-fix.pl')} "
f"{path_to_str(treebank_file)}", output)