"""Utils for scripts."""
import pathlib
import subprocess

LANG2TRANSFORMER = {
    "en": "bert-base-cased",
    "pl": "allegro/herbert-base-cased",
    "zh": "bert-base-chinese",
    "fi": "TurkuNLP/bert-base-finnish-cased-v1",
    "ko": "kykim/bert-kor-base",
    "de": "dbmdz/bert-base-german-cased",
    "ar": "aubmindlab/bert-base-arabertv2",
    "eu": "ixa-ehu/berteus-base-cased",
    "tr": "dbmdz/bert-base-turkish-cased",
    "bg": "iarfmoose/roberta-base-bulgarian",
    "nl": "GroNLP/bert-base-dutch-cased",
    "fr": "camembert-base",
    "it": "dbmdz/bert-base-italian-cased",
    "ru": "blinoff/roberta-base-russian-v0",
    "sv": "KB/bert-base-swedish-cased",
    "uk": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-uk-cased/",
    "ta": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-ta-cased/",
    "sk": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-sk-cased/",
    "lt": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-lt-cased/",
    "lv": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-lv-cased/",
    "cs": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-cs-cased/",
    "et": "/tmp/lustre_shared/mklimasz/transformers/etwiki-bert/",
    # "uk": http://dl.turkunlp.org/wikibert/wikibert-base-uk-cased/
    # "ta": http://dl.turkunlp.org/wikibert/wikibert-base-ta-cased/
    # "sk": http://dl.turkunlp.org/wikibert/wikibert-base-sk-cased/
    # "lt": http://dl.turkunlp.org/wikibert/wikibert-base-lt-cased/
    # "lv": http://dl.turkunlp.org/wikibert/wikibert-base-lv-cased/
    # "et": http://dl.turkunlp.org/estonian-bert/etwiki-bert/pytorch/
    # "cs": https://github.com/kiv-air/Czert https://arxiv.org/pdf/2103.13031.pdf
}


def execute_command(command, output_file=None):
    command = [c for c in command.split() if c.strip()]
    if output_file:
        with open(output_file, "w") as f:
            subprocess.run(command, check=True, stdout=f)
    else:
        subprocess.run(command, check=True)


def path_to_str(path: pathlib.Path) -> str:
    return str(path.resolve())


def collapse_nodes(data_dir: pathlib.Path, treebank_file: pathlib.Path, output: str):
    output_path = pathlib.Path(output)
    if not output_path.exists():
        execute_command(f"perl {path_to_str(data_dir / 'tools' / 'enhanced_collapse_empty_nodes.pl')} "
                        f"{path_to_str(treebank_file)}", output)