Skip to content
Snippets Groups Projects
Select Git revision
  • d711c60f4e9aef246f7a08d459d45b525b1a2a0b
  • main default protected
  • ud_training_script
  • fix_seed
  • merged-with-ner
  • multiword_fix_transformer
  • transformer_encoder
  • combo3
  • save_deprel_matrix_to_npz
  • master protected
  • combo-lambo
  • lambo-sent-attributes
  • adding_lambo
  • develop
  • update_allenlp2
  • develop_tmp
  • tokens_truncation
  • LR_test
  • eud_iwpt
  • iob
  • eud_iwpt_shared_task_bert_finetuning
  • 3.3.1
  • list
  • 3.2.1
  • 3.0.3
  • 3.0.1
  • 3.0.0
  • v1.0.6
  • v1.0.5
  • v1.0.4
  • v1.0.3
  • v1.0.2
  • v1.0.1
  • v1.0.0
34 results

utils.py

Blame
  • utils.py 1.77 KiB
    """Utils for scripts."""
    import pathlib
    import subprocess
    
    LANG2TRANSFORMER = {
        "en": "bert-base-cased",
        "pl": "allegro/herbert-large-cased",
        "zh": "bert-base-chinese",
        "fi": "TurkuNLP/bert-base-finnish-cased-v1",
        "ko": "kykim/bert-kor-base",
        "de": "dbmdz/bert-base-german-cased",
        "ar": "aubmindlab/bert-base-arabertv2",
        "eu": "ixa-ehu/berteus-base-cased",
        "tr": "dbmdz/bert-base-turkish-cased",
        "bg": "xlm-roberta-large",
        "nl": "xlm-roberta-large",
        "fr": "camembert-base",
        "it": "xlm-roberta-large",
        "ru": "xlm-roberta-large",
        "sv": "xlm-roberta-large",
        "uk": "xlm-roberta-large",
        "ta": "xlm-roberta-large",
        "sk": "xlm-roberta-large",
        "lt": "xlm-roberta-large",
        "lv": "xlm-roberta-large",
        "cs": "xlm-roberta-large",
        "et": "xlm-roberta-large",
    }
    
    
    def execute_command(command, output_file=None):
        command = [c for c in command.split() if c.strip()]
        if output_file:
            with open(output_file, "w") as f:
                subprocess.run(command, check=True, stdout=f)
        else:
            subprocess.run(command, check=True)
    
    
    def path_to_str(path: pathlib.Path) -> str:
        return str(path.resolve())
    
    
    def collapse_nodes(data_dir: pathlib.Path, treebank_file: pathlib.Path, output: str):
        output_path = pathlib.Path(output)
        if not output_path.exists():
            execute_command(f"perl {path_to_str(data_dir / 'enhanced_collapse_empty_nodes.pl')} "
                            f"{path_to_str(treebank_file)}", output)
    
    
    def quick_fix(data_dir: pathlib.Path, treebank_file: pathlib.Path, output: str):
        output_path = pathlib.Path(output)
        if not output_path.exists():
            execute_command(f"perl {path_to_str(data_dir / 'conllu-quick-fix.pl')} "
                            f"{path_to_str(treebank_file)}", output)