diff --git a/scripts/utils.py b/scripts/utils.py index 6ce5a8a5bbd2bff04219e6aa40b2c0c915d4a1c7..0ec872500e936fd8bb4abdd32654bd8e7f933625 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -10,7 +10,20 @@ LANG2TRANSFORMER = { "de": "dbmdz/bert-base-german-cased", "ar": "aubmindlab/bert-base-arabertv2", "eu": "ixa-ehu/berteus-base-cased", - "tr": "dbmdz/bert-base-turkish-cased" + "tr": "dbmdz/bert-base-turkish-cased", + "bg": "iarfmoose/roberta-base-bulgarian", + "nl": "GroNLP/bert-base-dutch-cased", + "fr": "camembert-base", + "it": "dbmdz/bert-base-italian-cased", + "ru": "blinoff/roberta-base-russian-v0", + "sv": "KB/bert-base-swedish-cased", + # "uk": http://dl.turkunlp.org/wikibert/wikibert-base-uk-cased/ + # "ta": http://dl.turkunlp.org/wikibert/wikibert-base-ta-cased/ + # "sk": http://dl.turkunlp.org/wikibert/wikibert-base-sl-cased/ + # "lt": http://dl.turkunlp.org/wikibert/wikibert-base-lt-cased/ + # "lv": http://dl.turkunlp.org/wikibert/wikibert-base-lv-cased/ + # "et": http://dl.turkunlp.org/estonian-bert/etwiki-bert/pytorch/ + # "cs": https://github.com/kiv-air/Czert https://arxiv.org/pdf/2103.13031.pdf }