From 1982161fa9b48ab9f081aa2e3c2bfb8e63d7257e Mon Sep 17 00:00:00 2001
From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com>
Date: Thu, 20 May 2021 12:38:13 +0200
Subject: [PATCH] Use XLM-R for low resource languages.

---
 scripts/utils.py | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/scripts/utils.py b/scripts/utils.py
index 2c66205..925f13b 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -18,20 +18,13 @@ LANG2TRANSFORMER = {
     "it": "dbmdz/bert-base-italian-cased",
     "ru": "blinoff/roberta-base-russian-v0",
     "sv": "KB/bert-base-swedish-cased",
-    "uk": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-uk-cased/",
+    "uk": "xlm-roberta-large",
     "ta": "xlm-roberta-large",
-    "sk": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-sk-cased/",
-    "lt": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-lt-cased/",
-    "lv": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-lv-cased/",
-    "cs": "/tmp/lustre_shared/mklimasz/transformers/wikibert-base-cs-cased/",
-    "et": "/tmp/lustre_shared/mklimasz/transformers/etwiki-bert/",
-    # "uk": http://dl.turkunlp.org/wikibert/wikibert-base-uk-cased/
-    # "ta": http://dl.turkunlp.org/wikibert/wikibert-base-ta-cased/
-    # "sk": http://dl.turkunlp.org/wikibert/wikibert-base-sk-cased/
-    # "lt": http://dl.turkunlp.org/wikibert/wikibert-base-lt-cased/
-    # "lv": http://dl.turkunlp.org/wikibert/wikibert-base-lv-cased/
-    # "et": http://dl.turkunlp.org/estonian-bert/etwiki-bert/pytorch/
-    # "cs": https://github.com/kiv-air/Czert https://arxiv.org/pdf/2103.13031.pdf
+    "sk": "xlm-roberta-large",
+    "lt": "xlm-roberta-large",
+    "lv": "xlm-roberta-large",
+    "cs": "xlm-roberta-large",
+    "et": "xlm-roberta-large",
 }
 
 
-- 
GitLab