diff --git a/src/lambo/examples/run_pretraining.py b/src/lambo/examples/run_pretraining.py
index 6ff74bcdf950584987de13b5a29dd8944ae9d4d6..2c724f408b35a8d8f964f4087fdf00fd91ead4ea 100644
--- a/src/lambo/examples/run_pretraining.py
+++ b/src/lambo/examples/run_pretraining.py
@@ -2,6 +2,8 @@
 Script from pretraining models using OSCAR corpora
 """
 import gzip
+from urllib.error import HTTPError
+
 import importlib_resources as resources
 from pathlib import Path
 
@@ -39,7 +41,14 @@ if __name__ == '__main__':
             continue
         print("Language: " + language)
         print("Downloading corpus...")
-        download_archive1_from_oscar(language, tmppath, OSCAR_LOGIN, OSCAR_PASSWORD)
+        try:
+            download_archive1_from_oscar(language, tmppath, OSCAR_LOGIN, OSCAR_PASSWORD)
+        except HTTPError as err:
+            if err.code==404:
+                print("Language unavailable in OSCAR. moving on...")
+                continue
+            else:
+                raise err
         with gzip.open(tmppath) as jsonfile:
             train_documents, test_documents = read_jsonl_to_documents(jsonfile)
         print("Generated " + str(len(train_documents)) + " documents.")