From ab07051dd54bd9155a0ee8d7a74df26cf81eb8c0 Mon Sep 17 00:00:00 2001 From: piotrmp <piotr.m.przybyla@gmail.com> Date: Wed, 23 Nov 2022 10:07:01 +0100 Subject: [PATCH] Bug fix. --- src/lambo/utils/oscar.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/lambo/utils/oscar.py b/src/lambo/utils/oscar.py index bda4c66..0b588a1 100644 --- a/src/lambo/utils/oscar.py +++ b/src/lambo/utils/oscar.py @@ -4,6 +4,7 @@ Functions used to obtain multilingual corpora from `OSCAR <https://oscar-corpus. import json import random import urllib +import time from urllib.error import HTTPError from lambo.data.document import Document @@ -46,10 +47,11 @@ def download_archive1_from_oscar(language, path, OSCAR_LOGIN, OSCAR_PASSWORD, re return except HTTPError as err: error = err - if i == retry - 1: + if i == retry - 1 or err.code<500: raise error - time = ((i + 1) * (i + 1) * (i + 1) * 15) - print("[Got " + str(error.code) + ", retrying after " + str(time) + " seconds...]") + secs = ((i + 1) * (i + 1) * (i + 1) * 15) + print("[Got " + str(error.code) + ", retrying after " + str(secs) + " seconds...]") + time.sleep(secs) def read_jsonl_to_documents(fileobj, MAX_LEN=3000000): -- GitLab