Skip to content
Snippets Groups Projects
Commit ac107081 authored by piotrmp's avatar piotrmp
Browse files

Added partial execution to pretraining.

parent 98383a84
No related branches found
No related tags found
1 merge request!1Migration to UD 2.11
......@@ -32,7 +32,9 @@ if __name__=='__main__':
MAX_DOCUMENTS = 100
CONTEXT_LEN = 1024
for language in languages:
for l, language in enumerate(languages):
if l % 5 != int(sys.argv[5]):
continue
if (outpath / ('oscar_' + language + '.pth')).exists():
continue
print("Language: " + language)
......@@ -52,7 +54,8 @@ if __name__=='__main__':
print(str(i + 1) + '/' + str(min(len(train_documents), MAX_DOCUMENTS)))
Xchars, Xutfs, Xmasks, Yvecs = encode_pretraining([document_train], dict, CONTEXT_LEN)
_, train_dataloader, test_dataloader = prepare_dataloaders_pretraining([document_train],
[document_test], CONTEXT_LEN, 32, dict)
[document_test], CONTEXT_LEN, 32,
dict)
pretrain(model, train_dataloader, test_dataloader, 1, device)
torch.save(model, outpath / ('oscar_' + language + '.pth'))
with open(outpath / ('oscar_' + language + '.dict'), "w") as file1:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment