Skip to content
Snippets Groups Projects
Commit 3329cb6d authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Add new experiment processing

parent 46860cf8
No related branches found
No related tags found
No related merge requests found
...@@ -16,7 +16,7 @@ def import_fleurs_dataset(dataset_lang: str, experiment_dataset_name: str): ...@@ -16,7 +16,7 @@ def import_fleurs_dataset(dataset_lang: str, experiment_dataset_name: str):
get_experiment_repository(experiment_dataset_name), get_experiment_repository(experiment_dataset_name),
get_minio_audio_record_repository(), get_minio_audio_record_repository(),
experiment_dataset_name, experiment_dataset_name,
), load_dataset('google/fleurs', dataset_lang)) ), load_dataset('google/fleurs', dataset_lang)['test'])
def import_minds14_dataset(dataset_lang: str, experiment_dataset_name: str): def import_minds14_dataset(dataset_lang: str, experiment_dataset_name: str):
......
import datetime
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from hashlib import sha1 from hashlib import sha1
from pathlib import Path from pathlib import Path
...@@ -41,8 +42,11 @@ class HfDatasetImporter(ABC): ...@@ -41,8 +42,11 @@ class HfDatasetImporter(ABC):
pass pass
def process_dataset(self, dataset: Dataset): def process_dataset(self, dataset: Dataset):
counter = 1
for it in dataset: for it in dataset:
print(datetime.datetime.now().isoformat(), f'process_dataset item {counter} {it}')
self.process_record(it) self.process_record(it)
counter += 1
def process_record(self, record: Dict[str, Any]): def process_record(self, record: Dict[str, Any]):
record_id = self.get_record_id(record) record_id = self.get_record_id(record)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment