Skip to content
Snippets Groups Projects
Commit 3329cb6d authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Add new experiment processing

parent 46860cf8
No related merge requests found
......@@ -16,7 +16,7 @@ def import_fleurs_dataset(dataset_lang: str, experiment_dataset_name: str):
get_experiment_repository(experiment_dataset_name),
get_minio_audio_record_repository(),
experiment_dataset_name,
), load_dataset('google/fleurs', dataset_lang))
), load_dataset('google/fleurs', dataset_lang)['test'])
def import_minds14_dataset(dataset_lang: str, experiment_dataset_name: str):
......
import datetime
from abc import ABC, abstractmethod
from hashlib import sha1
from pathlib import Path
......@@ -41,8 +42,11 @@ class HfDatasetImporter(ABC):
pass
def process_dataset(self, dataset: Dataset):
counter = 1
for it in dataset:
print(datetime.datetime.now().isoformat(), f'process_dataset item {counter} {it}')
self.process_record(it)
counter += 1
def process_record(self, record: Dict[str, Any]):
record_id = self.get_record_id(record)
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment