from pprint import pprint

from experiment.common_voice.dependency_helper import get_record_provider, get_repository
from experiment.stats_tools.record_stat import RecordStat

if __name__ == '__main__':
    record_provider = get_record_provider()
    dataset = record_provider.get_dataset()
    repository = get_repository()
    print(dataset[0])
    s = [
        RecordStat(
            record['audio']['array'].shape[0] / record['audio']['sampling_rate'],
            len(repository.get_property_for_key(record_provider.get_id_from_record(record), 'gold_transcript'))
        )
        for record in dataset
    ]
    pprint(s)
