Skip to content
Snippets Groups Projects
Commit 06fa7509 authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Add stats

parent 8639bbdb
Branches
No related tags found
No related merge requests found
from new_experiment.new_dependency_provider import get_experiment_repository
from new_experiment.utils.get_spacy_model_name import get_spacy_model_name
from new_experiment.utils.property_helper import PropertyHelper
def get_stats_for(dataset_name: str, property_name: str) -> float:
repo = get_experiment_repository(dataset_name)
vals = [repo.get_property_for_key(it, property_name) for it in repo.get_all_record_ids_for_property(property_name)]
print(vals)
vals = [it for it in vals if isinstance(it, float)]
ret = 0.0
if len(vals) == 0:
......@@ -12,6 +13,7 @@ def get_stats_for(dataset_name: str, property_name: str) -> float:
else:
ret = sum(vals) / len(vals)
print(dataset_name, property_name, ret)
return ret
def get_stats_for_classic_wer(dataset_name: str, property_name: str) -> float:
......@@ -19,6 +21,12 @@ def get_stats_for_classic_wer(dataset_name: str, property_name: str) -> float:
vals = [repo.get_property_for_key(it, property_name) for it in repo.get_all_record_ids_for_property(property_name)]
vals = [it['classic_wer'] for it in vals if 'classic_wer' in it]
vals = [it for it in vals if isinstance(it, float)]
ret = 0.0
if len(vals) == 0:
ret = -1
else:
ret = sum(vals) / len(vals)
print(dataset_name, property_name, ret)
return sum(vals) / len(vals)
......@@ -27,6 +35,12 @@ def get_stats_for_soft_wer(dataset_name: str, property_name: str) -> float:
vals = [repo.get_property_for_key(it, property_name) for it in repo.get_all_record_ids_for_property(property_name)]
vals = [it['soft_wer'] for it in vals if 'soft_wer' in it]
vals = [it for it in vals if isinstance(it, float)]
ret = 0.0
if len(vals) == 0:
ret = -1
else:
ret = sum(vals) / len(vals)
print(dataset_name, property_name, ret)
return sum(vals) / len(vals)
......@@ -36,3 +50,41 @@ def get_stats_for_embedding_wer(dataset_name: str, property_name: str) -> float:
vals = [it['embedding_wer'] for it in vals if 'embedding_wer' in it]
vals = [it for it in vals if isinstance(it, float)]
return sum(vals) / len(vals)
if __name__ == '__main__':
COMMANDS = ['run_word_wer_classic_pipeline', 'run_word_wer_embedding_pipeline', 'run_spacy_dep_tag_wer_pipeline',
'run_spacy_ner_wer_pipeline', 'run_spacy_pos_wer_pipeline']
LANGUAGES = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en']
WHISPER_ASR_MODEL = ['tiny', 'base', 'small', 'medium', 'large-v2']
DATASETS = ['google_fleurs', 'minds14', 'voxpopuli']
FULL_DATASET_NAMES = []
for itt in LANGUAGES:
for it in DATASETS:
FULL_DATASET_NAMES.append(f'{itt}_{it}')
FULL_LANGUAGE_MODELS = [f'whisper_{it}' for it in WHISPER_ASR_MODEL]
for dataset in FULL_DATASET_NAMES:
for model in FULL_LANGUAGE_MODELS:
get_stats_for(dataset, PropertyHelper.ner_metrics(model, get_spacy_model_name(dataset[:2])))
for dataset in FULL_DATASET_NAMES:
for model in FULL_LANGUAGE_MODELS:
get_stats_for(dataset, PropertyHelper.pos_metrics(model, get_spacy_model_name(dataset[:2])))
for dataset in FULL_DATASET_NAMES:
for model in FULL_LANGUAGE_MODELS:
get_stats_for(dataset, PropertyHelper.dep_tag_metrics(model, get_spacy_model_name(dataset[:2])))
for dataset in FULL_DATASET_NAMES:
for model in FULL_LANGUAGE_MODELS:
get_stats_for_classic_wer(dataset, PropertyHelper.word_wer_classic_metrics(model))
for dataset in FULL_DATASET_NAMES:
for model in FULL_LANGUAGE_MODELS:
get_stats_for_soft_wer(dataset, PropertyHelper.word_wer_embeddings_metrics(model))
for dataset in FULL_DATASET_NAMES:
for model in FULL_LANGUAGE_MODELS:
get_stats_for_embedding_wer(dataset, PropertyHelper.word_wer_embeddings_metrics(model))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment