{ "cells": [ { "cell_type": "code", "execution_count": 11, "id": "955a0385-29fb-47dc-b012-729e49570594", "metadata": {}, "outputs": [], "source": [ "from new_experiment.utils.get_spacy_model_name import *\n", "\n", "from call_experiment_stats import *\n", "\n", "from new_experiment.utils.property_helper import PropertyHelper\n", "from new_experiment.utils.get_spacy_model_name import get_spacy_model_name\n", "from new_experiment.new_dependency_provider import get_experiment_repository\n", "from new_experiment.add_to_queue_pipeline import get_hf_facebook_wav2vec2_model_by_language_code\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": null, "id": "3f1221d3-5f70-4441-af07-58fa176e31e9", "metadata": {}, "outputs": [], "source": [ "METRICS_FILE = 'metrics.txt'" ] }, { "cell_type": "code", "execution_count": 9, "id": "eda46e65-8079-40b9-9c4e-37fe74caec45", "metadata": {}, "outputs": [], "source": [ "metric_repository = get_experiment_repository('metric_stats')\n", "with open(METRICS_FILE, 'w') as writer:\n", " for dataset_property in metric_repository.get_all_properties():\n", " values_dict = metric_repository.get_all_values_from_property(dataset_property)\n", " for value_key in values_dict.keys():\n", " line = f'{dataset_property} {value_key} {values_dict[value_key]}'\n", " writer.write(f'{line}\\n')" ] }, { "cell_type": "code", "execution_count": 15, "id": "9f5e44a6-f211-4b61-8cb4-5636c7672c6a", "metadata": {}, "outputs": [], "source": [ "COMMANDS = ['run_word_wer_classic_pipeline', 'run_word_wer_embedding_pipeline', 'run_spacy_dep_tag_wer_pipeline',\n", " 'run_spacy_ner_wer_pipeline', 'run_spacy_pos_wer_pipeline']\n", "LANGUAGES = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en']\n", "WHISPER_ASR_MODEL = ['tiny', 'base', 'small', 'medium', 'large-v2']\n", "DATASETS = ['google_fleurs', 'minds14', 'voxpopuli']\n", "FULL_DATASET_NAMES = []\n", "for itt in LANGUAGES:\n", " for it in DATASETS:\n", " FULL_DATASET_NAMES.append(f'{itt}_{it}')\n", "\n", "FULL_LANGUAGE_MODELS = [f'whisper_{it}' for it in WHISPER_ASR_MODEL] + ['facebook_wav2vec2', 'nvidia_stt']" ] }, { "cell_type": "code", "execution_count": 10, "id": "d2465ceb-7439-4fa5-adf8-e95d7e6106b9", "metadata": {}, "outputs": [], "source": [ "vals = dict()\n", "with open(METRICS_FILE, 'r') as reader:\n", " lines = reader.read().splitlines(keepends=False)\n", " for line in lines:\n", " # print(line)\n", " words = line.split()\n", " key = f'{words[0]}_{words[1]}'\n", " vals[key] = float(words[2])" ] }, { "cell_type": "code", "execution_count": 12, "id": "e41b19d0-37cb-4810-896a-fa0f73dd86e0", "metadata": {}, "outputs": [], "source": [ "def get_model_for_dataset_name(dataset: str, model: str):\n", " language_code = dataset[:2]\n", " if model.startswith('whisper'):\n", " return model\n", " elif model.startswith('facebook_wav2vec2'):\n", " return get_hf_facebook_wav2vec2_model_by_language_code(language_code)\n", " elif model.startswith('nvidia_stt'):\n", " return f'nvidia_stt_{language_code}_conformer_transducer_large'\n", " else:\n", " raise Exception('asr name not found')" ] }, { "cell_type": "code", "execution_count": 24, "id": "22d84451-b7e3-4dba-9758-068dae23ace4", "metadata": {}, "outputs": [], "source": [ "spacy_ner = [\n", " [vals.get(f'{dataset}_{PropertyHelper.ner_metrics(get_model_for_dataset_name(dataset, model), get_spacy_model_name(dataset[:2]))}', -1.0) \n", " for model in FULL_LANGUAGE_MODELS]\n", " for dataset in FULL_DATASET_NAMES\n", "]\n", "spacy_pos = [\n", " [vals.get(f'{dataset}_{PropertyHelper.pos_metrics(get_model_for_dataset_name(dataset, model), get_spacy_model_name(dataset[:2]))}', -1.0) \n", " for model in FULL_LANGUAGE_MODELS]\n", " for dataset in FULL_DATASET_NAMES\n", "]\n", "spacy_dep = [\n", " [vals.get(f'{dataset}_{PropertyHelper.pos_metrics(get_model_for_dataset_name(dataset, model), get_spacy_model_name(dataset[:2]))}', -1.0) \n", " for model in FULL_LANGUAGE_MODELS]\n", " for dataset in FULL_DATASET_NAMES\n", "]\n", "word_wer_classic_metrics = [\n", " [vals.get(f'{dataset}_{PropertyHelper.word_wer_classic_metrics(get_model_for_dataset_name(dataset, model))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n", " for dataset in FULL_DATASET_NAMES\n", "]\n", "word_wer_soft_metrics = [\n", " [vals.get(f'{dataset}_{PropertyHelper.word_wer_soft_metrics(get_model_for_dataset_name(dataset, model))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n", " for dataset in FULL_DATASET_NAMES\n", "]\n", "word_wer_embedding_metrics = [\n", " [vals.get(f'{dataset}_{PropertyHelper.word_wer_embeddings_metrics(get_model_for_dataset_name(dataset, model))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n", " for dataset in FULL_DATASET_NAMES\n", "]\n", "flair_pos = [\n", " [vals.get(f'{dataset}_{PropertyHelper.word_wer_embeddings_metrics(get_model_for_dataset_name(dataset, model))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n", " for dataset in FULL_DATASET_NAMES\n", "]" ] }, { "cell_type": "code", "execution_count": 25, "id": "45fd851c-644f-48e6-b711-5bd312404b8b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>whisper_tiny</th>\n", " <th>whisper_base</th>\n", " <th>whisper_small</th>\n", " <th>whisper_medium</th>\n", " <th>whisper_large-v2</th>\n", " <th>facebook_wav2vec2</th>\n", " <th>nvidia_stt</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>nl_google_fleurs</th>\n", " <td>0.316124</td>\n", " <td>0.230845</td>\n", " <td>0.186936</td>\n", " <td>0.170150</td>\n", " <td>0.165057</td>\n", " <td>0.082781</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_minds14</th>\n", " <td>0.463084</td>\n", " <td>0.409993</td>\n", " <td>0.360934</td>\n", " <td>0.331613</td>\n", " <td>0.324172</td>\n", " <td>0.142155</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_voxpopuli</th>\n", " <td>0.215158</td>\n", " <td>0.178716</td>\n", " <td>0.132960</td>\n", " <td>0.118042</td>\n", " <td>0.139958</td>\n", " <td>0.200403</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>fr_google_fleurs</th>\n", " <td>0.264291</td>\n", " <td>0.193436</td>\n", " <td>0.177302</td>\n", " <td>0.147464</td>\n", " <td>0.141276</td>\n", " <td>0.083170</td>\n", " <td>0.053155</td>\n", " </tr>\n", " <tr>\n", " <th>fr_minds14</th>\n", " <td>0.466860</td>\n", " <td>0.468822</td>\n", " <td>0.471754</td>\n", " <td>0.444854</td>\n", " <td>0.485090</td>\n", " <td>0.220358</td>\n", " <td>0.189111</td>\n", " </tr>\n", " <tr>\n", " <th>fr_voxpopuli</th>\n", " <td>0.161386</td>\n", " <td>0.131144</td>\n", " <td>0.113097</td>\n", " <td>0.099114</td>\n", " <td>0.111776</td>\n", " <td>0.169564</td>\n", " <td>0.127958</td>\n", " </tr>\n", " <tr>\n", " <th>de_google_fleurs</th>\n", " <td>0.316175</td>\n", " <td>0.257454</td>\n", " <td>0.234163</td>\n", " <td>0.239750</td>\n", " <td>0.236715</td>\n", " <td>0.083423</td>\n", " <td>0.051673</td>\n", " </tr>\n", " <tr>\n", " <th>de_minds14</th>\n", " <td>0.435681</td>\n", " <td>0.425712</td>\n", " <td>0.412896</td>\n", " <td>0.398617</td>\n", " <td>0.398762</td>\n", " <td>0.183933</td>\n", " <td>0.146988</td>\n", " </tr>\n", " <tr>\n", " <th>de_voxpopuli</th>\n", " <td>0.200245</td>\n", " <td>0.155502</td>\n", " <td>0.133251</td>\n", " <td>0.116949</td>\n", " <td>0.156371</td>\n", " <td>0.242498</td>\n", " <td>0.168854</td>\n", " </tr>\n", " <tr>\n", " <th>it_google_fleurs</th>\n", " <td>0.206301</td>\n", " <td>0.172527</td>\n", " <td>0.161195</td>\n", " <td>0.156655</td>\n", " <td>0.160677</td>\n", " <td>0.067181</td>\n", " <td>0.039040</td>\n", " </tr>\n", " <tr>\n", " <th>it_minds14</th>\n", " <td>0.487493</td>\n", " <td>0.448874</td>\n", " <td>0.432679</td>\n", " <td>0.416035</td>\n", " <td>0.392705</td>\n", " <td>0.198809</td>\n", " <td>0.146235</td>\n", " </tr>\n", " <tr>\n", " <th>it_voxpopuli</th>\n", " <td>0.160365</td>\n", " <td>0.139461</td>\n", " <td>0.138966</td>\n", " <td>0.123130</td>\n", " <td>0.130691</td>\n", " <td>-1.000000</td>\n", " <td>0.153960</td>\n", " </tr>\n", " <tr>\n", " <th>pl_google_fleurs</th>\n", " <td>0.334936</td>\n", " <td>0.273025</td>\n", " <td>0.227662</td>\n", " <td>0.210962</td>\n", " <td>0.209027</td>\n", " <td>0.088157</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_minds14</th>\n", " <td>0.657194</td>\n", " <td>0.591588</td>\n", " <td>0.487344</td>\n", " <td>0.474013</td>\n", " <td>0.487891</td>\n", " <td>0.237692</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_voxpopuli</th>\n", " <td>0.203548</td>\n", " <td>0.158526</td>\n", " <td>0.126280</td>\n", " <td>0.110784</td>\n", " <td>0.117780</td>\n", " <td>0.184368</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>es_google_fleurs</th>\n", " <td>0.187607</td>\n", " <td>0.159873</td>\n", " <td>0.147104</td>\n", " <td>0.155210</td>\n", " <td>0.154657</td>\n", " <td>0.057830</td>\n", " <td>0.038903</td>\n", " </tr>\n", " <tr>\n", " <th>es_minds14</th>\n", " <td>0.721295</td>\n", " <td>0.670363</td>\n", " <td>0.666278</td>\n", " <td>0.673058</td>\n", " <td>0.680341</td>\n", " <td>0.411927</td>\n", " <td>0.342895</td>\n", " </tr>\n", " <tr>\n", " <th>es_voxpopuli</th>\n", " <td>0.133805</td>\n", " <td>0.116222</td>\n", " <td>0.119882</td>\n", " <td>0.106610</td>\n", " <td>0.122036</td>\n", " <td>0.148225</td>\n", " <td>0.128456</td>\n", " </tr>\n", " <tr>\n", " <th>en_google_fleurs</th>\n", " <td>0.217843</td>\n", " <td>0.188810</td>\n", " <td>0.186407</td>\n", " <td>0.183656</td>\n", " <td>0.184568</td>\n", " <td>0.180523</td>\n", " <td>0.071421</td>\n", " </tr>\n", " <tr>\n", " <th>en_minds14</th>\n", " <td>0.562068</td>\n", " <td>0.566999</td>\n", " <td>0.580369</td>\n", " <td>0.583945</td>\n", " <td>0.578079</td>\n", " <td>0.325304</td>\n", " <td>0.293083</td>\n", " </tr>\n", " <tr>\n", " <th>en_voxpopuli</th>\n", " <td>0.224980</td>\n", " <td>0.203959</td>\n", " <td>0.210278</td>\n", " <td>0.322688</td>\n", " <td>0.280877</td>\n", " <td>0.182708</td>\n", " <td>0.124416</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " whisper_tiny whisper_base whisper_small whisper_medium \\\n", "nl_google_fleurs 0.316124 0.230845 0.186936 0.170150 \n", "nl_minds14 0.463084 0.409993 0.360934 0.331613 \n", "nl_voxpopuli 0.215158 0.178716 0.132960 0.118042 \n", "fr_google_fleurs 0.264291 0.193436 0.177302 0.147464 \n", "fr_minds14 0.466860 0.468822 0.471754 0.444854 \n", "fr_voxpopuli 0.161386 0.131144 0.113097 0.099114 \n", "de_google_fleurs 0.316175 0.257454 0.234163 0.239750 \n", "de_minds14 0.435681 0.425712 0.412896 0.398617 \n", "de_voxpopuli 0.200245 0.155502 0.133251 0.116949 \n", "it_google_fleurs 0.206301 0.172527 0.161195 0.156655 \n", "it_minds14 0.487493 0.448874 0.432679 0.416035 \n", "it_voxpopuli 0.160365 0.139461 0.138966 0.123130 \n", "pl_google_fleurs 0.334936 0.273025 0.227662 0.210962 \n", "pl_minds14 0.657194 0.591588 0.487344 0.474013 \n", "pl_voxpopuli 0.203548 0.158526 0.126280 0.110784 \n", "es_google_fleurs 0.187607 0.159873 0.147104 0.155210 \n", "es_minds14 0.721295 0.670363 0.666278 0.673058 \n", "es_voxpopuli 0.133805 0.116222 0.119882 0.106610 \n", "en_google_fleurs 0.217843 0.188810 0.186407 0.183656 \n", "en_minds14 0.562068 0.566999 0.580369 0.583945 \n", "en_voxpopuli 0.224980 0.203959 0.210278 0.322688 \n", "\n", " whisper_large-v2 facebook_wav2vec2 nvidia_stt \n", "nl_google_fleurs 0.165057 0.082781 -1.000000 \n", "nl_minds14 0.324172 0.142155 -1.000000 \n", "nl_voxpopuli 0.139958 0.200403 -1.000000 \n", "fr_google_fleurs 0.141276 0.083170 0.053155 \n", "fr_minds14 0.485090 0.220358 0.189111 \n", "fr_voxpopuli 0.111776 0.169564 0.127958 \n", "de_google_fleurs 0.236715 0.083423 0.051673 \n", "de_minds14 0.398762 0.183933 0.146988 \n", "de_voxpopuli 0.156371 0.242498 0.168854 \n", "it_google_fleurs 0.160677 0.067181 0.039040 \n", "it_minds14 0.392705 0.198809 0.146235 \n", "it_voxpopuli 0.130691 -1.000000 0.153960 \n", "pl_google_fleurs 0.209027 0.088157 -1.000000 \n", "pl_minds14 0.487891 0.237692 -1.000000 \n", "pl_voxpopuli 0.117780 0.184368 -1.000000 \n", "es_google_fleurs 0.154657 0.057830 0.038903 \n", "es_minds14 0.680341 0.411927 0.342895 \n", "es_voxpopuli 0.122036 0.148225 0.128456 \n", "en_google_fleurs 0.184568 0.180523 0.071421 \n", "en_minds14 0.578079 0.325304 0.293083 \n", "en_voxpopuli 0.280877 0.182708 0.124416 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spacy_ner_df = pd.DataFrame(spacy_ner, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n", "spacy_ner_df.to_csv('results/spacy_ner.csv')\n", "spacy_ner_df" ] }, { "cell_type": "code", "execution_count": 26, "id": "6466877e-e744-4cb1-8d4f-f818e1d3ee7d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>whisper_tiny</th>\n", " <th>whisper_base</th>\n", " <th>whisper_small</th>\n", " <th>whisper_medium</th>\n", " <th>whisper_large-v2</th>\n", " <th>facebook_wav2vec2</th>\n", " <th>nvidia_stt</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>nl_google_fleurs</th>\n", " <td>0.582916</td>\n", " <td>0.427364</td>\n", " <td>0.279190</td>\n", " <td>0.229402</td>\n", " <td>0.212373</td>\n", " <td>0.160957</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_minds14</th>\n", " <td>0.888989</td>\n", " <td>0.702107</td>\n", " <td>0.511865</td>\n", " <td>0.440081</td>\n", " <td>0.415821</td>\n", " <td>0.298583</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_voxpopuli</th>\n", " <td>0.451950</td>\n", " <td>0.350228</td>\n", " <td>0.233061</td>\n", " <td>0.188461</td>\n", " <td>0.208664</td>\n", " <td>0.340656</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>fr_google_fleurs</th>\n", " <td>0.468415</td>\n", " <td>0.338927</td>\n", " <td>0.260157</td>\n", " <td>0.207241</td>\n", " <td>0.194587</td>\n", " <td>0.141560</td>\n", " <td>0.073667</td>\n", " </tr>\n", " <tr>\n", " <th>fr_minds14</th>\n", " <td>0.700735</td>\n", " <td>0.619382</td>\n", " <td>0.567487</td>\n", " <td>0.513574</td>\n", " <td>0.552826</td>\n", " <td>0.336656</td>\n", " <td>0.236770</td>\n", " </tr>\n", " <tr>\n", " <th>fr_voxpopuli</th>\n", " <td>0.310661</td>\n", " <td>0.235596</td>\n", " <td>0.180943</td>\n", " <td>0.153288</td>\n", " <td>0.159867</td>\n", " <td>0.245229</td>\n", " <td>0.164607</td>\n", " </tr>\n", " <tr>\n", " <th>de_google_fleurs</th>\n", " <td>0.449640</td>\n", " <td>0.344001</td>\n", " <td>0.282088</td>\n", " <td>0.275634</td>\n", " <td>0.264093</td>\n", " <td>0.094206</td>\n", " <td>0.053148</td>\n", " </tr>\n", " <tr>\n", " <th>de_minds14</th>\n", " <td>0.608813</td>\n", " <td>0.529599</td>\n", " <td>0.472205</td>\n", " <td>0.443094</td>\n", " <td>0.441656</td>\n", " <td>0.228980</td>\n", " <td>0.157855</td>\n", " </tr>\n", " <tr>\n", " <th>de_voxpopuli</th>\n", " <td>0.347653</td>\n", " <td>0.248060</td>\n", " <td>0.198001</td>\n", " <td>0.168237</td>\n", " <td>0.205059</td>\n", " <td>0.313704</td>\n", " <td>0.203633</td>\n", " </tr>\n", " <tr>\n", " <th>it_google_fleurs</th>\n", " <td>0.364700</td>\n", " <td>0.269092</td>\n", " <td>0.218361</td>\n", " <td>0.189632</td>\n", " <td>0.189108</td>\n", " <td>0.115212</td>\n", " <td>0.057875</td>\n", " </tr>\n", " <tr>\n", " <th>it_minds14</th>\n", " <td>0.735663</td>\n", " <td>0.597724</td>\n", " <td>0.500377</td>\n", " <td>0.438344</td>\n", " <td>0.417785</td>\n", " <td>0.285531</td>\n", " <td>0.153250</td>\n", " </tr>\n", " <tr>\n", " <th>it_voxpopuli</th>\n", " <td>0.401738</td>\n", " <td>0.332257</td>\n", " <td>0.278988</td>\n", " <td>0.245468</td>\n", " <td>0.247638</td>\n", " <td>-1.000000</td>\n", " <td>0.236106</td>\n", " </tr>\n", " <tr>\n", " <th>pl_google_fleurs</th>\n", " <td>0.594285</td>\n", " <td>0.452570</td>\n", " <td>0.318702</td>\n", " <td>0.276475</td>\n", " <td>0.261194</td>\n", " <td>0.184994</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_minds14</th>\n", " <td>0.988993</td>\n", " <td>0.853431</td>\n", " <td>0.653693</td>\n", " <td>0.585884</td>\n", " <td>0.597468</td>\n", " <td>0.454939</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_voxpopuli</th>\n", " <td>0.374544</td>\n", " <td>0.277290</td>\n", " <td>0.198685</td>\n", " <td>0.164524</td>\n", " <td>0.161887</td>\n", " <td>0.309752</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>es_google_fleurs</th>\n", " <td>0.284499</td>\n", " <td>0.224748</td>\n", " <td>0.187365</td>\n", " <td>0.189561</td>\n", " <td>0.184028</td>\n", " <td>0.096476</td>\n", " <td>0.051401</td>\n", " </tr>\n", " <tr>\n", " <th>es_minds14</th>\n", " <td>0.880992</td>\n", " <td>0.747677</td>\n", " <td>0.695294</td>\n", " <td>0.690749</td>\n", " <td>0.697884</td>\n", " <td>0.508818</td>\n", " <td>0.384215</td>\n", " </tr>\n", " <tr>\n", " <th>es_voxpopuli</th>\n", " <td>0.252463</td>\n", " <td>0.206225</td>\n", " <td>0.229706</td>\n", " <td>0.195846</td>\n", " <td>0.231587</td>\n", " <td>0.230351</td>\n", " <td>0.173987</td>\n", " </tr>\n", " <tr>\n", " <th>en_google_fleurs</th>\n", " <td>0.295853</td>\n", " <td>0.250928</td>\n", " <td>0.224483</td>\n", " <td>0.218855</td>\n", " <td>0.218479</td>\n", " <td>0.367414</td>\n", " <td>0.078904</td>\n", " </tr>\n", " <tr>\n", " <th>en_minds14</th>\n", " <td>0.634351</td>\n", " <td>0.623962</td>\n", " <td>0.626942</td>\n", " <td>0.626588</td>\n", " <td>0.620953</td>\n", " <td>0.584547</td>\n", " <td>0.329282</td>\n", " </tr>\n", " <tr>\n", " <th>en_voxpopuli</th>\n", " <td>0.345836</td>\n", " <td>0.319493</td>\n", " <td>0.319060</td>\n", " <td>0.466410</td>\n", " <td>0.408949</td>\n", " <td>0.377100</td>\n", " <td>0.160883</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " whisper_tiny whisper_base whisper_small whisper_medium \\\n", "nl_google_fleurs 0.582916 0.427364 0.279190 0.229402 \n", "nl_minds14 0.888989 0.702107 0.511865 0.440081 \n", "nl_voxpopuli 0.451950 0.350228 0.233061 0.188461 \n", "fr_google_fleurs 0.468415 0.338927 0.260157 0.207241 \n", "fr_minds14 0.700735 0.619382 0.567487 0.513574 \n", "fr_voxpopuli 0.310661 0.235596 0.180943 0.153288 \n", "de_google_fleurs 0.449640 0.344001 0.282088 0.275634 \n", "de_minds14 0.608813 0.529599 0.472205 0.443094 \n", "de_voxpopuli 0.347653 0.248060 0.198001 0.168237 \n", "it_google_fleurs 0.364700 0.269092 0.218361 0.189632 \n", "it_minds14 0.735663 0.597724 0.500377 0.438344 \n", "it_voxpopuli 0.401738 0.332257 0.278988 0.245468 \n", "pl_google_fleurs 0.594285 0.452570 0.318702 0.276475 \n", "pl_minds14 0.988993 0.853431 0.653693 0.585884 \n", "pl_voxpopuli 0.374544 0.277290 0.198685 0.164524 \n", "es_google_fleurs 0.284499 0.224748 0.187365 0.189561 \n", "es_minds14 0.880992 0.747677 0.695294 0.690749 \n", "es_voxpopuli 0.252463 0.206225 0.229706 0.195846 \n", "en_google_fleurs 0.295853 0.250928 0.224483 0.218855 \n", "en_minds14 0.634351 0.623962 0.626942 0.626588 \n", "en_voxpopuli 0.345836 0.319493 0.319060 0.466410 \n", "\n", " whisper_large-v2 facebook_wav2vec2 nvidia_stt \n", "nl_google_fleurs 0.212373 0.160957 -1.000000 \n", "nl_minds14 0.415821 0.298583 -1.000000 \n", "nl_voxpopuli 0.208664 0.340656 -1.000000 \n", "fr_google_fleurs 0.194587 0.141560 0.073667 \n", "fr_minds14 0.552826 0.336656 0.236770 \n", "fr_voxpopuli 0.159867 0.245229 0.164607 \n", "de_google_fleurs 0.264093 0.094206 0.053148 \n", "de_minds14 0.441656 0.228980 0.157855 \n", "de_voxpopuli 0.205059 0.313704 0.203633 \n", "it_google_fleurs 0.189108 0.115212 0.057875 \n", "it_minds14 0.417785 0.285531 0.153250 \n", "it_voxpopuli 0.247638 -1.000000 0.236106 \n", "pl_google_fleurs 0.261194 0.184994 -1.000000 \n", "pl_minds14 0.597468 0.454939 -1.000000 \n", "pl_voxpopuli 0.161887 0.309752 -1.000000 \n", "es_google_fleurs 0.184028 0.096476 0.051401 \n", "es_minds14 0.697884 0.508818 0.384215 \n", "es_voxpopuli 0.231587 0.230351 0.173987 \n", "en_google_fleurs 0.218479 0.367414 0.078904 \n", "en_minds14 0.620953 0.584547 0.329282 \n", "en_voxpopuli 0.408949 0.377100 0.160883 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spacy_pos_df = pd.DataFrame(spacy_pos, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n", "spacy_pos_df.to_csv('results/spacy_pos.csv')\n", "spacy_pos_df" ] }, { "cell_type": "code", "execution_count": 27, "id": "77567361-b730-49f0-ab68-19ad335df1b1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>whisper_tiny</th>\n", " <th>whisper_base</th>\n", " <th>whisper_small</th>\n", " <th>whisper_medium</th>\n", " <th>whisper_large-v2</th>\n", " <th>facebook_wav2vec2</th>\n", " <th>nvidia_stt</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>nl_google_fleurs</th>\n", " <td>0.582916</td>\n", " <td>0.427364</td>\n", " <td>0.279190</td>\n", " <td>0.229402</td>\n", " <td>0.212373</td>\n", " <td>0.160957</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_minds14</th>\n", " <td>0.888989</td>\n", " <td>0.702107</td>\n", " <td>0.511865</td>\n", " <td>0.440081</td>\n", " <td>0.415821</td>\n", " <td>0.298583</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_voxpopuli</th>\n", " <td>0.451950</td>\n", " <td>0.350228</td>\n", " <td>0.233061</td>\n", " <td>0.188461</td>\n", " <td>0.208664</td>\n", " <td>0.340656</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>fr_google_fleurs</th>\n", " <td>0.468415</td>\n", " <td>0.338927</td>\n", " <td>0.260157</td>\n", " <td>0.207241</td>\n", " <td>0.194587</td>\n", " <td>0.141560</td>\n", " <td>0.073667</td>\n", " </tr>\n", " <tr>\n", " <th>fr_minds14</th>\n", " <td>0.700735</td>\n", " <td>0.619382</td>\n", " <td>0.567487</td>\n", " <td>0.513574</td>\n", " <td>0.552826</td>\n", " <td>0.336656</td>\n", " <td>0.236770</td>\n", " </tr>\n", " <tr>\n", " <th>fr_voxpopuli</th>\n", " <td>0.310661</td>\n", " <td>0.235596</td>\n", " <td>0.180943</td>\n", " <td>0.153288</td>\n", " <td>0.159867</td>\n", " <td>0.245229</td>\n", " <td>0.164607</td>\n", " </tr>\n", " <tr>\n", " <th>de_google_fleurs</th>\n", " <td>0.449640</td>\n", " <td>0.344001</td>\n", " <td>0.282088</td>\n", " <td>0.275634</td>\n", " <td>0.264093</td>\n", " <td>0.094206</td>\n", " <td>0.053148</td>\n", " </tr>\n", " <tr>\n", " <th>de_minds14</th>\n", " <td>0.608813</td>\n", " <td>0.529599</td>\n", " <td>0.472205</td>\n", " <td>0.443094</td>\n", " <td>0.441656</td>\n", " <td>0.228980</td>\n", " <td>0.157855</td>\n", " </tr>\n", " <tr>\n", " <th>de_voxpopuli</th>\n", " <td>0.347653</td>\n", " <td>0.248060</td>\n", " <td>0.198001</td>\n", " <td>0.168237</td>\n", " <td>0.205059</td>\n", " <td>0.313704</td>\n", " <td>0.203633</td>\n", " </tr>\n", " <tr>\n", " <th>it_google_fleurs</th>\n", " <td>0.364700</td>\n", " <td>0.269092</td>\n", " <td>0.218361</td>\n", " <td>0.189632</td>\n", " <td>0.189108</td>\n", " <td>0.115212</td>\n", " <td>0.057875</td>\n", " </tr>\n", " <tr>\n", " <th>it_minds14</th>\n", " <td>0.735663</td>\n", " <td>0.597724</td>\n", " <td>0.500377</td>\n", " <td>0.438344</td>\n", " <td>0.417785</td>\n", " <td>0.285531</td>\n", " <td>0.153250</td>\n", " </tr>\n", " <tr>\n", " <th>it_voxpopuli</th>\n", " <td>0.401738</td>\n", " <td>0.332257</td>\n", " <td>0.278988</td>\n", " <td>0.245468</td>\n", " <td>0.247638</td>\n", " <td>-1.000000</td>\n", " <td>0.236106</td>\n", " </tr>\n", " <tr>\n", " <th>pl_google_fleurs</th>\n", " <td>0.594285</td>\n", " <td>0.452570</td>\n", " <td>0.318702</td>\n", " <td>0.276475</td>\n", " <td>0.261194</td>\n", " <td>0.184994</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_minds14</th>\n", " <td>0.988993</td>\n", " <td>0.853431</td>\n", " <td>0.653693</td>\n", " <td>0.585884</td>\n", " <td>0.597468</td>\n", " <td>0.454939</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_voxpopuli</th>\n", " <td>0.374544</td>\n", " <td>0.277290</td>\n", " <td>0.198685</td>\n", " <td>0.164524</td>\n", " <td>0.161887</td>\n", " <td>0.309752</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>es_google_fleurs</th>\n", " <td>0.284499</td>\n", " <td>0.224748</td>\n", " <td>0.187365</td>\n", " <td>0.189561</td>\n", " <td>0.184028</td>\n", " <td>0.096476</td>\n", " <td>0.051401</td>\n", " </tr>\n", " <tr>\n", " <th>es_minds14</th>\n", " <td>0.880992</td>\n", " <td>0.747677</td>\n", " <td>0.695294</td>\n", " <td>0.690749</td>\n", " <td>0.697884</td>\n", " <td>0.508818</td>\n", " <td>0.384215</td>\n", " </tr>\n", " <tr>\n", " <th>es_voxpopuli</th>\n", " <td>0.252463</td>\n", " <td>0.206225</td>\n", " <td>0.229706</td>\n", " <td>0.195846</td>\n", " <td>0.231587</td>\n", " <td>0.230351</td>\n", " <td>0.173987</td>\n", " </tr>\n", " <tr>\n", " <th>en_google_fleurs</th>\n", " <td>0.295853</td>\n", " <td>0.250928</td>\n", " <td>0.224483</td>\n", " <td>0.218855</td>\n", " <td>0.218479</td>\n", " <td>0.367414</td>\n", " <td>0.078904</td>\n", " </tr>\n", " <tr>\n", " <th>en_minds14</th>\n", " <td>0.634351</td>\n", " <td>0.623962</td>\n", " <td>0.626942</td>\n", " <td>0.626588</td>\n", " <td>0.620953</td>\n", " <td>0.584547</td>\n", " <td>0.329282</td>\n", " </tr>\n", " <tr>\n", " <th>en_voxpopuli</th>\n", " <td>0.345836</td>\n", " <td>0.319493</td>\n", " <td>0.319060</td>\n", " <td>0.466410</td>\n", " <td>0.408949</td>\n", " <td>0.377100</td>\n", " <td>0.160883</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " whisper_tiny whisper_base whisper_small whisper_medium \\\n", "nl_google_fleurs 0.582916 0.427364 0.279190 0.229402 \n", "nl_minds14 0.888989 0.702107 0.511865 0.440081 \n", "nl_voxpopuli 0.451950 0.350228 0.233061 0.188461 \n", "fr_google_fleurs 0.468415 0.338927 0.260157 0.207241 \n", "fr_minds14 0.700735 0.619382 0.567487 0.513574 \n", "fr_voxpopuli 0.310661 0.235596 0.180943 0.153288 \n", "de_google_fleurs 0.449640 0.344001 0.282088 0.275634 \n", "de_minds14 0.608813 0.529599 0.472205 0.443094 \n", "de_voxpopuli 0.347653 0.248060 0.198001 0.168237 \n", "it_google_fleurs 0.364700 0.269092 0.218361 0.189632 \n", "it_minds14 0.735663 0.597724 0.500377 0.438344 \n", "it_voxpopuli 0.401738 0.332257 0.278988 0.245468 \n", "pl_google_fleurs 0.594285 0.452570 0.318702 0.276475 \n", "pl_minds14 0.988993 0.853431 0.653693 0.585884 \n", "pl_voxpopuli 0.374544 0.277290 0.198685 0.164524 \n", "es_google_fleurs 0.284499 0.224748 0.187365 0.189561 \n", "es_minds14 0.880992 0.747677 0.695294 0.690749 \n", "es_voxpopuli 0.252463 0.206225 0.229706 0.195846 \n", "en_google_fleurs 0.295853 0.250928 0.224483 0.218855 \n", "en_minds14 0.634351 0.623962 0.626942 0.626588 \n", "en_voxpopuli 0.345836 0.319493 0.319060 0.466410 \n", "\n", " whisper_large-v2 facebook_wav2vec2 nvidia_stt \n", "nl_google_fleurs 0.212373 0.160957 -1.000000 \n", "nl_minds14 0.415821 0.298583 -1.000000 \n", "nl_voxpopuli 0.208664 0.340656 -1.000000 \n", "fr_google_fleurs 0.194587 0.141560 0.073667 \n", "fr_minds14 0.552826 0.336656 0.236770 \n", "fr_voxpopuli 0.159867 0.245229 0.164607 \n", "de_google_fleurs 0.264093 0.094206 0.053148 \n", "de_minds14 0.441656 0.228980 0.157855 \n", "de_voxpopuli 0.205059 0.313704 0.203633 \n", "it_google_fleurs 0.189108 0.115212 0.057875 \n", "it_minds14 0.417785 0.285531 0.153250 \n", "it_voxpopuli 0.247638 -1.000000 0.236106 \n", "pl_google_fleurs 0.261194 0.184994 -1.000000 \n", "pl_minds14 0.597468 0.454939 -1.000000 \n", "pl_voxpopuli 0.161887 0.309752 -1.000000 \n", "es_google_fleurs 0.184028 0.096476 0.051401 \n", "es_minds14 0.697884 0.508818 0.384215 \n", "es_voxpopuli 0.231587 0.230351 0.173987 \n", "en_google_fleurs 0.218479 0.367414 0.078904 \n", "en_minds14 0.620953 0.584547 0.329282 \n", "en_voxpopuli 0.408949 0.377100 0.160883 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "spacy_dep_df = pd.DataFrame(spacy_dep, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n", "spacy_dep_df.to_csv('results/spacy_dep.csv')\n", "spacy_dep_df" ] }, { "cell_type": "code", "execution_count": 28, "id": "3dbfbb6e-c369-47fd-801c-6df211943dc1", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>whisper_tiny</th>\n", " <th>whisper_base</th>\n", " <th>whisper_small</th>\n", " <th>whisper_medium</th>\n", " <th>whisper_large-v2</th>\n", " <th>facebook_wav2vec2</th>\n", " <th>nvidia_stt</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>nl_google_fleurs</th>\n", " <td>0.708020</td>\n", " <td>0.535692</td>\n", " <td>0.365346</td>\n", " <td>0.296100</td>\n", " <td>0.261951</td>\n", " <td>0.273752</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_minds14</th>\n", " <td>0.897447</td>\n", " <td>0.714498</td>\n", " <td>0.503436</td>\n", " <td>0.419083</td>\n", " <td>0.389125</td>\n", " <td>0.465494</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_voxpopuli</th>\n", " <td>0.645715</td>\n", " <td>0.526939</td>\n", " <td>0.396940</td>\n", " <td>0.345034</td>\n", " <td>0.358023</td>\n", " <td>0.380835</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>fr_google_fleurs</th>\n", " <td>0.600185</td>\n", " <td>0.470808</td>\n", " <td>0.378478</td>\n", " <td>0.324236</td>\n", " <td>0.309570</td>\n", " <td>0.305183</td>\n", " <td>0.206433</td>\n", " </tr>\n", " <tr>\n", " <th>fr_minds14</th>\n", " <td>0.805977</td>\n", " <td>0.700773</td>\n", " <td>0.642619</td>\n", " <td>0.583323</td>\n", " <td>0.616411</td>\n", " <td>0.564885</td>\n", " <td>0.441154</td>\n", " </tr>\n", " <tr>\n", " <th>fr_voxpopuli</th>\n", " <td>0.510623</td>\n", " <td>0.440340</td>\n", " <td>0.382961</td>\n", " <td>0.359633</td>\n", " <td>0.365811</td>\n", " <td>0.323351</td>\n", " <td>0.187074</td>\n", " </tr>\n", " <tr>\n", " <th>de_google_fleurs</th>\n", " <td>0.651989</td>\n", " <td>0.551766</td>\n", " <td>0.506944</td>\n", " <td>0.478476</td>\n", " <td>0.469045</td>\n", " <td>0.182395</td>\n", " <td>0.072162</td>\n", " </tr>\n", " <tr>\n", " <th>de_minds14</th>\n", " <td>0.659890</td>\n", " <td>0.554437</td>\n", " <td>0.474513</td>\n", " <td>0.429274</td>\n", " <td>0.425134</td>\n", " <td>0.437369</td>\n", " <td>0.357848</td>\n", " </tr>\n", " <tr>\n", " <th>de_voxpopuli</th>\n", " <td>0.645898</td>\n", " <td>0.558876</td>\n", " <td>0.518976</td>\n", " <td>0.488194</td>\n", " <td>0.525581</td>\n", " <td>0.292203</td>\n", " <td>0.088256</td>\n", " </tr>\n", " <tr>\n", " <th>it_google_fleurs</th>\n", " <td>0.465298</td>\n", " <td>0.355877</td>\n", " <td>0.287491</td>\n", " <td>0.254384</td>\n", " <td>0.251697</td>\n", " <td>0.218689</td>\n", " <td>0.140564</td>\n", " </tr>\n", " <tr>\n", " <th>it_minds14</th>\n", " <td>0.779429</td>\n", " <td>0.621546</td>\n", " <td>0.502670</td>\n", " <td>0.437805</td>\n", " <td>0.422781</td>\n", " <td>0.429940</td>\n", " <td>0.276002</td>\n", " </tr>\n", " <tr>\n", " <th>it_voxpopuli</th>\n", " <td>0.562729</td>\n", " <td>0.477854</td>\n", " <td>0.420387</td>\n", " <td>0.388904</td>\n", " <td>0.393964</td>\n", " <td>-1.000000</td>\n", " <td>0.233076</td>\n", " </tr>\n", " <tr>\n", " <th>pl_google_fleurs</th>\n", " <td>0.700853</td>\n", " <td>0.553073</td>\n", " <td>0.384142</td>\n", " <td>0.318203</td>\n", " <td>0.298247</td>\n", " <td>0.335870</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_minds14</th>\n", " <td>1.023324</td>\n", " <td>0.860626</td>\n", " <td>0.633766</td>\n", " <td>0.572826</td>\n", " <td>0.563293</td>\n", " <td>0.697584</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_voxpopuli</th>\n", " <td>0.588464</td>\n", " <td>0.489265</td>\n", " <td>0.380883</td>\n", " <td>0.345623</td>\n", " <td>0.349896</td>\n", " <td>0.324229</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>es_google_fleurs</th>\n", " <td>0.333658</td>\n", " <td>0.261352</td>\n", " <td>0.213950</td>\n", " <td>0.206351</td>\n", " <td>0.202078</td>\n", " <td>0.145522</td>\n", " <td>0.067686</td>\n", " </tr>\n", " <tr>\n", " <th>es_minds14</th>\n", " <td>0.884689</td>\n", " <td>0.740604</td>\n", " <td>0.664831</td>\n", " <td>0.656090</td>\n", " <td>0.650328</td>\n", " <td>0.602494</td>\n", " <td>0.436570</td>\n", " </tr>\n", " <tr>\n", " <th>es_voxpopuli</th>\n", " <td>0.347112</td>\n", " <td>0.294192</td>\n", " <td>0.333500</td>\n", " <td>0.295472</td>\n", " <td>0.353273</td>\n", " <td>0.191242</td>\n", " <td>0.067363</td>\n", " </tr>\n", " <tr>\n", " <th>en_google_fleurs</th>\n", " <td>0.348152</td>\n", " <td>0.307207</td>\n", " <td>0.278857</td>\n", " <td>0.268917</td>\n", " <td>0.270208</td>\n", " <td>1.031485</td>\n", " <td>0.114966</td>\n", " </tr>\n", " <tr>\n", " <th>en_minds14</th>\n", " <td>0.588375</td>\n", " <td>0.571845</td>\n", " <td>0.566381</td>\n", " <td>0.567538</td>\n", " <td>0.562651</td>\n", " <td>1.203252</td>\n", " <td>0.467297</td>\n", " </tr>\n", " <tr>\n", " <th>en_voxpopuli</th>\n", " <td>0.475612</td>\n", " <td>0.451586</td>\n", " <td>0.453132</td>\n", " <td>0.594546</td>\n", " <td>0.549755</td>\n", " <td>1.020514</td>\n", " <td>0.067919</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " whisper_tiny whisper_base whisper_small whisper_medium \\\n", "nl_google_fleurs 0.708020 0.535692 0.365346 0.296100 \n", "nl_minds14 0.897447 0.714498 0.503436 0.419083 \n", "nl_voxpopuli 0.645715 0.526939 0.396940 0.345034 \n", "fr_google_fleurs 0.600185 0.470808 0.378478 0.324236 \n", "fr_minds14 0.805977 0.700773 0.642619 0.583323 \n", "fr_voxpopuli 0.510623 0.440340 0.382961 0.359633 \n", "de_google_fleurs 0.651989 0.551766 0.506944 0.478476 \n", "de_minds14 0.659890 0.554437 0.474513 0.429274 \n", "de_voxpopuli 0.645898 0.558876 0.518976 0.488194 \n", "it_google_fleurs 0.465298 0.355877 0.287491 0.254384 \n", "it_minds14 0.779429 0.621546 0.502670 0.437805 \n", "it_voxpopuli 0.562729 0.477854 0.420387 0.388904 \n", "pl_google_fleurs 0.700853 0.553073 0.384142 0.318203 \n", "pl_minds14 1.023324 0.860626 0.633766 0.572826 \n", "pl_voxpopuli 0.588464 0.489265 0.380883 0.345623 \n", "es_google_fleurs 0.333658 0.261352 0.213950 0.206351 \n", "es_minds14 0.884689 0.740604 0.664831 0.656090 \n", "es_voxpopuli 0.347112 0.294192 0.333500 0.295472 \n", "en_google_fleurs 0.348152 0.307207 0.278857 0.268917 \n", "en_minds14 0.588375 0.571845 0.566381 0.567538 \n", "en_voxpopuli 0.475612 0.451586 0.453132 0.594546 \n", "\n", " whisper_large-v2 facebook_wav2vec2 nvidia_stt \n", "nl_google_fleurs 0.261951 0.273752 -1.000000 \n", "nl_minds14 0.389125 0.465494 -1.000000 \n", "nl_voxpopuli 0.358023 0.380835 -1.000000 \n", "fr_google_fleurs 0.309570 0.305183 0.206433 \n", "fr_minds14 0.616411 0.564885 0.441154 \n", "fr_voxpopuli 0.365811 0.323351 0.187074 \n", "de_google_fleurs 0.469045 0.182395 0.072162 \n", "de_minds14 0.425134 0.437369 0.357848 \n", "de_voxpopuli 0.525581 0.292203 0.088256 \n", "it_google_fleurs 0.251697 0.218689 0.140564 \n", "it_minds14 0.422781 0.429940 0.276002 \n", "it_voxpopuli 0.393964 -1.000000 0.233076 \n", "pl_google_fleurs 0.298247 0.335870 -1.000000 \n", "pl_minds14 0.563293 0.697584 -1.000000 \n", "pl_voxpopuli 0.349896 0.324229 -1.000000 \n", "es_google_fleurs 0.202078 0.145522 0.067686 \n", "es_minds14 0.650328 0.602494 0.436570 \n", "es_voxpopuli 0.353273 0.191242 0.067363 \n", "en_google_fleurs 0.270208 1.031485 0.114966 \n", "en_minds14 0.562651 1.203252 0.467297 \n", "en_voxpopuli 0.549755 1.020514 0.067919 " ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "word_wer_classic_metrics_df = pd.DataFrame(word_wer_classic_metrics, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n", "word_wer_classic_metrics_df.to_csv('results/word_wer_classic_metrics.csv')\n", "word_wer_classic_metrics_df" ] }, { "cell_type": "code", "execution_count": 29, "id": "77a6e273-1f5e-4a2b-9568-66e53ba99c7b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>whisper_tiny</th>\n", " <th>whisper_base</th>\n", " <th>whisper_small</th>\n", " <th>whisper_medium</th>\n", " <th>whisper_large-v2</th>\n", " <th>facebook_wav2vec2</th>\n", " <th>nvidia_stt</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>nl_google_fleurs</th>\n", " <td>0.487020</td>\n", " <td>0.332826</td>\n", " <td>0.173815</td>\n", " <td>0.118312</td>\n", " <td>0.092164</td>\n", " <td>0.186138</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_minds14</th>\n", " <td>0.696387</td>\n", " <td>0.528807</td>\n", " <td>0.323153</td>\n", " <td>0.251855</td>\n", " <td>0.234766</td>\n", " <td>0.306648</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_voxpopuli</th>\n", " <td>0.440765</td>\n", " <td>0.349226</td>\n", " <td>0.233398</td>\n", " <td>0.187694</td>\n", " <td>0.203840</td>\n", " <td>0.295450</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>fr_google_fleurs</th>\n", " <td>0.422005</td>\n", " <td>0.308031</td>\n", " <td>0.230959</td>\n", " <td>0.181520</td>\n", " <td>0.167575</td>\n", " <td>0.225745</td>\n", " <td>0.154588</td>\n", " </tr>\n", " <tr>\n", " <th>fr_minds14</th>\n", " <td>0.598664</td>\n", " <td>0.499632</td>\n", " <td>0.447757</td>\n", " <td>0.395654</td>\n", " <td>0.429327</td>\n", " <td>0.441224</td>\n", " <td>0.342637</td>\n", " </tr>\n", " <tr>\n", " <th>fr_voxpopuli</th>\n", " <td>0.349906</td>\n", " <td>0.291653</td>\n", " <td>0.242314</td>\n", " <td>0.218193</td>\n", " <td>0.226681</td>\n", " <td>0.251004</td>\n", " <td>0.147786</td>\n", " </tr>\n", " <tr>\n", " <th>de_google_fleurs</th>\n", " <td>0.328928</td>\n", " <td>0.213515</td>\n", " <td>0.151060</td>\n", " <td>0.116871</td>\n", " <td>0.104827</td>\n", " <td>0.118999</td>\n", " <td>0.048663</td>\n", " </tr>\n", " <tr>\n", " <th>de_minds14</th>\n", " <td>0.425754</td>\n", " <td>0.331317</td>\n", " <td>0.255620</td>\n", " <td>0.222602</td>\n", " <td>0.220104</td>\n", " <td>0.232533</td>\n", " <td>0.143306</td>\n", " </tr>\n", " <tr>\n", " <th>de_voxpopuli</th>\n", " <td>0.349224</td>\n", " <td>0.259910</td>\n", " <td>0.208328</td>\n", " <td>0.176478</td>\n", " <td>0.215692</td>\n", " <td>0.228572</td>\n", " <td>0.065661</td>\n", " </tr>\n", " <tr>\n", " <th>it_google_fleurs</th>\n", " <td>0.297877</td>\n", " <td>0.201276</td>\n", " <td>0.139435</td>\n", " <td>0.114579</td>\n", " <td>0.103925</td>\n", " <td>0.161414</td>\n", " <td>0.101285</td>\n", " </tr>\n", " <tr>\n", " <th>it_minds14</th>\n", " <td>0.603743</td>\n", " <td>0.455306</td>\n", " <td>0.323527</td>\n", " <td>0.264797</td>\n", " <td>0.255383</td>\n", " <td>0.299216</td>\n", " <td>0.162753</td>\n", " </tr>\n", " <tr>\n", " <th>it_voxpopuli</th>\n", " <td>0.418096</td>\n", " <td>0.345687</td>\n", " <td>0.298079</td>\n", " <td>0.266888</td>\n", " <td>0.270669</td>\n", " <td>-1.000000</td>\n", " <td>0.193692</td>\n", " </tr>\n", " <tr>\n", " <th>pl_google_fleurs</th>\n", " <td>0.493295</td>\n", " <td>0.336319</td>\n", " <td>0.183046</td>\n", " <td>0.119453</td>\n", " <td>0.096625</td>\n", " <td>0.232851</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_minds14</th>\n", " <td>0.822964</td>\n", " <td>0.633399</td>\n", " <td>0.420067</td>\n", " <td>0.353710</td>\n", " <td>0.342892</td>\n", " <td>0.519684</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_voxpopuli</th>\n", " <td>0.385923</td>\n", " <td>0.288336</td>\n", " <td>0.188413</td>\n", " <td>0.152321</td>\n", " <td>0.147463</td>\n", " <td>0.232410</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>es_google_fleurs</th>\n", " <td>0.196055</td>\n", " <td>0.130109</td>\n", " <td>0.084114</td>\n", " <td>0.077302</td>\n", " <td>0.067295</td>\n", " <td>0.102324</td>\n", " <td>0.048997</td>\n", " </tr>\n", " <tr>\n", " <th>es_minds14</th>\n", " <td>0.723086</td>\n", " <td>0.581624</td>\n", " <td>0.497037</td>\n", " <td>0.493568</td>\n", " <td>0.488170</td>\n", " <td>0.522209</td>\n", " <td>0.397315</td>\n", " </tr>\n", " <tr>\n", " <th>es_voxpopuli</th>\n", " <td>0.222505</td>\n", " <td>0.172764</td>\n", " <td>0.195746</td>\n", " <td>0.162495</td>\n", " <td>0.201468</td>\n", " <td>0.143578</td>\n", " <td>0.053721</td>\n", " </tr>\n", " <tr>\n", " <th>en_google_fleurs</th>\n", " <td>0.191329</td>\n", " <td>0.151693</td>\n", " <td>0.121134</td>\n", " <td>0.107578</td>\n", " <td>0.108609</td>\n", " <td>0.111466</td>\n", " <td>0.088609</td>\n", " </tr>\n", " <tr>\n", " <th>en_minds14</th>\n", " <td>0.449783</td>\n", " <td>0.433839</td>\n", " <td>0.427788</td>\n", " <td>0.431043</td>\n", " <td>0.424969</td>\n", " <td>0.424984</td>\n", " <td>0.363642</td>\n", " </tr>\n", " <tr>\n", " <th>en_voxpopuli</th>\n", " <td>0.314581</td>\n", " <td>0.286802</td>\n", " <td>0.297819</td>\n", " <td>0.439680</td>\n", " <td>0.402555</td>\n", " <td>0.118296</td>\n", " <td>0.054176</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " whisper_tiny whisper_base whisper_small whisper_medium \\\n", "nl_google_fleurs 0.487020 0.332826 0.173815 0.118312 \n", "nl_minds14 0.696387 0.528807 0.323153 0.251855 \n", "nl_voxpopuli 0.440765 0.349226 0.233398 0.187694 \n", "fr_google_fleurs 0.422005 0.308031 0.230959 0.181520 \n", "fr_minds14 0.598664 0.499632 0.447757 0.395654 \n", "fr_voxpopuli 0.349906 0.291653 0.242314 0.218193 \n", "de_google_fleurs 0.328928 0.213515 0.151060 0.116871 \n", "de_minds14 0.425754 0.331317 0.255620 0.222602 \n", "de_voxpopuli 0.349224 0.259910 0.208328 0.176478 \n", "it_google_fleurs 0.297877 0.201276 0.139435 0.114579 \n", "it_minds14 0.603743 0.455306 0.323527 0.264797 \n", "it_voxpopuli 0.418096 0.345687 0.298079 0.266888 \n", "pl_google_fleurs 0.493295 0.336319 0.183046 0.119453 \n", "pl_minds14 0.822964 0.633399 0.420067 0.353710 \n", "pl_voxpopuli 0.385923 0.288336 0.188413 0.152321 \n", "es_google_fleurs 0.196055 0.130109 0.084114 0.077302 \n", "es_minds14 0.723086 0.581624 0.497037 0.493568 \n", "es_voxpopuli 0.222505 0.172764 0.195746 0.162495 \n", "en_google_fleurs 0.191329 0.151693 0.121134 0.107578 \n", "en_minds14 0.449783 0.433839 0.427788 0.431043 \n", "en_voxpopuli 0.314581 0.286802 0.297819 0.439680 \n", "\n", " whisper_large-v2 facebook_wav2vec2 nvidia_stt \n", "nl_google_fleurs 0.092164 0.186138 -1.000000 \n", "nl_minds14 0.234766 0.306648 -1.000000 \n", "nl_voxpopuli 0.203840 0.295450 -1.000000 \n", "fr_google_fleurs 0.167575 0.225745 0.154588 \n", "fr_minds14 0.429327 0.441224 0.342637 \n", "fr_voxpopuli 0.226681 0.251004 0.147786 \n", "de_google_fleurs 0.104827 0.118999 0.048663 \n", "de_minds14 0.220104 0.232533 0.143306 \n", "de_voxpopuli 0.215692 0.228572 0.065661 \n", "it_google_fleurs 0.103925 0.161414 0.101285 \n", "it_minds14 0.255383 0.299216 0.162753 \n", "it_voxpopuli 0.270669 -1.000000 0.193692 \n", "pl_google_fleurs 0.096625 0.232851 -1.000000 \n", "pl_minds14 0.342892 0.519684 -1.000000 \n", "pl_voxpopuli 0.147463 0.232410 -1.000000 \n", "es_google_fleurs 0.067295 0.102324 0.048997 \n", "es_minds14 0.488170 0.522209 0.397315 \n", "es_voxpopuli 0.201468 0.143578 0.053721 \n", "en_google_fleurs 0.108609 0.111466 0.088609 \n", "en_minds14 0.424969 0.424984 0.363642 \n", "en_voxpopuli 0.402555 0.118296 0.054176 " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "word_wer_soft_metrics_df = pd.DataFrame(word_wer_soft_metrics, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n", "word_wer_soft_metrics_df.to_csv('results/word_wer_soft_metrics.csv')\n", "word_wer_soft_metrics_df" ] }, { "cell_type": "code", "execution_count": 30, "id": "629318e6-8c00-413c-99d4-2b7ff559ac3f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>whisper_tiny</th>\n", " <th>whisper_base</th>\n", " <th>whisper_small</th>\n", " <th>whisper_medium</th>\n", " <th>whisper_large-v2</th>\n", " <th>facebook_wav2vec2</th>\n", " <th>nvidia_stt</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>nl_google_fleurs</th>\n", " <td>0.512857</td>\n", " <td>0.351476</td>\n", " <td>0.183268</td>\n", " <td>0.123803</td>\n", " <td>0.095700</td>\n", " <td>0.192525</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_minds14</th>\n", " <td>0.732501</td>\n", " <td>0.554846</td>\n", " <td>0.346042</td>\n", " <td>0.267858</td>\n", " <td>0.244768</td>\n", " <td>0.319302</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_voxpopuli</th>\n", " <td>0.472829</td>\n", " <td>0.364308</td>\n", " <td>0.241434</td>\n", " <td>0.193047</td>\n", " <td>0.210556</td>\n", " <td>0.304289</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>fr_google_fleurs</th>\n", " <td>0.442361</td>\n", " <td>0.321953</td>\n", " <td>0.240016</td>\n", " <td>0.188132</td>\n", " <td>0.174075</td>\n", " <td>0.233362</td>\n", " <td>0.159139</td>\n", " </tr>\n", " <tr>\n", " <th>fr_minds14</th>\n", " <td>0.628774</td>\n", " <td>0.527781</td>\n", " <td>0.472124</td>\n", " <td>0.417764</td>\n", " <td>0.451830</td>\n", " <td>0.456835</td>\n", " <td>0.353934</td>\n", " </tr>\n", " <tr>\n", " <th>fr_voxpopuli</th>\n", " <td>0.365471</td>\n", " <td>0.304097</td>\n", " <td>0.251867</td>\n", " <td>0.226099</td>\n", " <td>0.235006</td>\n", " <td>0.259228</td>\n", " <td>0.150950</td>\n", " </tr>\n", " <tr>\n", " <th>de_google_fleurs</th>\n", " <td>0.346586</td>\n", " <td>0.227203</td>\n", " <td>0.158453</td>\n", " <td>0.121399</td>\n", " <td>0.107550</td>\n", " <td>0.123204</td>\n", " <td>0.050265</td>\n", " </tr>\n", " <tr>\n", " <th>de_minds14</th>\n", " <td>0.446445</td>\n", " <td>0.346742</td>\n", " <td>0.265021</td>\n", " <td>0.229449</td>\n", " <td>0.226477</td>\n", " <td>0.238560</td>\n", " <td>0.147524</td>\n", " </tr>\n", " <tr>\n", " <th>de_voxpopuli</th>\n", " <td>0.366639</td>\n", " <td>0.270086</td>\n", " <td>0.215487</td>\n", " <td>0.181204</td>\n", " <td>0.221848</td>\n", " <td>0.234268</td>\n", " <td>0.067181</td>\n", " </tr>\n", " <tr>\n", " <th>it_google_fleurs</th>\n", " <td>0.313010</td>\n", " <td>0.210131</td>\n", " <td>0.144045</td>\n", " <td>0.117567</td>\n", " <td>0.106640</td>\n", " <td>0.165954</td>\n", " <td>0.104103</td>\n", " </tr>\n", " <tr>\n", " <th>it_minds14</th>\n", " <td>0.633334</td>\n", " <td>0.476970</td>\n", " <td>0.337584</td>\n", " <td>0.275103</td>\n", " <td>0.265102</td>\n", " <td>0.310508</td>\n", " <td>0.168097</td>\n", " </tr>\n", " <tr>\n", " <th>it_voxpopuli</th>\n", " <td>0.439105</td>\n", " <td>0.363577</td>\n", " <td>0.310733</td>\n", " <td>0.278968</td>\n", " <td>0.283103</td>\n", " <td>-1.000000</td>\n", " <td>0.198565</td>\n", " </tr>\n", " <tr>\n", " <th>pl_google_fleurs</th>\n", " <td>0.520524</td>\n", " <td>0.358929</td>\n", " <td>0.190407</td>\n", " <td>0.123706</td>\n", " <td>0.098981</td>\n", " <td>0.242890</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_minds14</th>\n", " <td>0.861366</td>\n", " <td>0.666738</td>\n", " <td>0.439214</td>\n", " <td>0.370198</td>\n", " <td>0.361172</td>\n", " <td>0.542831</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_voxpopuli</th>\n", " <td>0.404981</td>\n", " <td>0.301113</td>\n", " <td>0.194702</td>\n", " <td>0.156644</td>\n", " <td>0.151601</td>\n", " <td>0.240070</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>es_google_fleurs</th>\n", " <td>0.204884</td>\n", " <td>0.135018</td>\n", " <td>0.086281</td>\n", " <td>0.078608</td>\n", " <td>0.067940</td>\n", " <td>0.105327</td>\n", " <td>0.050019</td>\n", " </tr>\n", " <tr>\n", " <th>es_minds14</th>\n", " <td>0.752425</td>\n", " <td>0.601240</td>\n", " <td>0.511320</td>\n", " <td>0.505483</td>\n", " <td>0.497249</td>\n", " <td>0.535758</td>\n", " <td>0.401730</td>\n", " </tr>\n", " <tr>\n", " <th>es_voxpopuli</th>\n", " <td>0.233013</td>\n", " <td>0.179737</td>\n", " <td>0.202485</td>\n", " <td>0.167919</td>\n", " <td>0.208381</td>\n", " <td>0.148001</td>\n", " <td>0.054963</td>\n", " </tr>\n", " <tr>\n", " <th>en_google_fleurs</th>\n", " <td>0.198209</td>\n", " <td>0.157780</td>\n", " <td>0.125360</td>\n", " <td>0.111138</td>\n", " <td>0.112012</td>\n", " <td>0.116211</td>\n", " <td>0.092322</td>\n", " </tr>\n", " <tr>\n", " <th>en_minds14</th>\n", " <td>0.463499</td>\n", " <td>0.446222</td>\n", " <td>0.442346</td>\n", " <td>0.444175</td>\n", " <td>0.438048</td>\n", " <td>0.434445</td>\n", " <td>0.371188</td>\n", " </tr>\n", " <tr>\n", " <th>en_voxpopuli</th>\n", " <td>0.325976</td>\n", " <td>0.294154</td>\n", " <td>0.306453</td>\n", " <td>0.451091</td>\n", " <td>0.414535</td>\n", " <td>0.120754</td>\n", " <td>0.055428</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " whisper_tiny whisper_base whisper_small whisper_medium \\\n", "nl_google_fleurs 0.512857 0.351476 0.183268 0.123803 \n", "nl_minds14 0.732501 0.554846 0.346042 0.267858 \n", "nl_voxpopuli 0.472829 0.364308 0.241434 0.193047 \n", "fr_google_fleurs 0.442361 0.321953 0.240016 0.188132 \n", "fr_minds14 0.628774 0.527781 0.472124 0.417764 \n", "fr_voxpopuli 0.365471 0.304097 0.251867 0.226099 \n", "de_google_fleurs 0.346586 0.227203 0.158453 0.121399 \n", "de_minds14 0.446445 0.346742 0.265021 0.229449 \n", "de_voxpopuli 0.366639 0.270086 0.215487 0.181204 \n", "it_google_fleurs 0.313010 0.210131 0.144045 0.117567 \n", "it_minds14 0.633334 0.476970 0.337584 0.275103 \n", "it_voxpopuli 0.439105 0.363577 0.310733 0.278968 \n", "pl_google_fleurs 0.520524 0.358929 0.190407 0.123706 \n", "pl_minds14 0.861366 0.666738 0.439214 0.370198 \n", "pl_voxpopuli 0.404981 0.301113 0.194702 0.156644 \n", "es_google_fleurs 0.204884 0.135018 0.086281 0.078608 \n", "es_minds14 0.752425 0.601240 0.511320 0.505483 \n", "es_voxpopuli 0.233013 0.179737 0.202485 0.167919 \n", "en_google_fleurs 0.198209 0.157780 0.125360 0.111138 \n", "en_minds14 0.463499 0.446222 0.442346 0.444175 \n", "en_voxpopuli 0.325976 0.294154 0.306453 0.451091 \n", "\n", " whisper_large-v2 facebook_wav2vec2 nvidia_stt \n", "nl_google_fleurs 0.095700 0.192525 -1.000000 \n", "nl_minds14 0.244768 0.319302 -1.000000 \n", "nl_voxpopuli 0.210556 0.304289 -1.000000 \n", "fr_google_fleurs 0.174075 0.233362 0.159139 \n", "fr_minds14 0.451830 0.456835 0.353934 \n", "fr_voxpopuli 0.235006 0.259228 0.150950 \n", "de_google_fleurs 0.107550 0.123204 0.050265 \n", "de_minds14 0.226477 0.238560 0.147524 \n", "de_voxpopuli 0.221848 0.234268 0.067181 \n", "it_google_fleurs 0.106640 0.165954 0.104103 \n", "it_minds14 0.265102 0.310508 0.168097 \n", "it_voxpopuli 0.283103 -1.000000 0.198565 \n", "pl_google_fleurs 0.098981 0.242890 -1.000000 \n", "pl_minds14 0.361172 0.542831 -1.000000 \n", "pl_voxpopuli 0.151601 0.240070 -1.000000 \n", "es_google_fleurs 0.067940 0.105327 0.050019 \n", "es_minds14 0.497249 0.535758 0.401730 \n", "es_voxpopuli 0.208381 0.148001 0.054963 \n", "en_google_fleurs 0.112012 0.116211 0.092322 \n", "en_minds14 0.438048 0.434445 0.371188 \n", "en_voxpopuli 0.414535 0.120754 0.055428 " ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "word_wer_embedding_metrics_df = pd.DataFrame(word_wer_embedding_metrics, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n", "word_wer_embedding_metrics_df.to_csv('results/word_wer_embedding_metrics.csv')\n", "word_wer_embedding_metrics_df" ] }, { "cell_type": "code", "execution_count": 31, "id": "99bfad3e-3c9f-42d6-9a36-ce1914b16bb5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>whisper_tiny</th>\n", " <th>whisper_base</th>\n", " <th>whisper_small</th>\n", " <th>whisper_medium</th>\n", " <th>whisper_large-v2</th>\n", " <th>facebook_wav2vec2</th>\n", " <th>nvidia_stt</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>nl_google_fleurs</th>\n", " <td>0.512857</td>\n", " <td>0.351476</td>\n", " <td>0.183268</td>\n", " <td>0.123803</td>\n", " <td>0.095700</td>\n", " <td>0.192525</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_minds14</th>\n", " <td>0.732501</td>\n", " <td>0.554846</td>\n", " <td>0.346042</td>\n", " <td>0.267858</td>\n", " <td>0.244768</td>\n", " <td>0.319302</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>nl_voxpopuli</th>\n", " <td>0.472829</td>\n", " <td>0.364308</td>\n", " <td>0.241434</td>\n", " <td>0.193047</td>\n", " <td>0.210556</td>\n", " <td>0.304289</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>fr_google_fleurs</th>\n", " <td>0.442361</td>\n", " <td>0.321953</td>\n", " <td>0.240016</td>\n", " <td>0.188132</td>\n", " <td>0.174075</td>\n", " <td>0.233362</td>\n", " <td>0.159139</td>\n", " </tr>\n", " <tr>\n", " <th>fr_minds14</th>\n", " <td>0.628774</td>\n", " <td>0.527781</td>\n", " <td>0.472124</td>\n", " <td>0.417764</td>\n", " <td>0.451830</td>\n", " <td>0.456835</td>\n", " <td>0.353934</td>\n", " </tr>\n", " <tr>\n", " <th>fr_voxpopuli</th>\n", " <td>0.365471</td>\n", " <td>0.304097</td>\n", " <td>0.251867</td>\n", " <td>0.226099</td>\n", " <td>0.235006</td>\n", " <td>0.259228</td>\n", " <td>0.150950</td>\n", " </tr>\n", " <tr>\n", " <th>de_google_fleurs</th>\n", " <td>0.346586</td>\n", " <td>0.227203</td>\n", " <td>0.158453</td>\n", " <td>0.121399</td>\n", " <td>0.107550</td>\n", " <td>0.123204</td>\n", " <td>0.050265</td>\n", " </tr>\n", " <tr>\n", " <th>de_minds14</th>\n", " <td>0.446445</td>\n", " <td>0.346742</td>\n", " <td>0.265021</td>\n", " <td>0.229449</td>\n", " <td>0.226477</td>\n", " <td>0.238560</td>\n", " <td>0.147524</td>\n", " </tr>\n", " <tr>\n", " <th>de_voxpopuli</th>\n", " <td>0.366639</td>\n", " <td>0.270086</td>\n", " <td>0.215487</td>\n", " <td>0.181204</td>\n", " <td>0.221848</td>\n", " <td>0.234268</td>\n", " <td>0.067181</td>\n", " </tr>\n", " <tr>\n", " <th>it_google_fleurs</th>\n", " <td>0.313010</td>\n", " <td>0.210131</td>\n", " <td>0.144045</td>\n", " <td>0.117567</td>\n", " <td>0.106640</td>\n", " <td>0.165954</td>\n", " <td>0.104103</td>\n", " </tr>\n", " <tr>\n", " <th>it_minds14</th>\n", " <td>0.633334</td>\n", " <td>0.476970</td>\n", " <td>0.337584</td>\n", " <td>0.275103</td>\n", " <td>0.265102</td>\n", " <td>0.310508</td>\n", " <td>0.168097</td>\n", " </tr>\n", " <tr>\n", " <th>it_voxpopuli</th>\n", " <td>0.439105</td>\n", " <td>0.363577</td>\n", " <td>0.310733</td>\n", " <td>0.278968</td>\n", " <td>0.283103</td>\n", " <td>-1.000000</td>\n", " <td>0.198565</td>\n", " </tr>\n", " <tr>\n", " <th>pl_google_fleurs</th>\n", " <td>0.520524</td>\n", " <td>0.358929</td>\n", " <td>0.190407</td>\n", " <td>0.123706</td>\n", " <td>0.098981</td>\n", " <td>0.242890</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_minds14</th>\n", " <td>0.861366</td>\n", " <td>0.666738</td>\n", " <td>0.439214</td>\n", " <td>0.370198</td>\n", " <td>0.361172</td>\n", " <td>0.542831</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>pl_voxpopuli</th>\n", " <td>0.404981</td>\n", " <td>0.301113</td>\n", " <td>0.194702</td>\n", " <td>0.156644</td>\n", " <td>0.151601</td>\n", " <td>0.240070</td>\n", " <td>-1.000000</td>\n", " </tr>\n", " <tr>\n", " <th>es_google_fleurs</th>\n", " <td>0.204884</td>\n", " <td>0.135018</td>\n", " <td>0.086281</td>\n", " <td>0.078608</td>\n", " <td>0.067940</td>\n", " <td>0.105327</td>\n", " <td>0.050019</td>\n", " </tr>\n", " <tr>\n", " <th>es_minds14</th>\n", " <td>0.752425</td>\n", " <td>0.601240</td>\n", " <td>0.511320</td>\n", " <td>0.505483</td>\n", " <td>0.497249</td>\n", " <td>0.535758</td>\n", " <td>0.401730</td>\n", " </tr>\n", " <tr>\n", " <th>es_voxpopuli</th>\n", " <td>0.233013</td>\n", " <td>0.179737</td>\n", " <td>0.202485</td>\n", " <td>0.167919</td>\n", " <td>0.208381</td>\n", " <td>0.148001</td>\n", " <td>0.054963</td>\n", " </tr>\n", " <tr>\n", " <th>en_google_fleurs</th>\n", " <td>0.198209</td>\n", " <td>0.157780</td>\n", " <td>0.125360</td>\n", " <td>0.111138</td>\n", " <td>0.112012</td>\n", " <td>0.116211</td>\n", " <td>0.092322</td>\n", " </tr>\n", " <tr>\n", " <th>en_minds14</th>\n", " <td>0.463499</td>\n", " <td>0.446222</td>\n", " <td>0.442346</td>\n", " <td>0.444175</td>\n", " <td>0.438048</td>\n", " <td>0.434445</td>\n", " <td>0.371188</td>\n", " </tr>\n", " <tr>\n", " <th>en_voxpopuli</th>\n", " <td>0.325976</td>\n", " <td>0.294154</td>\n", " <td>0.306453</td>\n", " <td>0.451091</td>\n", " <td>0.414535</td>\n", " <td>0.120754</td>\n", " <td>0.055428</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " whisper_tiny whisper_base whisper_small whisper_medium \\\n", "nl_google_fleurs 0.512857 0.351476 0.183268 0.123803 \n", "nl_minds14 0.732501 0.554846 0.346042 0.267858 \n", "nl_voxpopuli 0.472829 0.364308 0.241434 0.193047 \n", "fr_google_fleurs 0.442361 0.321953 0.240016 0.188132 \n", "fr_minds14 0.628774 0.527781 0.472124 0.417764 \n", "fr_voxpopuli 0.365471 0.304097 0.251867 0.226099 \n", "de_google_fleurs 0.346586 0.227203 0.158453 0.121399 \n", "de_minds14 0.446445 0.346742 0.265021 0.229449 \n", "de_voxpopuli 0.366639 0.270086 0.215487 0.181204 \n", "it_google_fleurs 0.313010 0.210131 0.144045 0.117567 \n", "it_minds14 0.633334 0.476970 0.337584 0.275103 \n", "it_voxpopuli 0.439105 0.363577 0.310733 0.278968 \n", "pl_google_fleurs 0.520524 0.358929 0.190407 0.123706 \n", "pl_minds14 0.861366 0.666738 0.439214 0.370198 \n", "pl_voxpopuli 0.404981 0.301113 0.194702 0.156644 \n", "es_google_fleurs 0.204884 0.135018 0.086281 0.078608 \n", "es_minds14 0.752425 0.601240 0.511320 0.505483 \n", "es_voxpopuli 0.233013 0.179737 0.202485 0.167919 \n", "en_google_fleurs 0.198209 0.157780 0.125360 0.111138 \n", "en_minds14 0.463499 0.446222 0.442346 0.444175 \n", "en_voxpopuli 0.325976 0.294154 0.306453 0.451091 \n", "\n", " whisper_large-v2 facebook_wav2vec2 nvidia_stt \n", "nl_google_fleurs 0.095700 0.192525 -1.000000 \n", "nl_minds14 0.244768 0.319302 -1.000000 \n", "nl_voxpopuli 0.210556 0.304289 -1.000000 \n", "fr_google_fleurs 0.174075 0.233362 0.159139 \n", "fr_minds14 0.451830 0.456835 0.353934 \n", "fr_voxpopuli 0.235006 0.259228 0.150950 \n", "de_google_fleurs 0.107550 0.123204 0.050265 \n", "de_minds14 0.226477 0.238560 0.147524 \n", "de_voxpopuli 0.221848 0.234268 0.067181 \n", "it_google_fleurs 0.106640 0.165954 0.104103 \n", "it_minds14 0.265102 0.310508 0.168097 \n", "it_voxpopuli 0.283103 -1.000000 0.198565 \n", "pl_google_fleurs 0.098981 0.242890 -1.000000 \n", "pl_minds14 0.361172 0.542831 -1.000000 \n", "pl_voxpopuli 0.151601 0.240070 -1.000000 \n", "es_google_fleurs 0.067940 0.105327 0.050019 \n", "es_minds14 0.497249 0.535758 0.401730 \n", "es_voxpopuli 0.208381 0.148001 0.054963 \n", "en_google_fleurs 0.112012 0.116211 0.092322 \n", "en_minds14 0.438048 0.434445 0.371188 \n", "en_voxpopuli 0.414535 0.120754 0.055428 " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "flair_pos_df = pd.DataFrame(flair_pos, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n", "flair_pos_df.to_csv('results/flair_pos.csv')\n", "flair_pos_df" ] }, { "cell_type": "code", "execution_count": null, "id": "7275b2b0-957b-4618-9f66-7b88302f896a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" } }, "nbformat": 4, "nbformat_minor": 5 }