{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "955a0385-29fb-47dc-b012-729e49570594",
   "metadata": {},
   "outputs": [],
   "source": [
    "from new_experiment.utils.get_spacy_model_name import *\n",
    "\n",
    "from call_experiment_stats import *\n",
    "\n",
    "from new_experiment.utils.property_helper import PropertyHelper\n",
    "from new_experiment.utils.get_spacy_model_name import get_spacy_model_name\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "9f5e44a6-f211-4b61-8cb4-5636c7672c6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "COMMANDS = ['run_word_wer_classic_pipeline', 'run_word_wer_embedding_pipeline', 'run_spacy_dep_tag_wer_pipeline',\n",
    "            'run_spacy_ner_wer_pipeline', 'run_spacy_pos_wer_pipeline']\n",
    "LANGUAGES = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en']\n",
    "WHISPER_ASR_MODEL = ['tiny', 'base', 'small', 'medium', 'large-v2']\n",
    "DATASETS = ['google_fleurs', 'minds14', 'voxpopuli']\n",
    "FULL_DATASET_NAMES = []\n",
    "for itt in LANGUAGES:\n",
    "    for it in DATASETS:\n",
    "        FULL_DATASET_NAMES.append(f'{itt}_{it}')\n",
    "\n",
    "FULL_LANGUAGE_MODELS = [f'whisper_{it}' for it in WHISPER_ASR_MODEL]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "d2465ceb-7439-4fa5-adf8-e95d7e6106b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "0vals = dict()\n",
    "with open('metrics.log', 'r') as reader:\n",
    "    lines = reader.read().splitlines(keepends=False)\n",
    "    for line in lines:\n",
    "        # print(line)\n",
    "        words = line.split()\n",
    "        key = f'{words[0]}_{words[1]}'\n",
    "        # print(key)\n",
    "        vals[key] = float(words[2])\n",
    "# vals"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "22d84451-b7e3-4dba-9758-068dae23ace4",
   "metadata": {},
   "outputs": [],
   "source": [
    "spacy_ner = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.ner_metrics(model, get_spacy_model_name(dataset[:2]))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "spacy_pos = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.pos_metrics(model, get_spacy_model_name(dataset[:2]))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "spacy_dep = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.pos_metrics(model, get_spacy_model_name(dataset[:2]))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "word_wer_classic_metrics = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.word_wer_classic_metrics(model)}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "\n",
    "#     for dataset in FULL_DATASET_NAMES:\n",
    "#         for model in FULL_LANGUAGE_MODELS:\n",
    "#             get_stats_for_classic_wer(dataset, PropertyHelper.word_wer_classic_metrics(model))\n",
    "\n",
    "#     for dataset in FULL_DATASET_NAMES:\n",
    "#         for model in FULL_LANGUAGE_MODELS:\n",
    "#             get_stats_for_soft_wer(dataset, PropertyHelper.word_wer_embeddings_metrics(model))\n",
    "\n",
    "#     for dataset in FULL_DATASET_NAMES:\n",
    "#         for model in FULL_LANGUAGE_MODELS:\n",
    "#             get_stats_for_embedding_wer(dataset, PropertyHelper.word_wer_embeddings_metrics(model))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "45fd851c-644f-48e6-b711-5bd312404b8b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tiny</th>\n",
       "      <th>base</th>\n",
       "      <th>small</th>\n",
       "      <th>medium</th>\n",
       "      <th>large-v2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.316124</td>\n",
       "      <td>0.230845</td>\n",
       "      <td>0.186936</td>\n",
       "      <td>0.170150</td>\n",
       "      <td>0.165057</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.463084</td>\n",
       "      <td>0.409993</td>\n",
       "      <td>0.360934</td>\n",
       "      <td>0.331613</td>\n",
       "      <td>0.324172</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.215158</td>\n",
       "      <td>0.178716</td>\n",
       "      <td>0.132960</td>\n",
       "      <td>0.118042</td>\n",
       "      <td>0.139958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.264291</td>\n",
       "      <td>0.193436</td>\n",
       "      <td>0.177302</td>\n",
       "      <td>0.147464</td>\n",
       "      <td>0.141276</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.466860</td>\n",
       "      <td>0.468822</td>\n",
       "      <td>0.471754</td>\n",
       "      <td>0.444854</td>\n",
       "      <td>0.485090</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.161386</td>\n",
       "      <td>0.131144</td>\n",
       "      <td>0.113097</td>\n",
       "      <td>0.099114</td>\n",
       "      <td>0.111776</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.316175</td>\n",
       "      <td>0.257454</td>\n",
       "      <td>0.234163</td>\n",
       "      <td>0.239750</td>\n",
       "      <td>0.236715</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.435681</td>\n",
       "      <td>0.425712</td>\n",
       "      <td>0.412896</td>\n",
       "      <td>0.398617</td>\n",
       "      <td>0.398762</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.200245</td>\n",
       "      <td>0.155502</td>\n",
       "      <td>0.133251</td>\n",
       "      <td>0.116949</td>\n",
       "      <td>0.156371</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.206301</td>\n",
       "      <td>0.172527</td>\n",
       "      <td>0.161195</td>\n",
       "      <td>0.156655</td>\n",
       "      <td>0.160677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.487493</td>\n",
       "      <td>0.448874</td>\n",
       "      <td>0.432679</td>\n",
       "      <td>0.416035</td>\n",
       "      <td>0.392705</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.334936</td>\n",
       "      <td>0.273025</td>\n",
       "      <td>0.227662</td>\n",
       "      <td>0.210962</td>\n",
       "      <td>0.209027</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.657194</td>\n",
       "      <td>0.591588</td>\n",
       "      <td>0.487344</td>\n",
       "      <td>0.474013</td>\n",
       "      <td>0.487891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.203548</td>\n",
       "      <td>0.158526</td>\n",
       "      <td>0.126280</td>\n",
       "      <td>0.110784</td>\n",
       "      <td>0.117780</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.187607</td>\n",
       "      <td>0.159873</td>\n",
       "      <td>0.147104</td>\n",
       "      <td>0.155210</td>\n",
       "      <td>0.154657</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.721295</td>\n",
       "      <td>0.670363</td>\n",
       "      <td>0.666278</td>\n",
       "      <td>0.673058</td>\n",
       "      <td>0.680341</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.133805</td>\n",
       "      <td>0.116222</td>\n",
       "      <td>0.119882</td>\n",
       "      <td>0.106610</td>\n",
       "      <td>0.122036</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.217843</td>\n",
       "      <td>0.188810</td>\n",
       "      <td>0.186407</td>\n",
       "      <td>0.183656</td>\n",
       "      <td>0.184568</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.562068</td>\n",
       "      <td>0.566999</td>\n",
       "      <td>0.580369</td>\n",
       "      <td>0.583945</td>\n",
       "      <td>0.578079</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.224980</td>\n",
       "      <td>0.203959</td>\n",
       "      <td>0.210278</td>\n",
       "      <td>0.322688</td>\n",
       "      <td>0.280877</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      tiny      base     small    medium  large-v2\n",
       "nl_google_fleurs  0.316124  0.230845  0.186936  0.170150  0.165057\n",
       "nl_minds14        0.463084  0.409993  0.360934  0.331613  0.324172\n",
       "nl_voxpopuli      0.215158  0.178716  0.132960  0.118042  0.139958\n",
       "fr_google_fleurs  0.264291  0.193436  0.177302  0.147464  0.141276\n",
       "fr_minds14        0.466860  0.468822  0.471754  0.444854  0.485090\n",
       "fr_voxpopuli      0.161386  0.131144  0.113097  0.099114  0.111776\n",
       "de_google_fleurs  0.316175  0.257454  0.234163  0.239750  0.236715\n",
       "de_minds14        0.435681  0.425712  0.412896  0.398617  0.398762\n",
       "de_voxpopuli      0.200245  0.155502  0.133251  0.116949  0.156371\n",
       "it_google_fleurs  0.206301  0.172527  0.161195  0.156655  0.160677\n",
       "it_minds14        0.487493  0.448874  0.432679  0.416035  0.392705\n",
       "it_voxpopuli     -1.000000 -1.000000 -1.000000 -1.000000 -1.000000\n",
       "pl_google_fleurs  0.334936  0.273025  0.227662  0.210962  0.209027\n",
       "pl_minds14        0.657194  0.591588  0.487344  0.474013  0.487891\n",
       "pl_voxpopuli      0.203548  0.158526  0.126280  0.110784  0.117780\n",
       "es_google_fleurs  0.187607  0.159873  0.147104  0.155210  0.154657\n",
       "es_minds14        0.721295  0.670363  0.666278  0.673058  0.680341\n",
       "es_voxpopuli      0.133805  0.116222  0.119882  0.106610  0.122036\n",
       "en_google_fleurs  0.217843  0.188810  0.186407  0.183656  0.184568\n",
       "en_minds14        0.562068  0.566999  0.580369  0.583945  0.578079\n",
       "en_voxpopuli      0.224980  0.203959  0.210278  0.322688  0.280877"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(spacy_ner, columns=WHISPER_ASR_MODEL, index=FULL_DATASET_NAMES)\n",
    "# NER"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "6466877e-e744-4cb1-8d4f-f818e1d3ee7d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tiny</th>\n",
       "      <th>base</th>\n",
       "      <th>small</th>\n",
       "      <th>medium</th>\n",
       "      <th>large-v2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.582916</td>\n",
       "      <td>0.427364</td>\n",
       "      <td>0.279190</td>\n",
       "      <td>0.229402</td>\n",
       "      <td>0.212373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.888989</td>\n",
       "      <td>0.702107</td>\n",
       "      <td>0.511865</td>\n",
       "      <td>0.440081</td>\n",
       "      <td>0.415821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.451950</td>\n",
       "      <td>0.350228</td>\n",
       "      <td>0.233061</td>\n",
       "      <td>0.188461</td>\n",
       "      <td>0.208664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.468415</td>\n",
       "      <td>0.338927</td>\n",
       "      <td>0.260157</td>\n",
       "      <td>0.207241</td>\n",
       "      <td>0.194587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.700735</td>\n",
       "      <td>0.619382</td>\n",
       "      <td>0.567487</td>\n",
       "      <td>0.513574</td>\n",
       "      <td>0.552826</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.310661</td>\n",
       "      <td>0.235596</td>\n",
       "      <td>0.180943</td>\n",
       "      <td>0.153288</td>\n",
       "      <td>0.159867</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.449640</td>\n",
       "      <td>0.344001</td>\n",
       "      <td>0.282088</td>\n",
       "      <td>0.275634</td>\n",
       "      <td>0.264093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.608813</td>\n",
       "      <td>0.529599</td>\n",
       "      <td>0.472205</td>\n",
       "      <td>0.443094</td>\n",
       "      <td>0.441656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.347653</td>\n",
       "      <td>0.248060</td>\n",
       "      <td>0.198001</td>\n",
       "      <td>0.168237</td>\n",
       "      <td>0.205059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.364700</td>\n",
       "      <td>0.269092</td>\n",
       "      <td>0.218361</td>\n",
       "      <td>0.189632</td>\n",
       "      <td>0.189108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.735663</td>\n",
       "      <td>0.597724</td>\n",
       "      <td>0.500377</td>\n",
       "      <td>0.438344</td>\n",
       "      <td>0.417785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.594285</td>\n",
       "      <td>0.452570</td>\n",
       "      <td>0.318702</td>\n",
       "      <td>0.276475</td>\n",
       "      <td>0.261194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.988993</td>\n",
       "      <td>0.853431</td>\n",
       "      <td>0.653693</td>\n",
       "      <td>0.585884</td>\n",
       "      <td>0.597468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.374544</td>\n",
       "      <td>0.277290</td>\n",
       "      <td>0.198685</td>\n",
       "      <td>0.164524</td>\n",
       "      <td>0.161887</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.284499</td>\n",
       "      <td>0.224748</td>\n",
       "      <td>0.187365</td>\n",
       "      <td>0.189561</td>\n",
       "      <td>0.184028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.880992</td>\n",
       "      <td>0.747677</td>\n",
       "      <td>0.695294</td>\n",
       "      <td>0.690749</td>\n",
       "      <td>0.697884</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.252463</td>\n",
       "      <td>0.206225</td>\n",
       "      <td>0.229706</td>\n",
       "      <td>0.195846</td>\n",
       "      <td>0.231587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.295853</td>\n",
       "      <td>0.250928</td>\n",
       "      <td>0.224483</td>\n",
       "      <td>0.218855</td>\n",
       "      <td>0.218479</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.634351</td>\n",
       "      <td>0.623962</td>\n",
       "      <td>0.626942</td>\n",
       "      <td>0.626588</td>\n",
       "      <td>0.620953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.345836</td>\n",
       "      <td>0.319493</td>\n",
       "      <td>0.319060</td>\n",
       "      <td>0.466410</td>\n",
       "      <td>0.408949</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      tiny      base     small    medium  large-v2\n",
       "nl_google_fleurs  0.582916  0.427364  0.279190  0.229402  0.212373\n",
       "nl_minds14        0.888989  0.702107  0.511865  0.440081  0.415821\n",
       "nl_voxpopuli      0.451950  0.350228  0.233061  0.188461  0.208664\n",
       "fr_google_fleurs  0.468415  0.338927  0.260157  0.207241  0.194587\n",
       "fr_minds14        0.700735  0.619382  0.567487  0.513574  0.552826\n",
       "fr_voxpopuli      0.310661  0.235596  0.180943  0.153288  0.159867\n",
       "de_google_fleurs  0.449640  0.344001  0.282088  0.275634  0.264093\n",
       "de_minds14        0.608813  0.529599  0.472205  0.443094  0.441656\n",
       "de_voxpopuli      0.347653  0.248060  0.198001  0.168237  0.205059\n",
       "it_google_fleurs  0.364700  0.269092  0.218361  0.189632  0.189108\n",
       "it_minds14        0.735663  0.597724  0.500377  0.438344  0.417785\n",
       "it_voxpopuli     -1.000000 -1.000000 -1.000000 -1.000000 -1.000000\n",
       "pl_google_fleurs  0.594285  0.452570  0.318702  0.276475  0.261194\n",
       "pl_minds14        0.988993  0.853431  0.653693  0.585884  0.597468\n",
       "pl_voxpopuli      0.374544  0.277290  0.198685  0.164524  0.161887\n",
       "es_google_fleurs  0.284499  0.224748  0.187365  0.189561  0.184028\n",
       "es_minds14        0.880992  0.747677  0.695294  0.690749  0.697884\n",
       "es_voxpopuli      0.252463  0.206225  0.229706  0.195846  0.231587\n",
       "en_google_fleurs  0.295853  0.250928  0.224483  0.218855  0.218479\n",
       "en_minds14        0.634351  0.623962  0.626942  0.626588  0.620953\n",
       "en_voxpopuli      0.345836  0.319493  0.319060  0.466410  0.408949"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(spacy_pos, columns=WHISPER_ASR_MODEL, index=FULL_DATASET_NAMES)\n",
    "# POS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "77567361-b730-49f0-ab68-19ad335df1b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tiny</th>\n",
       "      <th>base</th>\n",
       "      <th>small</th>\n",
       "      <th>medium</th>\n",
       "      <th>large-v2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.582916</td>\n",
       "      <td>0.427364</td>\n",
       "      <td>0.279190</td>\n",
       "      <td>0.229402</td>\n",
       "      <td>0.212373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.888989</td>\n",
       "      <td>0.702107</td>\n",
       "      <td>0.511865</td>\n",
       "      <td>0.440081</td>\n",
       "      <td>0.415821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.451950</td>\n",
       "      <td>0.350228</td>\n",
       "      <td>0.233061</td>\n",
       "      <td>0.188461</td>\n",
       "      <td>0.208664</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.468415</td>\n",
       "      <td>0.338927</td>\n",
       "      <td>0.260157</td>\n",
       "      <td>0.207241</td>\n",
       "      <td>0.194587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.700735</td>\n",
       "      <td>0.619382</td>\n",
       "      <td>0.567487</td>\n",
       "      <td>0.513574</td>\n",
       "      <td>0.552826</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.310661</td>\n",
       "      <td>0.235596</td>\n",
       "      <td>0.180943</td>\n",
       "      <td>0.153288</td>\n",
       "      <td>0.159867</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.449640</td>\n",
       "      <td>0.344001</td>\n",
       "      <td>0.282088</td>\n",
       "      <td>0.275634</td>\n",
       "      <td>0.264093</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.608813</td>\n",
       "      <td>0.529599</td>\n",
       "      <td>0.472205</td>\n",
       "      <td>0.443094</td>\n",
       "      <td>0.441656</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.347653</td>\n",
       "      <td>0.248060</td>\n",
       "      <td>0.198001</td>\n",
       "      <td>0.168237</td>\n",
       "      <td>0.205059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.364700</td>\n",
       "      <td>0.269092</td>\n",
       "      <td>0.218361</td>\n",
       "      <td>0.189632</td>\n",
       "      <td>0.189108</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.735663</td>\n",
       "      <td>0.597724</td>\n",
       "      <td>0.500377</td>\n",
       "      <td>0.438344</td>\n",
       "      <td>0.417785</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.594285</td>\n",
       "      <td>0.452570</td>\n",
       "      <td>0.318702</td>\n",
       "      <td>0.276475</td>\n",
       "      <td>0.261194</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.988993</td>\n",
       "      <td>0.853431</td>\n",
       "      <td>0.653693</td>\n",
       "      <td>0.585884</td>\n",
       "      <td>0.597468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.374544</td>\n",
       "      <td>0.277290</td>\n",
       "      <td>0.198685</td>\n",
       "      <td>0.164524</td>\n",
       "      <td>0.161887</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.284499</td>\n",
       "      <td>0.224748</td>\n",
       "      <td>0.187365</td>\n",
       "      <td>0.189561</td>\n",
       "      <td>0.184028</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.880992</td>\n",
       "      <td>0.747677</td>\n",
       "      <td>0.695294</td>\n",
       "      <td>0.690749</td>\n",
       "      <td>0.697884</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.252463</td>\n",
       "      <td>0.206225</td>\n",
       "      <td>0.229706</td>\n",
       "      <td>0.195846</td>\n",
       "      <td>0.231587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.295853</td>\n",
       "      <td>0.250928</td>\n",
       "      <td>0.224483</td>\n",
       "      <td>0.218855</td>\n",
       "      <td>0.218479</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.634351</td>\n",
       "      <td>0.623962</td>\n",
       "      <td>0.626942</td>\n",
       "      <td>0.626588</td>\n",
       "      <td>0.620953</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.345836</td>\n",
       "      <td>0.319493</td>\n",
       "      <td>0.319060</td>\n",
       "      <td>0.466410</td>\n",
       "      <td>0.408949</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      tiny      base     small    medium  large-v2\n",
       "nl_google_fleurs  0.582916  0.427364  0.279190  0.229402  0.212373\n",
       "nl_minds14        0.888989  0.702107  0.511865  0.440081  0.415821\n",
       "nl_voxpopuli      0.451950  0.350228  0.233061  0.188461  0.208664\n",
       "fr_google_fleurs  0.468415  0.338927  0.260157  0.207241  0.194587\n",
       "fr_minds14        0.700735  0.619382  0.567487  0.513574  0.552826\n",
       "fr_voxpopuli      0.310661  0.235596  0.180943  0.153288  0.159867\n",
       "de_google_fleurs  0.449640  0.344001  0.282088  0.275634  0.264093\n",
       "de_minds14        0.608813  0.529599  0.472205  0.443094  0.441656\n",
       "de_voxpopuli      0.347653  0.248060  0.198001  0.168237  0.205059\n",
       "it_google_fleurs  0.364700  0.269092  0.218361  0.189632  0.189108\n",
       "it_minds14        0.735663  0.597724  0.500377  0.438344  0.417785\n",
       "it_voxpopuli     -1.000000 -1.000000 -1.000000 -1.000000 -1.000000\n",
       "pl_google_fleurs  0.594285  0.452570  0.318702  0.276475  0.261194\n",
       "pl_minds14        0.988993  0.853431  0.653693  0.585884  0.597468\n",
       "pl_voxpopuli      0.374544  0.277290  0.198685  0.164524  0.161887\n",
       "es_google_fleurs  0.284499  0.224748  0.187365  0.189561  0.184028\n",
       "es_minds14        0.880992  0.747677  0.695294  0.690749  0.697884\n",
       "es_voxpopuli      0.252463  0.206225  0.229706  0.195846  0.231587\n",
       "en_google_fleurs  0.295853  0.250928  0.224483  0.218855  0.218479\n",
       "en_minds14        0.634351  0.623962  0.626942  0.626588  0.620953\n",
       "en_voxpopuli      0.345836  0.319493  0.319060  0.466410  0.408949"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(spacy_dep, columns=WHISPER_ASR_MODEL, index=FULL_DATASET_NAMES)\n",
    "# DEP"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "3dbfbb6e-c369-47fd-801c-6df211943dc1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>tiny</th>\n",
       "      <th>base</th>\n",
       "      <th>small</th>\n",
       "      <th>medium</th>\n",
       "      <th>large-v2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.708020</td>\n",
       "      <td>0.535692</td>\n",
       "      <td>0.365346</td>\n",
       "      <td>0.296100</td>\n",
       "      <td>0.261951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.897447</td>\n",
       "      <td>0.714498</td>\n",
       "      <td>0.503436</td>\n",
       "      <td>0.419083</td>\n",
       "      <td>0.389125</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.645715</td>\n",
       "      <td>0.526939</td>\n",
       "      <td>0.396940</td>\n",
       "      <td>0.345034</td>\n",
       "      <td>0.358023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.600185</td>\n",
       "      <td>0.470808</td>\n",
       "      <td>0.378478</td>\n",
       "      <td>0.324236</td>\n",
       "      <td>0.309570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.805977</td>\n",
       "      <td>0.700773</td>\n",
       "      <td>0.642619</td>\n",
       "      <td>0.583323</td>\n",
       "      <td>0.616411</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.510623</td>\n",
       "      <td>0.440340</td>\n",
       "      <td>0.382961</td>\n",
       "      <td>0.359633</td>\n",
       "      <td>0.365811</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.651989</td>\n",
       "      <td>0.551766</td>\n",
       "      <td>0.506944</td>\n",
       "      <td>0.478476</td>\n",
       "      <td>0.469045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.659890</td>\n",
       "      <td>0.554437</td>\n",
       "      <td>0.474513</td>\n",
       "      <td>0.429274</td>\n",
       "      <td>0.425134</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.645898</td>\n",
       "      <td>0.558876</td>\n",
       "      <td>0.518976</td>\n",
       "      <td>0.488194</td>\n",
       "      <td>0.525581</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.465298</td>\n",
       "      <td>0.355877</td>\n",
       "      <td>0.287491</td>\n",
       "      <td>0.254384</td>\n",
       "      <td>0.251697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.779429</td>\n",
       "      <td>0.621546</td>\n",
       "      <td>0.502670</td>\n",
       "      <td>0.437805</td>\n",
       "      <td>0.422781</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.705909</td>\n",
       "      <td>0.553073</td>\n",
       "      <td>0.384142</td>\n",
       "      <td>0.318203</td>\n",
       "      <td>0.298247</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>1.009390</td>\n",
       "      <td>0.860626</td>\n",
       "      <td>0.633766</td>\n",
       "      <td>0.572826</td>\n",
       "      <td>0.563293</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.588464</td>\n",
       "      <td>0.489265</td>\n",
       "      <td>0.380883</td>\n",
       "      <td>0.345623</td>\n",
       "      <td>0.349896</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.333658</td>\n",
       "      <td>0.261352</td>\n",
       "      <td>0.213950</td>\n",
       "      <td>0.206351</td>\n",
       "      <td>0.202078</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.884689</td>\n",
       "      <td>0.740604</td>\n",
       "      <td>0.664831</td>\n",
       "      <td>0.656090</td>\n",
       "      <td>0.650328</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.347112</td>\n",
       "      <td>0.294192</td>\n",
       "      <td>0.333500</td>\n",
       "      <td>0.295472</td>\n",
       "      <td>0.353273</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.348152</td>\n",
       "      <td>0.307207</td>\n",
       "      <td>0.278857</td>\n",
       "      <td>0.268917</td>\n",
       "      <td>0.270208</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.588375</td>\n",
       "      <td>0.571845</td>\n",
       "      <td>0.566381</td>\n",
       "      <td>0.567538</td>\n",
       "      <td>0.562651</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.475612</td>\n",
       "      <td>0.451586</td>\n",
       "      <td>0.453132</td>\n",
       "      <td>0.594546</td>\n",
       "      <td>0.549755</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      tiny      base     small    medium  large-v2\n",
       "nl_google_fleurs  0.708020  0.535692  0.365346  0.296100  0.261951\n",
       "nl_minds14        0.897447  0.714498  0.503436  0.419083  0.389125\n",
       "nl_voxpopuli      0.645715  0.526939  0.396940  0.345034  0.358023\n",
       "fr_google_fleurs  0.600185  0.470808  0.378478  0.324236  0.309570\n",
       "fr_minds14        0.805977  0.700773  0.642619  0.583323  0.616411\n",
       "fr_voxpopuli      0.510623  0.440340  0.382961  0.359633  0.365811\n",
       "de_google_fleurs  0.651989  0.551766  0.506944  0.478476  0.469045\n",
       "de_minds14        0.659890  0.554437  0.474513  0.429274  0.425134\n",
       "de_voxpopuli      0.645898  0.558876  0.518976  0.488194  0.525581\n",
       "it_google_fleurs  0.465298  0.355877  0.287491  0.254384  0.251697\n",
       "it_minds14        0.779429  0.621546  0.502670  0.437805  0.422781\n",
       "it_voxpopuli     -1.000000 -1.000000 -1.000000 -1.000000 -1.000000\n",
       "pl_google_fleurs  0.705909  0.553073  0.384142  0.318203  0.298247\n",
       "pl_minds14        1.009390  0.860626  0.633766  0.572826  0.563293\n",
       "pl_voxpopuli      0.588464  0.489265  0.380883  0.345623  0.349896\n",
       "es_google_fleurs  0.333658  0.261352  0.213950  0.206351  0.202078\n",
       "es_minds14        0.884689  0.740604  0.664831  0.656090  0.650328\n",
       "es_voxpopuli      0.347112  0.294192  0.333500  0.295472  0.353273\n",
       "en_google_fleurs  0.348152  0.307207  0.278857  0.268917  0.270208\n",
       "en_minds14        0.588375  0.571845  0.566381  0.567538  0.562651\n",
       "en_voxpopuli      0.475612  0.451586  0.453132  0.594546  0.549755"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.DataFrame(word_wer_classic_metrics, columns=WHISPER_ASR_MODEL, index=FULL_DATASET_NAMES)\n",
    "# word_wer_classic_metrics"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "77a6e273-1f5e-4a2b-9568-66e53ba99c7b",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "629318e6-8c00-413c-99d4-2b7ff559ac3f",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}