From a87187dbaba8b841128972edd9a2cb7b175ce88f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com>
Date: Thu, 12 Jan 2023 14:46:58 +0100
Subject: [PATCH] Add results processing

---
 call_experiment_stats.py                      |   37 +-
 metrics.ipynb                                 | 1050 ++++++++++++++++-
 .../repository/experiment_repository.py       |    6 +-
 .../repository/mongo_experiment_repository.py |    5 +-
 4 files changed, 1050 insertions(+), 48 deletions(-)

diff --git a/call_experiment_stats.py b/call_experiment_stats.py
index 60bc573..c93ad1c 100644
--- a/call_experiment_stats.py
+++ b/call_experiment_stats.py
@@ -5,9 +5,9 @@ from new_experiment.utils.property_helper import PropertyHelper
 
 def get_stats_for(dataset_name: str, property_name: str) -> float:
     repo = get_experiment_repository(dataset_name)
-    vals = [repo.get_property_for_key(it, property_name) for it in repo.get_all_record_ids_for_property(property_name)]
-    vals = [it for it in vals if isinstance(it, float)]
-    ret = 0.0
+    all_vals = repo.get_all_values_from_property(property_name)
+    vals = [all_vals[record_id] for record_id in all_vals.keys()]
+    vals = [ittt for ittt in vals if isinstance(ittt, float) and 10 > ittt > -2]
     if len(vals) == 0:
         ret = -1
     else:
@@ -18,38 +18,43 @@ def get_stats_for(dataset_name: str, property_name: str) -> float:
 
 def get_stats_for_classic_wer(dataset_name: str, property_name: str) -> float:
     repo = get_experiment_repository(dataset_name)
-    vals = [repo.get_property_for_key(it, property_name) for it in repo.get_all_record_ids_for_property(property_name)]
-    vals = [it['classic_wer'] for it in vals if 'classic_wer' in it]
-    vals = [it for it in vals if isinstance(it, float)]
-    ret = 0.0
+    all_vals = repo.get_all_values_from_property(property_name)
+    vals = [all_vals[record_id] for record_id in all_vals.keys()]
+    vals = [ittt['classic_wer'] for ittt in vals if 'classic_wer' in ittt]
+    vals = [ittt for ittt in vals if isinstance(ittt, float) and 10 > ittt > -2]
     if len(vals) == 0:
         ret = -1
     else:
         ret = sum(vals) / len(vals)
     print(dataset_name, property_name, ret)
-    return sum(vals) / len(vals)
+    return ret
 
 
 def get_stats_for_soft_wer(dataset_name: str, property_name: str) -> float:
     repo = get_experiment_repository(dataset_name)
-    vals = [repo.get_property_for_key(it, property_name) for it in repo.get_all_record_ids_for_property(property_name)]
-    vals = [it['soft_wer'] for it in vals if 'soft_wer' in it]
-    vals = [it for it in vals if isinstance(it, float)]
-    ret = 0.0
+    all_vals = repo.get_all_values_from_property(property_name)
+    vals = [all_vals[record_id] for record_id in all_vals.keys()]
+    vals = [ittt['soft_wer'] for ittt in vals if 'soft_wer' in ittt]
+    vals = [ittt for ittt in vals if isinstance(ittt, float) and 10 > ittt > -2]
     if len(vals) == 0:
         ret = -1
     else:
         ret = sum(vals) / len(vals)
-    print(dataset_name, property_name, ret)
-    return sum(vals) / len(vals)
+    print(dataset_name, property_name + '_soft', ret)
+    return ret
 
 
 def get_stats_for_embedding_wer(dataset_name: str, property_name: str) -> float:
     repo = get_experiment_repository(dataset_name)
     vals = [repo.get_property_for_key(it, property_name) for it in repo.get_all_record_ids_for_property(property_name)]
     vals = [it['embedding_wer'] for it in vals if 'embedding_wer' in it]
-    vals = [it for it in vals if isinstance(it, float)]
-    return sum(vals) / len(vals)
+    vals = [ittt for ittt in vals if isinstance(ittt, float)]
+    if len(vals) == 0:
+        ret = -1
+    else:
+        ret = sum(vals) / len(vals)
+    print(dataset_name, property_name + '_emb', ret)
+    return ret
 
 
 if __name__ == '__main__':
diff --git a/metrics.ipynb b/metrics.ipynb
index 7a79fe7..0341b3c 100644
--- a/metrics.ipynb
+++ b/metrics.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 9,
    "id": "955a0385-29fb-47dc-b012-729e49570594",
    "metadata": {},
    "outputs": [],
@@ -12,12 +12,14 @@
     "from call_experiment_stats import *\n",
     "\n",
     "from new_experiment.utils.property_helper import PropertyHelper\n",
-    "from new_experiment.utils.get_spacy_model_name import get_spacy_model_name"
+    "from new_experiment.utils.get_spacy_model_name import get_spacy_model_name\n",
+    "import pandas as pd\n",
+    "import numpy as np"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 10,
    "id": "9f5e44a6-f211-4b61-8cb4-5636c7672c6a",
    "metadata": {},
    "outputs": [],
@@ -31,50 +33,1038 @@
     "for itt in LANGUAGES:\n",
     "    for it in DATASETS:\n",
     "        FULL_DATASET_NAMES.append(f'{itt}_{it}')\n",
-    "        \n",
+    "\n",
     "FULL_LANGUAGE_MODELS = [f'whisper_{it}' for it in WHISPER_ASR_MODEL]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 18,
+   "id": "d2465ceb-7439-4fa5-adf8-e95d7e6106b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "0vals = dict()\n",
+    "with open('metrics.log', 'r') as reader:\n",
+    "    lines = reader.read().splitlines(keepends=False)\n",
+    "    for line in lines:\n",
+    "        # print(line)\n",
+    "        words = line.split()\n",
+    "        key = f'{words[0]}_{words[1]}'\n",
+    "        # print(key)\n",
+    "        vals[key] = float(words[2])\n",
+    "# vals"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
    "id": "22d84451-b7e3-4dba-9758-068dae23ace4",
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "spacy_ner = [\n",
+    "    [vals.get(f'{dataset}_{PropertyHelper.ner_metrics(model, get_spacy_model_name(dataset[:2]))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
+    "    for dataset in FULL_DATASET_NAMES\n",
+    "]\n",
+    "spacy_pos = [\n",
+    "    [vals.get(f'{dataset}_{PropertyHelper.pos_metrics(model, get_spacy_model_name(dataset[:2]))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
+    "    for dataset in FULL_DATASET_NAMES\n",
+    "]\n",
+    "spacy_dep = [\n",
+    "    [vals.get(f'{dataset}_{PropertyHelper.pos_metrics(model, get_spacy_model_name(dataset[:2]))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
+    "    for dataset in FULL_DATASET_NAMES\n",
+    "]\n",
+    "word_wer_classic_metrics = [\n",
+    "    [vals.get(f'{dataset}_{PropertyHelper.word_wer_classic_metrics(model)}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
+    "    for dataset in FULL_DATASET_NAMES\n",
+    "]\n",
+    "\n",
+    "#     for dataset in FULL_DATASET_NAMES:\n",
+    "#         for model in FULL_LANGUAGE_MODELS:\n",
+    "#             get_stats_for_classic_wer(dataset, PropertyHelper.word_wer_classic_metrics(model))\n",
+    "\n",
+    "#     for dataset in FULL_DATASET_NAMES:\n",
+    "#         for model in FULL_LANGUAGE_MODELS:\n",
+    "#             get_stats_for_soft_wer(dataset, PropertyHelper.word_wer_embeddings_metrics(model))\n",
+    "\n",
+    "#     for dataset in FULL_DATASET_NAMES:\n",
+    "#         for model in FULL_LANGUAGE_MODELS:\n",
+    "#             get_stats_for_embedding_wer(dataset, PropertyHelper.word_wer_embeddings_metrics(model))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "45fd851c-644f-48e6-b711-5bd312404b8b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>tiny</th>\n",
+       "      <th>base</th>\n",
+       "      <th>small</th>\n",
+       "      <th>medium</th>\n",
+       "      <th>large-v2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>nl_google_fleurs</th>\n",
+       "      <td>0.316124</td>\n",
+       "      <td>0.230845</td>\n",
+       "      <td>0.186936</td>\n",
+       "      <td>0.170150</td>\n",
+       "      <td>0.165057</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>nl_minds14</th>\n",
+       "      <td>0.463084</td>\n",
+       "      <td>0.409993</td>\n",
+       "      <td>0.360934</td>\n",
+       "      <td>0.331613</td>\n",
+       "      <td>0.324172</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>nl_voxpopuli</th>\n",
+       "      <td>0.215158</td>\n",
+       "      <td>0.178716</td>\n",
+       "      <td>0.132960</td>\n",
+       "      <td>0.118042</td>\n",
+       "      <td>0.139958</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_google_fleurs</th>\n",
+       "      <td>0.264291</td>\n",
+       "      <td>0.193436</td>\n",
+       "      <td>0.177302</td>\n",
+       "      <td>0.147464</td>\n",
+       "      <td>0.141276</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_minds14</th>\n",
+       "      <td>0.466860</td>\n",
+       "      <td>0.468822</td>\n",
+       "      <td>0.471754</td>\n",
+       "      <td>0.444854</td>\n",
+       "      <td>0.485090</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_voxpopuli</th>\n",
+       "      <td>0.161386</td>\n",
+       "      <td>0.131144</td>\n",
+       "      <td>0.113097</td>\n",
+       "      <td>0.099114</td>\n",
+       "      <td>0.111776</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_google_fleurs</th>\n",
+       "      <td>0.316175</td>\n",
+       "      <td>0.257454</td>\n",
+       "      <td>0.234163</td>\n",
+       "      <td>0.239750</td>\n",
+       "      <td>0.236715</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_minds14</th>\n",
+       "      <td>0.435681</td>\n",
+       "      <td>0.425712</td>\n",
+       "      <td>0.412896</td>\n",
+       "      <td>0.398617</td>\n",
+       "      <td>0.398762</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_voxpopuli</th>\n",
+       "      <td>0.200245</td>\n",
+       "      <td>0.155502</td>\n",
+       "      <td>0.133251</td>\n",
+       "      <td>0.116949</td>\n",
+       "      <td>0.156371</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_google_fleurs</th>\n",
+       "      <td>0.206301</td>\n",
+       "      <td>0.172527</td>\n",
+       "      <td>0.161195</td>\n",
+       "      <td>0.156655</td>\n",
+       "      <td>0.160677</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_minds14</th>\n",
+       "      <td>0.487493</td>\n",
+       "      <td>0.448874</td>\n",
+       "      <td>0.432679</td>\n",
+       "      <td>0.416035</td>\n",
+       "      <td>0.392705</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_voxpopuli</th>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_google_fleurs</th>\n",
+       "      <td>0.334936</td>\n",
+       "      <td>0.273025</td>\n",
+       "      <td>0.227662</td>\n",
+       "      <td>0.210962</td>\n",
+       "      <td>0.209027</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_minds14</th>\n",
+       "      <td>0.657194</td>\n",
+       "      <td>0.591588</td>\n",
+       "      <td>0.487344</td>\n",
+       "      <td>0.474013</td>\n",
+       "      <td>0.487891</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_voxpopuli</th>\n",
+       "      <td>0.203548</td>\n",
+       "      <td>0.158526</td>\n",
+       "      <td>0.126280</td>\n",
+       "      <td>0.110784</td>\n",
+       "      <td>0.117780</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_google_fleurs</th>\n",
+       "      <td>0.187607</td>\n",
+       "      <td>0.159873</td>\n",
+       "      <td>0.147104</td>\n",
+       "      <td>0.155210</td>\n",
+       "      <td>0.154657</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_minds14</th>\n",
+       "      <td>0.721295</td>\n",
+       "      <td>0.670363</td>\n",
+       "      <td>0.666278</td>\n",
+       "      <td>0.673058</td>\n",
+       "      <td>0.680341</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_voxpopuli</th>\n",
+       "      <td>0.133805</td>\n",
+       "      <td>0.116222</td>\n",
+       "      <td>0.119882</td>\n",
+       "      <td>0.106610</td>\n",
+       "      <td>0.122036</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_google_fleurs</th>\n",
+       "      <td>0.217843</td>\n",
+       "      <td>0.188810</td>\n",
+       "      <td>0.186407</td>\n",
+       "      <td>0.183656</td>\n",
+       "      <td>0.184568</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_minds14</th>\n",
+       "      <td>0.562068</td>\n",
+       "      <td>0.566999</td>\n",
+       "      <td>0.580369</td>\n",
+       "      <td>0.583945</td>\n",
+       "      <td>0.578079</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_voxpopuli</th>\n",
+       "      <td>0.224980</td>\n",
+       "      <td>0.203959</td>\n",
+       "      <td>0.210278</td>\n",
+       "      <td>0.322688</td>\n",
+       "      <td>0.280877</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      tiny      base     small    medium  large-v2\n",
+       "nl_google_fleurs  0.316124  0.230845  0.186936  0.170150  0.165057\n",
+       "nl_minds14        0.463084  0.409993  0.360934  0.331613  0.324172\n",
+       "nl_voxpopuli      0.215158  0.178716  0.132960  0.118042  0.139958\n",
+       "fr_google_fleurs  0.264291  0.193436  0.177302  0.147464  0.141276\n",
+       "fr_minds14        0.466860  0.468822  0.471754  0.444854  0.485090\n",
+       "fr_voxpopuli      0.161386  0.131144  0.113097  0.099114  0.111776\n",
+       "de_google_fleurs  0.316175  0.257454  0.234163  0.239750  0.236715\n",
+       "de_minds14        0.435681  0.425712  0.412896  0.398617  0.398762\n",
+       "de_voxpopuli      0.200245  0.155502  0.133251  0.116949  0.156371\n",
+       "it_google_fleurs  0.206301  0.172527  0.161195  0.156655  0.160677\n",
+       "it_minds14        0.487493  0.448874  0.432679  0.416035  0.392705\n",
+       "it_voxpopuli     -1.000000 -1.000000 -1.000000 -1.000000 -1.000000\n",
+       "pl_google_fleurs  0.334936  0.273025  0.227662  0.210962  0.209027\n",
+       "pl_minds14        0.657194  0.591588  0.487344  0.474013  0.487891\n",
+       "pl_voxpopuli      0.203548  0.158526  0.126280  0.110784  0.117780\n",
+       "es_google_fleurs  0.187607  0.159873  0.147104  0.155210  0.154657\n",
+       "es_minds14        0.721295  0.670363  0.666278  0.673058  0.680341\n",
+       "es_voxpopuli      0.133805  0.116222  0.119882  0.106610  0.122036\n",
+       "en_google_fleurs  0.217843  0.188810  0.186407  0.183656  0.184568\n",
+       "en_minds14        0.562068  0.566999  0.580369  0.583945  0.578079\n",
+       "en_voxpopuli      0.224980  0.203959  0.210278  0.322688  0.280877"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(spacy_ner, columns=WHISPER_ASR_MODEL, index=FULL_DATASET_NAMES)\n",
+    "# NER"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "6466877e-e744-4cb1-8d4f-f818e1d3ee7d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>tiny</th>\n",
+       "      <th>base</th>\n",
+       "      <th>small</th>\n",
+       "      <th>medium</th>\n",
+       "      <th>large-v2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>nl_google_fleurs</th>\n",
+       "      <td>0.582916</td>\n",
+       "      <td>0.427364</td>\n",
+       "      <td>0.279190</td>\n",
+       "      <td>0.229402</td>\n",
+       "      <td>0.212373</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>nl_minds14</th>\n",
+       "      <td>0.888989</td>\n",
+       "      <td>0.702107</td>\n",
+       "      <td>0.511865</td>\n",
+       "      <td>0.440081</td>\n",
+       "      <td>0.415821</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>nl_voxpopuli</th>\n",
+       "      <td>0.451950</td>\n",
+       "      <td>0.350228</td>\n",
+       "      <td>0.233061</td>\n",
+       "      <td>0.188461</td>\n",
+       "      <td>0.208664</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_google_fleurs</th>\n",
+       "      <td>0.468415</td>\n",
+       "      <td>0.338927</td>\n",
+       "      <td>0.260157</td>\n",
+       "      <td>0.207241</td>\n",
+       "      <td>0.194587</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_minds14</th>\n",
+       "      <td>0.700735</td>\n",
+       "      <td>0.619382</td>\n",
+       "      <td>0.567487</td>\n",
+       "      <td>0.513574</td>\n",
+       "      <td>0.552826</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_voxpopuli</th>\n",
+       "      <td>0.310661</td>\n",
+       "      <td>0.235596</td>\n",
+       "      <td>0.180943</td>\n",
+       "      <td>0.153288</td>\n",
+       "      <td>0.159867</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_google_fleurs</th>\n",
+       "      <td>0.449640</td>\n",
+       "      <td>0.344001</td>\n",
+       "      <td>0.282088</td>\n",
+       "      <td>0.275634</td>\n",
+       "      <td>0.264093</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_minds14</th>\n",
+       "      <td>0.608813</td>\n",
+       "      <td>0.529599</td>\n",
+       "      <td>0.472205</td>\n",
+       "      <td>0.443094</td>\n",
+       "      <td>0.441656</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_voxpopuli</th>\n",
+       "      <td>0.347653</td>\n",
+       "      <td>0.248060</td>\n",
+       "      <td>0.198001</td>\n",
+       "      <td>0.168237</td>\n",
+       "      <td>0.205059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_google_fleurs</th>\n",
+       "      <td>0.364700</td>\n",
+       "      <td>0.269092</td>\n",
+       "      <td>0.218361</td>\n",
+       "      <td>0.189632</td>\n",
+       "      <td>0.189108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_minds14</th>\n",
+       "      <td>0.735663</td>\n",
+       "      <td>0.597724</td>\n",
+       "      <td>0.500377</td>\n",
+       "      <td>0.438344</td>\n",
+       "      <td>0.417785</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_voxpopuli</th>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_google_fleurs</th>\n",
+       "      <td>0.594285</td>\n",
+       "      <td>0.452570</td>\n",
+       "      <td>0.318702</td>\n",
+       "      <td>0.276475</td>\n",
+       "      <td>0.261194</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_minds14</th>\n",
+       "      <td>0.988993</td>\n",
+       "      <td>0.853431</td>\n",
+       "      <td>0.653693</td>\n",
+       "      <td>0.585884</td>\n",
+       "      <td>0.597468</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_voxpopuli</th>\n",
+       "      <td>0.374544</td>\n",
+       "      <td>0.277290</td>\n",
+       "      <td>0.198685</td>\n",
+       "      <td>0.164524</td>\n",
+       "      <td>0.161887</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_google_fleurs</th>\n",
+       "      <td>0.284499</td>\n",
+       "      <td>0.224748</td>\n",
+       "      <td>0.187365</td>\n",
+       "      <td>0.189561</td>\n",
+       "      <td>0.184028</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_minds14</th>\n",
+       "      <td>0.880992</td>\n",
+       "      <td>0.747677</td>\n",
+       "      <td>0.695294</td>\n",
+       "      <td>0.690749</td>\n",
+       "      <td>0.697884</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_voxpopuli</th>\n",
+       "      <td>0.252463</td>\n",
+       "      <td>0.206225</td>\n",
+       "      <td>0.229706</td>\n",
+       "      <td>0.195846</td>\n",
+       "      <td>0.231587</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_google_fleurs</th>\n",
+       "      <td>0.295853</td>\n",
+       "      <td>0.250928</td>\n",
+       "      <td>0.224483</td>\n",
+       "      <td>0.218855</td>\n",
+       "      <td>0.218479</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_minds14</th>\n",
+       "      <td>0.634351</td>\n",
+       "      <td>0.623962</td>\n",
+       "      <td>0.626942</td>\n",
+       "      <td>0.626588</td>\n",
+       "      <td>0.620953</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_voxpopuli</th>\n",
+       "      <td>0.345836</td>\n",
+       "      <td>0.319493</td>\n",
+       "      <td>0.319060</td>\n",
+       "      <td>0.466410</td>\n",
+       "      <td>0.408949</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      tiny      base     small    medium  large-v2\n",
+       "nl_google_fleurs  0.582916  0.427364  0.279190  0.229402  0.212373\n",
+       "nl_minds14        0.888989  0.702107  0.511865  0.440081  0.415821\n",
+       "nl_voxpopuli      0.451950  0.350228  0.233061  0.188461  0.208664\n",
+       "fr_google_fleurs  0.468415  0.338927  0.260157  0.207241  0.194587\n",
+       "fr_minds14        0.700735  0.619382  0.567487  0.513574  0.552826\n",
+       "fr_voxpopuli      0.310661  0.235596  0.180943  0.153288  0.159867\n",
+       "de_google_fleurs  0.449640  0.344001  0.282088  0.275634  0.264093\n",
+       "de_minds14        0.608813  0.529599  0.472205  0.443094  0.441656\n",
+       "de_voxpopuli      0.347653  0.248060  0.198001  0.168237  0.205059\n",
+       "it_google_fleurs  0.364700  0.269092  0.218361  0.189632  0.189108\n",
+       "it_minds14        0.735663  0.597724  0.500377  0.438344  0.417785\n",
+       "it_voxpopuli     -1.000000 -1.000000 -1.000000 -1.000000 -1.000000\n",
+       "pl_google_fleurs  0.594285  0.452570  0.318702  0.276475  0.261194\n",
+       "pl_minds14        0.988993  0.853431  0.653693  0.585884  0.597468\n",
+       "pl_voxpopuli      0.374544  0.277290  0.198685  0.164524  0.161887\n",
+       "es_google_fleurs  0.284499  0.224748  0.187365  0.189561  0.184028\n",
+       "es_minds14        0.880992  0.747677  0.695294  0.690749  0.697884\n",
+       "es_voxpopuli      0.252463  0.206225  0.229706  0.195846  0.231587\n",
+       "en_google_fleurs  0.295853  0.250928  0.224483  0.218855  0.218479\n",
+       "en_minds14        0.634351  0.623962  0.626942  0.626588  0.620953\n",
+       "en_voxpopuli      0.345836  0.319493  0.319060  0.466410  0.408949"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(spacy_pos, columns=WHISPER_ASR_MODEL, index=FULL_DATASET_NAMES)\n",
+    "# POS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "77567361-b730-49f0-ab68-19ad335df1b1",
+   "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[0.1875, 0.0, 0.3125, 0.2962962962962963, 0.2857142857142857, 0.2608695652173913, 0.29411764705882354, 0.43137254901960786, 0.45454545454545453, 0.2608695652173913, 0.25, 0.17647058823529413, 0.21666666666666667, 0.3076923076923077, 0.38461538461538464, 0.625, 0.1111111111111111, 0.3684210526315789, 0.15384615384615385, 0.16666666666666666, 0.2777777777777778, 0.17142857142857143, 0.12121212121212122, 0.14285714285714285, 0.35, 0.05, 0.125, 0.2857142857142857, 0.0, 0.22727272727272727, 0.47058823529411764, 0.7142857142857143, 0.3333333333333333, 4.461538461538462, 0.13043478260869565, 0.09090909090909091, 0.24, 0.21739130434782608, 0.3333333333333333, 0.20689655172413793, 0.25925925925925924, 0.35294117647058826, 0.27586206896551724, 0.4166666666666667, 0.29411764705882354, 0.3333333333333333, 0.17647058823529413, 0.25, 0.05263157894736842, 0.3333333333333333, 0.6, 0.1875, 0.36363636363636365, 0.10810810810810811, 0.17142857142857143, 0.1, 0.3793103448275862, 0.2413793103448276, 0.34782608695652173, 0.34782608695652173, 0.2608695652173913, 0.21875, 0.125, 0.18181818181818182, 0.1875, 0.15789473684210525, 0.19230769230769232, 0.35294117647058826, 0.15789473684210525, 0.4230769230769231, 0.058823529411764705, 0.12121212121212122, 0.23076923076923078, 0.375, 0.23076923076923078, 0.20689655172413793, 0.18181818181818182, 0.22580645161290322, 0.8, 0.2857142857142857, 0.5454545454545454, 0.35714285714285715, 0.09090909090909091, 0.2857142857142857, 0.15384615384615385, 0.2692307692307692, 0.46153846153846156, 0.2777777777777778, 0.5384615384615384, 0.4375, 0.4, 0.09090909090909091, 1.0, 0.5238095238095238, 0.23809523809523808, 0.2608695652173913, 0.15, 0.5555555555555556, 0.14285714285714285, 0.38095238095238093, 1.6666666666666667, 0.3333333333333333, 0.7083333333333334, 0.48, 0.1935483870967742, 0.2222222222222222, 0.4, 0.08333333333333333, 0.2857142857142857, 0.15, 0.35294117647058826, 0.14814814814814814, 0.4444444444444444, 0.1111111111111111, 0.2857142857142857, 0.14285714285714285, 0.47058823529411764, 0.38095238095238093, 0.38095238095238093, 0.13043478260869565, 0.17857142857142858, 0.17391304347826086, 0.3333333333333333, 0.4117647058823529, 0.7857142857142857, 0.2727272727272727, 0.37037037037037035, 0.15789473684210525, 0.1875, 0.2777777777777778, 0.3076923076923077, 0.2903225806451613, 0.16666666666666666, 0.38461538461538464, 0.45, 0.35, 0.25806451612903225, 0.21428571428571427, 0.11764705882352941, 0.6666666666666666, 0.1, 0.13636363636363635, 0.20833333333333334, 0.3888888888888889, 1.0555555555555556, 0.1875, 0.7083333333333334, 0.5555555555555556, 0.3023255813953488, 0.1111111111111111, 0.5555555555555556, 0.21428571428571427, 0.6, 0.3235294117647059, 0.5789473684210527, 0.3333333333333333, 0.18181818181818182, 0.32, 0.2777777777777778, 0.4444444444444444, 0.2631578947368421, 0.5238095238095238, 0.23529411764705882, 0.05263157894736842, 0.92, 0.47058823529411764, 0.23076923076923078, 0.2727272727272727, 0.5263157894736842, 0.22727272727272727, 0.34615384615384615, 0.4, 0.6666666666666666, 0.2, 0.09090909090909091, 0.2, 0.21739130434782608, 0.21212121212121213, 0.047619047619047616, 0.24, 0.29411764705882354, 0.34615384615384615, 0.17857142857142858, 0.0, 0.3076923076923077, 0.14285714285714285, 0.038461538461538464, 0.2857142857142857, 0.2857142857142857, 0.22727272727272727, 0.25, 0.13333333333333333, 0.4444444444444444, 0.21951219512195122, 0.17391304347826086, 0.6296296296296297, 0.3333333333333333, 0.14814814814814814, 0.20833333333333334, 0.2222222222222222, 0.32, 0.06451612903225806, 0.07692307692307693, 0.29310344827586204, 0.11764705882352941, 0.10526315789473684, 0.4375, 0.3125, 0.14814814814814814, 0.2727272727272727, 0.46153846153846156, 0.20833333333333334, 0.125, 0.14285714285714285, 0.4666666666666667, 1.3, 0.4583333333333333, 0.13043478260869565, 0.17391304347826086, 0.3157894736842105, 0.17857142857142858, 0.4, 0.3157894736842105, 0.6363636363636364, 0.12195121951219512, 0.05, 0.2916666666666667, 0.24324324324324326, 0.3333333333333333, 0.21739130434782608, 0.38461538461538464, 0.15789473684210525, 0.15, 0.09523809523809523, 0.2777777777777778, 0.21212121212121213, 0.07692307692307693, 0.1111111111111111, 0.0625, 0.3793103448275862, 0.29411764705882354, 0.4090909090909091, 0.4444444444444444, 0.7777777777777778, 0.3076923076923077, 0.2777777777777778, 0.1724137931034483, 0.26666666666666666, 0.13333333333333333, 0.45454545454545453, 0.4375]\n",
-      "nl_google_fleurs whisper_tiny__nl_core_news_lg__ner_metrics 0.3161237339690157\n",
-      "[0.25, 0.07692307692307693, 0.125, 0.2962962962962963, 0.23809523809523808, 0.13043478260869565, 0.23529411764705882, 0.2549019607843137, 0.2727272727272727, 0.4583333333333333, 0.17391304347826086, 0.3, 0.35294117647058826, 0.19230769230769232, 0.38461538461538464, 0.25, 0.2222222222222222, 0.3157894736842105, 0.15384615384615385, 0.08333333333333333, 0.2777777777777778, 0.08571428571428572, 0.15151515151515152, 0.14285714285714285, 0.2, 0.15, 0.041666666666666664, 0.21428571428571427, 0.125, 0.2727272727272727, 0.058823529411764705, 0.35714285714285715, 0.06666666666666667, 0.23076923076923078, 0.13043478260869565, 0.09090909090909091, 0.12, 0.21739130434782608, 0.5238095238095238, 0.13793103448275862, 0.07407407407407407, 0.29411764705882354, 0.27586206896551724, 0.4166666666666667, 0.16666666666666666, 0.11764705882352941, 0.11764705882352941, 0.16666666666666666, 0.05263157894736842, 0.3333333333333333, 0.6, 0.46875, 0.2727272727272727, 0.13513513513513514, 0.17142857142857143, 0.27586206896551724, 0.15, 0.20689655172413793, 0.34782608695652173, 0.15625, 0.21739130434782608, 0.17391304347826086, 0.125, 0.13636363636363635, 0.125, 0.29411764705882354, 0.3684210526315789, 0.15384615384615385, 0.10526315789473684, 0.058823529411764705, 0.34615384615384615, 0.030303030303030304, 0.1935483870967742, 0.2916666666666667, 0.3076923076923077, 0.13793103448275862, 0.06060606060606061, 0.20512820512820512, 0.4666666666666667, 0.14285714285714285, 0.18181818181818182, 0.42857142857142855, 0.15384615384615385, 0.17857142857142858, 0.18181818181818182, 0.23076923076923078, 0.38461538461538464, 0.3333333333333333, 0.23076923076923078, 1.625, 0.4, 0.09090909090909091, 0.23809523809523808, 0.3333333333333333, 0.3333333333333333, 0.2, 0.08695652173913043, 0.3333333333333333, 0.10714285714285714, 0.2857142857142857, 0.0, 0.13333333333333333, 0.4166666666666667, 0.24, 0.06451612903225806, 0.3888888888888889, 0.1, 0.16666666666666666, 0.1111111111111111, 0.2, 0.29411764705882354, 0.14285714285714285, 0.3333333333333333, 0.1111111111111111, 0.23809523809523808, 0.09523809523809523, 0.11764705882352941, 0.14285714285714285, 0.2857142857142857, 0.08695652173913043, 0.17857142857142858, 0.2857142857142857, 0.08695652173913043, 0.35294117647058826, 0.14285714285714285, 0.36363636363636365, 0.05263157894736842, 0.0625, 0.25, 0.25925925925925924, 0.23076923076923078, 0.2777777777777778, 0.12903225806451613, 0.23076923076923078, 0.1, 0.2, 0.0967741935483871, 2.5, 0.17647058823529413, 0.5833333333333334, 0.0, 0.0, 0.08333333333333333, 0.1111111111111111, 0.2222222222222222, 0.25, 0.625, 0.4444444444444444, 0.13953488372093023, 0.2222222222222222, 0.3333333333333333, 0.14285714285714285, 0.4666666666666667, 0.23529411764705882, 0.7368421052631579, 0.0, 0.2777777777777778, 0.13333333333333333, 0.1111111111111111, 0.12, 0.19047619047619047, 0.05263157894736842, 0.23529411764705882, 0.05263157894736842, 0.24, 0.38235294117647056, 0.19230769230769232, 0.22727272727272727, 0.42105263157894735, 0.36363636363636365, 0.07692307692307693, 0.2, 0.25, 0.1, 0.18181818181818182, 0.2, 0.043478260869565216, 0.23809523809523808, 0.06060606060606061, 0.32, 0.17647058823529413, 0.38461538461538464, 0.17857142857142858, 0.19230769230769232, 0.3076923076923077, 0.047619047619047616, 0.038461538461538464, 0.38095238095238093, 0.14285714285714285, 0.13636363636363635, 0.06666666666666667, 0.10714285714285714, 0.3333333333333333, 0.12195121951219512, 0.043478260869565216, 0.6296296296296297, 0.2777777777777778, 0.14814814814814814, 0.08333333333333333, 0.1111111111111111, 0.2, 0.16129032258064516, 0.07692307692307693, 0.3103448275862069, 0.14705882352941177, 0.15789473684210525, 0.3125, 0.25, 0.2222222222222222, 0.18181818181818182, 0.125, 0.10256410256410256, 0.3333333333333333, 0.16666666666666666, 0.4666666666666667, 0.1, 0.17391304347826086, 0.4583333333333333, 0.08695652173913043, 0.2631578947368421, 0.17857142857142858, 0.26666666666666666, 0.42105263157894735, 0.8181818181818182, 0.4, 0.07317073170731707, 0.2916666666666667, 0.16216216216216217, 0.20833333333333334, 0.043478260869565216, 0.23076923076923078, 0.10526315789473684, 0.2, 0.2222222222222222, 0.14285714285714285, 0.12121212121212122, 0.11538461538461539, 0.1388888888888889, 0.25, 0.5172413793103449, 0.29411764705882354, 0.13636363636363635, 0.2222222222222222, 0.15384615384615385, 0.4074074074074074, 0.2777777777777778, 0.13793103448275862, 0.13333333333333333, 0.13333333333333333, 0.45454545454545453, 0.125]\n",
-      "nl_google_fleurs whisper_base__nl_core_news_lg__ner_metrics 0.23084502550941563\n"
-     ]
-    },
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>tiny</th>\n",
+       "      <th>base</th>\n",
+       "      <th>small</th>\n",
+       "      <th>medium</th>\n",
+       "      <th>large-v2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>nl_google_fleurs</th>\n",
+       "      <td>0.582916</td>\n",
+       "      <td>0.427364</td>\n",
+       "      <td>0.279190</td>\n",
+       "      <td>0.229402</td>\n",
+       "      <td>0.212373</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>nl_minds14</th>\n",
+       "      <td>0.888989</td>\n",
+       "      <td>0.702107</td>\n",
+       "      <td>0.511865</td>\n",
+       "      <td>0.440081</td>\n",
+       "      <td>0.415821</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>nl_voxpopuli</th>\n",
+       "      <td>0.451950</td>\n",
+       "      <td>0.350228</td>\n",
+       "      <td>0.233061</td>\n",
+       "      <td>0.188461</td>\n",
+       "      <td>0.208664</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_google_fleurs</th>\n",
+       "      <td>0.468415</td>\n",
+       "      <td>0.338927</td>\n",
+       "      <td>0.260157</td>\n",
+       "      <td>0.207241</td>\n",
+       "      <td>0.194587</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_minds14</th>\n",
+       "      <td>0.700735</td>\n",
+       "      <td>0.619382</td>\n",
+       "      <td>0.567487</td>\n",
+       "      <td>0.513574</td>\n",
+       "      <td>0.552826</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_voxpopuli</th>\n",
+       "      <td>0.310661</td>\n",
+       "      <td>0.235596</td>\n",
+       "      <td>0.180943</td>\n",
+       "      <td>0.153288</td>\n",
+       "      <td>0.159867</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_google_fleurs</th>\n",
+       "      <td>0.449640</td>\n",
+       "      <td>0.344001</td>\n",
+       "      <td>0.282088</td>\n",
+       "      <td>0.275634</td>\n",
+       "      <td>0.264093</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_minds14</th>\n",
+       "      <td>0.608813</td>\n",
+       "      <td>0.529599</td>\n",
+       "      <td>0.472205</td>\n",
+       "      <td>0.443094</td>\n",
+       "      <td>0.441656</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_voxpopuli</th>\n",
+       "      <td>0.347653</td>\n",
+       "      <td>0.248060</td>\n",
+       "      <td>0.198001</td>\n",
+       "      <td>0.168237</td>\n",
+       "      <td>0.205059</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_google_fleurs</th>\n",
+       "      <td>0.364700</td>\n",
+       "      <td>0.269092</td>\n",
+       "      <td>0.218361</td>\n",
+       "      <td>0.189632</td>\n",
+       "      <td>0.189108</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_minds14</th>\n",
+       "      <td>0.735663</td>\n",
+       "      <td>0.597724</td>\n",
+       "      <td>0.500377</td>\n",
+       "      <td>0.438344</td>\n",
+       "      <td>0.417785</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_voxpopuli</th>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_google_fleurs</th>\n",
+       "      <td>0.594285</td>\n",
+       "      <td>0.452570</td>\n",
+       "      <td>0.318702</td>\n",
+       "      <td>0.276475</td>\n",
+       "      <td>0.261194</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_minds14</th>\n",
+       "      <td>0.988993</td>\n",
+       "      <td>0.853431</td>\n",
+       "      <td>0.653693</td>\n",
+       "      <td>0.585884</td>\n",
+       "      <td>0.597468</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_voxpopuli</th>\n",
+       "      <td>0.374544</td>\n",
+       "      <td>0.277290</td>\n",
+       "      <td>0.198685</td>\n",
+       "      <td>0.164524</td>\n",
+       "      <td>0.161887</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_google_fleurs</th>\n",
+       "      <td>0.284499</td>\n",
+       "      <td>0.224748</td>\n",
+       "      <td>0.187365</td>\n",
+       "      <td>0.189561</td>\n",
+       "      <td>0.184028</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_minds14</th>\n",
+       "      <td>0.880992</td>\n",
+       "      <td>0.747677</td>\n",
+       "      <td>0.695294</td>\n",
+       "      <td>0.690749</td>\n",
+       "      <td>0.697884</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_voxpopuli</th>\n",
+       "      <td>0.252463</td>\n",
+       "      <td>0.206225</td>\n",
+       "      <td>0.229706</td>\n",
+       "      <td>0.195846</td>\n",
+       "      <td>0.231587</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_google_fleurs</th>\n",
+       "      <td>0.295853</td>\n",
+       "      <td>0.250928</td>\n",
+       "      <td>0.224483</td>\n",
+       "      <td>0.218855</td>\n",
+       "      <td>0.218479</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_minds14</th>\n",
+       "      <td>0.634351</td>\n",
+       "      <td>0.623962</td>\n",
+       "      <td>0.626942</td>\n",
+       "      <td>0.626588</td>\n",
+       "      <td>0.620953</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_voxpopuli</th>\n",
+       "      <td>0.345836</td>\n",
+       "      <td>0.319493</td>\n",
+       "      <td>0.319060</td>\n",
+       "      <td>0.466410</td>\n",
+       "      <td>0.408949</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      tiny      base     small    medium  large-v2\n",
+       "nl_google_fleurs  0.582916  0.427364  0.279190  0.229402  0.212373\n",
+       "nl_minds14        0.888989  0.702107  0.511865  0.440081  0.415821\n",
+       "nl_voxpopuli      0.451950  0.350228  0.233061  0.188461  0.208664\n",
+       "fr_google_fleurs  0.468415  0.338927  0.260157  0.207241  0.194587\n",
+       "fr_minds14        0.700735  0.619382  0.567487  0.513574  0.552826\n",
+       "fr_voxpopuli      0.310661  0.235596  0.180943  0.153288  0.159867\n",
+       "de_google_fleurs  0.449640  0.344001  0.282088  0.275634  0.264093\n",
+       "de_minds14        0.608813  0.529599  0.472205  0.443094  0.441656\n",
+       "de_voxpopuli      0.347653  0.248060  0.198001  0.168237  0.205059\n",
+       "it_google_fleurs  0.364700  0.269092  0.218361  0.189632  0.189108\n",
+       "it_minds14        0.735663  0.597724  0.500377  0.438344  0.417785\n",
+       "it_voxpopuli     -1.000000 -1.000000 -1.000000 -1.000000 -1.000000\n",
+       "pl_google_fleurs  0.594285  0.452570  0.318702  0.276475  0.261194\n",
+       "pl_minds14        0.988993  0.853431  0.653693  0.585884  0.597468\n",
+       "pl_voxpopuli      0.374544  0.277290  0.198685  0.164524  0.161887\n",
+       "es_google_fleurs  0.284499  0.224748  0.187365  0.189561  0.184028\n",
+       "es_minds14        0.880992  0.747677  0.695294  0.690749  0.697884\n",
+       "es_voxpopuli      0.252463  0.206225  0.229706  0.195846  0.231587\n",
+       "en_google_fleurs  0.295853  0.250928  0.224483  0.218855  0.218479\n",
+       "en_minds14        0.634351  0.623962  0.626942  0.626588  0.620953\n",
+       "en_voxpopuli      0.345836  0.319493  0.319060  0.466410  0.408949"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pd.DataFrame(spacy_dep, columns=WHISPER_ASR_MODEL, index=FULL_DATASET_NAMES)\n",
+    "# DEP"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "3dbfbb6e-c369-47fd-801c-6df211943dc1",
+   "metadata": {},
+   "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "KeyboardInterrupt\n",
-      "\n"
-     ]
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>tiny</th>\n",
+       "      <th>base</th>\n",
+       "      <th>small</th>\n",
+       "      <th>medium</th>\n",
+       "      <th>large-v2</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>nl_google_fleurs</th>\n",
+       "      <td>0.708020</td>\n",
+       "      <td>0.535692</td>\n",
+       "      <td>0.365346</td>\n",
+       "      <td>0.296100</td>\n",
+       "      <td>0.261951</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>nl_minds14</th>\n",
+       "      <td>0.897447</td>\n",
+       "      <td>0.714498</td>\n",
+       "      <td>0.503436</td>\n",
+       "      <td>0.419083</td>\n",
+       "      <td>0.389125</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>nl_voxpopuli</th>\n",
+       "      <td>0.645715</td>\n",
+       "      <td>0.526939</td>\n",
+       "      <td>0.396940</td>\n",
+       "      <td>0.345034</td>\n",
+       "      <td>0.358023</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_google_fleurs</th>\n",
+       "      <td>0.600185</td>\n",
+       "      <td>0.470808</td>\n",
+       "      <td>0.378478</td>\n",
+       "      <td>0.324236</td>\n",
+       "      <td>0.309570</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_minds14</th>\n",
+       "      <td>0.805977</td>\n",
+       "      <td>0.700773</td>\n",
+       "      <td>0.642619</td>\n",
+       "      <td>0.583323</td>\n",
+       "      <td>0.616411</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fr_voxpopuli</th>\n",
+       "      <td>0.510623</td>\n",
+       "      <td>0.440340</td>\n",
+       "      <td>0.382961</td>\n",
+       "      <td>0.359633</td>\n",
+       "      <td>0.365811</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_google_fleurs</th>\n",
+       "      <td>0.651989</td>\n",
+       "      <td>0.551766</td>\n",
+       "      <td>0.506944</td>\n",
+       "      <td>0.478476</td>\n",
+       "      <td>0.469045</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_minds14</th>\n",
+       "      <td>0.659890</td>\n",
+       "      <td>0.554437</td>\n",
+       "      <td>0.474513</td>\n",
+       "      <td>0.429274</td>\n",
+       "      <td>0.425134</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>de_voxpopuli</th>\n",
+       "      <td>0.645898</td>\n",
+       "      <td>0.558876</td>\n",
+       "      <td>0.518976</td>\n",
+       "      <td>0.488194</td>\n",
+       "      <td>0.525581</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_google_fleurs</th>\n",
+       "      <td>0.465298</td>\n",
+       "      <td>0.355877</td>\n",
+       "      <td>0.287491</td>\n",
+       "      <td>0.254384</td>\n",
+       "      <td>0.251697</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_minds14</th>\n",
+       "      <td>0.779429</td>\n",
+       "      <td>0.621546</td>\n",
+       "      <td>0.502670</td>\n",
+       "      <td>0.437805</td>\n",
+       "      <td>0.422781</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>it_voxpopuli</th>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_google_fleurs</th>\n",
+       "      <td>0.705909</td>\n",
+       "      <td>0.553073</td>\n",
+       "      <td>0.384142</td>\n",
+       "      <td>0.318203</td>\n",
+       "      <td>0.298247</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_minds14</th>\n",
+       "      <td>1.009390</td>\n",
+       "      <td>0.860626</td>\n",
+       "      <td>0.633766</td>\n",
+       "      <td>0.572826</td>\n",
+       "      <td>0.563293</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>pl_voxpopuli</th>\n",
+       "      <td>0.588464</td>\n",
+       "      <td>0.489265</td>\n",
+       "      <td>0.380883</td>\n",
+       "      <td>0.345623</td>\n",
+       "      <td>0.349896</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_google_fleurs</th>\n",
+       "      <td>0.333658</td>\n",
+       "      <td>0.261352</td>\n",
+       "      <td>0.213950</td>\n",
+       "      <td>0.206351</td>\n",
+       "      <td>0.202078</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_minds14</th>\n",
+       "      <td>0.884689</td>\n",
+       "      <td>0.740604</td>\n",
+       "      <td>0.664831</td>\n",
+       "      <td>0.656090</td>\n",
+       "      <td>0.650328</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>es_voxpopuli</th>\n",
+       "      <td>0.347112</td>\n",
+       "      <td>0.294192</td>\n",
+       "      <td>0.333500</td>\n",
+       "      <td>0.295472</td>\n",
+       "      <td>0.353273</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_google_fleurs</th>\n",
+       "      <td>0.348152</td>\n",
+       "      <td>0.307207</td>\n",
+       "      <td>0.278857</td>\n",
+       "      <td>0.268917</td>\n",
+       "      <td>0.270208</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_minds14</th>\n",
+       "      <td>0.588375</td>\n",
+       "      <td>0.571845</td>\n",
+       "      <td>0.566381</td>\n",
+       "      <td>0.567538</td>\n",
+       "      <td>0.562651</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>en_voxpopuli</th>\n",
+       "      <td>0.475612</td>\n",
+       "      <td>0.451586</td>\n",
+       "      <td>0.453132</td>\n",
+       "      <td>0.594546</td>\n",
+       "      <td>0.549755</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                      tiny      base     small    medium  large-v2\n",
+       "nl_google_fleurs  0.708020  0.535692  0.365346  0.296100  0.261951\n",
+       "nl_minds14        0.897447  0.714498  0.503436  0.419083  0.389125\n",
+       "nl_voxpopuli      0.645715  0.526939  0.396940  0.345034  0.358023\n",
+       "fr_google_fleurs  0.600185  0.470808  0.378478  0.324236  0.309570\n",
+       "fr_minds14        0.805977  0.700773  0.642619  0.583323  0.616411\n",
+       "fr_voxpopuli      0.510623  0.440340  0.382961  0.359633  0.365811\n",
+       "de_google_fleurs  0.651989  0.551766  0.506944  0.478476  0.469045\n",
+       "de_minds14        0.659890  0.554437  0.474513  0.429274  0.425134\n",
+       "de_voxpopuli      0.645898  0.558876  0.518976  0.488194  0.525581\n",
+       "it_google_fleurs  0.465298  0.355877  0.287491  0.254384  0.251697\n",
+       "it_minds14        0.779429  0.621546  0.502670  0.437805  0.422781\n",
+       "it_voxpopuli     -1.000000 -1.000000 -1.000000 -1.000000 -1.000000\n",
+       "pl_google_fleurs  0.705909  0.553073  0.384142  0.318203  0.298247\n",
+       "pl_minds14        1.009390  0.860626  0.633766  0.572826  0.563293\n",
+       "pl_voxpopuli      0.588464  0.489265  0.380883  0.345623  0.349896\n",
+       "es_google_fleurs  0.333658  0.261352  0.213950  0.206351  0.202078\n",
+       "es_minds14        0.884689  0.740604  0.664831  0.656090  0.650328\n",
+       "es_voxpopuli      0.347112  0.294192  0.333500  0.295472  0.353273\n",
+       "en_google_fleurs  0.348152  0.307207  0.278857  0.268917  0.270208\n",
+       "en_minds14        0.588375  0.571845  0.566381  0.567538  0.562651\n",
+       "en_voxpopuli      0.475612  0.451586  0.453132  0.594546  0.549755"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "[\n",
-    "    [\n",
-    "        get_stats_for(dataset, PropertyHelper.ner_metrics(model, get_spacy_model_name(dataset[:2])))\n",
-    "        for model in FULL_LANGUAGE_MODELS\n",
-    "    ]\n",
-    "    for dataset in FULL_DATASET_NAMES\n",
-    "]"
+    "pd.DataFrame(word_wer_classic_metrics, columns=WHISPER_ASR_MODEL, index=FULL_DATASET_NAMES)\n",
+    "# word_wer_classic_metrics"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "45fd851c-644f-48e6-b711-5bd312404b8b",
+   "id": "77a6e273-1f5e-4a2b-9568-66e53ba99c7b",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -82,7 +1072,7 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6466877e-e744-4cb1-8d4f-f818e1d3ee7d",
+   "id": "629318e6-8c00-413c-99d4-2b7ff559ac3f",
    "metadata": {},
    "outputs": [],
    "source": []
diff --git a/sziszapangma/integration/repository/experiment_repository.py b/sziszapangma/integration/repository/experiment_repository.py
index e50666d..61ddbb3 100644
--- a/sziszapangma/integration/repository/experiment_repository.py
+++ b/sziszapangma/integration/repository/experiment_repository.py
@@ -1,6 +1,6 @@
 """Repository to manage results of asr experiment processing."""
 from abc import ABC, abstractmethod
-from typing import Any, Optional, Set
+from typing import Any, Optional, Set, Dict
 
 
 class ExperimentRepository(ABC):
@@ -37,3 +37,7 @@ class ExperimentRepository(ABC):
     @abstractmethod
     def get_all_properties(self) -> Set[str]:
         """Methods returns all possible properties."""
+
+    @abstractmethod
+    def get_all_values_from_property(self, property_name: str) -> Dict[str, Any]:
+        pass
diff --git a/sziszapangma/integration/repository/mongo_experiment_repository.py b/sziszapangma/integration/repository/mongo_experiment_repository.py
index 6c87a1d..98c2ef3 100644
--- a/sziszapangma/integration/repository/mongo_experiment_repository.py
+++ b/sziszapangma/integration/repository/mongo_experiment_repository.py
@@ -1,4 +1,4 @@
-from typing import Any, Optional, Set
+from typing import Any, Optional, Set, Dict
 
 from pymongo import MongoClient
 from pymongo.database import Database
@@ -58,3 +58,6 @@ class MongoExperimentRepository(ExperimentRepository):
 
     def get_all_properties(self) -> Set[str]:
         return set(self._get_database().list_collection_names())
+
+    def get_all_values_from_property(self, property_name: str) -> Dict[str, Any]:
+        return {record[ID]: record[VALUE] for record in self._get_database()[property_name].find()}
-- 
GitLab