{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "955a0385-29fb-47dc-b012-729e49570594",
   "metadata": {},
   "outputs": [],
   "source": [
    "from new_experiment.utils.get_spacy_model_name import *\n",
    "\n",
    "from call_experiment_stats import *\n",
    "\n",
    "from new_experiment.utils.property_helper import PropertyHelper\n",
    "from new_experiment.utils.get_spacy_model_name import get_spacy_model_name\n",
    "from new_experiment.new_dependency_provider import get_experiment_repository\n",
    "from new_experiment.add_to_queue_pipeline import get_hf_facebook_wav2vec2_model_by_language_code\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from typing import List"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "3f1221d3-5f70-4441-af07-58fa176e31e9",
   "metadata": {},
   "outputs": [],
   "source": [
    "METRICS_FILE = 'metrics.txt'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "eda46e65-8079-40b9-9c4e-37fe74caec45",
   "metadata": {},
   "outputs": [
    {
     "ename": "ServerSelectionTimeoutError",
     "evalue": "192.168.0.124:27017: timed out, Timeout: 30s, Topology Description: <TopologyDescription id: 63caac355a13a212d6a8209f, topology_type: Unknown, servers: [<ServerDescription ('192.168.0.124', 27017) server_type: Unknown, rtt: None, error=NetworkTimeout('192.168.0.124:27017: timed out')>]>",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mServerSelectionTimeoutError\u001b[0m               Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[3], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m metric_repository \u001b[38;5;241m=\u001b[39m get_experiment_repository(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mmetric_stats\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m      2\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mopen\u001b[39m(METRICS_FILE, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mw\u001b[39m\u001b[38;5;124m'\u001b[39m) \u001b[38;5;28;01mas\u001b[39;00m writer:\n\u001b[0;32m----> 3\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m dataset_property \u001b[38;5;129;01min\u001b[39;00m \u001b[43mmetric_repository\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_all_properties\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m:\n\u001b[1;32m      4\u001b[0m         values_dict \u001b[38;5;241m=\u001b[39m metric_repository\u001b[38;5;241m.\u001b[39mget_all_values_from_property(dataset_property)\n\u001b[1;32m      5\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m value_key \u001b[38;5;129;01min\u001b[39;00m values_dict\u001b[38;5;241m.\u001b[39mkeys():\n",
      "File \u001b[0;32m~/Desktop/WUST/asr-benchmarks/sziszapangma/integration/repository/mongo_experiment_repository.py:60\u001b[0m, in \u001b[0;36mMongoExperimentRepository.get_all_properties\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m     59\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mget_all_properties\u001b[39m(\u001b[38;5;28mself\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Set[\u001b[38;5;28mstr\u001b[39m]:\n\u001b[0;32m---> 60\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mset\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_database\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist_collection_names\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m)\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/database.py:959\u001b[0m, in \u001b[0;36mDatabase.list_collection_names\u001b[0;34m(self, session, filter, comment, **kwargs)\u001b[0m\n\u001b[1;32m    956\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mfilter\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m (\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mfilter\u001b[39m) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mfilter\u001b[39m):\n\u001b[1;32m    957\u001b[0m         kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnameOnly\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[0;32m--> 959\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [result[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mname\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;28;01mfor\u001b[39;00m result \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mlist_collections\u001b[49m\u001b[43m(\u001b[49m\u001b[43msession\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m]\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/database.py:911\u001b[0m, in \u001b[0;36mDatabase.list_collections\u001b[0;34m(self, session, filter, comment, **kwargs)\u001b[0m\n\u001b[1;32m    906\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_cmd\u001b[39m(session, server, sock_info, read_preference):\n\u001b[1;32m    907\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_list_collections(\n\u001b[1;32m    908\u001b[0m         sock_info, session, read_preference\u001b[38;5;241m=\u001b[39mread_preference, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[1;32m    909\u001b[0m     )\n\u001b[0;32m--> 911\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m__client\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retryable_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_cmd\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mread_pref\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/_csot.py:105\u001b[0m, in \u001b[0;36mapply.<locals>.csot_wrapper\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    103\u001b[0m         \u001b[38;5;28;01mwith\u001b[39;00m _TimeoutContext(timeout):\n\u001b[1;32m    104\u001b[0m             \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m--> 105\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/mongo_client.py:1441\u001b[0m, in \u001b[0;36mMongoClient._retryable_read\u001b[0;34m(self, func, read_pref, session, address, retryable)\u001b[0m\n\u001b[1;32m   1439\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m last_error\n\u001b[1;32m   1440\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1441\u001b[0m     server \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_select_server\u001b[49m\u001b[43m(\u001b[49m\u001b[43mread_pref\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msession\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maddress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maddress\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1442\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_socket_from_server(read_pref, server, session) \u001b[38;5;28;01mas\u001b[39;00m (sock_info, read_pref):\n\u001b[1;32m   1443\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m retrying \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m retryable:\n\u001b[1;32m   1444\u001b[0m             \u001b[38;5;66;03m# A retry is not possible because this server does\u001b[39;00m\n\u001b[1;32m   1445\u001b[0m             \u001b[38;5;66;03m# not support retryable reads, raise the last error.\u001b[39;00m\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/mongo_client.py:1257\u001b[0m, in \u001b[0;36mMongoClient._select_server\u001b[0;34m(self, server_selector, session, address)\u001b[0m\n\u001b[1;32m   1255\u001b[0m             \u001b[38;5;28;01mraise\u001b[39;00m AutoReconnect(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mserver \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m:\u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m no longer available\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m address)\n\u001b[1;32m   1256\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1257\u001b[0m         server \u001b[38;5;241m=\u001b[39m \u001b[43mtopology\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mselect_server\u001b[49m\u001b[43m(\u001b[49m\u001b[43mserver_selector\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1258\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m server\n\u001b[1;32m   1259\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m PyMongoError \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m   1260\u001b[0m     \u001b[38;5;66;03m# Server selection errors in a transaction are transient.\u001b[39;00m\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/topology.py:272\u001b[0m, in \u001b[0;36mTopology.select_server\u001b[0;34m(self, selector, server_selection_timeout, address)\u001b[0m\n\u001b[1;32m    270\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mselect_server\u001b[39m(\u001b[38;5;28mself\u001b[39m, selector, server_selection_timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, address\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[1;32m    271\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Like select_servers, but choose a random server if several match.\"\"\"\u001b[39;00m\n\u001b[0;32m--> 272\u001b[0m     server \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_select_server\u001b[49m\u001b[43m(\u001b[49m\u001b[43mselector\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mserver_selection_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maddress\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    273\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m _csot\u001b[38;5;241m.\u001b[39mget_timeout():\n\u001b[1;32m    274\u001b[0m         _csot\u001b[38;5;241m.\u001b[39mset_rtt(server\u001b[38;5;241m.\u001b[39mdescription\u001b[38;5;241m.\u001b[39mround_trip_time)\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/topology.py:261\u001b[0m, in \u001b[0;36mTopology._select_server\u001b[0;34m(self, selector, server_selection_timeout, address)\u001b[0m\n\u001b[1;32m    260\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_select_server\u001b[39m(\u001b[38;5;28mself\u001b[39m, selector, server_selection_timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, address\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[0;32m--> 261\u001b[0m     servers \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mselect_servers\u001b[49m\u001b[43m(\u001b[49m\u001b[43mselector\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mserver_selection_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maddress\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    262\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(servers) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[1;32m    263\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m servers[\u001b[38;5;241m0\u001b[39m]\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/topology.py:223\u001b[0m, in \u001b[0;36mTopology.select_servers\u001b[0;34m(self, selector, server_selection_timeout, address)\u001b[0m\n\u001b[1;32m    220\u001b[0m     server_timeout \u001b[38;5;241m=\u001b[39m server_selection_timeout\n\u001b[1;32m    222\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_lock:\n\u001b[0;32m--> 223\u001b[0m     server_descriptions \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_select_servers_loop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mselector\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mserver_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maddress\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    225\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m [\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_server_by_address(sd\u001b[38;5;241m.\u001b[39maddress) \u001b[38;5;28;01mfor\u001b[39;00m sd \u001b[38;5;129;01min\u001b[39;00m server_descriptions]\n",
      "File \u001b[0;32m~/opt/miniconda3/envs/asr_benchmarks_39/lib/python3.9/site-packages/pymongo/topology.py:238\u001b[0m, in \u001b[0;36mTopology._select_servers_loop\u001b[0;34m(self, selector, timeout, address)\u001b[0m\n\u001b[1;32m    235\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m server_descriptions:\n\u001b[1;32m    236\u001b[0m     \u001b[38;5;66;03m# No suitable servers.\u001b[39;00m\n\u001b[1;32m    237\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m now \u001b[38;5;241m>\u001b[39m end_time:\n\u001b[0;32m--> 238\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m ServerSelectionTimeoutError(\n\u001b[1;32m    239\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m, Timeout: \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124ms, Topology Description: \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    240\u001b[0m             \u001b[38;5;241m%\u001b[39m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_error_message(selector), timeout, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdescription)\n\u001b[1;32m    241\u001b[0m         )\n\u001b[1;32m    243\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_ensure_opened()\n\u001b[1;32m    244\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_request_check_all()\n",
      "\u001b[0;31mServerSelectionTimeoutError\u001b[0m: 192.168.0.124:27017: timed out, Timeout: 30s, Topology Description: <TopologyDescription id: 63caac355a13a212d6a8209f, topology_type: Unknown, servers: [<ServerDescription ('192.168.0.124', 27017) server_type: Unknown, rtt: None, error=NetworkTimeout('192.168.0.124:27017: timed out')>]>"
     ]
    }
   ],
   "source": [
    "metric_repository = get_experiment_repository('metric_stats')\n",
    "with open(METRICS_FILE, 'w') as writer:\n",
    "    for dataset_property in metric_repository.get_all_properties():\n",
    "        values_dict = metric_repository.get_all_values_from_property(dataset_property)\n",
    "        for value_key in values_dict.keys():\n",
    "            line = f'{dataset_property} {value_key} {values_dict[value_key]}'\n",
    "            writer.write(f'{line}\\n')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "9f5e44a6-f211-4b61-8cb4-5636c7672c6a",
   "metadata": {},
   "outputs": [],
   "source": [
    "COMMANDS = ['run_word_wer_classic_pipeline', 'run_word_wer_embedding_pipeline', 'run_spacy_dep_tag_wer_pipeline',\n",
    "            'run_spacy_ner_wer_pipeline', 'run_spacy_pos_wer_pipeline']\n",
    "LANGUAGES = ['nl', 'fr', 'de', 'it', 'pl', 'es', 'en']\n",
    "WHISPER_ASR_MODEL = ['tiny', 'base', 'small', 'medium', 'large-v2']\n",
    "DATASETS = ['google_fleurs', 'minds14', 'voxpopuli']\n",
    "FULL_DATASET_NAMES = []\n",
    "for itt in LANGUAGES:\n",
    "    for it in DATASETS:\n",
    "        FULL_DATASET_NAMES.append(f'{itt}_{it}')\n",
    "\n",
    "FULL_LANGUAGE_MODELS = [f'whisper_{it}' for it in WHISPER_ASR_MODEL] + ['facebook_wav2vec2', 'nvidia_stt']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "d2465ceb-7439-4fa5-adf8-e95d7e6106b9",
   "metadata": {},
   "outputs": [],
   "source": [
    "vals = dict()\n",
    "with open(METRICS_FILE, 'r') as reader:\n",
    "    lines = reader.read().splitlines(keepends=False)\n",
    "    for line in lines:\n",
    "        # print(line)\n",
    "        words = line.split()\n",
    "        key = f'{words[0]}_{words[1]}'\n",
    "        vals[key] = float(words[2])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "e41b19d0-37cb-4810-896a-fa0f73dd86e0",
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_model_for_dataset_name(dataset: str, model: str):\n",
    "    language_code = dataset[:2]\n",
    "    if model.startswith('whisper'):\n",
    "        return model\n",
    "    elif model.startswith('facebook_wav2vec2'):\n",
    "        return get_hf_facebook_wav2vec2_model_by_language_code(language_code)\n",
    "    elif model.startswith('nvidia_stt'):\n",
    "        return f'nvidia_stt_{language_code}_conformer_transducer_large'\n",
    "    else:\n",
    "        raise Exception('asr name not found')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "22d84451-b7e3-4dba-9758-068dae23ace4",
   "metadata": {},
   "outputs": [],
   "source": [
    "spacy_ner = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.ner_metrics(get_model_for_dataset_name(dataset, model), get_spacy_model_name(dataset[:2]))}', -1.0) \n",
    "     for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "spacy_pos = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.pos_metrics(get_model_for_dataset_name(dataset, model), get_spacy_model_name(dataset[:2]))}', -1.0) \n",
    "     for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "spacy_dep = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.dep_tag_metrics(get_model_for_dataset_name(dataset, model), get_spacy_model_name(dataset[:2]))}', -1.0) \n",
    "     for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "word_wer_classic_metrics = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.word_wer_classic_metrics(get_model_for_dataset_name(dataset, model))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "word_wer_soft_metrics = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.word_wer_soft_metrics(get_model_for_dataset_name(dataset, model))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "word_wer_embedding_metrics = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.word_wer_embeddings_metrics(get_model_for_dataset_name(dataset, model))}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "flair_pos = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.pos_metrics(get_model_for_dataset_name(dataset, model), \"flair_upos_multi\")}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]\n",
    "wikineural_ner = [\n",
    "    [vals.get(f'{dataset}_{PropertyHelper.ner_metrics(get_model_for_dataset_name(dataset, model), \"wikineural\")}', -1.0) for model in FULL_LANGUAGE_MODELS]\n",
    "    for dataset in FULL_DATASET_NAMES\n",
    "]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "0d9a4977-edce-4c8e-aebe-b76781901512",
   "metadata": {},
   "outputs": [],
   "source": [
    "def df_to_latex(df: pd.DataFrame, name: str) -> None:\n",
    "    with pd.option_context(\"max_colwidth\", 1000):\n",
    "        with open(name, 'w') as writer:\n",
    "            writer.write(spacy_ner_df.to_latex())\n",
    "\n",
    "\n",
    "def summarize_df(arr: List[List[float]], name: str) -> pd.DataFrame:\n",
    "    spacy_ner_df = pd.DataFrame(arr, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "    spacy_ner_df.to_csv(f'results/{name}.csv')\n",
    "    df_to_latex(spacy_ner, f'results/{name}.tex')\n",
    "    return spacy_ner_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "45fd851c-644f-48e6-b711-5bd312404b8b",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/var/folders/t8/4j9s5lbj1cbbn0xj92r0g31c0000gn/T/ipykernel_59977/2461695209.py:4: FutureWarning: In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.\n",
      "  writer.write(spacy_ner_df.to_latex())\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.316124</td>\n",
       "      <td>0.230845</td>\n",
       "      <td>0.186936</td>\n",
       "      <td>0.170150</td>\n",
       "      <td>0.165057</td>\n",
       "      <td>0.082781</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.463084</td>\n",
       "      <td>0.409993</td>\n",
       "      <td>0.360934</td>\n",
       "      <td>0.331613</td>\n",
       "      <td>0.324172</td>\n",
       "      <td>0.142155</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.215158</td>\n",
       "      <td>0.178716</td>\n",
       "      <td>0.132960</td>\n",
       "      <td>0.118042</td>\n",
       "      <td>0.139958</td>\n",
       "      <td>0.200403</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.264291</td>\n",
       "      <td>0.193436</td>\n",
       "      <td>0.177302</td>\n",
       "      <td>0.147464</td>\n",
       "      <td>0.141276</td>\n",
       "      <td>0.083170</td>\n",
       "      <td>0.053155</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.466860</td>\n",
       "      <td>0.468822</td>\n",
       "      <td>0.471754</td>\n",
       "      <td>0.444854</td>\n",
       "      <td>0.485090</td>\n",
       "      <td>0.220358</td>\n",
       "      <td>0.189111</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.161386</td>\n",
       "      <td>0.131144</td>\n",
       "      <td>0.113097</td>\n",
       "      <td>0.099114</td>\n",
       "      <td>0.111776</td>\n",
       "      <td>0.169564</td>\n",
       "      <td>0.127958</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.316175</td>\n",
       "      <td>0.257454</td>\n",
       "      <td>0.234163</td>\n",
       "      <td>0.239750</td>\n",
       "      <td>0.236715</td>\n",
       "      <td>0.083423</td>\n",
       "      <td>0.051673</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.435681</td>\n",
       "      <td>0.425712</td>\n",
       "      <td>0.412896</td>\n",
       "      <td>0.398617</td>\n",
       "      <td>0.398762</td>\n",
       "      <td>0.183933</td>\n",
       "      <td>0.146988</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.200245</td>\n",
       "      <td>0.155502</td>\n",
       "      <td>0.133251</td>\n",
       "      <td>0.116949</td>\n",
       "      <td>0.156371</td>\n",
       "      <td>0.242498</td>\n",
       "      <td>0.168854</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.206301</td>\n",
       "      <td>0.172527</td>\n",
       "      <td>0.161195</td>\n",
       "      <td>0.156655</td>\n",
       "      <td>0.160677</td>\n",
       "      <td>0.067181</td>\n",
       "      <td>0.039040</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.487493</td>\n",
       "      <td>0.448874</td>\n",
       "      <td>0.432679</td>\n",
       "      <td>0.416035</td>\n",
       "      <td>0.392705</td>\n",
       "      <td>0.198809</td>\n",
       "      <td>0.146235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.160365</td>\n",
       "      <td>0.139461</td>\n",
       "      <td>0.138966</td>\n",
       "      <td>0.123130</td>\n",
       "      <td>0.130691</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.153960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.334936</td>\n",
       "      <td>0.273025</td>\n",
       "      <td>0.227662</td>\n",
       "      <td>0.210962</td>\n",
       "      <td>0.209027</td>\n",
       "      <td>0.088157</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.657194</td>\n",
       "      <td>0.591588</td>\n",
       "      <td>0.487344</td>\n",
       "      <td>0.474013</td>\n",
       "      <td>0.487891</td>\n",
       "      <td>0.237692</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.203548</td>\n",
       "      <td>0.158526</td>\n",
       "      <td>0.126280</td>\n",
       "      <td>0.110784</td>\n",
       "      <td>0.117780</td>\n",
       "      <td>0.184368</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.187607</td>\n",
       "      <td>0.159873</td>\n",
       "      <td>0.147104</td>\n",
       "      <td>0.155210</td>\n",
       "      <td>0.154657</td>\n",
       "      <td>0.057830</td>\n",
       "      <td>0.038903</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.721295</td>\n",
       "      <td>0.670363</td>\n",
       "      <td>0.666278</td>\n",
       "      <td>0.673058</td>\n",
       "      <td>0.680341</td>\n",
       "      <td>0.411927</td>\n",
       "      <td>0.342895</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.133805</td>\n",
       "      <td>0.116222</td>\n",
       "      <td>0.119882</td>\n",
       "      <td>0.106610</td>\n",
       "      <td>0.122036</td>\n",
       "      <td>0.148225</td>\n",
       "      <td>0.128456</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.217843</td>\n",
       "      <td>0.188810</td>\n",
       "      <td>0.186407</td>\n",
       "      <td>0.183656</td>\n",
       "      <td>0.184568</td>\n",
       "      <td>0.180523</td>\n",
       "      <td>0.071421</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.562068</td>\n",
       "      <td>0.566999</td>\n",
       "      <td>0.580369</td>\n",
       "      <td>0.583945</td>\n",
       "      <td>0.578079</td>\n",
       "      <td>0.325304</td>\n",
       "      <td>0.293083</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.224980</td>\n",
       "      <td>0.203959</td>\n",
       "      <td>0.210278</td>\n",
       "      <td>0.322688</td>\n",
       "      <td>0.280877</td>\n",
       "      <td>0.182708</td>\n",
       "      <td>0.124416</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.316124      0.230845       0.186936        0.170150   \n",
       "nl_minds14            0.463084      0.409993       0.360934        0.331613   \n",
       "nl_voxpopuli          0.215158      0.178716       0.132960        0.118042   \n",
       "fr_google_fleurs      0.264291      0.193436       0.177302        0.147464   \n",
       "fr_minds14            0.466860      0.468822       0.471754        0.444854   \n",
       "fr_voxpopuli          0.161386      0.131144       0.113097        0.099114   \n",
       "de_google_fleurs      0.316175      0.257454       0.234163        0.239750   \n",
       "de_minds14            0.435681      0.425712       0.412896        0.398617   \n",
       "de_voxpopuli          0.200245      0.155502       0.133251        0.116949   \n",
       "it_google_fleurs      0.206301      0.172527       0.161195        0.156655   \n",
       "it_minds14            0.487493      0.448874       0.432679        0.416035   \n",
       "it_voxpopuli          0.160365      0.139461       0.138966        0.123130   \n",
       "pl_google_fleurs      0.334936      0.273025       0.227662        0.210962   \n",
       "pl_minds14            0.657194      0.591588       0.487344        0.474013   \n",
       "pl_voxpopuli          0.203548      0.158526       0.126280        0.110784   \n",
       "es_google_fleurs      0.187607      0.159873       0.147104        0.155210   \n",
       "es_minds14            0.721295      0.670363       0.666278        0.673058   \n",
       "es_voxpopuli          0.133805      0.116222       0.119882        0.106610   \n",
       "en_google_fleurs      0.217843      0.188810       0.186407        0.183656   \n",
       "en_minds14            0.562068      0.566999       0.580369        0.583945   \n",
       "en_voxpopuli          0.224980      0.203959       0.210278        0.322688   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.165057           0.082781   -1.000000  \n",
       "nl_minds14                0.324172           0.142155   -1.000000  \n",
       "nl_voxpopuli              0.139958           0.200403   -1.000000  \n",
       "fr_google_fleurs          0.141276           0.083170    0.053155  \n",
       "fr_minds14                0.485090           0.220358    0.189111  \n",
       "fr_voxpopuli              0.111776           0.169564    0.127958  \n",
       "de_google_fleurs          0.236715           0.083423    0.051673  \n",
       "de_minds14                0.398762           0.183933    0.146988  \n",
       "de_voxpopuli              0.156371           0.242498    0.168854  \n",
       "it_google_fleurs          0.160677           0.067181    0.039040  \n",
       "it_minds14                0.392705           0.198809    0.146235  \n",
       "it_voxpopuli              0.130691          -1.000000    0.153960  \n",
       "pl_google_fleurs          0.209027           0.088157   -1.000000  \n",
       "pl_minds14                0.487891           0.237692   -1.000000  \n",
       "pl_voxpopuli              0.117780           0.184368   -1.000000  \n",
       "es_google_fleurs          0.154657           0.057830    0.038903  \n",
       "es_minds14                0.680341           0.411927    0.342895  \n",
       "es_voxpopuli              0.122036           0.148225    0.128456  \n",
       "en_google_fleurs          0.184568           0.180523    0.071421  \n",
       "en_minds14                0.578079           0.325304    0.293083  \n",
       "en_voxpopuli              0.280877           0.182708    0.124416  "
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "summarize_df(spacy_ner, 'spacy_ner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "6466877e-e744-4cb1-8d4f-f818e1d3ee7d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.582916</td>\n",
       "      <td>0.427364</td>\n",
       "      <td>0.279190</td>\n",
       "      <td>0.229402</td>\n",
       "      <td>0.212373</td>\n",
       "      <td>0.160957</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.888989</td>\n",
       "      <td>0.702107</td>\n",
       "      <td>0.511865</td>\n",
       "      <td>0.440081</td>\n",
       "      <td>0.415821</td>\n",
       "      <td>0.298583</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.451950</td>\n",
       "      <td>0.350228</td>\n",
       "      <td>0.233061</td>\n",
       "      <td>0.188461</td>\n",
       "      <td>0.208664</td>\n",
       "      <td>0.340656</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.468415</td>\n",
       "      <td>0.338927</td>\n",
       "      <td>0.260157</td>\n",
       "      <td>0.207241</td>\n",
       "      <td>0.194587</td>\n",
       "      <td>0.141560</td>\n",
       "      <td>0.073667</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.700735</td>\n",
       "      <td>0.619382</td>\n",
       "      <td>0.567487</td>\n",
       "      <td>0.513574</td>\n",
       "      <td>0.552826</td>\n",
       "      <td>0.336656</td>\n",
       "      <td>0.236770</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.310661</td>\n",
       "      <td>0.235596</td>\n",
       "      <td>0.180943</td>\n",
       "      <td>0.153288</td>\n",
       "      <td>0.159867</td>\n",
       "      <td>0.245229</td>\n",
       "      <td>0.164607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.449640</td>\n",
       "      <td>0.344001</td>\n",
       "      <td>0.282088</td>\n",
       "      <td>0.275634</td>\n",
       "      <td>0.264093</td>\n",
       "      <td>0.094206</td>\n",
       "      <td>0.053148</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.608813</td>\n",
       "      <td>0.529599</td>\n",
       "      <td>0.472205</td>\n",
       "      <td>0.443094</td>\n",
       "      <td>0.441656</td>\n",
       "      <td>0.228980</td>\n",
       "      <td>0.157855</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.347653</td>\n",
       "      <td>0.248060</td>\n",
       "      <td>0.198001</td>\n",
       "      <td>0.168237</td>\n",
       "      <td>0.205059</td>\n",
       "      <td>0.313704</td>\n",
       "      <td>0.203633</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.364700</td>\n",
       "      <td>0.269092</td>\n",
       "      <td>0.218361</td>\n",
       "      <td>0.189632</td>\n",
       "      <td>0.189108</td>\n",
       "      <td>0.115212</td>\n",
       "      <td>0.057875</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.735663</td>\n",
       "      <td>0.597724</td>\n",
       "      <td>0.500377</td>\n",
       "      <td>0.438344</td>\n",
       "      <td>0.417785</td>\n",
       "      <td>0.285531</td>\n",
       "      <td>0.153250</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.401738</td>\n",
       "      <td>0.332257</td>\n",
       "      <td>0.278988</td>\n",
       "      <td>0.245468</td>\n",
       "      <td>0.247638</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.236106</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.594285</td>\n",
       "      <td>0.452570</td>\n",
       "      <td>0.318702</td>\n",
       "      <td>0.276475</td>\n",
       "      <td>0.261194</td>\n",
       "      <td>0.184994</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.988993</td>\n",
       "      <td>0.853431</td>\n",
       "      <td>0.653693</td>\n",
       "      <td>0.585884</td>\n",
       "      <td>0.597468</td>\n",
       "      <td>0.454939</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.374544</td>\n",
       "      <td>0.277290</td>\n",
       "      <td>0.198685</td>\n",
       "      <td>0.164524</td>\n",
       "      <td>0.161887</td>\n",
       "      <td>0.309752</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.284499</td>\n",
       "      <td>0.224748</td>\n",
       "      <td>0.187365</td>\n",
       "      <td>0.189561</td>\n",
       "      <td>0.184028</td>\n",
       "      <td>0.096476</td>\n",
       "      <td>0.051401</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.880992</td>\n",
       "      <td>0.747677</td>\n",
       "      <td>0.695294</td>\n",
       "      <td>0.690749</td>\n",
       "      <td>0.697884</td>\n",
       "      <td>0.508818</td>\n",
       "      <td>0.384215</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.252463</td>\n",
       "      <td>0.206225</td>\n",
       "      <td>0.229706</td>\n",
       "      <td>0.195846</td>\n",
       "      <td>0.231587</td>\n",
       "      <td>0.230351</td>\n",
       "      <td>0.173987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.295853</td>\n",
       "      <td>0.250928</td>\n",
       "      <td>0.224483</td>\n",
       "      <td>0.218855</td>\n",
       "      <td>0.218479</td>\n",
       "      <td>0.367414</td>\n",
       "      <td>0.078904</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.634351</td>\n",
       "      <td>0.623962</td>\n",
       "      <td>0.626942</td>\n",
       "      <td>0.626588</td>\n",
       "      <td>0.620953</td>\n",
       "      <td>0.584547</td>\n",
       "      <td>0.329282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.345836</td>\n",
       "      <td>0.319493</td>\n",
       "      <td>0.319060</td>\n",
       "      <td>0.466410</td>\n",
       "      <td>0.408949</td>\n",
       "      <td>0.377100</td>\n",
       "      <td>0.160883</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.582916      0.427364       0.279190        0.229402   \n",
       "nl_minds14            0.888989      0.702107       0.511865        0.440081   \n",
       "nl_voxpopuli          0.451950      0.350228       0.233061        0.188461   \n",
       "fr_google_fleurs      0.468415      0.338927       0.260157        0.207241   \n",
       "fr_minds14            0.700735      0.619382       0.567487        0.513574   \n",
       "fr_voxpopuli          0.310661      0.235596       0.180943        0.153288   \n",
       "de_google_fleurs      0.449640      0.344001       0.282088        0.275634   \n",
       "de_minds14            0.608813      0.529599       0.472205        0.443094   \n",
       "de_voxpopuli          0.347653      0.248060       0.198001        0.168237   \n",
       "it_google_fleurs      0.364700      0.269092       0.218361        0.189632   \n",
       "it_minds14            0.735663      0.597724       0.500377        0.438344   \n",
       "it_voxpopuli          0.401738      0.332257       0.278988        0.245468   \n",
       "pl_google_fleurs      0.594285      0.452570       0.318702        0.276475   \n",
       "pl_minds14            0.988993      0.853431       0.653693        0.585884   \n",
       "pl_voxpopuli          0.374544      0.277290       0.198685        0.164524   \n",
       "es_google_fleurs      0.284499      0.224748       0.187365        0.189561   \n",
       "es_minds14            0.880992      0.747677       0.695294        0.690749   \n",
       "es_voxpopuli          0.252463      0.206225       0.229706        0.195846   \n",
       "en_google_fleurs      0.295853      0.250928       0.224483        0.218855   \n",
       "en_minds14            0.634351      0.623962       0.626942        0.626588   \n",
       "en_voxpopuli          0.345836      0.319493       0.319060        0.466410   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.212373           0.160957   -1.000000  \n",
       "nl_minds14                0.415821           0.298583   -1.000000  \n",
       "nl_voxpopuli              0.208664           0.340656   -1.000000  \n",
       "fr_google_fleurs          0.194587           0.141560    0.073667  \n",
       "fr_minds14                0.552826           0.336656    0.236770  \n",
       "fr_voxpopuli              0.159867           0.245229    0.164607  \n",
       "de_google_fleurs          0.264093           0.094206    0.053148  \n",
       "de_minds14                0.441656           0.228980    0.157855  \n",
       "de_voxpopuli              0.205059           0.313704    0.203633  \n",
       "it_google_fleurs          0.189108           0.115212    0.057875  \n",
       "it_minds14                0.417785           0.285531    0.153250  \n",
       "it_voxpopuli              0.247638          -1.000000    0.236106  \n",
       "pl_google_fleurs          0.261194           0.184994   -1.000000  \n",
       "pl_minds14                0.597468           0.454939   -1.000000  \n",
       "pl_voxpopuli              0.161887           0.309752   -1.000000  \n",
       "es_google_fleurs          0.184028           0.096476    0.051401  \n",
       "es_minds14                0.697884           0.508818    0.384215  \n",
       "es_voxpopuli              0.231587           0.230351    0.173987  \n",
       "en_google_fleurs          0.218479           0.367414    0.078904  \n",
       "en_minds14                0.620953           0.584547    0.329282  \n",
       "en_voxpopuli              0.408949           0.377100    0.160883  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "summarize_df(spacy_ner, 'spacy_ner')\n",
    "\n",
    "spacy_pos_df = pd.DataFrame(spacy_pos, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "spacy_pos_df.to_csv('results/spacy_pos.csv')\n",
    "spacy_pos_df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "77567361-b730-49f0-ab68-19ad335df1b1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.699699</td>\n",
       "      <td>0.533595</td>\n",
       "      <td>0.366764</td>\n",
       "      <td>0.300730</td>\n",
       "      <td>0.282070</td>\n",
       "      <td>0.246416</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.941359</td>\n",
       "      <td>0.778265</td>\n",
       "      <td>0.584732</td>\n",
       "      <td>0.511929</td>\n",
       "      <td>0.490065</td>\n",
       "      <td>0.376911</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.553280</td>\n",
       "      <td>0.435277</td>\n",
       "      <td>0.304322</td>\n",
       "      <td>0.252270</td>\n",
       "      <td>0.268306</td>\n",
       "      <td>0.430234</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.580527</td>\n",
       "      <td>0.429523</td>\n",
       "      <td>0.337506</td>\n",
       "      <td>0.275466</td>\n",
       "      <td>0.259405</td>\n",
       "      <td>0.205104</td>\n",
       "      <td>0.114100</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.800999</td>\n",
       "      <td>0.714124</td>\n",
       "      <td>0.647957</td>\n",
       "      <td>0.592392</td>\n",
       "      <td>0.613262</td>\n",
       "      <td>0.421050</td>\n",
       "      <td>0.284212</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.387866</td>\n",
       "      <td>0.307476</td>\n",
       "      <td>0.240038</td>\n",
       "      <td>0.205174</td>\n",
       "      <td>0.210248</td>\n",
       "      <td>0.323655</td>\n",
       "      <td>0.232059</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.519535</td>\n",
       "      <td>0.424735</td>\n",
       "      <td>0.360695</td>\n",
       "      <td>0.353459</td>\n",
       "      <td>0.345089</td>\n",
       "      <td>0.139605</td>\n",
       "      <td>0.074235</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.693370</td>\n",
       "      <td>0.628170</td>\n",
       "      <td>0.570571</td>\n",
       "      <td>0.543742</td>\n",
       "      <td>0.546479</td>\n",
       "      <td>0.288109</td>\n",
       "      <td>0.216011</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.396771</td>\n",
       "      <td>0.298134</td>\n",
       "      <td>0.236937</td>\n",
       "      <td>0.204998</td>\n",
       "      <td>0.241773</td>\n",
       "      <td>0.385364</td>\n",
       "      <td>0.271072</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.453637</td>\n",
       "      <td>0.334587</td>\n",
       "      <td>0.269876</td>\n",
       "      <td>0.234494</td>\n",
       "      <td>0.232862</td>\n",
       "      <td>0.168723</td>\n",
       "      <td>0.089945</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.814580</td>\n",
       "      <td>0.681371</td>\n",
       "      <td>0.576940</td>\n",
       "      <td>0.511340</td>\n",
       "      <td>0.495661</td>\n",
       "      <td>0.376479</td>\n",
       "      <td>0.224318</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.483728</td>\n",
       "      <td>0.401518</td>\n",
       "      <td>0.332556</td>\n",
       "      <td>0.290310</td>\n",
       "      <td>0.291917</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.288211</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.741445</td>\n",
       "      <td>0.580439</td>\n",
       "      <td>0.420468</td>\n",
       "      <td>0.365168</td>\n",
       "      <td>0.348206</td>\n",
       "      <td>0.303350</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>1.138465</td>\n",
       "      <td>0.999350</td>\n",
       "      <td>0.817470</td>\n",
       "      <td>0.738430</td>\n",
       "      <td>0.754548</td>\n",
       "      <td>0.587577</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.479609</td>\n",
       "      <td>0.366738</td>\n",
       "      <td>0.257558</td>\n",
       "      <td>0.210752</td>\n",
       "      <td>0.201585</td>\n",
       "      <td>0.422140</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.341816</td>\n",
       "      <td>0.278543</td>\n",
       "      <td>0.226821</td>\n",
       "      <td>0.227239</td>\n",
       "      <td>0.220248</td>\n",
       "      <td>0.135718</td>\n",
       "      <td>0.069997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.992263</td>\n",
       "      <td>0.828084</td>\n",
       "      <td>0.799141</td>\n",
       "      <td>0.791115</td>\n",
       "      <td>0.799426</td>\n",
       "      <td>0.591663</td>\n",
       "      <td>0.435506</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.304887</td>\n",
       "      <td>0.249827</td>\n",
       "      <td>0.277536</td>\n",
       "      <td>0.240640</td>\n",
       "      <td>0.280930</td>\n",
       "      <td>0.276648</td>\n",
       "      <td>0.210668</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.341285</td>\n",
       "      <td>0.285416</td>\n",
       "      <td>0.262014</td>\n",
       "      <td>0.249445</td>\n",
       "      <td>0.251211</td>\n",
       "      <td>0.398297</td>\n",
       "      <td>0.099033</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.681148</td>\n",
       "      <td>0.666131</td>\n",
       "      <td>0.669723</td>\n",
       "      <td>0.669332</td>\n",
       "      <td>0.661842</td>\n",
       "      <td>0.627539</td>\n",
       "      <td>0.361619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.364437</td>\n",
       "      <td>0.335141</td>\n",
       "      <td>0.333144</td>\n",
       "      <td>0.481083</td>\n",
       "      <td>0.419667</td>\n",
       "      <td>0.402100</td>\n",
       "      <td>0.170951</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.699699      0.533595       0.366764        0.300730   \n",
       "nl_minds14            0.941359      0.778265       0.584732        0.511929   \n",
       "nl_voxpopuli          0.553280      0.435277       0.304322        0.252270   \n",
       "fr_google_fleurs      0.580527      0.429523       0.337506        0.275466   \n",
       "fr_minds14            0.800999      0.714124       0.647957        0.592392   \n",
       "fr_voxpopuli          0.387866      0.307476       0.240038        0.205174   \n",
       "de_google_fleurs      0.519535      0.424735       0.360695        0.353459   \n",
       "de_minds14            0.693370      0.628170       0.570571        0.543742   \n",
       "de_voxpopuli          0.396771      0.298134       0.236937        0.204998   \n",
       "it_google_fleurs      0.453637      0.334587       0.269876        0.234494   \n",
       "it_minds14            0.814580      0.681371       0.576940        0.511340   \n",
       "it_voxpopuli          0.483728      0.401518       0.332556        0.290310   \n",
       "pl_google_fleurs      0.741445      0.580439       0.420468        0.365168   \n",
       "pl_minds14            1.138465      0.999350       0.817470        0.738430   \n",
       "pl_voxpopuli          0.479609      0.366738       0.257558        0.210752   \n",
       "es_google_fleurs      0.341816      0.278543       0.226821        0.227239   \n",
       "es_minds14            0.992263      0.828084       0.799141        0.791115   \n",
       "es_voxpopuli          0.304887      0.249827       0.277536        0.240640   \n",
       "en_google_fleurs      0.341285      0.285416       0.262014        0.249445   \n",
       "en_minds14            0.681148      0.666131       0.669723        0.669332   \n",
       "en_voxpopuli          0.364437      0.335141       0.333144        0.481083   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.282070           0.246416   -1.000000  \n",
       "nl_minds14                0.490065           0.376911   -1.000000  \n",
       "nl_voxpopuli              0.268306           0.430234   -1.000000  \n",
       "fr_google_fleurs          0.259405           0.205104    0.114100  \n",
       "fr_minds14                0.613262           0.421050    0.284212  \n",
       "fr_voxpopuli              0.210248           0.323655    0.232059  \n",
       "de_google_fleurs          0.345089           0.139605    0.074235  \n",
       "de_minds14                0.546479           0.288109    0.216011  \n",
       "de_voxpopuli              0.241773           0.385364    0.271072  \n",
       "it_google_fleurs          0.232862           0.168723    0.089945  \n",
       "it_minds14                0.495661           0.376479    0.224318  \n",
       "it_voxpopuli              0.291917          -1.000000    0.288211  \n",
       "pl_google_fleurs          0.348206           0.303350   -1.000000  \n",
       "pl_minds14                0.754548           0.587577   -1.000000  \n",
       "pl_voxpopuli              0.201585           0.422140   -1.000000  \n",
       "es_google_fleurs          0.220248           0.135718    0.069997  \n",
       "es_minds14                0.799426           0.591663    0.435506  \n",
       "es_voxpopuli              0.280930           0.276648    0.210668  \n",
       "en_google_fleurs          0.251211           0.398297    0.099033  \n",
       "en_minds14                0.661842           0.627539    0.361619  \n",
       "en_voxpopuli              0.419667           0.402100    0.170951  "
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "spacy_dep_df = pd.DataFrame(spacy_dep, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "spacy_dep_df.to_csv('results/spacy_dep.csv')\n",
    "spacy_dep_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "3dbfbb6e-c369-47fd-801c-6df211943dc1",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.708020</td>\n",
       "      <td>0.535692</td>\n",
       "      <td>0.365346</td>\n",
       "      <td>0.296100</td>\n",
       "      <td>0.261951</td>\n",
       "      <td>0.273752</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.897447</td>\n",
       "      <td>0.714498</td>\n",
       "      <td>0.503436</td>\n",
       "      <td>0.419083</td>\n",
       "      <td>0.389125</td>\n",
       "      <td>0.465494</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.645715</td>\n",
       "      <td>0.526939</td>\n",
       "      <td>0.396940</td>\n",
       "      <td>0.345034</td>\n",
       "      <td>0.358023</td>\n",
       "      <td>0.380835</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.600185</td>\n",
       "      <td>0.470808</td>\n",
       "      <td>0.378478</td>\n",
       "      <td>0.324236</td>\n",
       "      <td>0.309570</td>\n",
       "      <td>0.305183</td>\n",
       "      <td>0.206433</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.805977</td>\n",
       "      <td>0.700773</td>\n",
       "      <td>0.642619</td>\n",
       "      <td>0.583323</td>\n",
       "      <td>0.616411</td>\n",
       "      <td>0.564885</td>\n",
       "      <td>0.441154</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.510623</td>\n",
       "      <td>0.440340</td>\n",
       "      <td>0.382961</td>\n",
       "      <td>0.359633</td>\n",
       "      <td>0.365811</td>\n",
       "      <td>0.323351</td>\n",
       "      <td>0.187074</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.651989</td>\n",
       "      <td>0.551766</td>\n",
       "      <td>0.506944</td>\n",
       "      <td>0.478476</td>\n",
       "      <td>0.469045</td>\n",
       "      <td>0.182395</td>\n",
       "      <td>0.072162</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.659890</td>\n",
       "      <td>0.554437</td>\n",
       "      <td>0.474513</td>\n",
       "      <td>0.429274</td>\n",
       "      <td>0.425134</td>\n",
       "      <td>0.437369</td>\n",
       "      <td>0.357848</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.645898</td>\n",
       "      <td>0.558876</td>\n",
       "      <td>0.518976</td>\n",
       "      <td>0.488194</td>\n",
       "      <td>0.525581</td>\n",
       "      <td>0.292203</td>\n",
       "      <td>0.088256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.465298</td>\n",
       "      <td>0.355877</td>\n",
       "      <td>0.287491</td>\n",
       "      <td>0.254384</td>\n",
       "      <td>0.251697</td>\n",
       "      <td>0.218689</td>\n",
       "      <td>0.140564</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.779429</td>\n",
       "      <td>0.621546</td>\n",
       "      <td>0.502670</td>\n",
       "      <td>0.437805</td>\n",
       "      <td>0.422781</td>\n",
       "      <td>0.429940</td>\n",
       "      <td>0.276002</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.562729</td>\n",
       "      <td>0.477854</td>\n",
       "      <td>0.420387</td>\n",
       "      <td>0.388904</td>\n",
       "      <td>0.393964</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.233076</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.700853</td>\n",
       "      <td>0.553073</td>\n",
       "      <td>0.384142</td>\n",
       "      <td>0.318203</td>\n",
       "      <td>0.298247</td>\n",
       "      <td>0.335870</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>1.023324</td>\n",
       "      <td>0.860626</td>\n",
       "      <td>0.633766</td>\n",
       "      <td>0.572826</td>\n",
       "      <td>0.563293</td>\n",
       "      <td>0.697584</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.588464</td>\n",
       "      <td>0.489265</td>\n",
       "      <td>0.380883</td>\n",
       "      <td>0.345623</td>\n",
       "      <td>0.349896</td>\n",
       "      <td>0.324229</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.333658</td>\n",
       "      <td>0.261352</td>\n",
       "      <td>0.213950</td>\n",
       "      <td>0.206351</td>\n",
       "      <td>0.202078</td>\n",
       "      <td>0.145522</td>\n",
       "      <td>0.067686</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.884689</td>\n",
       "      <td>0.740604</td>\n",
       "      <td>0.664831</td>\n",
       "      <td>0.656090</td>\n",
       "      <td>0.650328</td>\n",
       "      <td>0.602494</td>\n",
       "      <td>0.436570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.347112</td>\n",
       "      <td>0.294192</td>\n",
       "      <td>0.333500</td>\n",
       "      <td>0.295472</td>\n",
       "      <td>0.353273</td>\n",
       "      <td>0.191242</td>\n",
       "      <td>0.067363</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.348152</td>\n",
       "      <td>0.307207</td>\n",
       "      <td>0.278857</td>\n",
       "      <td>0.268917</td>\n",
       "      <td>0.270208</td>\n",
       "      <td>1.031485</td>\n",
       "      <td>0.114966</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.588375</td>\n",
       "      <td>0.571845</td>\n",
       "      <td>0.566381</td>\n",
       "      <td>0.567538</td>\n",
       "      <td>0.562651</td>\n",
       "      <td>1.203252</td>\n",
       "      <td>0.467297</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.475612</td>\n",
       "      <td>0.451586</td>\n",
       "      <td>0.453132</td>\n",
       "      <td>0.594546</td>\n",
       "      <td>0.549755</td>\n",
       "      <td>1.020514</td>\n",
       "      <td>0.067919</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.708020      0.535692       0.365346        0.296100   \n",
       "nl_minds14            0.897447      0.714498       0.503436        0.419083   \n",
       "nl_voxpopuli          0.645715      0.526939       0.396940        0.345034   \n",
       "fr_google_fleurs      0.600185      0.470808       0.378478        0.324236   \n",
       "fr_minds14            0.805977      0.700773       0.642619        0.583323   \n",
       "fr_voxpopuli          0.510623      0.440340       0.382961        0.359633   \n",
       "de_google_fleurs      0.651989      0.551766       0.506944        0.478476   \n",
       "de_minds14            0.659890      0.554437       0.474513        0.429274   \n",
       "de_voxpopuli          0.645898      0.558876       0.518976        0.488194   \n",
       "it_google_fleurs      0.465298      0.355877       0.287491        0.254384   \n",
       "it_minds14            0.779429      0.621546       0.502670        0.437805   \n",
       "it_voxpopuli          0.562729      0.477854       0.420387        0.388904   \n",
       "pl_google_fleurs      0.700853      0.553073       0.384142        0.318203   \n",
       "pl_minds14            1.023324      0.860626       0.633766        0.572826   \n",
       "pl_voxpopuli          0.588464      0.489265       0.380883        0.345623   \n",
       "es_google_fleurs      0.333658      0.261352       0.213950        0.206351   \n",
       "es_minds14            0.884689      0.740604       0.664831        0.656090   \n",
       "es_voxpopuli          0.347112      0.294192       0.333500        0.295472   \n",
       "en_google_fleurs      0.348152      0.307207       0.278857        0.268917   \n",
       "en_minds14            0.588375      0.571845       0.566381        0.567538   \n",
       "en_voxpopuli          0.475612      0.451586       0.453132        0.594546   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.261951           0.273752   -1.000000  \n",
       "nl_minds14                0.389125           0.465494   -1.000000  \n",
       "nl_voxpopuli              0.358023           0.380835   -1.000000  \n",
       "fr_google_fleurs          0.309570           0.305183    0.206433  \n",
       "fr_minds14                0.616411           0.564885    0.441154  \n",
       "fr_voxpopuli              0.365811           0.323351    0.187074  \n",
       "de_google_fleurs          0.469045           0.182395    0.072162  \n",
       "de_minds14                0.425134           0.437369    0.357848  \n",
       "de_voxpopuli              0.525581           0.292203    0.088256  \n",
       "it_google_fleurs          0.251697           0.218689    0.140564  \n",
       "it_minds14                0.422781           0.429940    0.276002  \n",
       "it_voxpopuli              0.393964          -1.000000    0.233076  \n",
       "pl_google_fleurs          0.298247           0.335870   -1.000000  \n",
       "pl_minds14                0.563293           0.697584   -1.000000  \n",
       "pl_voxpopuli              0.349896           0.324229   -1.000000  \n",
       "es_google_fleurs          0.202078           0.145522    0.067686  \n",
       "es_minds14                0.650328           0.602494    0.436570  \n",
       "es_voxpopuli              0.353273           0.191242    0.067363  \n",
       "en_google_fleurs          0.270208           1.031485    0.114966  \n",
       "en_minds14                0.562651           1.203252    0.467297  \n",
       "en_voxpopuli              0.549755           1.020514    0.067919  "
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_wer_classic_metrics_df = pd.DataFrame(word_wer_classic_metrics, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "word_wer_classic_metrics_df.to_csv('results/word_wer_classic_metrics.csv')\n",
    "word_wer_classic_metrics_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "77a6e273-1f5e-4a2b-9568-66e53ba99c7b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.487020</td>\n",
       "      <td>0.332826</td>\n",
       "      <td>0.173815</td>\n",
       "      <td>0.118312</td>\n",
       "      <td>0.092164</td>\n",
       "      <td>0.186138</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.696387</td>\n",
       "      <td>0.528807</td>\n",
       "      <td>0.323153</td>\n",
       "      <td>0.251855</td>\n",
       "      <td>0.234766</td>\n",
       "      <td>0.306648</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.440765</td>\n",
       "      <td>0.349226</td>\n",
       "      <td>0.233398</td>\n",
       "      <td>0.187694</td>\n",
       "      <td>0.203840</td>\n",
       "      <td>0.295450</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.422005</td>\n",
       "      <td>0.308031</td>\n",
       "      <td>0.230959</td>\n",
       "      <td>0.181520</td>\n",
       "      <td>0.167575</td>\n",
       "      <td>0.225745</td>\n",
       "      <td>0.154588</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.598664</td>\n",
       "      <td>0.499632</td>\n",
       "      <td>0.447757</td>\n",
       "      <td>0.395654</td>\n",
       "      <td>0.429327</td>\n",
       "      <td>0.441224</td>\n",
       "      <td>0.342637</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.349906</td>\n",
       "      <td>0.291653</td>\n",
       "      <td>0.242314</td>\n",
       "      <td>0.218193</td>\n",
       "      <td>0.226681</td>\n",
       "      <td>0.251004</td>\n",
       "      <td>0.147786</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.328928</td>\n",
       "      <td>0.213515</td>\n",
       "      <td>0.151060</td>\n",
       "      <td>0.116871</td>\n",
       "      <td>0.104827</td>\n",
       "      <td>0.118999</td>\n",
       "      <td>0.048663</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.425754</td>\n",
       "      <td>0.331317</td>\n",
       "      <td>0.255620</td>\n",
       "      <td>0.222602</td>\n",
       "      <td>0.220104</td>\n",
       "      <td>0.232533</td>\n",
       "      <td>0.143306</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.349224</td>\n",
       "      <td>0.259910</td>\n",
       "      <td>0.208328</td>\n",
       "      <td>0.176478</td>\n",
       "      <td>0.215692</td>\n",
       "      <td>0.228572</td>\n",
       "      <td>0.065661</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.297877</td>\n",
       "      <td>0.201276</td>\n",
       "      <td>0.139435</td>\n",
       "      <td>0.114579</td>\n",
       "      <td>0.103925</td>\n",
       "      <td>0.161414</td>\n",
       "      <td>0.101285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.603743</td>\n",
       "      <td>0.455306</td>\n",
       "      <td>0.323527</td>\n",
       "      <td>0.264797</td>\n",
       "      <td>0.255383</td>\n",
       "      <td>0.299216</td>\n",
       "      <td>0.162753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.418096</td>\n",
       "      <td>0.345687</td>\n",
       "      <td>0.298079</td>\n",
       "      <td>0.266888</td>\n",
       "      <td>0.270669</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.193692</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.493295</td>\n",
       "      <td>0.336319</td>\n",
       "      <td>0.183046</td>\n",
       "      <td>0.119453</td>\n",
       "      <td>0.096625</td>\n",
       "      <td>0.232851</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.822964</td>\n",
       "      <td>0.633399</td>\n",
       "      <td>0.420067</td>\n",
       "      <td>0.353710</td>\n",
       "      <td>0.342892</td>\n",
       "      <td>0.519684</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.385923</td>\n",
       "      <td>0.288336</td>\n",
       "      <td>0.188413</td>\n",
       "      <td>0.152321</td>\n",
       "      <td>0.147463</td>\n",
       "      <td>0.232410</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.196055</td>\n",
       "      <td>0.130109</td>\n",
       "      <td>0.084114</td>\n",
       "      <td>0.077302</td>\n",
       "      <td>0.067295</td>\n",
       "      <td>0.102324</td>\n",
       "      <td>0.048997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.723086</td>\n",
       "      <td>0.581624</td>\n",
       "      <td>0.497037</td>\n",
       "      <td>0.493568</td>\n",
       "      <td>0.488170</td>\n",
       "      <td>0.522209</td>\n",
       "      <td>0.397315</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.222505</td>\n",
       "      <td>0.172764</td>\n",
       "      <td>0.195746</td>\n",
       "      <td>0.162495</td>\n",
       "      <td>0.201468</td>\n",
       "      <td>0.143578</td>\n",
       "      <td>0.053721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.191329</td>\n",
       "      <td>0.151693</td>\n",
       "      <td>0.121134</td>\n",
       "      <td>0.107578</td>\n",
       "      <td>0.108609</td>\n",
       "      <td>0.111466</td>\n",
       "      <td>0.088609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.449783</td>\n",
       "      <td>0.433839</td>\n",
       "      <td>0.427788</td>\n",
       "      <td>0.431043</td>\n",
       "      <td>0.424969</td>\n",
       "      <td>0.424984</td>\n",
       "      <td>0.363642</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.314581</td>\n",
       "      <td>0.286802</td>\n",
       "      <td>0.297819</td>\n",
       "      <td>0.439680</td>\n",
       "      <td>0.402555</td>\n",
       "      <td>0.118296</td>\n",
       "      <td>0.054176</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.487020      0.332826       0.173815        0.118312   \n",
       "nl_minds14            0.696387      0.528807       0.323153        0.251855   \n",
       "nl_voxpopuli          0.440765      0.349226       0.233398        0.187694   \n",
       "fr_google_fleurs      0.422005      0.308031       0.230959        0.181520   \n",
       "fr_minds14            0.598664      0.499632       0.447757        0.395654   \n",
       "fr_voxpopuli          0.349906      0.291653       0.242314        0.218193   \n",
       "de_google_fleurs      0.328928      0.213515       0.151060        0.116871   \n",
       "de_minds14            0.425754      0.331317       0.255620        0.222602   \n",
       "de_voxpopuli          0.349224      0.259910       0.208328        0.176478   \n",
       "it_google_fleurs      0.297877      0.201276       0.139435        0.114579   \n",
       "it_minds14            0.603743      0.455306       0.323527        0.264797   \n",
       "it_voxpopuli          0.418096      0.345687       0.298079        0.266888   \n",
       "pl_google_fleurs      0.493295      0.336319       0.183046        0.119453   \n",
       "pl_minds14            0.822964      0.633399       0.420067        0.353710   \n",
       "pl_voxpopuli          0.385923      0.288336       0.188413        0.152321   \n",
       "es_google_fleurs      0.196055      0.130109       0.084114        0.077302   \n",
       "es_minds14            0.723086      0.581624       0.497037        0.493568   \n",
       "es_voxpopuli          0.222505      0.172764       0.195746        0.162495   \n",
       "en_google_fleurs      0.191329      0.151693       0.121134        0.107578   \n",
       "en_minds14            0.449783      0.433839       0.427788        0.431043   \n",
       "en_voxpopuli          0.314581      0.286802       0.297819        0.439680   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.092164           0.186138   -1.000000  \n",
       "nl_minds14                0.234766           0.306648   -1.000000  \n",
       "nl_voxpopuli              0.203840           0.295450   -1.000000  \n",
       "fr_google_fleurs          0.167575           0.225745    0.154588  \n",
       "fr_minds14                0.429327           0.441224    0.342637  \n",
       "fr_voxpopuli              0.226681           0.251004    0.147786  \n",
       "de_google_fleurs          0.104827           0.118999    0.048663  \n",
       "de_minds14                0.220104           0.232533    0.143306  \n",
       "de_voxpopuli              0.215692           0.228572    0.065661  \n",
       "it_google_fleurs          0.103925           0.161414    0.101285  \n",
       "it_minds14                0.255383           0.299216    0.162753  \n",
       "it_voxpopuli              0.270669          -1.000000    0.193692  \n",
       "pl_google_fleurs          0.096625           0.232851   -1.000000  \n",
       "pl_minds14                0.342892           0.519684   -1.000000  \n",
       "pl_voxpopuli              0.147463           0.232410   -1.000000  \n",
       "es_google_fleurs          0.067295           0.102324    0.048997  \n",
       "es_minds14                0.488170           0.522209    0.397315  \n",
       "es_voxpopuli              0.201468           0.143578    0.053721  \n",
       "en_google_fleurs          0.108609           0.111466    0.088609  \n",
       "en_minds14                0.424969           0.424984    0.363642  \n",
       "en_voxpopuli              0.402555           0.118296    0.054176  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_wer_soft_metrics_df = pd.DataFrame(word_wer_soft_metrics, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "word_wer_soft_metrics_df.to_csv('results/word_wer_soft_metrics.csv')\n",
    "word_wer_soft_metrics_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "629318e6-8c00-413c-99d4-2b7ff559ac3f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.512857</td>\n",
       "      <td>0.351476</td>\n",
       "      <td>0.183268</td>\n",
       "      <td>0.123803</td>\n",
       "      <td>0.095700</td>\n",
       "      <td>0.192525</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.732501</td>\n",
       "      <td>0.554846</td>\n",
       "      <td>0.346042</td>\n",
       "      <td>0.267858</td>\n",
       "      <td>0.244768</td>\n",
       "      <td>0.319302</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.472829</td>\n",
       "      <td>0.364308</td>\n",
       "      <td>0.241434</td>\n",
       "      <td>0.193047</td>\n",
       "      <td>0.210556</td>\n",
       "      <td>0.304289</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.442361</td>\n",
       "      <td>0.321953</td>\n",
       "      <td>0.240016</td>\n",
       "      <td>0.188132</td>\n",
       "      <td>0.174075</td>\n",
       "      <td>0.233362</td>\n",
       "      <td>0.159139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.628774</td>\n",
       "      <td>0.527781</td>\n",
       "      <td>0.472124</td>\n",
       "      <td>0.417764</td>\n",
       "      <td>0.451830</td>\n",
       "      <td>0.456835</td>\n",
       "      <td>0.353934</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.365471</td>\n",
       "      <td>0.304097</td>\n",
       "      <td>0.251867</td>\n",
       "      <td>0.226099</td>\n",
       "      <td>0.235006</td>\n",
       "      <td>0.259228</td>\n",
       "      <td>0.150950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.346586</td>\n",
       "      <td>0.227203</td>\n",
       "      <td>0.158453</td>\n",
       "      <td>0.121399</td>\n",
       "      <td>0.107550</td>\n",
       "      <td>0.123204</td>\n",
       "      <td>0.050265</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.446445</td>\n",
       "      <td>0.346742</td>\n",
       "      <td>0.265021</td>\n",
       "      <td>0.229449</td>\n",
       "      <td>0.226477</td>\n",
       "      <td>0.238560</td>\n",
       "      <td>0.147524</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.366639</td>\n",
       "      <td>0.270086</td>\n",
       "      <td>0.215487</td>\n",
       "      <td>0.181204</td>\n",
       "      <td>0.221848</td>\n",
       "      <td>0.234268</td>\n",
       "      <td>0.067181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.313010</td>\n",
       "      <td>0.210131</td>\n",
       "      <td>0.144045</td>\n",
       "      <td>0.117567</td>\n",
       "      <td>0.106640</td>\n",
       "      <td>0.165954</td>\n",
       "      <td>0.104103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.633334</td>\n",
       "      <td>0.476970</td>\n",
       "      <td>0.337584</td>\n",
       "      <td>0.275103</td>\n",
       "      <td>0.265102</td>\n",
       "      <td>0.310508</td>\n",
       "      <td>0.168097</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.439105</td>\n",
       "      <td>0.363577</td>\n",
       "      <td>0.310733</td>\n",
       "      <td>0.278968</td>\n",
       "      <td>0.283103</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.198565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.520524</td>\n",
       "      <td>0.358929</td>\n",
       "      <td>0.190407</td>\n",
       "      <td>0.123706</td>\n",
       "      <td>0.098981</td>\n",
       "      <td>0.242890</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.861366</td>\n",
       "      <td>0.666738</td>\n",
       "      <td>0.439214</td>\n",
       "      <td>0.370198</td>\n",
       "      <td>0.361172</td>\n",
       "      <td>0.542831</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.404981</td>\n",
       "      <td>0.301113</td>\n",
       "      <td>0.194702</td>\n",
       "      <td>0.156644</td>\n",
       "      <td>0.151601</td>\n",
       "      <td>0.240070</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.204884</td>\n",
       "      <td>0.135018</td>\n",
       "      <td>0.086281</td>\n",
       "      <td>0.078608</td>\n",
       "      <td>0.067940</td>\n",
       "      <td>0.105327</td>\n",
       "      <td>0.050019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.752425</td>\n",
       "      <td>0.601240</td>\n",
       "      <td>0.511320</td>\n",
       "      <td>0.505483</td>\n",
       "      <td>0.497249</td>\n",
       "      <td>0.535758</td>\n",
       "      <td>0.401730</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.233013</td>\n",
       "      <td>0.179737</td>\n",
       "      <td>0.202485</td>\n",
       "      <td>0.167919</td>\n",
       "      <td>0.208381</td>\n",
       "      <td>0.148001</td>\n",
       "      <td>0.054963</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.198209</td>\n",
       "      <td>0.157780</td>\n",
       "      <td>0.125360</td>\n",
       "      <td>0.111138</td>\n",
       "      <td>0.112012</td>\n",
       "      <td>0.116211</td>\n",
       "      <td>0.092322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.463499</td>\n",
       "      <td>0.446222</td>\n",
       "      <td>0.442346</td>\n",
       "      <td>0.444175</td>\n",
       "      <td>0.438048</td>\n",
       "      <td>0.434445</td>\n",
       "      <td>0.371188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.325976</td>\n",
       "      <td>0.294154</td>\n",
       "      <td>0.306453</td>\n",
       "      <td>0.451091</td>\n",
       "      <td>0.414535</td>\n",
       "      <td>0.120754</td>\n",
       "      <td>0.055428</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.512857      0.351476       0.183268        0.123803   \n",
       "nl_minds14            0.732501      0.554846       0.346042        0.267858   \n",
       "nl_voxpopuli          0.472829      0.364308       0.241434        0.193047   \n",
       "fr_google_fleurs      0.442361      0.321953       0.240016        0.188132   \n",
       "fr_minds14            0.628774      0.527781       0.472124        0.417764   \n",
       "fr_voxpopuli          0.365471      0.304097       0.251867        0.226099   \n",
       "de_google_fleurs      0.346586      0.227203       0.158453        0.121399   \n",
       "de_minds14            0.446445      0.346742       0.265021        0.229449   \n",
       "de_voxpopuli          0.366639      0.270086       0.215487        0.181204   \n",
       "it_google_fleurs      0.313010      0.210131       0.144045        0.117567   \n",
       "it_minds14            0.633334      0.476970       0.337584        0.275103   \n",
       "it_voxpopuli          0.439105      0.363577       0.310733        0.278968   \n",
       "pl_google_fleurs      0.520524      0.358929       0.190407        0.123706   \n",
       "pl_minds14            0.861366      0.666738       0.439214        0.370198   \n",
       "pl_voxpopuli          0.404981      0.301113       0.194702        0.156644   \n",
       "es_google_fleurs      0.204884      0.135018       0.086281        0.078608   \n",
       "es_minds14            0.752425      0.601240       0.511320        0.505483   \n",
       "es_voxpopuli          0.233013      0.179737       0.202485        0.167919   \n",
       "en_google_fleurs      0.198209      0.157780       0.125360        0.111138   \n",
       "en_minds14            0.463499      0.446222       0.442346        0.444175   \n",
       "en_voxpopuli          0.325976      0.294154       0.306453        0.451091   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.095700           0.192525   -1.000000  \n",
       "nl_minds14                0.244768           0.319302   -1.000000  \n",
       "nl_voxpopuli              0.210556           0.304289   -1.000000  \n",
       "fr_google_fleurs          0.174075           0.233362    0.159139  \n",
       "fr_minds14                0.451830           0.456835    0.353934  \n",
       "fr_voxpopuli              0.235006           0.259228    0.150950  \n",
       "de_google_fleurs          0.107550           0.123204    0.050265  \n",
       "de_minds14                0.226477           0.238560    0.147524  \n",
       "de_voxpopuli              0.221848           0.234268    0.067181  \n",
       "it_google_fleurs          0.106640           0.165954    0.104103  \n",
       "it_minds14                0.265102           0.310508    0.168097  \n",
       "it_voxpopuli              0.283103          -1.000000    0.198565  \n",
       "pl_google_fleurs          0.098981           0.242890   -1.000000  \n",
       "pl_minds14                0.361172           0.542831   -1.000000  \n",
       "pl_voxpopuli              0.151601           0.240070   -1.000000  \n",
       "es_google_fleurs          0.067940           0.105327    0.050019  \n",
       "es_minds14                0.497249           0.535758    0.401730  \n",
       "es_voxpopuli              0.208381           0.148001    0.054963  \n",
       "en_google_fleurs          0.112012           0.116211    0.092322  \n",
       "en_minds14                0.438048           0.434445    0.371188  \n",
       "en_voxpopuli              0.414535           0.120754    0.055428  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_wer_embedding_metrics_df = pd.DataFrame(word_wer_embedding_metrics, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "word_wer_embedding_metrics_df.to_csv('results/word_wer_embedding_metrics.csv')\n",
    "word_wer_embedding_metrics_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "99bfad3e-3c9f-42d6-9a36-ce1914b16bb5",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.510993</td>\n",
       "      <td>0.364093</td>\n",
       "      <td>0.233944</td>\n",
       "      <td>0.194375</td>\n",
       "      <td>0.176388</td>\n",
       "      <td>0.127387</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.749436</td>\n",
       "      <td>0.563341</td>\n",
       "      <td>0.400222</td>\n",
       "      <td>0.337951</td>\n",
       "      <td>0.321183</td>\n",
       "      <td>0.253165</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.393880</td>\n",
       "      <td>0.294984</td>\n",
       "      <td>0.187720</td>\n",
       "      <td>0.148644</td>\n",
       "      <td>0.167895</td>\n",
       "      <td>0.314945</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.433044</td>\n",
       "      <td>0.304306</td>\n",
       "      <td>0.222197</td>\n",
       "      <td>0.178437</td>\n",
       "      <td>0.165940</td>\n",
       "      <td>0.114709</td>\n",
       "      <td>0.062883</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.664310</td>\n",
       "      <td>0.565113</td>\n",
       "      <td>0.509531</td>\n",
       "      <td>0.449146</td>\n",
       "      <td>0.490874</td>\n",
       "      <td>0.329511</td>\n",
       "      <td>0.231802</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.271206</td>\n",
       "      <td>0.200462</td>\n",
       "      <td>0.143015</td>\n",
       "      <td>0.116287</td>\n",
       "      <td>0.121793</td>\n",
       "      <td>0.250052</td>\n",
       "      <td>0.183570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.398803</td>\n",
       "      <td>0.298202</td>\n",
       "      <td>0.238771</td>\n",
       "      <td>0.233886</td>\n",
       "      <td>0.225833</td>\n",
       "      <td>0.090344</td>\n",
       "      <td>0.045677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.541808</td>\n",
       "      <td>0.458428</td>\n",
       "      <td>0.394453</td>\n",
       "      <td>0.366073</td>\n",
       "      <td>0.366372</td>\n",
       "      <td>0.216899</td>\n",
       "      <td>0.167290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.310177</td>\n",
       "      <td>0.212666</td>\n",
       "      <td>0.156219</td>\n",
       "      <td>0.127821</td>\n",
       "      <td>0.165711</td>\n",
       "      <td>0.318096</td>\n",
       "      <td>0.215976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.334621</td>\n",
       "      <td>0.248942</td>\n",
       "      <td>0.206167</td>\n",
       "      <td>0.171781</td>\n",
       "      <td>0.175235</td>\n",
       "      <td>0.110213</td>\n",
       "      <td>0.066707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.669161</td>\n",
       "      <td>0.531287</td>\n",
       "      <td>0.431609</td>\n",
       "      <td>0.369645</td>\n",
       "      <td>0.358544</td>\n",
       "      <td>0.267590</td>\n",
       "      <td>0.162753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.387963</td>\n",
       "      <td>0.318896</td>\n",
       "      <td>0.260937</td>\n",
       "      <td>0.225710</td>\n",
       "      <td>0.228727</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.253023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.500755</td>\n",
       "      <td>0.379842</td>\n",
       "      <td>0.260604</td>\n",
       "      <td>0.225961</td>\n",
       "      <td>0.216013</td>\n",
       "      <td>0.133794</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.914857</td>\n",
       "      <td>0.762013</td>\n",
       "      <td>0.564110</td>\n",
       "      <td>0.499567</td>\n",
       "      <td>0.505554</td>\n",
       "      <td>0.405276</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.303221</td>\n",
       "      <td>0.213051</td>\n",
       "      <td>0.135739</td>\n",
       "      <td>0.102749</td>\n",
       "      <td>0.098353</td>\n",
       "      <td>0.296488</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.254765</td>\n",
       "      <td>0.196087</td>\n",
       "      <td>0.162556</td>\n",
       "      <td>0.172763</td>\n",
       "      <td>0.169661</td>\n",
       "      <td>0.082617</td>\n",
       "      <td>0.051719</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.830240</td>\n",
       "      <td>0.721083</td>\n",
       "      <td>0.667662</td>\n",
       "      <td>0.661177</td>\n",
       "      <td>0.669886</td>\n",
       "      <td>0.480701</td>\n",
       "      <td>0.396179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.210093</td>\n",
       "      <td>0.164819</td>\n",
       "      <td>0.174829</td>\n",
       "      <td>0.142208</td>\n",
       "      <td>0.168499</td>\n",
       "      <td>0.232005</td>\n",
       "      <td>0.188607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.263058</td>\n",
       "      <td>0.214739</td>\n",
       "      <td>0.192109</td>\n",
       "      <td>0.188423</td>\n",
       "      <td>0.189492</td>\n",
       "      <td>0.156390</td>\n",
       "      <td>0.075239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.532602</td>\n",
       "      <td>0.518411</td>\n",
       "      <td>0.523873</td>\n",
       "      <td>0.524760</td>\n",
       "      <td>0.517753</td>\n",
       "      <td>0.391206</td>\n",
       "      <td>0.332978</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.280769</td>\n",
       "      <td>0.252505</td>\n",
       "      <td>0.246012</td>\n",
       "      <td>0.364994</td>\n",
       "      <td>0.296381</td>\n",
       "      <td>0.210788</td>\n",
       "      <td>0.167197</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.510993      0.364093       0.233944        0.194375   \n",
       "nl_minds14            0.749436      0.563341       0.400222        0.337951   \n",
       "nl_voxpopuli          0.393880      0.294984       0.187720        0.148644   \n",
       "fr_google_fleurs      0.433044      0.304306       0.222197        0.178437   \n",
       "fr_minds14            0.664310      0.565113       0.509531        0.449146   \n",
       "fr_voxpopuli          0.271206      0.200462       0.143015        0.116287   \n",
       "de_google_fleurs      0.398803      0.298202       0.238771        0.233886   \n",
       "de_minds14            0.541808      0.458428       0.394453        0.366073   \n",
       "de_voxpopuli          0.310177      0.212666       0.156219        0.127821   \n",
       "it_google_fleurs      0.334621      0.248942       0.206167        0.171781   \n",
       "it_minds14            0.669161      0.531287       0.431609        0.369645   \n",
       "it_voxpopuli          0.387963      0.318896       0.260937        0.225710   \n",
       "pl_google_fleurs      0.500755      0.379842       0.260604        0.225961   \n",
       "pl_minds14            0.914857      0.762013       0.564110        0.499567   \n",
       "pl_voxpopuli          0.303221      0.213051       0.135739        0.102749   \n",
       "es_google_fleurs      0.254765      0.196087       0.162556        0.172763   \n",
       "es_minds14            0.830240      0.721083       0.667662        0.661177   \n",
       "es_voxpopuli          0.210093      0.164819       0.174829        0.142208   \n",
       "en_google_fleurs      0.263058      0.214739       0.192109        0.188423   \n",
       "en_minds14            0.532602      0.518411       0.523873        0.524760   \n",
       "en_voxpopuli          0.280769      0.252505       0.246012        0.364994   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.176388           0.127387   -1.000000  \n",
       "nl_minds14                0.321183           0.253165   -1.000000  \n",
       "nl_voxpopuli              0.167895           0.314945   -1.000000  \n",
       "fr_google_fleurs          0.165940           0.114709    0.062883  \n",
       "fr_minds14                0.490874           0.329511    0.231802  \n",
       "fr_voxpopuli              0.121793           0.250052    0.183570  \n",
       "de_google_fleurs          0.225833           0.090344    0.045677  \n",
       "de_minds14                0.366372           0.216899    0.167290  \n",
       "de_voxpopuli              0.165711           0.318096    0.215976  \n",
       "it_google_fleurs          0.175235           0.110213    0.066707  \n",
       "it_minds14                0.358544           0.267590    0.162753  \n",
       "it_voxpopuli              0.228727          -1.000000    0.253023  \n",
       "pl_google_fleurs          0.216013           0.133794   -1.000000  \n",
       "pl_minds14                0.505554           0.405276   -1.000000  \n",
       "pl_voxpopuli              0.098353           0.296488   -1.000000  \n",
       "es_google_fleurs          0.169661           0.082617    0.051719  \n",
       "es_minds14                0.669886           0.480701    0.396179  \n",
       "es_voxpopuli              0.168499           0.232005    0.188607  \n",
       "en_google_fleurs          0.189492           0.156390    0.075239  \n",
       "en_minds14                0.517753           0.391206    0.332978  \n",
       "en_voxpopuli              0.296381           0.210788    0.167197  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flair_pos_df = pd.DataFrame(flair_pos, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "flair_pos_df.to_csv('results/flair_pos.csv')\n",
    "flair_pos_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "7275b2b0-957b-4618-9f66-7b88302f896a",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.510993</td>\n",
       "      <td>0.364093</td>\n",
       "      <td>0.233944</td>\n",
       "      <td>0.194375</td>\n",
       "      <td>0.176388</td>\n",
       "      <td>0.127387</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.749436</td>\n",
       "      <td>0.563341</td>\n",
       "      <td>0.400222</td>\n",
       "      <td>0.337951</td>\n",
       "      <td>0.321183</td>\n",
       "      <td>0.253165</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.393880</td>\n",
       "      <td>0.294984</td>\n",
       "      <td>0.187720</td>\n",
       "      <td>0.148644</td>\n",
       "      <td>0.167895</td>\n",
       "      <td>0.314945</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.433044</td>\n",
       "      <td>0.304306</td>\n",
       "      <td>0.222197</td>\n",
       "      <td>0.178437</td>\n",
       "      <td>0.165940</td>\n",
       "      <td>0.114709</td>\n",
       "      <td>0.062883</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.664310</td>\n",
       "      <td>0.565113</td>\n",
       "      <td>0.509531</td>\n",
       "      <td>0.449146</td>\n",
       "      <td>0.490874</td>\n",
       "      <td>0.329511</td>\n",
       "      <td>0.231802</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.271206</td>\n",
       "      <td>0.200462</td>\n",
       "      <td>0.143015</td>\n",
       "      <td>0.116287</td>\n",
       "      <td>0.121793</td>\n",
       "      <td>0.250052</td>\n",
       "      <td>0.183570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.398803</td>\n",
       "      <td>0.298202</td>\n",
       "      <td>0.238771</td>\n",
       "      <td>0.233886</td>\n",
       "      <td>0.225833</td>\n",
       "      <td>0.090344</td>\n",
       "      <td>0.045677</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.541808</td>\n",
       "      <td>0.458428</td>\n",
       "      <td>0.394453</td>\n",
       "      <td>0.366073</td>\n",
       "      <td>0.366372</td>\n",
       "      <td>0.216899</td>\n",
       "      <td>0.167290</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.310177</td>\n",
       "      <td>0.212666</td>\n",
       "      <td>0.156219</td>\n",
       "      <td>0.127821</td>\n",
       "      <td>0.165711</td>\n",
       "      <td>0.318096</td>\n",
       "      <td>0.215976</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.334621</td>\n",
       "      <td>0.248942</td>\n",
       "      <td>0.206167</td>\n",
       "      <td>0.171781</td>\n",
       "      <td>0.175235</td>\n",
       "      <td>0.110213</td>\n",
       "      <td>0.066707</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.669161</td>\n",
       "      <td>0.531287</td>\n",
       "      <td>0.431609</td>\n",
       "      <td>0.369645</td>\n",
       "      <td>0.358544</td>\n",
       "      <td>0.267590</td>\n",
       "      <td>0.162753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.387963</td>\n",
       "      <td>0.318896</td>\n",
       "      <td>0.260937</td>\n",
       "      <td>0.225710</td>\n",
       "      <td>0.228727</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.253023</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.500755</td>\n",
       "      <td>0.379842</td>\n",
       "      <td>0.260604</td>\n",
       "      <td>0.225961</td>\n",
       "      <td>0.216013</td>\n",
       "      <td>0.133794</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.914857</td>\n",
       "      <td>0.762013</td>\n",
       "      <td>0.564110</td>\n",
       "      <td>0.499567</td>\n",
       "      <td>0.505554</td>\n",
       "      <td>0.405276</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.303221</td>\n",
       "      <td>0.213051</td>\n",
       "      <td>0.135739</td>\n",
       "      <td>0.102749</td>\n",
       "      <td>0.098353</td>\n",
       "      <td>0.296488</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.254765</td>\n",
       "      <td>0.196087</td>\n",
       "      <td>0.162556</td>\n",
       "      <td>0.172763</td>\n",
       "      <td>0.169661</td>\n",
       "      <td>0.082617</td>\n",
       "      <td>0.051719</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.830240</td>\n",
       "      <td>0.721083</td>\n",
       "      <td>0.667662</td>\n",
       "      <td>0.661177</td>\n",
       "      <td>0.669886</td>\n",
       "      <td>0.480701</td>\n",
       "      <td>0.396179</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.210093</td>\n",
       "      <td>0.164819</td>\n",
       "      <td>0.174829</td>\n",
       "      <td>0.142208</td>\n",
       "      <td>0.168499</td>\n",
       "      <td>0.232005</td>\n",
       "      <td>0.188607</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.263058</td>\n",
       "      <td>0.214739</td>\n",
       "      <td>0.192109</td>\n",
       "      <td>0.188423</td>\n",
       "      <td>0.189492</td>\n",
       "      <td>0.156390</td>\n",
       "      <td>0.075239</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.532602</td>\n",
       "      <td>0.518411</td>\n",
       "      <td>0.523873</td>\n",
       "      <td>0.524760</td>\n",
       "      <td>0.517753</td>\n",
       "      <td>0.391206</td>\n",
       "      <td>0.332978</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.280769</td>\n",
       "      <td>0.252505</td>\n",
       "      <td>0.246012</td>\n",
       "      <td>0.364994</td>\n",
       "      <td>0.296381</td>\n",
       "      <td>0.210788</td>\n",
       "      <td>0.167197</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.510993      0.364093       0.233944        0.194375   \n",
       "nl_minds14            0.749436      0.563341       0.400222        0.337951   \n",
       "nl_voxpopuli          0.393880      0.294984       0.187720        0.148644   \n",
       "fr_google_fleurs      0.433044      0.304306       0.222197        0.178437   \n",
       "fr_minds14            0.664310      0.565113       0.509531        0.449146   \n",
       "fr_voxpopuli          0.271206      0.200462       0.143015        0.116287   \n",
       "de_google_fleurs      0.398803      0.298202       0.238771        0.233886   \n",
       "de_minds14            0.541808      0.458428       0.394453        0.366073   \n",
       "de_voxpopuli          0.310177      0.212666       0.156219        0.127821   \n",
       "it_google_fleurs      0.334621      0.248942       0.206167        0.171781   \n",
       "it_minds14            0.669161      0.531287       0.431609        0.369645   \n",
       "it_voxpopuli          0.387963      0.318896       0.260937        0.225710   \n",
       "pl_google_fleurs      0.500755      0.379842       0.260604        0.225961   \n",
       "pl_minds14            0.914857      0.762013       0.564110        0.499567   \n",
       "pl_voxpopuli          0.303221      0.213051       0.135739        0.102749   \n",
       "es_google_fleurs      0.254765      0.196087       0.162556        0.172763   \n",
       "es_minds14            0.830240      0.721083       0.667662        0.661177   \n",
       "es_voxpopuli          0.210093      0.164819       0.174829        0.142208   \n",
       "en_google_fleurs      0.263058      0.214739       0.192109        0.188423   \n",
       "en_minds14            0.532602      0.518411       0.523873        0.524760   \n",
       "en_voxpopuli          0.280769      0.252505       0.246012        0.364994   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.176388           0.127387   -1.000000  \n",
       "nl_minds14                0.321183           0.253165   -1.000000  \n",
       "nl_voxpopuli              0.167895           0.314945   -1.000000  \n",
       "fr_google_fleurs          0.165940           0.114709    0.062883  \n",
       "fr_minds14                0.490874           0.329511    0.231802  \n",
       "fr_voxpopuli              0.121793           0.250052    0.183570  \n",
       "de_google_fleurs          0.225833           0.090344    0.045677  \n",
       "de_minds14                0.366372           0.216899    0.167290  \n",
       "de_voxpopuli              0.165711           0.318096    0.215976  \n",
       "it_google_fleurs          0.175235           0.110213    0.066707  \n",
       "it_minds14                0.358544           0.267590    0.162753  \n",
       "it_voxpopuli              0.228727          -1.000000    0.253023  \n",
       "pl_google_fleurs          0.216013           0.133794   -1.000000  \n",
       "pl_minds14                0.505554           0.405276   -1.000000  \n",
       "pl_voxpopuli              0.098353           0.296488   -1.000000  \n",
       "es_google_fleurs          0.169661           0.082617    0.051719  \n",
       "es_minds14                0.669886           0.480701    0.396179  \n",
       "es_voxpopuli              0.168499           0.232005    0.188607  \n",
       "en_google_fleurs          0.189492           0.156390    0.075239  \n",
       "en_minds14                0.517753           0.391206    0.332978  \n",
       "en_voxpopuli              0.296381           0.210788    0.167197  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "flair_pos_df = pd.DataFrame(flair_pos, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "flair_pos_df.to_csv('results/flair_pos.csv')\n",
    "flair_pos_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "5a4f9e8e-9c0e-44e5-9426-655c400ea054",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.215391</td>\n",
       "      <td>0.174029</td>\n",
       "      <td>0.125444</td>\n",
       "      <td>0.115182</td>\n",
       "      <td>0.113159</td>\n",
       "      <td>0.089213</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.343079</td>\n",
       "      <td>0.280673</td>\n",
       "      <td>0.216319</td>\n",
       "      <td>0.201182</td>\n",
       "      <td>0.198545</td>\n",
       "      <td>0.151310</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.216156</td>\n",
       "      <td>0.189828</td>\n",
       "      <td>0.141318</td>\n",
       "      <td>0.133931</td>\n",
       "      <td>0.147112</td>\n",
       "      <td>0.176515</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.208012</td>\n",
       "      <td>0.146742</td>\n",
       "      <td>0.128173</td>\n",
       "      <td>0.106214</td>\n",
       "      <td>0.097691</td>\n",
       "      <td>0.068703</td>\n",
       "      <td>0.045601</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.383273</td>\n",
       "      <td>0.356633</td>\n",
       "      <td>0.346255</td>\n",
       "      <td>0.330446</td>\n",
       "      <td>0.365426</td>\n",
       "      <td>0.248440</td>\n",
       "      <td>0.193615</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.157518</td>\n",
       "      <td>0.126534</td>\n",
       "      <td>0.104213</td>\n",
       "      <td>0.089124</td>\n",
       "      <td>0.095847</td>\n",
       "      <td>0.147897</td>\n",
       "      <td>0.118277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.178717</td>\n",
       "      <td>0.140455</td>\n",
       "      <td>0.153612</td>\n",
       "      <td>0.130936</td>\n",
       "      <td>0.135413</td>\n",
       "      <td>0.069640</td>\n",
       "      <td>0.049105</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.295491</td>\n",
       "      <td>0.264049</td>\n",
       "      <td>0.246428</td>\n",
       "      <td>0.232066</td>\n",
       "      <td>0.234698</td>\n",
       "      <td>0.172801</td>\n",
       "      <td>0.140307</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.178856</td>\n",
       "      <td>0.137537</td>\n",
       "      <td>0.105534</td>\n",
       "      <td>0.087482</td>\n",
       "      <td>0.124275</td>\n",
       "      <td>0.164667</td>\n",
       "      <td>0.126233</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.154465</td>\n",
       "      <td>0.123694</td>\n",
       "      <td>0.123264</td>\n",
       "      <td>0.107109</td>\n",
       "      <td>0.110015</td>\n",
       "      <td>0.060042</td>\n",
       "      <td>0.037594</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.343251</td>\n",
       "      <td>0.273268</td>\n",
       "      <td>0.240797</td>\n",
       "      <td>0.213506</td>\n",
       "      <td>0.211958</td>\n",
       "      <td>0.147876</td>\n",
       "      <td>0.110599</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.152579</td>\n",
       "      <td>0.146635</td>\n",
       "      <td>0.142331</td>\n",
       "      <td>0.125299</td>\n",
       "      <td>0.126924</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.145502</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.200009</td>\n",
       "      <td>0.163202</td>\n",
       "      <td>0.127170</td>\n",
       "      <td>0.116060</td>\n",
       "      <td>0.112860</td>\n",
       "      <td>0.091837</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.454800</td>\n",
       "      <td>0.415696</td>\n",
       "      <td>0.311585</td>\n",
       "      <td>0.310715</td>\n",
       "      <td>0.316154</td>\n",
       "      <td>0.279046</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.175804</td>\n",
       "      <td>0.137694</td>\n",
       "      <td>0.101624</td>\n",
       "      <td>0.084531</td>\n",
       "      <td>0.081097</td>\n",
       "      <td>0.152815</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.139403</td>\n",
       "      <td>0.105495</td>\n",
       "      <td>0.095208</td>\n",
       "      <td>0.106332</td>\n",
       "      <td>0.104021</td>\n",
       "      <td>0.063813</td>\n",
       "      <td>0.038414</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.533874</td>\n",
       "      <td>0.480372</td>\n",
       "      <td>0.472338</td>\n",
       "      <td>0.480882</td>\n",
       "      <td>0.483780</td>\n",
       "      <td>0.359815</td>\n",
       "      <td>0.290631</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.110894</td>\n",
       "      <td>0.098927</td>\n",
       "      <td>0.100773</td>\n",
       "      <td>0.087911</td>\n",
       "      <td>0.096432</td>\n",
       "      <td>0.122212</td>\n",
       "      <td>0.116315</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.151080</td>\n",
       "      <td>0.134344</td>\n",
       "      <td>0.130206</td>\n",
       "      <td>0.131738</td>\n",
       "      <td>0.132967</td>\n",
       "      <td>1.255453</td>\n",
       "      <td>0.049170</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.381197</td>\n",
       "      <td>0.386708</td>\n",
       "      <td>0.395517</td>\n",
       "      <td>0.399133</td>\n",
       "      <td>0.393609</td>\n",
       "      <td>1.444793</td>\n",
       "      <td>0.284332</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.258779</td>\n",
       "      <td>0.212418</td>\n",
       "      <td>0.217320</td>\n",
       "      <td>0.337455</td>\n",
       "      <td>0.292532</td>\n",
       "      <td>1.211453</td>\n",
       "      <td>0.120684</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.215391      0.174029       0.125444        0.115182   \n",
       "nl_minds14            0.343079      0.280673       0.216319        0.201182   \n",
       "nl_voxpopuli          0.216156      0.189828       0.141318        0.133931   \n",
       "fr_google_fleurs      0.208012      0.146742       0.128173        0.106214   \n",
       "fr_minds14            0.383273      0.356633       0.346255        0.330446   \n",
       "fr_voxpopuli          0.157518      0.126534       0.104213        0.089124   \n",
       "de_google_fleurs      0.178717      0.140455       0.153612        0.130936   \n",
       "de_minds14            0.295491      0.264049       0.246428        0.232066   \n",
       "de_voxpopuli          0.178856      0.137537       0.105534        0.087482   \n",
       "it_google_fleurs      0.154465      0.123694       0.123264        0.107109   \n",
       "it_minds14            0.343251      0.273268       0.240797        0.213506   \n",
       "it_voxpopuli          0.152579      0.146635       0.142331        0.125299   \n",
       "pl_google_fleurs      0.200009      0.163202       0.127170        0.116060   \n",
       "pl_minds14            0.454800      0.415696       0.311585        0.310715   \n",
       "pl_voxpopuli          0.175804      0.137694       0.101624        0.084531   \n",
       "es_google_fleurs      0.139403      0.105495       0.095208        0.106332   \n",
       "es_minds14            0.533874      0.480372       0.472338        0.480882   \n",
       "es_voxpopuli          0.110894      0.098927       0.100773        0.087911   \n",
       "en_google_fleurs      0.151080      0.134344       0.130206        0.131738   \n",
       "en_minds14            0.381197      0.386708       0.395517        0.399133   \n",
       "en_voxpopuli          0.258779      0.212418       0.217320        0.337455   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.113159           0.089213   -1.000000  \n",
       "nl_minds14                0.198545           0.151310   -1.000000  \n",
       "nl_voxpopuli              0.147112           0.176515   -1.000000  \n",
       "fr_google_fleurs          0.097691           0.068703    0.045601  \n",
       "fr_minds14                0.365426           0.248440    0.193615  \n",
       "fr_voxpopuli              0.095847           0.147897    0.118277  \n",
       "de_google_fleurs          0.135413           0.069640    0.049105  \n",
       "de_minds14                0.234698           0.172801    0.140307  \n",
       "de_voxpopuli              0.124275           0.164667    0.126233  \n",
       "it_google_fleurs          0.110015           0.060042    0.037594  \n",
       "it_minds14                0.211958           0.147876    0.110599  \n",
       "it_voxpopuli              0.126924          -1.000000    0.145502  \n",
       "pl_google_fleurs          0.112860           0.091837   -1.000000  \n",
       "pl_minds14                0.316154           0.279046   -1.000000  \n",
       "pl_voxpopuli              0.081097           0.152815   -1.000000  \n",
       "es_google_fleurs          0.104021           0.063813    0.038414  \n",
       "es_minds14                0.483780           0.359815    0.290631  \n",
       "es_voxpopuli              0.096432           0.122212    0.116315  \n",
       "en_google_fleurs          0.132967           1.255453    0.049170  \n",
       "en_minds14                0.393609           1.444793    0.284332  \n",
       "en_voxpopuli              0.292532           1.211453    0.120684  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wikineural_ner_df = pd.DataFrame(wikineural_ner, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "wikineural_ner_df.to_csv('results/wikineural_ner.csv')\n",
    "wikineural_ner_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8d4a212b-7437-4fa2-9e4e-06db21da1855",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.15"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}