Skip to content
Snippets Groups Projects
metrics.ipynb 131 KiB
Newer Older
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.210752</td>\n",
       "      <td>0.201585</td>\n",
       "      <td>0.422140</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>-1.000000</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.341816</td>\n",
       "      <td>0.278543</td>\n",
       "      <td>0.226821</td>\n",
       "      <td>0.227239</td>\n",
       "      <td>0.220248</td>\n",
       "      <td>0.135718</td>\n",
       "      <td>0.069997</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.992263</td>\n",
       "      <td>0.828084</td>\n",
       "      <td>0.799141</td>\n",
       "      <td>0.791115</td>\n",
       "      <td>0.799426</td>\n",
       "      <td>0.591663</td>\n",
       "      <td>0.435506</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.304887</td>\n",
       "      <td>0.249827</td>\n",
       "      <td>0.277536</td>\n",
       "      <td>0.240640</td>\n",
       "      <td>0.280930</td>\n",
       "      <td>0.276648</td>\n",
       "      <td>0.210668</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.341285</td>\n",
       "      <td>0.285416</td>\n",
       "      <td>0.262014</td>\n",
       "      <td>0.249445</td>\n",
       "      <td>0.251211</td>\n",
       "      <td>0.398297</td>\n",
       "      <td>0.099033</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.681148</td>\n",
       "      <td>0.666131</td>\n",
       "      <td>0.669723</td>\n",
       "      <td>0.669332</td>\n",
       "      <td>0.661842</td>\n",
       "      <td>0.627539</td>\n",
       "      <td>0.361619</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.364437</td>\n",
       "      <td>0.335141</td>\n",
       "      <td>0.333144</td>\n",
       "      <td>0.481083</td>\n",
       "      <td>0.419667</td>\n",
       "      <td>0.402100</td>\n",
       "      <td>0.170951</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
Marcin Wątroba's avatar
Marcin Wątroba committed
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "nl_google_fleurs      0.699699      0.533595       0.366764        0.300730   \n",
       "nl_minds14            0.941359      0.778265       0.584732        0.511929   \n",
       "nl_voxpopuli          0.553280      0.435277       0.304322        0.252270   \n",
       "fr_google_fleurs      0.580527      0.429523       0.337506        0.275466   \n",
       "fr_minds14            0.800999      0.714124       0.647957        0.592392   \n",
       "fr_voxpopuli          0.387866      0.307476       0.240038        0.205174   \n",
       "de_google_fleurs      0.519535      0.424735       0.360695        0.353459   \n",
       "de_minds14            0.693370      0.628170       0.570571        0.543742   \n",
       "de_voxpopuli          0.396771      0.298134       0.236937        0.204998   \n",
       "it_google_fleurs      0.453637      0.334587       0.269876        0.234494   \n",
       "it_minds14            0.814580      0.681371       0.576940        0.511340   \n",
       "it_voxpopuli          0.483728      0.401518       0.332556        0.290310   \n",
       "pl_google_fleurs      0.741445      0.580439       0.420468        0.365168   \n",
       "pl_minds14            1.138465      0.999350       0.817470        0.738430   \n",
       "pl_voxpopuli          0.479609      0.366738       0.257558        0.210752   \n",
       "es_google_fleurs      0.341816      0.278543       0.226821        0.227239   \n",
       "es_minds14            0.992263      0.828084       0.799141        0.791115   \n",
       "es_voxpopuli          0.304887      0.249827       0.277536        0.240640   \n",
       "en_google_fleurs      0.341285      0.285416       0.262014        0.249445   \n",
       "en_minds14            0.681148      0.666131       0.669723        0.669332   \n",
       "en_voxpopuli          0.364437      0.335141       0.333144        0.481083   \n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "nl_google_fleurs          0.282070           0.246416   -1.000000  \n",
       "nl_minds14                0.490065           0.376911   -1.000000  \n",
       "nl_voxpopuli              0.268306           0.430234   -1.000000  \n",
       "fr_google_fleurs          0.259405           0.205104    0.114100  \n",
       "fr_minds14                0.613262           0.421050    0.284212  \n",
       "fr_voxpopuli              0.210248           0.323655    0.232059  \n",
       "de_google_fleurs          0.345089           0.139605    0.074235  \n",
       "de_minds14                0.546479           0.288109    0.216011  \n",
       "de_voxpopuli              0.241773           0.385364    0.271072  \n",
       "it_google_fleurs          0.232862           0.168723    0.089945  \n",
       "it_minds14                0.495661           0.376479    0.224318  \n",
       "it_voxpopuli              0.291917          -1.000000    0.288211  \n",
       "pl_google_fleurs          0.348206           0.303350   -1.000000  \n",
       "pl_minds14                0.754548           0.587577   -1.000000  \n",
       "pl_voxpopuli              0.201585           0.422140   -1.000000  \n",
       "es_google_fleurs          0.220248           0.135718    0.069997  \n",
       "es_minds14                0.799426           0.591663    0.435506  \n",
       "es_voxpopuli              0.280930           0.276648    0.210668  \n",
       "en_google_fleurs          0.251211           0.398297    0.099033  \n",
       "en_minds14                0.661842           0.627539    0.361619  \n",
       "en_voxpopuli              0.419667           0.402100    0.170951  "
Marcin Wątroba's avatar
Marcin Wątroba committed
     "execution_count": 21,
Marcin Wątroba's avatar
Marcin Wątroba committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
Marcin Wątroba's avatar
Marcin Wątroba committed
    "spacy_dep_df = pd.DataFrame(spacy_dep, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "spacy_dep_df.to_csv('results/spacy_dep.csv')\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
    "spacy_dep_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
Marcin Wątroba's avatar
Marcin Wątroba committed
   ]
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 22,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "3dbfbb6e-c369-47fd-801c-6df211943dc1",
   "metadata": {},
   "outputs": [
Marcin Wątroba's avatar
Marcin Wątroba committed
    {
Marcin Wątroba's avatar
Marcin Wątroba committed
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.708020</td>\n",
       "      <td>0.535692</td>\n",
       "      <td>0.365346</td>\n",
       "      <td>0.296100</td>\n",
       "      <td>0.261951</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.273752</td>\n",
       "      <td>-1.000000</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.897447</td>\n",
       "      <td>0.714498</td>\n",
       "      <td>0.503436</td>\n",
       "      <td>0.419083</td>\n",
       "      <td>0.389125</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.465494</td>\n",
       "      <td>-1.000000</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.645715</td>\n",
       "      <td>0.526939</td>\n",
       "      <td>0.396940</td>\n",
       "      <td>0.345034</td>\n",
       "      <td>0.358023</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.380835</td>\n",
       "      <td>-1.000000</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.600185</td>\n",
       "      <td>0.470808</td>\n",
       "      <td>0.378478</td>\n",
       "      <td>0.324236</td>\n",
       "      <td>0.309570</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.305183</td>\n",
       "      <td>0.206433</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.805977</td>\n",
       "      <td>0.700773</td>\n",
       "      <td>0.642619</td>\n",
       "      <td>0.583323</td>\n",
       "      <td>0.616411</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.564885</td>\n",
       "      <td>0.441154</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.510623</td>\n",
       "      <td>0.440340</td>\n",
       "      <td>0.382961</td>\n",
       "      <td>0.359633</td>\n",
       "      <td>0.365811</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.323351</td>\n",
       "      <td>0.187074</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.651989</td>\n",
       "      <td>0.551766</td>\n",
       "      <td>0.506944</td>\n",
       "      <td>0.478476</td>\n",
       "      <td>0.469045</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.182395</td>\n",
       "      <td>0.072162</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.659890</td>\n",
       "      <td>0.554437</td>\n",
       "      <td>0.474513</td>\n",
       "      <td>0.429274</td>\n",
       "      <td>0.425134</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.437369</td>\n",
       "      <td>0.357848</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.645898</td>\n",
       "      <td>0.558876</td>\n",
       "      <td>0.518976</td>\n",
       "      <td>0.488194</td>\n",
       "      <td>0.525581</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.292203</td>\n",
       "      <td>0.088256</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.465298</td>\n",
       "      <td>0.355877</td>\n",
       "      <td>0.287491</td>\n",
       "      <td>0.254384</td>\n",
       "      <td>0.251697</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.218689</td>\n",
       "      <td>0.140564</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.779429</td>\n",
       "      <td>0.621546</td>\n",
       "      <td>0.502670</td>\n",
       "      <td>0.437805</td>\n",
       "      <td>0.422781</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.429940</td>\n",
       "      <td>0.276002</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.562729</td>\n",
       "      <td>0.477854</td>\n",
       "      <td>0.420387</td>\n",
       "      <td>0.388904</td>\n",
       "      <td>0.393964</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>-1.000000</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.233076</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.700853</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.553073</td>\n",
       "      <td>0.384142</td>\n",
       "      <td>0.318203</td>\n",
       "      <td>0.298247</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.335870</td>\n",
       "      <td>-1.000000</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>1.023324</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.860626</td>\n",
       "      <td>0.633766</td>\n",
       "      <td>0.572826</td>\n",
       "      <td>0.563293</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.697584</td>\n",
       "      <td>-1.000000</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.588464</td>\n",
       "      <td>0.489265</td>\n",
       "      <td>0.380883</td>\n",
       "      <td>0.345623</td>\n",
       "      <td>0.349896</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.324229</td>\n",
       "      <td>-1.000000</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.333658</td>\n",
       "      <td>0.261352</td>\n",
       "      <td>0.213950</td>\n",
       "      <td>0.206351</td>\n",
       "      <td>0.202078</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.145522</td>\n",
       "      <td>0.067686</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.884689</td>\n",
       "      <td>0.740604</td>\n",
       "      <td>0.664831</td>\n",
       "      <td>0.656090</td>\n",
       "      <td>0.650328</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.602494</td>\n",
       "      <td>0.436570</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.347112</td>\n",
       "      <td>0.294192</td>\n",
       "      <td>0.333500</td>\n",
       "      <td>0.295472</td>\n",
       "      <td>0.353273</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>0.191242</td>\n",
       "      <td>0.067363</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.348152</td>\n",
       "      <td>0.307207</td>\n",
       "      <td>0.278857</td>\n",
       "      <td>0.268917</td>\n",
       "      <td>0.270208</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>1.031485</td>\n",
       "      <td>0.114966</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.588375</td>\n",
       "      <td>0.571845</td>\n",
       "      <td>0.566381</td>\n",
       "      <td>0.567538</td>\n",
       "      <td>0.562651</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>1.203252</td>\n",
       "      <td>0.467297</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.475612</td>\n",
       "      <td>0.451586</td>\n",
       "      <td>0.453132</td>\n",
       "      <td>0.594546</td>\n",
       "      <td>0.549755</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <td>1.020514</td>\n",
       "      <td>0.067919</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
Marcin Wątroba's avatar
Marcin Wątroba committed
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.708020      0.535692       0.365346        0.296100   \n",
       "nl_minds14            0.897447      0.714498       0.503436        0.419083   \n",
       "nl_voxpopuli          0.645715      0.526939       0.396940        0.345034   \n",
       "fr_google_fleurs      0.600185      0.470808       0.378478        0.324236   \n",
       "fr_minds14            0.805977      0.700773       0.642619        0.583323   \n",
       "fr_voxpopuli          0.510623      0.440340       0.382961        0.359633   \n",
       "de_google_fleurs      0.651989      0.551766       0.506944        0.478476   \n",
       "de_minds14            0.659890      0.554437       0.474513        0.429274   \n",
       "de_voxpopuli          0.645898      0.558876       0.518976        0.488194   \n",
       "it_google_fleurs      0.465298      0.355877       0.287491        0.254384   \n",
       "it_minds14            0.779429      0.621546       0.502670        0.437805   \n",
       "it_voxpopuli          0.562729      0.477854       0.420387        0.388904   \n",
       "pl_google_fleurs      0.700853      0.553073       0.384142        0.318203   \n",
       "pl_minds14            1.023324      0.860626       0.633766        0.572826   \n",
       "pl_voxpopuli          0.588464      0.489265       0.380883        0.345623   \n",
       "es_google_fleurs      0.333658      0.261352       0.213950        0.206351   \n",
       "es_minds14            0.884689      0.740604       0.664831        0.656090   \n",
       "es_voxpopuli          0.347112      0.294192       0.333500        0.295472   \n",
       "en_google_fleurs      0.348152      0.307207       0.278857        0.268917   \n",
       "en_minds14            0.588375      0.571845       0.566381        0.567538   \n",
       "en_voxpopuli          0.475612      0.451586       0.453132        0.594546   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.261951           0.273752   -1.000000  \n",
       "nl_minds14                0.389125           0.465494   -1.000000  \n",
       "nl_voxpopuli              0.358023           0.380835   -1.000000  \n",
       "fr_google_fleurs          0.309570           0.305183    0.206433  \n",
       "fr_minds14                0.616411           0.564885    0.441154  \n",
       "fr_voxpopuli              0.365811           0.323351    0.187074  \n",
       "de_google_fleurs          0.469045           0.182395    0.072162  \n",
       "de_minds14                0.425134           0.437369    0.357848  \n",
       "de_voxpopuli              0.525581           0.292203    0.088256  \n",
       "it_google_fleurs          0.251697           0.218689    0.140564  \n",
       "it_minds14                0.422781           0.429940    0.276002  \n",
       "it_voxpopuli              0.393964          -1.000000    0.233076  \n",
       "pl_google_fleurs          0.298247           0.335870   -1.000000  \n",
       "pl_minds14                0.563293           0.697584   -1.000000  \n",
       "pl_voxpopuli              0.349896           0.324229   -1.000000  \n",
       "es_google_fleurs          0.202078           0.145522    0.067686  \n",
       "es_minds14                0.650328           0.602494    0.436570  \n",
       "es_voxpopuli              0.353273           0.191242    0.067363  \n",
       "en_google_fleurs          0.270208           1.031485    0.114966  \n",
       "en_minds14                0.562651           1.203252    0.467297  \n",
       "en_voxpopuli              0.549755           1.020514    0.067919  "
Marcin Wątroba's avatar
Marcin Wątroba committed
     "execution_count": 22,
Marcin Wątroba's avatar
Marcin Wątroba committed
     "metadata": {},
     "output_type": "execute_result"
Marcin Wątroba's avatar
Marcin Wątroba committed
    }
   ],
   "source": [
Marcin Wątroba's avatar
Marcin Wątroba committed
    "word_wer_classic_metrics_df = pd.DataFrame(word_wer_classic_metrics, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "word_wer_classic_metrics_df.to_csv('results/word_wer_classic_metrics.csv')\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
    "word_wer_classic_metrics_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
Marcin Wątroba's avatar
Marcin Wątroba committed
   ]
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 23,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "77a6e273-1f5e-4a2b-9568-66e53ba99c7b",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "metadata": {},
Marcin Wątroba's avatar
Marcin Wątroba committed
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.487020</td>\n",
       "      <td>0.332826</td>\n",
       "      <td>0.173815</td>\n",
       "      <td>0.118312</td>\n",
       "      <td>0.092164</td>\n",
       "      <td>0.186138</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.696387</td>\n",
       "      <td>0.528807</td>\n",
       "      <td>0.323153</td>\n",
       "      <td>0.251855</td>\n",
       "      <td>0.234766</td>\n",
       "      <td>0.306648</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.440765</td>\n",
       "      <td>0.349226</td>\n",
       "      <td>0.233398</td>\n",
       "      <td>0.187694</td>\n",
       "      <td>0.203840</td>\n",
       "      <td>0.295450</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.422005</td>\n",
       "      <td>0.308031</td>\n",
       "      <td>0.230959</td>\n",
       "      <td>0.181520</td>\n",
       "      <td>0.167575</td>\n",
       "      <td>0.225745</td>\n",
       "      <td>0.154588</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.598664</td>\n",
       "      <td>0.499632</td>\n",
       "      <td>0.447757</td>\n",
       "      <td>0.395654</td>\n",
       "      <td>0.429327</td>\n",
       "      <td>0.441224</td>\n",
       "      <td>0.342637</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.349906</td>\n",
       "      <td>0.291653</td>\n",
       "      <td>0.242314</td>\n",
       "      <td>0.218193</td>\n",
       "      <td>0.226681</td>\n",
       "      <td>0.251004</td>\n",
       "      <td>0.147786</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.328928</td>\n",
       "      <td>0.213515</td>\n",
       "      <td>0.151060</td>\n",
       "      <td>0.116871</td>\n",
       "      <td>0.104827</td>\n",
       "      <td>0.118999</td>\n",
       "      <td>0.048663</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.425754</td>\n",
       "      <td>0.331317</td>\n",
       "      <td>0.255620</td>\n",
       "      <td>0.222602</td>\n",
       "      <td>0.220104</td>\n",
       "      <td>0.232533</td>\n",
       "      <td>0.143306</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.349224</td>\n",
       "      <td>0.259910</td>\n",
       "      <td>0.208328</td>\n",
       "      <td>0.176478</td>\n",
       "      <td>0.215692</td>\n",
       "      <td>0.228572</td>\n",
       "      <td>0.065661</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.297877</td>\n",
       "      <td>0.201276</td>\n",
       "      <td>0.139435</td>\n",
       "      <td>0.114579</td>\n",
       "      <td>0.103925</td>\n",
       "      <td>0.161414</td>\n",
       "      <td>0.101285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.603743</td>\n",
       "      <td>0.455306</td>\n",
       "      <td>0.323527</td>\n",
       "      <td>0.264797</td>\n",
       "      <td>0.255383</td>\n",
       "      <td>0.299216</td>\n",
       "      <td>0.162753</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.418096</td>\n",
       "      <td>0.345687</td>\n",
       "      <td>0.298079</td>\n",
       "      <td>0.266888</td>\n",
       "      <td>0.270669</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.193692</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.493295</td>\n",
       "      <td>0.336319</td>\n",
       "      <td>0.183046</td>\n",
       "      <td>0.119453</td>\n",
       "      <td>0.096625</td>\n",
       "      <td>0.232851</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.822964</td>\n",
       "      <td>0.633399</td>\n",
       "      <td>0.420067</td>\n",
       "      <td>0.353710</td>\n",
       "      <td>0.342892</td>\n",
       "      <td>0.519684</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.385923</td>\n",
       "      <td>0.288336</td>\n",
       "      <td>0.188413</td>\n",
       "      <td>0.152321</td>\n",
       "      <td>0.147463</td>\n",
       "      <td>0.232410</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.196055</td>\n",
       "      <td>0.130109</td>\n",
       "      <td>0.084114</td>\n",
       "      <td>0.077302</td>\n",
       "      <td>0.067295</td>\n",
       "      <td>0.102324</td>\n",
       "      <td>0.048997</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.723086</td>\n",
       "      <td>0.581624</td>\n",
       "      <td>0.497037</td>\n",
       "      <td>0.493568</td>\n",
       "      <td>0.488170</td>\n",
       "      <td>0.522209</td>\n",
       "      <td>0.397315</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.222505</td>\n",
       "      <td>0.172764</td>\n",
       "      <td>0.195746</td>\n",
       "      <td>0.162495</td>\n",
       "      <td>0.201468</td>\n",
       "      <td>0.143578</td>\n",
       "      <td>0.053721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.191329</td>\n",
       "      <td>0.151693</td>\n",
       "      <td>0.121134</td>\n",
       "      <td>0.107578</td>\n",
       "      <td>0.108609</td>\n",
       "      <td>0.111466</td>\n",
       "      <td>0.088609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.449783</td>\n",
       "      <td>0.433839</td>\n",
       "      <td>0.427788</td>\n",
       "      <td>0.431043</td>\n",
       "      <td>0.424969</td>\n",
       "      <td>0.424984</td>\n",
       "      <td>0.363642</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.314581</td>\n",
       "      <td>0.286802</td>\n",
       "      <td>0.297819</td>\n",
       "      <td>0.439680</td>\n",
       "      <td>0.402555</td>\n",
       "      <td>0.118296</td>\n",
       "      <td>0.054176</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  whisper_tiny  whisper_base  whisper_small  whisper_medium  \\\n",
       "nl_google_fleurs      0.487020      0.332826       0.173815        0.118312   \n",
       "nl_minds14            0.696387      0.528807       0.323153        0.251855   \n",
       "nl_voxpopuli          0.440765      0.349226       0.233398        0.187694   \n",
       "fr_google_fleurs      0.422005      0.308031       0.230959        0.181520   \n",
       "fr_minds14            0.598664      0.499632       0.447757        0.395654   \n",
       "fr_voxpopuli          0.349906      0.291653       0.242314        0.218193   \n",
       "de_google_fleurs      0.328928      0.213515       0.151060        0.116871   \n",
       "de_minds14            0.425754      0.331317       0.255620        0.222602   \n",
       "de_voxpopuli          0.349224      0.259910       0.208328        0.176478   \n",
       "it_google_fleurs      0.297877      0.201276       0.139435        0.114579   \n",
       "it_minds14            0.603743      0.455306       0.323527        0.264797   \n",
       "it_voxpopuli          0.418096      0.345687       0.298079        0.266888   \n",
       "pl_google_fleurs      0.493295      0.336319       0.183046        0.119453   \n",
       "pl_minds14            0.822964      0.633399       0.420067        0.353710   \n",
       "pl_voxpopuli          0.385923      0.288336       0.188413        0.152321   \n",
       "es_google_fleurs      0.196055      0.130109       0.084114        0.077302   \n",
       "es_minds14            0.723086      0.581624       0.497037        0.493568   \n",
       "es_voxpopuli          0.222505      0.172764       0.195746        0.162495   \n",
       "en_google_fleurs      0.191329      0.151693       0.121134        0.107578   \n",
       "en_minds14            0.449783      0.433839       0.427788        0.431043   \n",
       "en_voxpopuli          0.314581      0.286802       0.297819        0.439680   \n",
       "\n",
       "                  whisper_large-v2  facebook_wav2vec2  nvidia_stt  \n",
       "nl_google_fleurs          0.092164           0.186138   -1.000000  \n",
       "nl_minds14                0.234766           0.306648   -1.000000  \n",
       "nl_voxpopuli              0.203840           0.295450   -1.000000  \n",
       "fr_google_fleurs          0.167575           0.225745    0.154588  \n",
       "fr_minds14                0.429327           0.441224    0.342637  \n",
       "fr_voxpopuli              0.226681           0.251004    0.147786  \n",
       "de_google_fleurs          0.104827           0.118999    0.048663  \n",
       "de_minds14                0.220104           0.232533    0.143306  \n",
       "de_voxpopuli              0.215692           0.228572    0.065661  \n",
       "it_google_fleurs          0.103925           0.161414    0.101285  \n",
       "it_minds14                0.255383           0.299216    0.162753  \n",
       "it_voxpopuli              0.270669          -1.000000    0.193692  \n",
       "pl_google_fleurs          0.096625           0.232851   -1.000000  \n",
       "pl_minds14                0.342892           0.519684   -1.000000  \n",
       "pl_voxpopuli              0.147463           0.232410   -1.000000  \n",
       "es_google_fleurs          0.067295           0.102324    0.048997  \n",
       "es_minds14                0.488170           0.522209    0.397315  \n",
       "es_voxpopuli              0.201468           0.143578    0.053721  \n",
       "en_google_fleurs          0.108609           0.111466    0.088609  \n",
       "en_minds14                0.424969           0.424984    0.363642  \n",
       "en_voxpopuli              0.402555           0.118296    0.054176  "
      ]
     },
Marcin Wątroba's avatar
Marcin Wątroba committed
     "execution_count": 23,
Marcin Wątroba's avatar
Marcin Wątroba committed
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_wer_soft_metrics_df = pd.DataFrame(word_wer_soft_metrics, columns=FULL_LANGUAGE_MODELS, index=FULL_DATASET_NAMES)\n",
    "word_wer_soft_metrics_df.to_csv('results/word_wer_soft_metrics.csv')\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
    "word_wer_soft_metrics_df\n",
    "\n",
    "summarize_df(spacy_ner, 'spacy_ner')"
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 24,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "629318e6-8c00-413c-99d4-2b7ff559ac3f",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "metadata": {},
Marcin Wątroba's avatar
Marcin Wątroba committed
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>whisper_tiny</th>\n",
       "      <th>whisper_base</th>\n",
       "      <th>whisper_small</th>\n",
       "      <th>whisper_medium</th>\n",
       "      <th>whisper_large-v2</th>\n",
       "      <th>facebook_wav2vec2</th>\n",
       "      <th>nvidia_stt</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>nl_google_fleurs</th>\n",
       "      <td>0.512857</td>\n",
       "      <td>0.351476</td>\n",
       "      <td>0.183268</td>\n",
       "      <td>0.123803</td>\n",
       "      <td>0.095700</td>\n",
       "      <td>0.192525</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_minds14</th>\n",
       "      <td>0.732501</td>\n",
       "      <td>0.554846</td>\n",
       "      <td>0.346042</td>\n",
       "      <td>0.267858</td>\n",
       "      <td>0.244768</td>\n",
       "      <td>0.319302</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>nl_voxpopuli</th>\n",
       "      <td>0.472829</td>\n",
       "      <td>0.364308</td>\n",
       "      <td>0.241434</td>\n",
       "      <td>0.193047</td>\n",
       "      <td>0.210556</td>\n",
       "      <td>0.304289</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_google_fleurs</th>\n",
       "      <td>0.442361</td>\n",
       "      <td>0.321953</td>\n",
       "      <td>0.240016</td>\n",
       "      <td>0.188132</td>\n",
       "      <td>0.174075</td>\n",
       "      <td>0.233362</td>\n",
       "      <td>0.159139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_minds14</th>\n",
       "      <td>0.628774</td>\n",
       "      <td>0.527781</td>\n",
       "      <td>0.472124</td>\n",
       "      <td>0.417764</td>\n",
       "      <td>0.451830</td>\n",
       "      <td>0.456835</td>\n",
       "      <td>0.353934</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>fr_voxpopuli</th>\n",
       "      <td>0.365471</td>\n",
       "      <td>0.304097</td>\n",
       "      <td>0.251867</td>\n",
       "      <td>0.226099</td>\n",
       "      <td>0.235006</td>\n",
       "      <td>0.259228</td>\n",
       "      <td>0.150950</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_google_fleurs</th>\n",
       "      <td>0.346586</td>\n",
       "      <td>0.227203</td>\n",
       "      <td>0.158453</td>\n",
       "      <td>0.121399</td>\n",
       "      <td>0.107550</td>\n",
       "      <td>0.123204</td>\n",
       "      <td>0.050265</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_minds14</th>\n",
       "      <td>0.446445</td>\n",
       "      <td>0.346742</td>\n",
       "      <td>0.265021</td>\n",
       "      <td>0.229449</td>\n",
       "      <td>0.226477</td>\n",
       "      <td>0.238560</td>\n",
       "      <td>0.147524</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>de_voxpopuli</th>\n",
       "      <td>0.366639</td>\n",
       "      <td>0.270086</td>\n",
       "      <td>0.215487</td>\n",
       "      <td>0.181204</td>\n",
       "      <td>0.221848</td>\n",
       "      <td>0.234268</td>\n",
       "      <td>0.067181</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_google_fleurs</th>\n",
       "      <td>0.313010</td>\n",
       "      <td>0.210131</td>\n",
       "      <td>0.144045</td>\n",
       "      <td>0.117567</td>\n",
       "      <td>0.106640</td>\n",
       "      <td>0.165954</td>\n",
       "      <td>0.104103</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_minds14</th>\n",
       "      <td>0.633334</td>\n",
       "      <td>0.476970</td>\n",
       "      <td>0.337584</td>\n",
       "      <td>0.275103</td>\n",
       "      <td>0.265102</td>\n",
       "      <td>0.310508</td>\n",
       "      <td>0.168097</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>it_voxpopuli</th>\n",
       "      <td>0.439105</td>\n",
       "      <td>0.363577</td>\n",
       "      <td>0.310733</td>\n",
       "      <td>0.278968</td>\n",
       "      <td>0.283103</td>\n",
       "      <td>-1.000000</td>\n",
       "      <td>0.198565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_google_fleurs</th>\n",
       "      <td>0.520524</td>\n",
       "      <td>0.358929</td>\n",
       "      <td>0.190407</td>\n",
       "      <td>0.123706</td>\n",
       "      <td>0.098981</td>\n",
       "      <td>0.242890</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_minds14</th>\n",
       "      <td>0.861366</td>\n",
       "      <td>0.666738</td>\n",
       "      <td>0.439214</td>\n",
       "      <td>0.370198</td>\n",
       "      <td>0.361172</td>\n",
       "      <td>0.542831</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>pl_voxpopuli</th>\n",
       "      <td>0.404981</td>\n",
       "      <td>0.301113</td>\n",
       "      <td>0.194702</td>\n",
       "      <td>0.156644</td>\n",
       "      <td>0.151601</td>\n",
       "      <td>0.240070</td>\n",
       "      <td>-1.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_google_fleurs</th>\n",
       "      <td>0.204884</td>\n",
       "      <td>0.135018</td>\n",
       "      <td>0.086281</td>\n",
       "      <td>0.078608</td>\n",
       "      <td>0.067940</td>\n",
       "      <td>0.105327</td>\n",
       "      <td>0.050019</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_minds14</th>\n",
       "      <td>0.752425</td>\n",
       "      <td>0.601240</td>\n",
       "      <td>0.511320</td>\n",
       "      <td>0.505483</td>\n",
       "      <td>0.497249</td>\n",
       "      <td>0.535758</td>\n",
       "      <td>0.401730</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>es_voxpopuli</th>\n",
       "      <td>0.233013</td>\n",
       "      <td>0.179737</td>\n",
       "      <td>0.202485</td>\n",
       "      <td>0.167919</td>\n",
       "      <td>0.208381</td>\n",
       "      <td>0.148001</td>\n",
       "      <td>0.054963</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_google_fleurs</th>\n",
       "      <td>0.198209</td>\n",
       "      <td>0.157780</td>\n",
       "      <td>0.125360</td>\n",
       "      <td>0.111138</td>\n",
       "      <td>0.112012</td>\n",
       "      <td>0.116211</td>\n",
       "      <td>0.092322</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_minds14</th>\n",
       "      <td>0.463499</td>\n",
       "      <td>0.446222</td>\n",
       "      <td>0.442346</td>\n",
       "      <td>0.444175</td>\n",
       "      <td>0.438048</td>\n",
       "      <td>0.434445</td>\n",
       "      <td>0.371188</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>en_voxpopuli</th>\n",
       "      <td>0.325976</td>\n",
       "      <td>0.294154</td>\n",
       "      <td>0.306453</td>\n",
       "      <td>0.451091</td>\n",
       "      <td>0.414535</td>\n",
       "      <td>0.120754</td>\n",
       "      <td>0.055428</td>\n",