Skip to content
Snippets Groups Projects
duckling_preview.ipynb 1.65 MiB
Newer Older
Marcin Wątroba's avatar
Marcin Wątroba committed
45001 45002 45003 45004 45005 45006 45007 45008 45009 45010 45011 45012 45013 45014 45015 45016 45017 45018 45019 45020 45021 45022 45023 45024 45025 45026 45027 45028 45029 45030 45031 45032 45033 45034 45035 45036 45037 45038 45039 45040 45041 45042 45043 45044 45045 45046 45047 45048 45049 45050 45051 45052 45053 45054 45055 45056 45057 45058 45059 45060 45061 45062 45063 45064 45065 45066 45067 45068 45069 45070 45071 45072 45073 45074 45075 45076 45077 45078 45079 45080 45081 45082 45083 45084 45085 45086 45087 45088 45089 45090 45091 45092 45093 45094 45095 45096 45097 45098 45099 45100 45101 45102 45103 45104 45105 45106 45107 45108 45109 45110 45111 45112 45113 45114 45115 45116 45117 45118 45119 45120 45121 45122 45123 45124 45125 45126 45127 45128 45129 45130 45131 45132 45133 45134 45135 45136 45137 45138 45139 45140 45141 45142 45143 45144 45145 45146 45147 45148 45149 45150 45151 45152 45153 45154 45155 45156 45157 45158 45159 45160 45161 45162 45163 45164 45165 45166 45167 45168 45169 45170 45171 45172 45173 45174 45175 45176 45177 45178 45179 45180 45181 45182 45183 45184 45185 45186 45187 45188 45189 45190 45191 45192 45193 45194 45195 45196 45197 45198 45199 45200 45201 45202 45203 45204 45205 45206 45207 45208 45209 45210 45211 45212 45213 45214 45215 45216 45217 45218 45219 45220 45221 45222 45223 45224 45225 45226 45227 45228 45229 45230 45231 45232 45233 45234 45235 45236 45237 45238 45239 45240 45241 45242 45243 45244 45245 45246 45247 45248 45249 45250 45251 45252 45253 45254 45255 45256 45257 45258 45259 45260 45261 45262 45263 45264 45265 45266 45267 45268 45269 45270 45271 45272 45273 45274 45275 45276 45277 45278 45279 45280 45281 45282 45283 45284 45285 45286 45287 45288 45289 45290 45291 45292 45293 45294 45295 45296 45297 45298 45299 45300 45301 45302 45303 45304 45305 45306 45307 45308 45309 45310 45311 45312 45313 45314 45315 45316 45317 45318 45319 45320 45321 45322 45323 45324 45325 45326 45327 45328 45329 45330 45331 45332 45333 45334 45335 45336 45337 45338 45339 45340 45341 45342 45343 45344 45345 45346 45347 45348 45349 45350 45351 45352 45353 45354 45355 45356 45357 45358 45359 45360 45361 45362 45363 45364 45365 45366 45367 45368 45369 45370 45371 45372 45373 45374 45375 45376 45377 45378 45379 45380 45381 45382 45383 45384 45385 45386 45387 45388 45389 45390 45391 45392 45393 45394 45395 45396 45397 45398 45399 45400 45401 45402 45403 45404 45405 45406 45407 45408 45409 45410 45411 45412 45413 45414 45415 45416 45417 45418 45419 45420 45421 45422 45423 45424 45425 45426 45427 45428 45429 45430 45431 45432 45433 45434 45435 45436 45437 45438 45439 45440 45441 45442 45443 45444 45445 45446 45447 45448 45449 45450 45451 45452 45453 45454 45455 45456 45457 45458 45459 45460 45461 45462 45463 45464 45465 45466 45467 45468 45469 45470 45471 45472 45473 45474 45475 45476 45477 45478 45479 45480 45481 45482 45483 45484 45485 45486 45487 45488 45489 45490 45491 45492 45493 45494 45495 45496 45497 45498 45499 45500 45501 45502 45503 45504 45505 45506 45507 45508 45509 45510 45511 45512 45513 45514 45515 45516 45517 45518 45519 45520 45521 45522 45523 45524 45525 45526 45527 45528 45529 45530 45531 45532 45533 45534 45535 45536 45537 45538 45539 45540 45541 45542 45543 45544 45545 45546 45547 45548 45549 45550 45551
       "      <td>PART -&gt; ___</td>\n",
       "      <td>3585</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>167</th>\n",
       "      <td>PRON -&gt; ___</td>\n",
       "      <td>3424</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221</th>\n",
       "      <td>VERB -&gt; ___</td>\n",
       "      <td>2935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>ADV -&gt; ___</td>\n",
       "      <td>2727</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>CCONJ -&gt; ___</td>\n",
       "      <td>2360</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>NUM -&gt; X</td>\n",
       "      <td>1842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>NUM -&gt; ___</td>\n",
       "      <td>1726</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92</th>\n",
       "      <td>DET -&gt; ___</td>\n",
       "      <td>1715</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>AUX -&gt; ___</td>\n",
       "      <td>1634</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>SCONJ -&gt; ___</td>\n",
       "      <td>1587</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>ADJ -&gt; ___</td>\n",
       "      <td>1461</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>244</th>\n",
       "      <td>___ -&gt; NOUN</td>\n",
       "      <td>1251</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>250</th>\n",
       "      <td>___ -&gt; VERB</td>\n",
       "      <td>1178</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119</th>\n",
       "      <td>NOUN -&gt; VERB</td>\n",
       "      <td>677</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           values  counts\n",
       "30     ADP -> ___    4527\n",
       "121   NOUN -> ___    4129\n",
       "151   PART -> ___    3585\n",
       "167   PRON -> ___    3424\n",
       "221   VERB -> ___    2935\n",
       "46     ADV -> ___    2727\n",
       "77   CCONJ -> ___    2360\n",
       "135      NUM -> X    1842\n",
       "136    NUM -> ___    1726\n",
       "92     DET -> ___    1715\n",
       "61     AUX -> ___    1634\n",
       "204  SCONJ -> ___    1587\n",
       "15     ADJ -> ___    1461\n",
       "244   ___ -> NOUN    1251\n",
       "250   ___ -> VERB    1178\n",
       "119  NOUN -> VERB     677"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "show_stats(voicelab_experiment_repository, techmo_connections_config)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "41e5bd80-87e0-4791-a87a-4c247ddb27cb",
   "metadata": {},
   "source": [
    "## VoiceLab AJN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "96b5b6d1-d7cd-4afb-adca-f6ed4bc1fedf",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>values</th>\n",
       "      <th>counts</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>259</th>\n",
       "      <td>VERB -&gt; ___</td>\n",
       "      <td>11609</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>NOUN -&gt; ___</td>\n",
       "      <td>10416</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>ADV -&gt; ___</td>\n",
       "      <td>10127</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>175</th>\n",
       "      <td>PART -&gt; ___</td>\n",
       "      <td>9282</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>ADP -&gt; ___</td>\n",
       "      <td>8663</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>PRON -&gt; ___</td>\n",
       "      <td>8066</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>287</th>\n",
       "      <td>___ -&gt; PUNCT</td>\n",
       "      <td>6354</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105</th>\n",
       "      <td>DET -&gt; ___</td>\n",
       "      <td>6147</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>ADJ -&gt; ___</td>\n",
       "      <td>5935</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231</th>\n",
       "      <td>SCONJ -&gt; ___</td>\n",
       "      <td>5385</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>254</th>\n",
       "      <td>VERB -&gt; PUNCT</td>\n",
       "      <td>4842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>NOUN -&gt; PUNCT</td>\n",
       "      <td>4632</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>AUX -&gt; ___</td>\n",
       "      <td>4016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>249</th>\n",
       "      <td>VERB -&gt; NOUN</td>\n",
       "      <td>3772</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>ADV -&gt; PUNCT</td>\n",
       "      <td>3453</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>CCONJ -&gt; ___</td>\n",
       "      <td>3438</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            values  counts\n",
       "259    VERB -> ___   11609\n",
       "140    NOUN -> ___   10416\n",
       "53      ADV -> ___   10127\n",
       "175    PART -> ___    9282\n",
       "35      ADP -> ___    8663\n",
       "192    PRON -> ___    8066\n",
       "287   ___ -> PUNCT    6354\n",
       "105     DET -> ___    6147\n",
       "17      ADJ -> ___    5935\n",
       "231   SCONJ -> ___    5385\n",
       "254  VERB -> PUNCT    4842\n",
       "134  NOUN -> PUNCT    4632\n",
       "70      AUX -> ___    4016\n",
       "249   VERB -> NOUN    3772\n",
       "47    ADV -> PUNCT    3453\n",
       "88    CCONJ -> ___    3438"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "show_stats(voicelab_experiment_repository, ajn_connections_config)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "id": "17823c33-7065-43e6-9d2f-49a59fba26c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "import spacy\n",
    "nlp = spacy.load(\"en_core_web_lg\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "id": "e8fa6eb7-ec32-4284-9ff3-1de52e969cb5",
   "metadata": {},
   "outputs": [],
   "source": [
    "doc = nlp(\"Hello, I'm Marcin\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "id": "cb0d05d0-8eff-4ddd-900a-207e67c2afc0",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INTJ\n",
      "PUNCT\n",
      "PRON\n",
      "AUX\n",
      "PROPN\n"
     ]
    }
   ],
   "source": [
    "for it in doc:\n",
    "    print(it.pos_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "id": "5f008198-1ad3-4fe6-a904-7a0e1b4d0ade",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(doc)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "f2a7a1e7-c226-4fd9-873b-cada73a9d5fb",
   "metadata": {},
   "outputs": [],
   "source": [
    "arr = ['_' for it in range(len(doc))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "6206ac46-c803-4c9f-a9f9-91a1f60177bf",
   "metadata": {},
   "outputs": [],
   "source": [
    "for ent in doc.ents:\n",
    "    for itt in range(ent.start, ent.end):\n",
    "        arr[itt] = ent.label_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "9756f1eb-7e95-4d8a-8d99-d2f664c4105f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# list(range(2, 3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "3b286ac6-e2fd-421f-89eb-fb66233856f7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['persName', '_', '_', '_', '_']"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "id": "3f5e9f50-1d0f-4660-87b7-563bd93582c4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Collecting en-core-web-lg==3.2.0\n",
      "  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.2.0/en_core_web_lg-3.2.0-py3-none-any.whl (777.4 MB)\n",
      "\u001b[K     |████████████████████████████████| 777.4 MB 25 kB/s s eta 0:00:01     |███████████████▋                | 378.2 MB 1.1 MB/s eta 0:05:52   | 465.7 MB 2.0 MB/s eta 0:02:39     |█████████████████████           | 507.6 MB 2.0 MB/s eta 0:02:17     |██████████████████████▏         | 537.6 MB 1.4 MB/s eta 0:02:54\n",
      "\u001b[?25hRequirement already satisfied: spacy<3.3.0,>=3.2.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from en-core-web-lg==3.2.0) (3.2.4)\n",
      "Requirement already satisfied: blis<0.8.0,>=0.4.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (0.7.7)\n",
      "Requirement already satisfied: numpy>=1.15.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (1.21.1)\n",
      "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (1.0.7)\n",
      "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (3.3.0)\n",
      "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (2.26.0)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (4.64.0)\n",
      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (1.8.2)\n",
      "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (2.0.6)\n",
      "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (2.0.7)\n",
      "Requirement already satisfied: typer<0.5.0,>=0.3.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (0.4.1)\n",
      "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.8 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (3.0.9)\n",
      "Requirement already satisfied: jinja2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (3.0.3)\n",
      "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (1.0.2)\n",
      "Requirement already satisfied: setuptools in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (61.2.0)\n",
      "Requirement already satisfied: srsly<3.0.0,>=2.4.1 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (2.4.3)\n",
      "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (3.0.6)\n",
      "Requirement already satisfied: thinc<8.1.0,>=8.0.12 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (8.0.15)\n",
      "Requirement already satisfied: pathy>=0.3.5 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (0.6.1)\n",
      "Requirement already satisfied: wasabi<1.1.0,>=0.8.1 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (0.9.1)\n",
      "Requirement already satisfied: packaging>=20.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (21.3)\n",
      "Requirement already satisfied: click<8.1.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (8.0.3)\n",
      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from packaging>=20.0->spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (3.0.6)\n",
      "Requirement already satisfied: smart-open<6.0.0,>=5.0.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from pathy>=0.3.5->spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (5.2.1)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4->spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (3.10.0.2)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (2021.10.8)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (1.26.7)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (3.3)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (2.0.9)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from jinja2->spacy<3.3.0,>=3.2.0->en-core-web-lg==3.2.0) (2.0.1)\n",
      "Installing collected packages: en-core-web-lg\n",
      "Successfully installed en-core-web-lg-3.2.0\n",
      "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
      "You can now load the package via spacy.load('en_core_web_lg')\n"
     ]
    }
   ],
   "source": [
    "!python -m spacy download en_core_web_lg"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "id": "6d35cd94-a2ba-4e45-afaa-c93d9ab360b4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Requirement already satisfied: spacy-transformers in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (1.1.5)\n",
      "Requirement already satisfied: spacy-alignments<1.0.0,>=0.7.2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy-transformers) (0.8.5)\n",
      "Requirement already satisfied: torch>=1.6.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy-transformers) (1.11.0)\n",
      "Requirement already satisfied: srsly<3.0.0,>=2.4.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy-transformers) (2.4.3)\n",
      "Requirement already satisfied: spacy<4.0.0,>=3.1.3 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy-transformers) (3.2.4)\n",
      "Requirement already satisfied: transformers<4.18.0,>=3.4.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy-transformers) (4.17.0)\n",
      "Requirement already satisfied: jinja2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (3.0.3)\n",
      "Requirement already satisfied: typer<0.5.0,>=0.3.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (0.4.1)\n",
      "Requirement already satisfied: blis<0.8.0,>=0.4.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (0.7.7)\n",
      "Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (2.0.7)\n",
      "Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.8 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (3.0.9)\n",
      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (1.8.2)\n",
      "Requirement already satisfied: packaging>=20.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (21.3)\n",
      "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (1.0.7)\n",
      "Requirement already satisfied: click<8.1.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (8.0.3)\n",
      "Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (3.3.0)\n",
      "Requirement already satisfied: pathy>=0.3.5 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (0.6.1)\n",
      "Requirement already satisfied: wasabi<1.1.0,>=0.8.1 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (0.9.1)\n",
      "Requirement already satisfied: setuptools in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (61.2.0)\n",
      "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (4.64.0)\n",
      "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (3.0.6)\n",
      "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (2.0.6)\n",
      "Requirement already satisfied: thinc<8.1.0,>=8.0.12 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (8.0.15)\n",
      "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (2.26.0)\n",
      "Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (1.0.2)\n",
      "Requirement already satisfied: numpy>=1.15.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from spacy<4.0.0,>=3.1.3->spacy-transformers) (1.21.1)\n",
      "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from packaging>=20.0->spacy<4.0.0,>=3.1.3->spacy-transformers) (3.0.6)\n",
      "Requirement already satisfied: smart-open<6.0.0,>=5.0.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from pathy>=0.3.5->spacy<4.0.0,>=3.1.3->spacy-transformers) (5.2.1)\n",
      "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from pydantic!=1.8,!=1.8.1,<1.9.0,>=1.7.4->spacy<4.0.0,>=3.1.3->spacy-transformers) (3.10.0.2)\n",
      "Requirement already satisfied: certifi>=2017.4.17 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<4.0.0,>=3.1.3->spacy-transformers) (2021.10.8)\n",
      "Requirement already satisfied: idna<4,>=2.5 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<4.0.0,>=3.1.3->spacy-transformers) (3.3)\n",
      "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<4.0.0,>=3.1.3->spacy-transformers) (1.26.7)\n",
      "Requirement already satisfied: charset-normalizer~=2.0.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<4.0.0,>=3.1.3->spacy-transformers) (2.0.9)\n",
      "Requirement already satisfied: pyyaml>=5.1 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from transformers<4.18.0,>=3.4.0->spacy-transformers) (6.0)\n",
      "Requirement already satisfied: regex!=2019.12.17 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from transformers<4.18.0,>=3.4.0->spacy-transformers) (2021.11.10)\n",
      "Requirement already satisfied: sacremoses in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from transformers<4.18.0,>=3.4.0->spacy-transformers) (0.0.53)\n",
      "Requirement already satisfied: huggingface-hub<1.0,>=0.1.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from transformers<4.18.0,>=3.4.0->spacy-transformers) (0.7.0)\n",
      "Requirement already satisfied: tokenizers!=0.11.3,>=0.11.1 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from transformers<4.18.0,>=3.4.0->spacy-transformers) (0.12.1)\n",
      "Requirement already satisfied: filelock in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from transformers<4.18.0,>=3.4.0->spacy-transformers) (3.6.0)\n",
      "Requirement already satisfied: MarkupSafe>=2.0 in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from jinja2->spacy<4.0.0,>=3.1.3->spacy-transformers) (2.0.1)\n",
      "Requirement already satisfied: six in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from sacremoses->transformers<4.18.0,>=3.4.0->spacy-transformers) (1.16.0)\n",
      "Requirement already satisfied: joblib in /home/marcinwatroba/miniconda3/envs/asr-benchmarks/lib/python3.8/site-packages (from sacremoses->transformers<4.18.0,>=3.4.0->spacy-transformers) (1.1.0)\n"
     ]
    }
   ],
   "source": [
    "!pip install spacy-transformers"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a88bbfcc-726f-4c9e-acea-9d38163296cf",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6f62e3d-ac08-43ae-ba25-4b93702c33f9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4348aca-0344-49c0-9bf4-af7b8c84871d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db6207ed-1ee5-4a1e-8cc9-397b76b66997",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}