Skip to content
Snippets Groups Projects
pos.ipynb 1.63 MiB
Newer Older
       "      <td>PART -&gt; ___</td>\n",
       "      <td>3585</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>167</th>\n",
       "      <td>PRON -&gt; ___</td>\n",
       "      <td>3424</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>221</th>\n",
       "      <td>VERB -&gt; ___</td>\n",
       "      <td>2935</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>ADV -&gt; ___</td>\n",
       "      <td>2727</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>77</th>\n",
       "      <td>CCONJ -&gt; ___</td>\n",
       "      <td>2360</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>135</th>\n",
       "      <td>NUM -&gt; X</td>\n",
       "      <td>1842</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>136</th>\n",
       "      <td>NUM -&gt; ___</td>\n",
       "      <td>1726</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>92</th>\n",
       "      <td>DET -&gt; ___</td>\n",
       "      <td>1715</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>AUX -&gt; ___</td>\n",
       "      <td>1634</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>204</th>\n",
       "      <td>SCONJ -&gt; ___</td>\n",
       "      <td>1587</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "      <th>15</th>\n",
       "      <td>ADJ -&gt; ___</td>\n",
       "      <td>1461</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>244</th>\n",
       "      <td>___ -&gt; NOUN</td>\n",
       "      <td>1251</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>250</th>\n",
       "      <td>___ -&gt; VERB</td>\n",
       "      <td>1178</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>119</th>\n",
       "      <td>NOUN -&gt; VERB</td>\n",
       "      <td>677</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "           values  counts\n",
       "30     ADP -> ___    4527\n",
       "121   NOUN -> ___    4129\n",
       "151   PART -> ___    3585\n",
       "167   PRON -> ___    3424\n",
       "221   VERB -> ___    2935\n",
       "46     ADV -> ___    2727\n",
       "77   CCONJ -> ___    2360\n",
       "135      NUM -> X    1842\n",
       "136    NUM -> ___    1726\n",
       "92     DET -> ___    1715\n",
       "61     AUX -> ___    1634\n",
       "204  SCONJ -> ___    1587\n",
       "15     ADJ -> ___    1461\n",
       "244   ___ -> NOUN    1251\n",
       "250   ___ -> VERB    1178\n",
       "119  NOUN -> VERB     677"
Marcin Wątroba's avatar
Marcin Wątroba committed
      ]
     },
Marcin Wątroba's avatar
Marcin Wątroba committed
     "execution_count": 43,
Marcin Wątroba's avatar
Marcin Wątroba committed
     "metadata": {},
     "output_type": "execute_result"
Marcin Wątroba's avatar
Marcin Wątroba committed
    }
   ],
   "source": [
    "show_stats(voicelab_experiment_repository, techmo_connections_config)"
Marcin Wątroba's avatar
Marcin Wątroba committed
   ]
  },
  {
   "cell_type": "markdown",
   "id": "41e5bd80-87e0-4791-a87a-4c247ddb27cb",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "metadata": {},
   "source": [
    "## VoiceLab AJN"
Marcin Wątroba's avatar
Marcin Wątroba committed
   ]
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 44,
   "id": "96b5b6d1-d7cd-4afb-adca-f6ed4bc1fedf",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "metadata": {},
   "outputs": [
Marcin Wątroba's avatar
Marcin Wątroba committed
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>values</th>\n",
       "      <th>counts</th>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>259</th>\n",
       "      <td>VERB -&gt; ___</td>\n",
       "      <td>11609</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>140</th>\n",
       "      <td>NOUN -&gt; ___</td>\n",
       "      <td>10416</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>ADV -&gt; ___</td>\n",
       "      <td>10127</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>175</th>\n",
       "      <td>PART -&gt; ___</td>\n",
       "      <td>9282</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>35</th>\n",
       "      <td>ADP -&gt; ___</td>\n",
       "      <td>8663</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>192</th>\n",
       "      <td>PRON -&gt; ___</td>\n",
       "      <td>8066</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>287</th>\n",
       "      <td>___ -&gt; PUNCT</td>\n",
       "      <td>6354</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>105</th>\n",
       "      <td>DET -&gt; ___</td>\n",
       "      <td>6147</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>ADJ -&gt; ___</td>\n",
       "      <td>5935</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>231</th>\n",
       "      <td>SCONJ -&gt; ___</td>\n",
       "      <td>5385</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>254</th>\n",
       "      <td>VERB -&gt; PUNCT</td>\n",
       "      <td>4842</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>134</th>\n",
       "      <td>NOUN -&gt; PUNCT</td>\n",
       "      <td>4632</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>AUX -&gt; ___</td>\n",
       "      <td>4016</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>249</th>\n",
       "      <td>VERB -&gt; NOUN</td>\n",
       "      <td>3772</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>ADV -&gt; PUNCT</td>\n",
       "      <td>3453</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "    <tr>\n",
       "      <th>88</th>\n",
       "      <td>CCONJ -&gt; ___</td>\n",
       "      <td>3438</td>\n",
Marcin Wątroba's avatar
Marcin Wątroba committed
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "            values  counts\n",
       "259    VERB -> ___   11609\n",
       "140    NOUN -> ___   10416\n",
       "53      ADV -> ___   10127\n",
       "175    PART -> ___    9282\n",
       "35      ADP -> ___    8663\n",
       "192    PRON -> ___    8066\n",
       "287   ___ -> PUNCT    6354\n",
       "105     DET -> ___    6147\n",
       "17      ADJ -> ___    5935\n",
       "231   SCONJ -> ___    5385\n",
       "254  VERB -> PUNCT    4842\n",
       "134  NOUN -> PUNCT    4632\n",
       "70      AUX -> ___    4016\n",
       "249   VERB -> NOUN    3772\n",
       "47    ADV -> PUNCT    3453\n",
       "88    CCONJ -> ___    3438"
Marcin Wątroba's avatar
Marcin Wątroba committed
      ]
     },
Marcin Wątroba's avatar
Marcin Wątroba committed
     "execution_count": 44,
Marcin Wątroba's avatar
Marcin Wątroba committed
     "metadata": {},
     "output_type": "execute_result"
Marcin Wątroba's avatar
Marcin Wątroba committed
    }
   ],
   "source": [
    "show_stats(voicelab_experiment_repository, ajn_connections_config)"
Marcin Wątroba's avatar
Marcin Wątroba committed
   ]
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 2,
   "id": "17823c33-7065-43e6-9d2f-49a59fba26c1",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "metadata": {},
   "outputs": [],
Marcin Wątroba's avatar
Marcin Wątroba committed
   "source": [
    "import spacy\n",
    "nlp = spacy.load(\"pl_core_news_lg\")"
   ]
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 3,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "e8fa6eb7-ec32-4284-9ff3-1de52e969cb5",
   "metadata": {},
   "outputs": [],
Marcin Wątroba's avatar
Marcin Wątroba committed
   "source": [
    "doc = nlp(\"Ala ma psa i kota\")"
   ]
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 16,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "cb0d05d0-8eff-4ddd-900a-207e67c2afc0",
   "metadata": {},
Marcin Wątroba's avatar
Marcin Wątroba committed
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "persName 0 1\n"
     ]
    }
   ],
   "source": [
    "for it in doc.ents:\n",
    "    print(it.label_, it.start, it.end)"
   ]
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 11,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "5f008198-1ad3-4fe6-a904-7a0e1b4d0ade",
   "metadata": {},
Marcin Wątroba's avatar
Marcin Wątroba committed
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(doc)"
   ]
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 18,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "f2a7a1e7-c226-4fd9-873b-cada73a9d5fb",
   "metadata": {},
   "outputs": [],
Marcin Wątroba's avatar
Marcin Wątroba committed
   "source": [
    "arr = ['_' for it in range(len(doc))]"
   ]
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 20,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "6206ac46-c803-4c9f-a9f9-91a1f60177bf",
   "metadata": {},
   "outputs": [],
Marcin Wątroba's avatar
Marcin Wątroba committed
   "source": [
    "for ent in doc.ents:\n",
    "    for itt in range(ent.start, ent.end):\n",
    "        arr[itt] = ent.label_"
   ]
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 15,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "9756f1eb-7e95-4d8a-8d99-d2f664c4105f",
   "metadata": {},
Marcin Wątroba's avatar
Marcin Wątroba committed
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[2]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# list(range(2, 3))"
   ]
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
Marcin Wątroba's avatar
Marcin Wątroba committed
   "execution_count": 22,
Marcin Wątroba's avatar
Marcin Wątroba committed
   "id": "3b286ac6-e2fd-421f-89eb-fb66233856f7",
   "metadata": {},
Marcin Wątroba's avatar
Marcin Wątroba committed
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['persName', '_', '_', '_', '_']"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "arr"
   ]
Marcin Wątroba's avatar
Marcin Wątroba committed
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f5e9f50-1d0f-4660-87b7-563bd93582c4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6d35cd94-a2ba-4e45-afaa-c93d9ab360b4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a88bbfcc-726f-4c9e-acea-9d38163296cf",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6f62e3d-ac08-43ae-ba25-4b93702c33f9",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f4348aca-0344-49c0-9bf4-af7b8c84871d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "db6207ed-1ee5-4a1e-8cc9-397b76b66997",
   "metadata": {},
   "outputs": [],
   "source": []
Marcin Wątroba's avatar
Marcin Wątroba committed
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}