Newer
Older
" <td>PART -> ___</td>\n",
" <td>3585</td>\n",
" <th>167</th>\n",
" <td>PRON -> ___</td>\n",
" <td>3424</td>\n",
" <th>221</th>\n",
" <td>VERB -> ___</td>\n",
" <td>2935</td>\n",
" <th>46</th>\n",
" <td>ADV -> ___</td>\n",
" <td>2727</td>\n",
" <th>77</th>\n",
" <td>CCONJ -> ___</td>\n",
" <td>2360</td>\n",
" <th>135</th>\n",
" <td>NUM -> X</td>\n",
" <td>1842</td>\n",
" <th>136</th>\n",
" <td>NUM -> ___</td>\n",
" <td>1726</td>\n",
" <th>92</th>\n",
" <td>DET -> ___</td>\n",
" <td>1715</td>\n",
" <th>61</th>\n",
" <td>AUX -> ___</td>\n",
" <td>1634</td>\n",
" <th>204</th>\n",
" <td>SCONJ -> ___</td>\n",
" <td>1587</td>\n",
" <td>ADJ -> ___</td>\n",
" <td>1461</td>\n",
" <th>244</th>\n",
" <td>___ -> NOUN</td>\n",
" <td>1251</td>\n",
" <th>250</th>\n",
" <td>___ -> VERB</td>\n",
" <td>1178</td>\n",
" <th>119</th>\n",
" <td>NOUN -> VERB</td>\n",
" <td>677</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
45074
45075
45076
45077
45078
45079
45080
45081
45082
45083
45084
45085
45086
45087
45088
45089
45090
" values counts\n",
"30 ADP -> ___ 4527\n",
"121 NOUN -> ___ 4129\n",
"151 PART -> ___ 3585\n",
"167 PRON -> ___ 3424\n",
"221 VERB -> ___ 2935\n",
"46 ADV -> ___ 2727\n",
"77 CCONJ -> ___ 2360\n",
"135 NUM -> X 1842\n",
"136 NUM -> ___ 1726\n",
"92 DET -> ___ 1715\n",
"61 AUX -> ___ 1634\n",
"204 SCONJ -> ___ 1587\n",
"15 ADJ -> ___ 1461\n",
"244 ___ -> NOUN 1251\n",
"250 ___ -> VERB 1178\n",
"119 NOUN -> VERB 677"
"show_stats(voicelab_experiment_repository, techmo_connections_config)"
"id": "41e5bd80-87e0-4791-a87a-4c247ddb27cb",
"id": "96b5b6d1-d7cd-4afb-adca-f6ed4bc1fedf",
45116
45117
45118
45119
45120
45121
45122
45123
45124
45125
45126
45127
45128
45129
45130
45131
45132
45133
45134
45135
45136
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>values</th>\n",
" <th>counts</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>259</th>\n",
" <td>VERB -> ___</td>\n",
" <td>11609</td>\n",
" <th>140</th>\n",
" <td>NOUN -> ___</td>\n",
" <td>10416</td>\n",
" <th>53</th>\n",
" <td>ADV -> ___</td>\n",
" <td>10127</td>\n",
" <th>175</th>\n",
" <td>PART -> ___</td>\n",
" <td>9282</td>\n",
" <th>35</th>\n",
" <td>ADP -> ___</td>\n",
" <td>8663</td>\n",
" <th>192</th>\n",
" <td>PRON -> ___</td>\n",
" <td>8066</td>\n",
" <th>287</th>\n",
" <td>___ -> PUNCT</td>\n",
" <td>6354</td>\n",
" <th>105</th>\n",
" <td>DET -> ___</td>\n",
" <td>6147</td>\n",
" <td>ADJ -> ___</td>\n",
" <td>5935</td>\n",
" <th>231</th>\n",
" <td>SCONJ -> ___</td>\n",
" <td>5385</td>\n",
" <th>254</th>\n",
" <td>VERB -> PUNCT</td>\n",
" <td>4842</td>\n",
" <th>134</th>\n",
" <td>NOUN -> PUNCT</td>\n",
" <td>4632</td>\n",
" <th>70</th>\n",
" <td>AUX -> ___</td>\n",
" <td>4016</td>\n",
" <th>249</th>\n",
" <td>VERB -> NOUN</td>\n",
" <td>3772</td>\n",
" <th>47</th>\n",
" <td>ADV -> PUNCT</td>\n",
" <td>3453</td>\n",
" <th>88</th>\n",
" <td>CCONJ -> ___</td>\n",
" <td>3438</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
45227
45228
45229
45230
45231
45232
45233
45234
45235
45236
45237
45238
45239
45240
45241
45242
45243
" values counts\n",
"259 VERB -> ___ 11609\n",
"140 NOUN -> ___ 10416\n",
"53 ADV -> ___ 10127\n",
"175 PART -> ___ 9282\n",
"35 ADP -> ___ 8663\n",
"192 PRON -> ___ 8066\n",
"287 ___ -> PUNCT 6354\n",
"105 DET -> ___ 6147\n",
"17 ADJ -> ___ 5935\n",
"231 SCONJ -> ___ 5385\n",
"254 VERB -> PUNCT 4842\n",
"134 NOUN -> PUNCT 4632\n",
"70 AUX -> ___ 4016\n",
"249 VERB -> NOUN 3772\n",
"47 ADV -> PUNCT 3453\n",
"88 CCONJ -> ___ 3438"
"show_stats(voicelab_experiment_repository, ajn_connections_config)"
"id": "17823c33-7065-43e6-9d2f-49a59fba26c1",
"source": [
"import spacy\n",
"nlp = spacy.load(\"pl_core_news_lg\")"
]
"id": "e8fa6eb7-ec32-4284-9ff3-1de52e969cb5",
"metadata": {},
"outputs": [],
"source": [
"doc = nlp(\"Ala ma psa i kota\")"
]
"id": "cb0d05d0-8eff-4ddd-900a-207e67c2afc0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"persName 0 1\n"
]
}
],
"source": [
"for it in doc.ents:\n",
" print(it.label_, it.start, it.end)"
]
"id": "5f008198-1ad3-4fe6-a904-7a0e1b4d0ade",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"5"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(doc)"
]
"id": "f2a7a1e7-c226-4fd9-873b-cada73a9d5fb",
"metadata": {},
"outputs": [],
"source": [
"arr = ['_' for it in range(len(doc))]"
]
"id": "6206ac46-c803-4c9f-a9f9-91a1f60177bf",
"metadata": {},
"outputs": [],
"source": [
"for ent in doc.ents:\n",
" for itt in range(ent.start, ent.end):\n",
" arr[itt] = ent.label_"
]
"id": "9756f1eb-7e95-4d8a-8d99-d2f664c4105f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[2]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# list(range(2, 3))"
]
"id": "3b286ac6-e2fd-421f-89eb-fb66233856f7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['persName', '_', '_', '_', '_']"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"arr"
]
45379
45380
45381
45382
45383
45384
45385
45386
45387
45388
45389
45390
45391
45392
45393
45394
45395
45396
45397
45398
45399
45400
45401
45402
45403
45404
45405
45406
45407
45408
45409
45410
45411
45412
45413
45414
45415
45416
45417
45418
45419
45420
45421
45422
45423
45424
45425
45426
},
{
"cell_type": "code",
"execution_count": null,
"id": "3f5e9f50-1d0f-4660-87b7-563bd93582c4",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d35cd94-a2ba-4e45-afaa-c93d9ab360b4",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a88bbfcc-726f-4c9e-acea-9d38163296cf",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e6f62e3d-ac08-43ae-ba25-4b93702c33f9",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "f4348aca-0344-49c0-9bf4-af7b8c84871d",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "db6207ed-1ee5-4a1e-8cc9-397b76b66997",
"metadata": {},
"outputs": [],
"source": []
45427
45428
45429
45430
45431
45432
45433
45434
45435
45436
45437
45438
45439
45440
45441
45442
45443
45444
45445
45446
45447
45448
45449
45450
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}