Newer
Older
{
"cells": [
{
"cell_type": "markdown",
"id": "9febd313-2446-49bb-8508-997c0b2bc0ca",
"metadata": {},
"source": [
"# Imports and configs"
]
},
{
"cell_type": "code",
"id": "1929f9bb-5060-4530-811b-823d69a5b00f",
"metadata": {},
"outputs": [],
"source": [
"from experiment.luna.luna_record_provider import LunaRecordProvider\n",
"from sziszapangma.integration.experiment_manager import ExperimentManager\n",
"from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository\n",
"from sziszapangma.integration.path_filter import ExtensionPathFilter\n",
"from pymongo import MongoClient\n",
"from spacy.tokens.doc import Doc\n",
"import pandas as pd\n",
"from experiment.voicelab.voicelab_telco_record_provider import VoicelabTelcoRecordProvider\n",
"from sziszapangma.integration.repository.experiment_repository import ExperimentRepository\n",
"from dataclasses import dataclass\n",
"import itertools\n",
"from typing import Optional, Any, List\n",
"import numpy as np"
"id": "321a93d9-0c5d-4d42-ba8f-4b704a05d78c",
"metadata": {},
"outputs": [],
"source": [
"pd.set_option('display.max_rows', None)"
]
},
{
"cell_type": "markdown",
"id": "1b0a963f-ef3b-4f0f-bebc-68a0ea3e4f6c",
"metadata": {},
"source": [
"# Load datasets and other pipeline objects"
]
},
{
"cell_type": "code",
"id": "bbe2a7bf-bb6d-42ee-b5ce-48e6ec7fcd94",
"metadata": {},
"outputs": [],
"source": [
"VOICELAB_DATASET_DIRECTORY = 'experiment_data/dataset/voicelab_cbiz_testset_20220322'\n",
"LUNA_DATASET_DIRECTORY = 'experiment_data/dataset/LUNA.PL'"
]
},
{
"cell_type": "code",
"id": "4bb1a1c0-8784-4e0d-9426-13495718e087",
"metadata": {},
"outputs": [],
"source": [
"## repository collections\n",
"GOLD_TRANSCRIPT = 'gold_transcript'\n",
"GOLD_TRANSCRIPT_SPACY = 'gold_transcript_spacy'\n",
"\n",
"TECHMO_POLISH_ASR = 'techmo_polish_asr'\n",
"WORD_TECHMO_MERTICS_WER = 'word_techmo_metrics_wer'\n",
"WORD_TECHMO_ALIGNMENT_WER = 'word_techmo_alignment_wer'\n",
"TECHMO_SPACY = 'techmo_spacy'\n",
"POS_TECHMO_ALIGNMENT_WER = 'pos_techmo_alignment_wer'\n",
"POS_TECHMO_METRICS_WER = 'pos_techmo_metrics_wer'\n",
"TAG_SPACY_TECHMO_METRICS_WER_EMBEDDINGS = 'tag_spacy_techmo_metrics_wer_embeddings'\n",
"TAG_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_techmo_alignment_wer_embeddings'\n",
"NER_SPACY_TECHMO_METRICS_WER_EMBEDDINGS = 'ner_spacy_techmo_metrics_wer_embeddings'\n",
"NER_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS = 'ner_spacy_techmo_alignment_wer_embeddings'\n",
"\n",
"AJN_POLISH_ASR = 'ajn_polish_asr'\n",
"WORD_AJN_MERTICS_WER = 'word_ajn_metrics_wer'\n",
"WORD_AJN_ALIGNMENT_WER = 'word_ajn_alignment_wer'\n",
"AJN_SPACY = 'ajn_spacy'\n",
"POS_AJN_ALIGNMENT_WER = 'pos_ajn_metrics_wer'\n",
"POS_AJN_METRICS_WER = 'pos_ajn_alignment_wer'\n",
"TAG_SPACY_AJN_METRICS_WER_EMBEDDINGS = 'tag_spacy_ajn_metrics_wer_embeddings'\n",
"TAG_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS = 'tag_spacy_ajn_alignment_wer_embeddings'\n",
"NER_SPACY_AJN_METRICS_WER_EMBEDDINGS = 'ner_spacy_ajn_metrics_wer_embeddings'\n",
"NER_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS = 'ner_spacy_ajn_alignment_wer_embeddings'"
"id": "d4265253-755a-4160-97f7-72604fdf41d1",
"metadata": {},
"outputs": [],
"source": [
"@dataclass\n",
"class CollectionsConfig:\n",
" config_name: str\n",
" gold_transcript: str\n",
" gold_transcript_spacy: str\n",
" asr: str\n",
" word_asr_metric_wer: str\n",
" word_asr_alignment_wer: str\n",
" asr_spacy: str\n",
" pos_asr_metric_wer: str\n",
" pos_asr_alignment_wer: str\n",
" tag_metric_wer: str\n",
" tag_alignment_wer: str\n",
" ner_metric_wer: str\n",
" ner_alignment_wer: str"
"id": "950b0bb8-e5ae-46e0-97a2-a832b7c8a70f",
"metadata": {},
"outputs": [],
"source": [
"techmo_connections_config = CollectionsConfig(\n",
" config_name='TECHMO ASR',\n",
" gold_transcript=GOLD_TRANSCRIPT,\n",
" gold_transcript_spacy=GOLD_TRANSCRIPT_SPACY,\n",
" asr=TECHMO_POLISH_ASR,\n",
" word_asr_metric_wer=WORD_TECHMO_MERTICS_WER,\n",
" word_asr_alignment_wer=WORD_TECHMO_ALIGNMENT_WER,\n",
" asr_spacy=TECHMO_SPACY,\n",
" pos_asr_metric_wer=POS_TECHMO_METRICS_WER,\n",
" pos_asr_alignment_wer=POS_TECHMO_ALIGNMENT_WER,\n",
" tag_metric_wer=TAG_SPACY_TECHMO_METRICS_WER_EMBEDDINGS,\n",
" tag_alignment_wer=TAG_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS,\n",
" ner_metric_wer=NER_SPACY_TECHMO_METRICS_WER_EMBEDDINGS,\n",
" ner_alignment_wer=NER_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS\n",
")\n",
"ajn_connections_config = CollectionsConfig(\n",
" config_name='AJN ASR',\n",
" gold_transcript=GOLD_TRANSCRIPT,\n",
" gold_transcript_spacy=GOLD_TRANSCRIPT_SPACY,\n",
" asr=AJN_POLISH_ASR,\n",
" word_asr_metric_wer=WORD_AJN_MERTICS_WER,\n",
" word_asr_alignment_wer=WORD_AJN_ALIGNMENT_WER,\n",
" asr_spacy=AJN_SPACY,\n",
" pos_asr_metric_wer=POS_AJN_ALIGNMENT_WER,\n",
" pos_asr_alignment_wer=POS_AJN_METRICS_WER,\n",
" tag_metric_wer=TAG_SPACY_AJN_METRICS_WER_EMBEDDINGS,\n",
" tag_alignment_wer=TAG_SPACY_AJN_ALIGNMENT_WER_EMBEDDINGS,\n",
" ner_metric_wer=NER_SPACY_TECHMO_METRICS_WER_EMBEDDINGS,\n",
" ner_alignment_wer=NER_SPACY_TECHMO_ALIGNMENT_WER_EMBEDDINGS\n",
"id": "4dec626b-02e4-4c78-a238-04ef2f090ea5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"voicelab examples count 800\n",
"luna examples count 500\n"
]
}
],
"source": [
"voicelab_experiment_repository = MultiFilesExperimentRepository(\n",
" 'experiment_data/pipeline',\n",
" 'asr_benchmark_voicelab_cbiz_testset_20220322'\n",
")\n",
"luna_experiment_repository = MultiFilesExperimentRepository(\n",
" 'experiment_data/pipeline',\n",
" 'asr_benchmark_luna'\n",
")\n",
"print(f'voicelab examples count {len(voicelab_experiment_repository.get_all_record_ids())}')\n",
"print(f'luna examples count {len(luna_experiment_repository.get_all_record_ids())}')"
]
},
{
"cell_type": "code",
"id": "98c6ff1d-4fbd-4b68-9e23-ecea33852b12",
"metadata": {},
"outputs": [],
"source": [
"voicelab_record_provider = VoicelabTelcoRecordProvider(ExtensionPathFilter(\n",
" root_directory=VOICELAB_DATASET_DIRECTORY,\n",
" extension='wav',\n",
"), relation_manager_root_path='experiment_data/dataset_relation_manager_data/voicelab_cbiz_testset_20220322')\n",
"\n",
"luna_record_provider = LunaRecordProvider(ExtensionPathFilter(\n",
" root_directory=f'{LUNA_DATASET_DIRECTORY}/LUNA.PL',\n",
" extension='wav',\n",
"), relation_manager_root_path='experiment_data/dataset_relation_manager_data/luna')"
]
},
{
"cell_type": "code",
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
"id": "5bd3dcd6-6b32-480c-8937-07c770354ed1",
"metadata": {},
"outputs": [],
"source": [
"# explore POS \n",
"a = []\n",
"for record_id in list(luna_record_provider.get_all_records()):\n",
" g = luna_experiment_repository.get_property_for_key(record_id, GOLD_TRANSCRIPT)\n",
" # print(g)\n",
" a.extend(list(set([it['pos'] for it in g])))\n",
" # print(a)\n",
"# set(a)\n",
"# https://www.sketchengine.eu/polish-nkjp-part-of-speech-tagset/\n",
"# {'-': 'X',\n",
"# 'ADJc': 'ADJ',\n",
"# 'ADJp': 'ADJ',\n",
"# 'ADV': 'ADV',\n",
"# 'ADVn': 'ADV',\n",
"# 'CC',\n",
"# 'ITJ',\n",
"# 'NUM': 'NUM',\n",
"# 'NUMc': 'NUM',\n",
"# 'NUMp': 'NUM',\n",
"# 'NUMq': 'NUM',\n",
"# 'Nc',\n",
"# 'Np',\n",
"# 'PADJ',\n",
"# 'PART',\n",
"# 'PINT',\n",
"# 'PN',\n",
"# 'PPER',\n",
"# 'PQ',\n",
"# 'PREF',\n",
"# 'PREL',\n",
"# 'PreP',\n",
"# 'PrePF',\n",
"# 'PrePp',\n",
"# 'PropName',\n",
"# 'VA',\n",
"# 'VS',\n",
"# 'VV',\n",
"# 'acron'}"
]
},
{
"cell_type": "code",
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
"id": "74bf17ba-8572-4758-8116-794708b6ea08",
"metadata": {},
"outputs": [],
"source": [
"def get_gold_transcript_for(record_id: str, experiment_repository: ExperimentRepository, \n",
" collections_config: CollectionsConfig) -> str:\n",
" gold_trnascript_property = experiment_repository.get_property_for_key(\n",
" record_id, collections_config.gold_transcript)\n",
" return ' '.join([it['word'] for it in gold_trnascript_property])\n",
"\n",
"def get_asr_transcript_for(record_id: str, experiment_repository: ExperimentRepository,\n",
" collections_config: CollectionsConfig) -> str:\n",
" gold_trnascript_property = experiment_repository.get_property_for_key(record_id, collections_config.asr)\n",
" return gold_trnascript_property['full_text']\n",
"\n",
"def get_word_alignment_df(record_id: str, experiment_repository: ExperimentRepository, \n",
" collections_config: CollectionsConfig) -> pd.DataFrame:\n",
" word_alignment_wer = experiment_repository.get_property_for_key(record_id, \n",
" collections_config.word_asr_alignment_wer)\n",
" arr = [\n",
" {\n",
" 'step_type': it['step_type'],\n",
" 'reference_word_text': it['step_words']['reference_word']['text'] \n",
" if 'reference_word' in it['step_words'] else '',\n",
" 'hypothesis_word_text': it['step_words']['hypothesis_word']['text']\n",
" if 'hypothesis_word' in it['step_words'] else '',\n",
" }\n",
" for it in word_alignment_wer\n",
" ]\n",
" return pd.DataFrame(arr)\n",
"\n",
"\n",
"def get_pos_alignment_df(record_id: str, experiment_repository: ExperimentRepository, \n",
" collections_config: CollectionsConfig) -> pd.DataFrame:\n",
" pos_alignment_wer = experiment_repository.get_property_for_key(record_id, \n",
" collections_config.pos_asr_alignment_wer)\n",
" gold_transcript_spacy = experiment_repository.get_property_for_key(record_id,\n",
" collections_config.gold_transcript_spacy)\n",
" gold_trnascript_spacy_word_dict = {it['id']: it['word'] for it in gold_transcript_spacy}\n",
" asr_spacy = experiment_repository.get_property_for_key(record_id, collections_config.asr_spacy)\n",
" asr_spacy_word_dict = {it['id']: it['word'] for it in asr_spacy}\n",
" arr = [\n",
" {\n",
" 'step_type': it['step_type'],\n",
" 'reference_word_pos': it['step_words']['reference_word']['text'] \n",
" if 'reference_word' in it['step_words'] else '',\n",
" 'reference_word_text': gold_trnascript_spacy_word_dict[it['step_words']['reference_word']['id']] \n",
" if 'reference_word' in it['step_words'] else '',\n",
" 'hypothesis_word_pos': it['step_words']['hypothesis_word']['text']\n",
" if 'hypothesis_word' in it['step_words'] else '',\n",
" 'hypothesis_word_text': asr_spacy_word_dict[it['step_words']['hypothesis_word']['id']] \n",
" if 'hypothesis_word' in it['step_words'] else ''\n",
" }\n",
" for it in pos_alignment_wer\n",
" ]\n",
" return pd.DataFrame(arr)\n",
"\n",
"\n",
"\n",
"def get_tag_alignment_df(record_id: str, experiment_repository: ExperimentRepository, \n",
" collections_config: CollectionsConfig) -> pd.DataFrame:\n",
" word_alignment_wer = experiment_repository.get_property_for_key(record_id, \n",
" collections_config.tag_alignment_wer)\n",
" arr = [\n",
" {\n",
" 'step_type': it['step_type'],\n",
" 'reference_word_text': it['step_words']['reference_word']['text'] \n",
" if 'reference_word' in it['step_words'] else '',\n",
" 'hypothesis_word_text': it['step_words']['hypothesis_word']['text']\n",
" if 'hypothesis_word' in it['step_words'] else '',\n",
" }\n",
" for it in word_alignment_wer\n",
" ]\n",
" return pd.DataFrame(arr)\n",
"\n",
"\n",
"def get_ner_alignment_df(record_id: str, experiment_repository: ExperimentRepository, \n",
" collections_config: CollectionsConfig) -> pd.DataFrame:\n",
" word_alignment_wer = experiment_repository.get_property_for_key(record_id, \n",
" collections_config.ner_metric_wer)\n",
" arr = [\n",
" {\n",
" 'step_type': it['step_type'],\n",
" 'reference_word_text': it['step_words']['reference_word']['text'] \n",
" if 'reference_word' in it['step_words'] else '',\n",
" 'hypothesis_word_text': it['step_words']['hypothesis_word']['text']\n",
" if 'hypothesis_word' in it['step_words'] else '',\n",
" }\n",
" for it in word_alignment_wer\n",
" ]\n",
" return pd.DataFrame(arr)\n",
" \n",
" \n",
"def show_report_for(record_id: str, experiment_repository: ExperimentRepository,\n",
" collections_config: CollectionsConfig):\n",
" print('record_id:', record_id)\n",
" print('properties_confiog:', collections_config.config_name)\n",
" print()\n",
" print(f'gold transcript: {get_gold_transcript_for(record_id, experiment_repository, collections_config)}') \n",
" print()\n",
" print(f'asr transcript: {get_asr_transcript_for(record_id, experiment_repository, collections_config)}')\n",
" print()\n",
" print(f\"word wer {experiment_repository.get_property_for_key(record_id, collections_config.word_asr_metric_wer)}\")\n",
" print()\n",
" print(f\"pos wer {experiment_repository.get_property_for_key(record_id, collections_config.pos_asr_metric_wer)}\")\n",
" print()\n",
" print(f\"tag wer {experiment_repository.get_property_for_key(record_id, collections_config.tag_metric_wer)}\") \n",
" print()\n",
" print(f\"ner wer {experiment_repository.get_property_for_key(record_id, collections_config.ner_alignment_wer)}\")\n",
" display(get_word_alignment_df(record_id, experiment_repository, collections_config))\n",
" print()\n",
" display(get_pos_alignment_df(record_id, experiment_repository, collections_config))\n",
" print()\n",
" display(get_tag_alignment_df(record_id, experiment_repository, collections_config))\n",
" print()\n",
" display(get_ner_alignment_df(record_id, experiment_repository, collections_config))\n",
" print('--------------------------------------------------------------')\n",
" print('--------------------------------------------------------------')\n",
" print('--------------------------------------------------------------')"
]
},
{
"cell_type": "markdown",
"id": "327bbc4b-7bcb-4a28-8d4a-22660ecaf6c2",
"metadata": {},
"source": [
"# Report for LUNA and TECHMO ASR"
]
},
{
"cell_type": "code",
"id": "3395e091-3488-4b49-aede-af15a0055a8b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"record_id: LUNA.PL__PRZYSTANKI__DOBRAJAKOSC__M__1_2007-03-30_14_37_38__1_2007-03-30_14_37_38\n",
"gold transcript: xxx dzień dobry dzień dobry proszę pana ja mam o której będzie sto czterdzieści trzy będzie najbliższy z Alei Alei Witosa w stronę Gocławia z tego przystanku vis-a-vis ZUSu tak ? no nie nie tam vis-a-vis Panoramy przez Siekierkowski tam w stronę Gocławia ale to nie z tego przystanku Aleja Witosa ? no to jest czy Bartoszka na żądanie wie pan gdzie jest się zaczyna Most Siekierkowski tam w stronę Gocławia gdzie ma on przystanek pierwszy sto czterdzieści trzy znaczy tak naprawdę Most Siekierkowski to się zaczyna za Siekierki Sanktuarium no właśnie przed tym przed tym przystanek o której jest sto czterdzieści trzy najbliższy aha czyli to wtedy przystanek Małe Siekierki ale to nie jest tak tak tak tak na wysokości Panoramy aha dobrze najbliższy już chwileczkę czternasta czterdzieści sześć piętnasta zero jeden aha no dobrze dziękuję do widzenia do widzenia\n",
"\n",
"asr transcript: dzień dobry dzień dobry proszę pana ja mam pytanie o której będzie 143 będzie najbliższy z alei witosa alei witosa w stronę gocławia przystanku widzowi zusu tak no nie nie tam widzowi panoramy przez siekierkowski tam w stronę gocławia ale to nie z tego przystanku aleja witosa no to jest czy bartoszka na żądanie wie pan gdzie jest się zaczyna most siekierkowski tam w stronę wrocław on przystanek pierwszy 143 znaczy tak naprawdę most siekierkowski to się zaczyna za siekierki sanktuarium no właśnie przed tym przed tym przystanek o której jest 143 najbliższy wtedy przystanek małe siekierki ale to nie jest dokonać panoramy dobrze najbliższy już chwileczkę czternasta 46 piętnasta 0 1 aha dobrze dziękuję do widzenia\n",
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
"\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>step_type</th>\n",
" <th>reference_word_text</th>\n",
" <th>hypothesis_word_text</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>DELETION</td>\n",
" <td>xxx</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CORRECT</td>\n",
" <td>dzień</td>\n",
" <td>dzień</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CORRECT</td>\n",
" <td>dobry</td>\n",
" <td>dobry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CORRECT</td>\n",
" <td>dzień</td>\n",
" <td>dzień</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CORRECT</td>\n",
" <td>dobry</td>\n",
" <td>dobry</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>CORRECT</td>\n",
" <td>proszę</td>\n",
" <td>proszę</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>CORRECT</td>\n",
" <td>pana</td>\n",
" <td>pana</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>CORRECT</td>\n",
" <td>ja</td>\n",
" <td>ja</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>CORRECT</td>\n",
" <td>INSERTION</td>\n",
" <td></td>\n",
" <td>pytanie</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>CORRECT</td>\n",
" <td>DELETION</td>\n",
" <td>sto</td>\n",
" <td></td>\n",
" <td>DELETION</td>\n",
" <td>czterdzieści</td>\n",
" <td></td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>trzy</td>\n",
" <td>143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>CORRECT</td>\n",
" <td>najbliższy</td>\n",
" <td>najbliższy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>CORRECT</td>\n",
" <td>INSERTION</td>\n",
" <td></td>\n",
" <td>alei</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Alei</td>\n",
" <td>witosa</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Alei</td>\n",
" <td>alei</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Witosa</td>\n",
" <td>witosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23</th>\n",
" <td>CORRECT</td>\n",
" <td>CORRECT</td>\n",
" <td>stronę</td>\n",
" <td>stronę</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25</th>\n",
" <td>DELETION</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>26</th>\n",
" <td>DELETION</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>27</th>\n",
" <td>SUBSTITUTION</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28</th>\n",
" <td>CORRECT</td>\n",
" <td>przystanku</td>\n",
" <td>przystanku</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29</th>\n",
" <td>SUBSTITUTION</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>ZUSu</td>\n",
" <td>zusu</td>\n",
" <td>CORRECT</td>\n",
" <td>tak</td>\n",
" <td>tak</td>\n",
" <td>DELETION</td>\n",
" <td>?</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>33</th>\n",
" <td>CORRECT</td>\n",
" <td>CORRECT</td>\n",
" <td>nie</td>\n",
" <td>nie</td>\n",
" </tr>\n",
" <tr>\n",
" <th>35</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>36</th>\n",
" <td>CORRECT</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>vis-a-vis</td>\n",
" <td>widzowi</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Panoramy</td>\n",
" <td>panoramy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>39</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40</th>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Siekierkowski</td>\n",
" <td>siekierkowski</td>\n",
" </tr>\n",
" <tr>\n",
" <th>41</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>42</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44</th>\n",
" <td>SUBSTITUTION</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46</th>\n",
" <td>CORRECT</td>\n",
" <td>CORRECT</td>\n",
" <td>nie</td>\n",
" <td>nie</td>\n",
" </tr>\n",
" <tr>\n",
" <th>48</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>49</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50</th>\n",
" <td>CORRECT</td>\n",
" <td>przystanku</td>\n",
" <td>przystanku</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51</th>\n",
" <td>DELETION</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>52</th>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Witosa</td>\n",
" <td>aleja</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>?</td>\n",
" <td>witosa</td>\n",
" </tr>\n",
" <tr>\n",
" <th>54</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>55</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>56</th>\n",
" <td>CORRECT</td>\n",
" <td>CORRECT</td>\n",
" <td>czy</td>\n",
" <td>czy</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Bartoszka</td>\n",
" <td>bartoszka</td>\n",
" </tr>\n",
" <tr>\n",
" <th>59</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61</th>\n",
" <td>CORRECT</td>\n",
" <td>CORRECT</td>\n",
" <td>pan</td>\n",
" <td>pan</td>\n",
" <td>CORRECT</td>\n",
" <td>gdzie</td>\n",
" <td>gdzie</td>\n",
" </tr>\n",
" <tr>\n",
" <th>64</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>65</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>66</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>67</th>\n",
" <td>SUBSTITUTION</td>\n",
" </tr>\n",
" <tr>\n",
" <th>68</th>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Siekierkowski</td>\n",
" <td>siekierkowski</td>\n",
" </tr>\n",
" <tr>\n",
" <th>69</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>70</th>\n",
" <td>CORRECT</td>\n",
" <td>CORRECT</td>\n",
" <td>stronę</td>\n",
" <td>stronę</td>\n",
" <td>DELETION</td>\n",
" <td>Gocławia</td>\n",
" <td></td>\n",
" <td>DELETION</td>\n",
" <td>gdzie</td>\n",
" <td></td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>ma</td>\n",
" <td>wrocław</td>\n",
" <td>CORRECT</td>\n",
" <td>on</td>\n",
" <td>on</td>\n",
" <td>CORRECT</td>\n",
" <td>przystanek</td>\n",
" <td>przystanek</td>\n",
" </tr>\n",
" <tr>\n",
" <th>77</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>78</th>\n",
" <td>DELETION</td>\n",
" <td>sto</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>79</th>\n",
" <td>DELETION</td>\n",
" <td></td>\n",
" </tr>\n",
" <tr>\n",
" <th>80</th>\n",
" <td>SUBSTITUTION</td>\n",
" </tr>\n",
" <tr>\n",
" <th>81</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>82</th>\n",
" <td>CORRECT</td>\n",
" <td>CORRECT</td>\n",
" <td>naprawdę</td>\n",
" <td>naprawdę</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Most</td>\n",
" <td>most</td>\n",
" </tr>\n",
" <tr>\n",
" <th>85</th>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Siekierkowski</td>\n",
" <td>siekierkowski</td>\n",
" </tr>\n",
" <tr>\n",
" <th>86</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>87</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>88</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>89</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>90</th>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Siekierki</td>\n",
" <td>siekierki</td>\n",
" <td>SUBSTITUTION</td>\n",
" <td>Sanktuarium</td>\n",
" <td>sanktuarium</td>\n",
" </tr>\n",
" <tr>\n",
" <th>92</th>\n",
" <td>CORRECT</td>\n",
" </tr>\n",
" <tr>\n",
" <th>93</th>\n",
" <td>CORRECT</td>\n",