From 02e76f7fbaec2ac72877c807e2ff5a93a62dd8e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com> Date: Mon, 28 Jun 2021 17:41:16 +0200 Subject: [PATCH] Divide WerCalculator for Alignment and Wer calculator --- setup.cfg | 4 +- sziszapangma/core/alignment/__init__.py | 0 .../core/alignment/alignment_calculator.py | 179 ++++++++++++++ .../alignment/alignment_classic_calculator.py | 10 + .../alignment_embedding_calculator.py | 50 ++++ .../alignment_processing_step.py} | 22 +- .../alignment_soft_calculator.py} | 7 +- sziszapangma/core/alignment/alignment_step.py | 18 ++ sziszapangma/core/alignment/alignment_util.py | 71 ++++++ .../distance_matrix_calculator.py | 2 +- .../core/{wer => alignment}/step_type.py | 0 .../core/{wer => alignment}/step_words.py | 2 +- sziszapangma/core/{wer => alignment}/word.py | 0 .../classic_wer_calculator.cpython-38.pyc | Bin 713 -> 0 bytes .../distance_matrix_calculator.cpython-38.pyc | Bin 4055 -> 0 bytes .../wer/__pycache__/step_type.cpython-38.pyc | Bin 948 -> 0 bytes .../wer/__pycache__/step_words.cpython-38.pyc | Bin 524 -> 0 bytes .../wer_embedding_calculator.cpython-38.pyc | Bin 2778 -> 0 bytes .../wer_processing_step.cpython-38.pyc | Bin 1860 -> 0 bytes .../wer_soft_calculator.cpython-38.pyc | Bin 843 -> 0 bytes .../wer_span_question.cpython-38.pyc | Bin 1053 -> 0 bytes .../wer/__pycache__/wer_step.cpython-38.pyc | Bin 621 -> 0 bytes .../wer/__pycache__/wer_util.cpython-38.pyc | Bin 1526 -> 0 bytes .../core/wer/classic_wer_calculator.py | 9 - .../wer/{wer_span_question.py => span.py} | 6 +- sziszapangma/core/wer/wer_calculator.py | 225 +++--------------- .../core/wer/wer_embedding_calculator.py | 63 ----- sziszapangma/core/wer/wer_step.py | 11 - sziszapangma/core/wer/wer_util.py | 33 --- .../task/classic_wer_metric_task.py | 20 +- .../task/embedding_wer_metrics_task.py | 30 ++- tests/test_classic_wer.py | 27 ++- tests/test_embedding_wer.py | 20 +- tests/test_soft_wer.py | 20 +- tox.ini | 2 - 35 files changed, 449 insertions(+), 382 deletions(-) create mode 100644 sziszapangma/core/alignment/__init__.py create mode 100644 sziszapangma/core/alignment/alignment_calculator.py create mode 100644 sziszapangma/core/alignment/alignment_classic_calculator.py create mode 100644 sziszapangma/core/alignment/alignment_embedding_calculator.py rename sziszapangma/core/{wer/wer_processing_step.py => alignment/alignment_processing_step.py} (58%) rename sziszapangma/core/{wer/wer_soft_calculator.py => alignment/alignment_soft_calculator.py} (56%) create mode 100644 sziszapangma/core/alignment/alignment_step.py create mode 100644 sziszapangma/core/alignment/alignment_util.py rename sziszapangma/core/{wer => alignment}/distance_matrix_calculator.py (98%) rename sziszapangma/core/{wer => alignment}/step_type.py (100%) rename sziszapangma/core/{wer => alignment}/step_words.py (77%) rename sziszapangma/core/{wer => alignment}/word.py (100%) delete mode 100644 sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc delete mode 100644 sziszapangma/core/wer/classic_wer_calculator.py rename sziszapangma/core/wer/{wer_span_question.py => span.py} (59%) delete mode 100644 sziszapangma/core/wer/wer_embedding_calculator.py delete mode 100644 sziszapangma/core/wer/wer_step.py delete mode 100644 sziszapangma/core/wer/wer_util.py diff --git a/setup.cfg b/setup.cfg index a65cf7a..2642f6a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -21,6 +21,6 @@ exclude = docs # Define setup.py command aliases here test = pytest -[tool:pytest] -collect_ignore = ['setup.py'] +;[tool:pytest] +;collect_ignore = ['setup.py'] diff --git a/sziszapangma/core/alignment/__init__.py b/sziszapangma/core/alignment/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/sziszapangma/core/alignment/alignment_calculator.py b/sziszapangma/core/alignment/alignment_calculator.py new file mode 100644 index 0000000..f69ec95 --- /dev/null +++ b/sziszapangma/core/alignment/alignment_calculator.py @@ -0,0 +1,179 @@ +from abc import ABC +from typing import List, Tuple, Optional + +import numpy as np + +from sziszapangma.core.alignment.step_type import StepType +from sziszapangma.core.alignment.alignment_step import AlignmentStep +from sziszapangma.core.alignment.distance_matrix_calculator import \ + DistanceCalculator +from sziszapangma.core.alignment.step_words import StepWords +from sziszapangma.core.alignment.alignment_processing_step import \ + AlignmentProcessingStep +from sziszapangma.core.alignment.word import Word + + +class AlignmentCalculator(ABC): + _distance_matrix_calculator: DistanceCalculator + + def __init__(self, distance_matrix_calculator: DistanceCalculator): + self._distance_matrix_calculator = distance_matrix_calculator + + def convert_processing_steps_to_result( + self, + processing_steps: List[AlignmentProcessingStep], + ) -> List[AlignmentStep]: + return [ + AlignmentStep(step.step_type, step.step_words, step.step_cost) + for step in processing_steps + ] + + def _get_reference_indexes_per_steps( + self, + steps: List[AlignmentProcessingStep] + ) -> List[int]: + counter = 0 + indexes = [] + for step in steps: + indexes.append(counter) + if step.step_type.contain_reference_word(): + counter = counter + 1 + return indexes + + def get_distance_matrix_between_words( + self, + reference: List[Word], + hypothesis: List[Word] + ) -> np.ndarray: + return self._distance_matrix_calculator.calculate_distance_matrix( + reference, hypothesis) + + @staticmethod + def _get_initialized_levenshtein_matrix( + reference: List[Word], + hypothesis: List[Word] + ) -> Tuple[np.ndarray, List[List[Optional[AlignmentProcessingStep]]]]: + + # TODO: consider about remove distance_arr replaced by steps_arr + reference_len = len(reference) + hypothesis_len = len(hypothesis) + distance_arr = np.zeros((reference_len + 1) * (hypothesis_len + 1)) \ + .reshape((reference_len + 1, hypothesis_len + 1)) + steps_arr = [ + [None for _ in range(hypothesis_len + 1)] + for _ in range(reference_len + 1) + ] + + # levenshtein initial + for ref_index in range(reference_len + 1): + distance_arr[ref_index][0] = ref_index + step_words = StepWords( + reference[ref_index - 1] if ref_index > 0 else None, + None + ) + steps_arr[ref_index][0] = AlignmentProcessingStep\ + .levenshtein_deletion(ref_index - 1, step_words) + for hyp_index in range(hypothesis_len + 1): + distance_arr[0][hyp_index] = hyp_index + step_words = StepWords( + None, + hypothesis[hyp_index - 1] if hyp_index > 0 else None + ) + steps_arr[0][hyp_index] = AlignmentProcessingStep\ + .levenshtein_insertion(hyp_index - 1, step_words) + + return distance_arr, steps_arr + + @staticmethod + def _get_levenshtein_processing_step_cross( + prev_cross_distance: float, + step_words: StepWords, + current_distance: float + ) -> AlignmentProcessingStep: + return AlignmentProcessingStep.levenshtein_correct( + prev_cross_distance, step_words, 0) \ + if current_distance == 0 \ + else AlignmentProcessingStep.levenshtein_substitution( + prev_cross_distance, step_words, current_distance) + + def get_levenshtein_embedding_based( + self, + reference: List[Word], + hypothesis: List[Word], + distance_matrix: np.ndarray + ) -> Tuple[np.ndarray, List[List[AlignmentProcessingStep]]]: + + reference_len = len(reference) + hypothesis_len = len(hypothesis) + distance_arr, steps_arr = self._get_initialized_levenshtein_matrix( + reference, hypothesis) + + for ref_index in range(reference_len): + for hyp_index in range(hypothesis_len): + step_words = StepWords(reference[ref_index], + hypothesis[hyp_index]) + current_distance = distance_matrix[ref_index][hyp_index] + prev_cross_distance = distance_arr[ref_index][hyp_index] + + cross_go_step = self._get_levenshtein_processing_step_cross( + prev_cross_distance, step_words, current_distance) + insertion_step = AlignmentProcessingStep.levenshtein_insertion( + distance_arr[ref_index + 1][hyp_index], step_words) + deletion_step = AlignmentProcessingStep.levenshtein_deletion( + distance_arr[ref_index][hyp_index + 1], step_words) + + best_step = min([cross_go_step, insertion_step, deletion_step], + key=lambda it: it.total_distance()) + + distance_arr[ref_index + 1][hyp_index + 1] = \ + best_step.total_distance() + steps_arr[ref_index + 1][hyp_index + 1] = best_step + + return distance_arr, steps_arr + + def extract_steps_path( + self, + steps_matrix: List[List[AlignmentProcessingStep]] + ) -> List[AlignmentProcessingStep]: + x = len(steps_matrix) - 1 + y = len(steps_matrix[0]) - 1 + to_return = [] + while not (x == 0 and y == 0): + current_step = steps_matrix[x][y] + to_return.append(current_step) + if current_step.step_type == StepType.DELETION: + x = x - 1 + elif current_step.step_type == StepType.INSERTION: + y = y - 1 + else: # creation and substitution + y = y - 1 + x = x - 1 + return to_return[::-1] + + def _calculate_steps_path( + self, + reference: List[Word], + hypothesis: List[Word] + ) -> List[AlignmentProcessingStep]: + distance_between_words = self.get_distance_matrix_between_words( + reference, hypothesis) + _, steps_matrix = self.get_levenshtein_embedding_based( + reference, hypothesis, distance_between_words) + return self.extract_steps_path(steps_matrix) + + def calculate_alignment( + self, + reference: List[Word], + hypothesis: List[Word] + ) -> List[AlignmentStep]: + steps_path = self._calculate_steps_path(reference, hypothesis) + return self.convert_processing_steps_to_result(steps_path) + + def calculate_alignment_weighted( + self, + reference: List[Word], + hypothesis: List[Word], + weights: List[float] + ) -> List[AlignmentStep]: + steps_path = self._calculate_steps_path(reference, hypothesis) + return self.convert_processing_steps_to_result(steps_path) diff --git a/sziszapangma/core/alignment/alignment_classic_calculator.py b/sziszapangma/core/alignment/alignment_classic_calculator.py new file mode 100644 index 0000000..fbf60eb --- /dev/null +++ b/sziszapangma/core/alignment/alignment_classic_calculator.py @@ -0,0 +1,10 @@ +from sziszapangma.core.alignment.alignment_calculator import \ + AlignmentCalculator +from sziszapangma.core.alignment.distance_matrix_calculator import \ + BinaryDistanceCalculator + + +class AlignmentClassicCalculator(AlignmentCalculator): + + def __init__(self): + super().__init__(BinaryDistanceCalculator()) diff --git a/sziszapangma/core/alignment/alignment_embedding_calculator.py b/sziszapangma/core/alignment/alignment_embedding_calculator.py new file mode 100644 index 0000000..a20802d --- /dev/null +++ b/sziszapangma/core/alignment/alignment_embedding_calculator.py @@ -0,0 +1,50 @@ +from typing import List + +from sziszapangma.core.alignment.alignment_calculator import \ + AlignmentCalculator +from sziszapangma.core.alignment.alignment_processing_step import \ + AlignmentProcessingStep +from sziszapangma.core.alignment.alignment_step import AlignmentStep +from sziszapangma.core.alignment.distance_matrix_calculator import \ + BinaryDistanceCalculator, DistanceCalculator, CosineDistanceCalculator +from sziszapangma.core.alignment.step_words import StepWords +from sziszapangma.core.transformer.embedding_transformer import \ + EmbeddingTransformer + + +class AlignmentEmbeddingCalculator(AlignmentCalculator): + _distance_calculator: DistanceCalculator + + def __init__(self, embedding_transformer: EmbeddingTransformer): + super().__init__(BinaryDistanceCalculator()) + self._embedding_transformer = embedding_transformer + self._distance_calculator = CosineDistanceCalculator( + embedding_transformer) + + def _calculate_distance_for_word_step( + self, + step_words: StepWords + ) -> float: + return self._distance_calculator.calculate_distance_for_words( + step_words.reference_word, + step_words.hypothesis_word + ) + + def _calculate_result_cost_for_step( + self, + processing_step: AlignmentProcessingStep + ) -> float: + step_words = processing_step.step_words + return self._calculate_distance_for_word_step(step_words) \ + if processing_step.step_type.is_cross_step() \ + else processing_step.step_cost + + def convert_processing_steps_to_result( + self, + processing_steps: List[AlignmentProcessingStep] + ) -> List[AlignmentStep]: + return [ + AlignmentStep(step.step_type, step.step_words, + self._calculate_result_cost_for_step(step)) + for step in processing_steps + ] diff --git a/sziszapangma/core/wer/wer_processing_step.py b/sziszapangma/core/alignment/alignment_processing_step.py similarity index 58% rename from sziszapangma/core/wer/wer_processing_step.py rename to sziszapangma/core/alignment/alignment_processing_step.py index 15d92dd..e4ab96d 100644 --- a/sziszapangma/core/wer/wer_processing_step.py +++ b/sziszapangma/core/alignment/alignment_processing_step.py @@ -1,11 +1,11 @@ from dataclasses import dataclass -from sziszapangma.core.wer.step_type import StepType -from sziszapangma.core.wer.step_words import StepWords +from sziszapangma.core.alignment.step_type import StepType +from sziszapangma.core.alignment.step_words import StepWords @dataclass(frozen=True) -class WerProcessingStep: +class AlignmentProcessingStep: step_type: StepType step_words: StepWords previous_distance: float @@ -15,27 +15,27 @@ class WerProcessingStep: def levenshtein_insertion(cls, previous_distance: float, step_words: StepWords, step_cost: float = 1): words = StepWords(None, step_words.hypothesis_word) - return WerProcessingStep(StepType.INSERTION, words, - previous_distance, step_cost) + return AlignmentProcessingStep(StepType.INSERTION, words, + previous_distance, step_cost) @classmethod def levenshtein_deletion(cls, previous_distance: float, step_words: StepWords, step_cost: float = 1): words = StepWords(step_words.reference_word, None) - return WerProcessingStep(StepType.DELETION, words, - previous_distance, step_cost) + return AlignmentProcessingStep(StepType.DELETION, words, + previous_distance, step_cost) @classmethod def levenshtein_substitution(cls, previous_distance: float, step_words: StepWords, step_cost: float): - return WerProcessingStep(StepType.SUBSTITUTION, step_words, - previous_distance, step_cost) + return AlignmentProcessingStep(StepType.SUBSTITUTION, step_words, + previous_distance, step_cost) @classmethod def levenshtein_correct(cls, previous_distance: float, step_words: StepWords, step_cost: float): - return WerProcessingStep(StepType.CORRECT, step_words, - previous_distance, step_cost) + return AlignmentProcessingStep(StepType.CORRECT, step_words, + previous_distance, step_cost) def total_distance(self) -> float: return self.step_cost + self.previous_distance diff --git a/sziszapangma/core/wer/wer_soft_calculator.py b/sziszapangma/core/alignment/alignment_soft_calculator.py similarity index 56% rename from sziszapangma/core/wer/wer_soft_calculator.py rename to sziszapangma/core/alignment/alignment_soft_calculator.py index e177282..c7de34c 100644 --- a/sziszapangma/core/wer/wer_soft_calculator.py +++ b/sziszapangma/core/alignment/alignment_soft_calculator.py @@ -1,11 +1,12 @@ -from sziszapangma.core.wer.distance_matrix_calculator import \ +from sziszapangma.core.alignment.alignment_calculator import \ + AlignmentCalculator +from sziszapangma.core.alignment.distance_matrix_calculator import \ CosineDistanceCalculator -from sziszapangma.core.wer.wer_calculator import WerCalculator from sziszapangma.core.transformer.embedding_transformer import \ EmbeddingTransformer -class WerSoftCalculator(WerCalculator): +class AlignmentSoftCalculator(AlignmentCalculator): def __init__(self, embedding_transformer: EmbeddingTransformer): super().__init__(CosineDistanceCalculator(embedding_transformer)) diff --git a/sziszapangma/core/alignment/alignment_step.py b/sziszapangma/core/alignment/alignment_step.py new file mode 100644 index 0000000..cefd0d1 --- /dev/null +++ b/sziszapangma/core/alignment/alignment_step.py @@ -0,0 +1,18 @@ +from dataclasses import dataclass + +from sziszapangma.core.alignment.step_type import StepType +from sziszapangma.core.alignment.step_words import StepWords + + +@dataclass(frozen=True) +class AlignmentStep: + step_type: StepType + step_words: StepWords + step_cost: float + + def with_weight_multiplication(self, weight: float): + return AlignmentStep( + step_type=self.step_type, + step_words=self.step_words, + step_cost=self.step_cost * weight + ) diff --git a/sziszapangma/core/alignment/alignment_util.py b/sziszapangma/core/alignment/alignment_util.py new file mode 100644 index 0000000..c188731 --- /dev/null +++ b/sziszapangma/core/alignment/alignment_util.py @@ -0,0 +1,71 @@ +from typing import List, Optional + +import numpy as np +import pandas as pd + +from sziszapangma.core.alignment.alignment_step import AlignmentStep + + +class AlignmentUtil: + + @staticmethod + def _optional_str_to_str(value: Optional[str]) -> str: + return value if value is not None else '' + + @staticmethod + def _wer_step_to_pandas_row_lit(step: AlignmentStep) -> List[any]: + return [ + step.step_type.get_short_name(), + AlignmentUtil._optional_str_to_str(step.step_words.reference_word), + AlignmentUtil._optional_str_to_str( + step.step_words.hypothesis_word), + round(step.step_cost, 3) + ] + + @staticmethod + def steps_to_dataframe(steps: List[AlignmentStep]) -> pd.DataFrame: + arr = np.array([ + AlignmentUtil._wer_step_to_pandas_row_lit(step) + for step in steps + ]) + return pd.DataFrame( + arr, + columns=['step_type', 'reference', 'hypothesis', 'cost'] + ) + + @staticmethod + def get_reference_indexes_per_steps( + steps: List[AlignmentStep] + ) -> List[int]: + counter = 0 + indexes = [] + for step in steps: + indexes.append(counter) + if step.step_type.contain_reference_word(): + counter = counter + 1 + return indexes + + @staticmethod + def get_reference_length(steps: List[AlignmentStep]) -> int: + return sum([ + 1 if step.step_type.contain_reference_word() else 0 + for step in steps + ]) + + @staticmethod + def apply_weights_to_alignment( + steps: List[AlignmentStep], + weights: List[float] + ) -> List[AlignmentStep]: + if AlignmentUtil.get_reference_length(steps) != len(weights): + raise Exception( + f"Incorrect length of weights, current={len(weights)}, " + f"required={AlignmentUtil.get_reference_length(steps)}" + ) + reference_indexes_per_steps = \ + AlignmentUtil.get_reference_indexes_per_steps(steps) + return [ + steps[index].with_weight_multiplication( + weights[reference_indexes_per_steps[index]]) + for index in range(len(steps)) + ] diff --git a/sziszapangma/core/wer/distance_matrix_calculator.py b/sziszapangma/core/alignment/distance_matrix_calculator.py similarity index 98% rename from sziszapangma/core/wer/distance_matrix_calculator.py rename to sziszapangma/core/alignment/distance_matrix_calculator.py index 47e736a..5f17ea7 100644 --- a/sziszapangma/core/wer/distance_matrix_calculator.py +++ b/sziszapangma/core/alignment/distance_matrix_calculator.py @@ -5,7 +5,7 @@ import numpy as np from sziszapangma.core.transformer.embedding_transformer import \ EmbeddingTransformer -from sziszapangma.core.wer.word import Word +from sziszapangma.core.alignment.word import Word class DistanceCalculator(ABC): diff --git a/sziszapangma/core/wer/step_type.py b/sziszapangma/core/alignment/step_type.py similarity index 100% rename from sziszapangma/core/wer/step_type.py rename to sziszapangma/core/alignment/step_type.py diff --git a/sziszapangma/core/wer/step_words.py b/sziszapangma/core/alignment/step_words.py similarity index 77% rename from sziszapangma/core/wer/step_words.py rename to sziszapangma/core/alignment/step_words.py index ced47b1..067466f 100644 --- a/sziszapangma/core/wer/step_words.py +++ b/sziszapangma/core/alignment/step_words.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Optional -from sziszapangma.core.wer.word import Word +from sziszapangma.core.alignment.word import Word @dataclass(frozen=True) diff --git a/sziszapangma/core/wer/word.py b/sziszapangma/core/alignment/word.py similarity index 100% rename from sziszapangma/core/wer/word.py rename to sziszapangma/core/alignment/word.py diff --git a/sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/classic_wer_calculator.cpython-38.pyc deleted file mode 100644 index 5114407c01a0c978a0d91a8e6465be84cc749485..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 713 zcmZuv!A{&T5FN)^6zJ}yswxiUfYi%MRniYYRaKC<aiJ?DA26<MDyvSi*jZ8bx=8#* z1xJ2?Psz2X{sJdvvRlYXjO5WwoO$!!j3=Yf7NGsveK>p10RB?aw-l4Fc;Y8?1`P91 zGQk+diBC!{ID<nl+@#lFQp*=yq-Jmo<Bfk<0b|x~e{+=zR|hUOs?v5}c|G&0sRQ1P zPHcGjpsAv7e3irx7+4U@fJkoOoEc`4>rC()NI|67jkoXiy^7K4bwt$kY!nkBo%j}A z0}HriO|pQyP7&g;BZL~n+0=$MlhReLkuoIsGqO#i_2>7j-IP*OGb!)k@wAvk8)8wa zpj~yTno$2x#et2#ntEECOpd=7aqi+=O;z=?RO^YZgDoy?C^|f+CB{-ePbi;Wg$?YT z5o00&Y`*iF&ip^s@qlPos<cwJTT+&_nR$wbQeMoIU*2?}?Pw)KTklaMJ%pvBcHs>< zT0ar!nrBIuAB_j%b6-+UDsp6=o7IkGiDPiTdrG<yijS|Ct&0w{pRQAnQb&#`wb|ZM O(-&H2B!a_a$e#dCtGWXK diff --git a/sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/distance_matrix_calculator.cpython-38.pyc deleted file mode 100644 index 2bf0c9c59168075c6523a4c441087072e2f1547f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4055 zcmb_f&2JmW6`z^?;EJLsR&{JCPFgu_TC+{5)@?3@>N>WQ0!1Jok&OZbitUOsl9ncy z%*+aoSn5&>h;rzm_W+fSy7W)zA2QdTeCs()oc`Xd_@NKBHjpJ|_~z}rnK$q2H~V;D zq0I37{q2AL>DD}B|DwV4<703S%}8{TN#0>y-sD};6r9GQ<8}R}?`W?RG(+(GPN^F; zBhG%uWFW&sCPT%adCfVDOESVZQgaxWF`koUjLU93FTG7xpMQ>>SY24}z55SrHQ6$` zPFi_a<-0vu=hpu)HTedJYwvfrl$2?<^GGL|+3xAC(k&96egX{cp_%*W6l-$HnnHQX zSHTK9>%(?Q@-I9oq<82)<IM=~1N`Qs4=x>nxMf*_dXZgv2nCX?r5+@m)?O#cd-~L~ zb6RaHt-!YB-Ti(q-&H0xHq<KL(^<>I?xTljj`je}kbx!JWT$-Nlwlp;sLr;Iz}0iq zRYu*HTWgPv(q^rj=vJCNO>*7aO4c4K^Ca)}*Z%N$^U<0aq-K!xlWeD(oFBA$TCF`* zdQF}wA9t}p`f1!6AEMFUw=1KeiZ4t=a4;j#v2gItWeYTB3gS$`)1H>UDE{tf3b_Ao z4^11o)1P{}O7s$S-yLxa!z^NG-?(CLL_<uw8WJe<7K62y(|attQJ=R_9A`;a#j!2N zaknS;Iz(6E_>;Y)Q@qjSP)&}s9)i`1tZ#!v>ttWoh#72mdNB5DG;8e6iT*mo3o?qK zV@6zfArHC#Rkb9zd)ET{+%rzfUxPgr(}?Ox95(|MDi1MU^rdz*iRR|qSu8K#PqRet zU$%?>4wk;;CGx+ES~}N|55*IqE0S-b4j?tW4NYPFoVwz8*T$4PH+jpMVFg6t6h3%& zmI_ye)Yvt;ge5NNN6AjkFt4M_*)ji&ea5$i<Wd|)L;jFG<0sG-nzl&E(L*$ex>#8S zvBQS!kISDkhzp3L>>Wl&PW*4;6CVqRbMj;sa-x%89?91Z_z^#5M=;chDCC?~>fG7j zR9MmYd2?}6EL^~%@Kwj9#Bqw~9~;n?*E`7JR<GN?`xf?c6(r>1o54q~V#y-?8|$4O zvd7#7F<Nwrbo!ohy^4KKMNyC_NWr>=KkXb!O3=tq(F#5BqC(1i@a-s57n*#jZ&J5F zoioG_iE_JBB))-W$N?;<dE)EAomby`qh8k6v4Flo9r;jGkZM}G$XrUgDP3saLYGNO zJB8lF3z9dK(h*+@ifq=zzm&;ibn4*kD3c#E!Y9es@h`;}Tzb+!^j$uOsw4woQYcH{ z07g~0J?|QT6?}qI!m5f|P$6=^=rKpm<84JQK&mRMh~P!L{GeyjOkK`>dp%YoXpI5B zBWwlczINq{7A3&wA)1=26J!I_2^Kqyb2S~||6_3BSXb3=gPyV@nUHF3nx%Oh{}iK9 zowzki-NtD4)isOa!s$Y;9I|7yHa`|fO6cV%ae@ThWQBH~z1?zlnE>FH%AYEg6?if+ zjdAC&3p*;GF!ijG*Px?NhA{5)GB3XvygTzi{|nFRo?UcsI@z;e+?g;6h@Ex{ZA`J| z4Dk;TLv{cY9C$;~W<#&do_P9p&LtV<jB_HM@xP$Nw-JLV^B?AZJ2(!9FxG*8Mth)x z?4FzBVa|~FZOp+<%9p`ky<#Rfbhj!vK~}>)q1y-Yv`6kAmxcmbL?>eC4})v$CObfc z55r}4P#U^h*T&$cL)6tjAFP_)q_0-9UcQPV*4hP2e-B4`P6vL3`spX1;^q}SQK~P~ zt{J>pNP<?GNqd>0MMzwyJG=P~r11RvxAi$2xTS67<Gn1$?Dw_qX&chbGRdu<VFgzK zY}f&8c6PwRE9=Q#Zuz5nq~F0p`e*1YPb}YRhDl5_n&DRQKX1}*+DQ|g=KI!{sJItG zU;hNczd|#s=vd$*SWr%R^&4N5K~)htOZ=Ls@#{R|gZE~K(96c~l{iW{<DnT+<p=?B z%yW@@Ljc+hU?`#J=L~KO2wwq_-YX&OTUgW~?CU^Sf@lv56c{TAqGw_(>3RX9fw6)x z_K2@Iq+MYbv6d{VzmJYGXI#Dv(z}%xDCX*rtKwrk`xx_G0S^Q5m5=LK_+s$UY-69@ zp)XyUUj|<F53m>Rxa*{+yYf4I-lpMseH^}q;>&6v{xkUC+tH>T7FDczu8JA!({%vX zS(Q&=-UNStGuDo9@b_qj49Ea>2vu(hS`O~Kx`rF|#fvCRRm(2Magrfplbq5wj&&W} z3jl2^_$tfOR`H=YrL_wHhu$+W`$t%gfXEPVmw6<rbOb@!C%&%UI74unRj*ht*=lL3 zHSXST!+gJwZ_wiYA7p#o{(ey;emVU$)}Rlt#uS(~#-TV3v6_zSF8%JlplQpg5-|=^ Z8?6`LB6sQT>TF-*6%mPwU#r$?{{^-r(U|}M diff --git a/sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/step_type.cpython-38.pyc deleted file mode 100644 index d3ee780df43be6c35dd46f10f72bad0a42c35bb8..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 948 zcmZ`&!EVz)5Z$$%#7UC?1r>3EYc938qpBheC=yCV><G!1#r1AVn%EArj-o`pQT~D- z$T#pOzH-VhaAIanszlJScD!#rJNw@3c+_e&7~1ROpPyHpv0nuC%|UREL%%{NndCE; z@Q`!%o=HpEcTC!ff3rhNIzH=Ewiv}ayl{HyB8ds?n~mTchkl1nv5-rK8+m9;D|DnC zR-_YFRZUiYa9NeLJ12Bi-5^)i5jFmzhCD)aB%63-t8o2X&4blkZBYQ*TIA@YEpbOK zRZ#f?(wM&PjOG;6#i85iZrP2I_uYEIFsn6yDz1`}G)mM4-&)Pa8wO2W{{7J#X$9I# zB8VsHGRh&lio6S@r+GH_K8^gq)9Z;|NAoDXPNG8)XP~^L0#74vkt6Zb`KoANt6b<W z8RUXaq+8J>h4J<Coxsz>n1>ZT#(lReXEz*&HMXWzpKbV{V*`mUYTe7>u-6TWrayY? z2mN3a^e+cMXBvH_Yf)%xqD2gu)`Hm5(vH>1J*$I|e@KxCQ)WtElA78+gGUHER!OMd zANalDgCtJkEX|`yDnN}Ds5Dk$nSmsgJ2o%x+5A6t&}kN~Fr^iuXb6#HaxqglZwm2s z5zVORLICB2C+G_IqW7U^C_N=;>l}bOrEspYYzD^^_>3HN#Ga!X8CBGNrng+~l#kW6 i+vOGDRTPfGe*~yJCVVf2$!N;&zA1U0QNtRoP5Tc)=+P_y diff --git a/sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/step_words.cpython-38.pyc deleted file mode 100644 index 4142240350aabea762e0c740887510d3e13d56cf..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 524 zcmY*Wy-ve05Vn&vgwoQP7eK0n$ii5m4oGZ8qOB@rF{VD+h$Id^S5>=~l?P$r0eBRz zOuPaU=Tze7r2Bl|-TD04o_4!!MuunacW<<Qc=FdGB13Y#AW%&4f|Weu+{Zx?lwlTf z23&<Idg0ehwN(7fRJ;pzJd4!9nDvuS+R6Id9V%(%ypV`QlAGG*x{}3&Nc>>*7OJ^# z{A(eRAvvBB0L!>ySukY~LUh8QrTE}C4hBqxbX?>P#}?|DHVWObfd?=&8N`|ruD9IO z+AaZeY!ah>>=Gd=S%MI*Ekvo+s_=0~h^Lh-_7&rWxDc&zJt1UOX)C>Y6ylYcE)^U2 zx=p7D2GdfSd0wrhHTqtrSAdUJ*Xe9Jo}{?VaVu+CElT;H%(a1Z4JJjB->Ad$dShBN z-8=f9Xa^nMWJdj#JNkJ~Kod~a#dcHY)uNHxwEb$^N5{S^8SHO)$^F=vpu;<1!oLAt CUxl9l diff --git a/sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_embedding_calculator.cpython-38.pyc deleted file mode 100644 index 924ae83ebc3c912ad3acb439a5766bc626c0ac74..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2778 zcmbVO-EZ7P5Z_(<e7=kCBO#QgDs3qRaUk7QR6>BF3eh5XfKVxoDp{(wuD!Wl>yPWM zLz?LNbdhqr!FwN)$NUZaC;Q41{{bM<FthjB=0YWGc|AM3p56KFZ)Utvsgwy^_wW4n z;Fl61zhh_ivZ3=8JSvAKA`y*ALVcPrpHb|y*h*~QHnJ5L632IvqF+p0-!(crE+u8Z zY~(_`ka)glWGAj9RljQFVq8n=ew~tUN#sVQJrb3K*SGw|sJu=Z3xB}Mq`|cPeMjZm z{jr;OvNVVrR4;#%JQPvXN!xd2kgCm0CPFqWeg5lC8pxg7Fh58`aVv<!tvJXtsh8&H z&fm&ZCl#khV1?3pF1q(J87b&g?g=@y1&T#b+>u!*6c}jZR8ST{iDHP`p1lgAj_QIS zh)*Npvkf9FVMlb8h(g3((0#H=BP+7^Jl_#T;YNiQEOMgap6!=J8Tu~tOHmp63sFsY zqP#*x1>Wuo0m&296%u(btf&%I_X>V(pV-8&1B`Wj4mLIcZ0d(z=Ftc!9!`(~N8T1= zba_67H)JzDp=^yFbrpo1^vMCuS)UwIN_w<MxRu*I+DEAnk#%xJ*BXv4sI9J$+U2~H zc5==of}_vzxl76-49W^L$O0weO<9G`5gZTaVH_yM`4Rd1L2E+^sai=O!%n&#<T860 zv~COaB+t67dmHO_TWYtXc7tw^wv*u1PMAs2+7=R@nO#f)X?AzyBCJ}6N12$RRq9Yi z*~#wZ`N%fM-oduZ3Mj$1kxAN-BHxnfDI_u0ZlU=lXnN!^Alajj89de@TPLyx>V|EA z&=<!$6VuZJ{*P9uO0g-V0PY)wu07i6X89wbI?5;-Ouhxi<aun)O`3N=((g@JFvm1U zH}C`HLPG!mo7yjTKR*kA|JbE51<2I8lL#^@xdMh?!Dh@bgUuBf1w_bm#DyNPLn1Hq z$br>khjblB>@iXW<DM=V<nx`b&=qh$lo{{^b@E*>H5v{xl{c&*D3?Gz7n#d5$Vj2K z;+*4m9C1g37vaMpHEf5uCmsW2K6^7V^GvNl7OTGojM9rU$0|DQN4Xk;g#e8&l#xjI zR^LH0U=exZ$_x7zWEL8+J$FEFlRkhounx$89@3x5KG~=H3})DndtgQmAv}Pf?T=<+ z;5O{Fkb=GOg|Uw$Kq8sgZv$uVUwBS)JO#tt0iFvi(6@eNPw`xkIY+i!fS0_CjUgZ= z4j=++UWe!g92<0a5QdnBljYSIQdF2F-J74lWJ5m3VlT_oJ=y)?42Z)l{Yl82zYZhM zGd{$;xf*A{P<0c8vHep%Vyqh<zXZYf_$m5001sElV>)`X0ULndn1lgVYp6Y>%j6Z` z<p;)9`60YUDWQ`_;@Od{7mXkkhrDRmq~!ZBOJ2jq1nUG2`4Q-Dz@wI-aa{Hnqb}TK z2uA0Gov>qI&lTPlIrurw1E<XE0wnde&{izcG4-b_n#DfGwao!seKVNon%NL3z*a1F z>^L?gN1N5%@}Ik3{O?0*K*3nVN|6SM;9QqEPqJt$M%m;1=~fUAN2CKg&^6A3G|ln= z{}P}uNDKy(xh1ky7dGPzN(08e#G{qSK8a+O7&1d{wm{OAQE>@H9D^N$h-naY1B>)P z!n&l8$3RB5<kVd#3CEsCju~^{NSuGnthL6Hb|4ACf5LPzePgyDHL(UYXG(2zel<jg z^!3wUZVTC*RPqFT>^z$!tW25;YalY^R9~5Iz$8mkriO4`n?DWKnzapojzl-kp88q{ p8sPP%v*(-iY_bF*JOlkC2DH06WWi0$Necf5>da%y%%kSE{slhP_jUjP diff --git a/sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_processing_step.cpython-38.pyc deleted file mode 100644 index e74e7f411d80ab89142f285eaaaf84f831cf9522..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1860 zcmbtV&2Aev5GJ|1(n?y(acm=PQ6NEqCQ%R4t*4+U;uwX2LTN3j2DS%wv!vs7SGy`n z#jv29>}&5m<OTc+eU!cSlvn5_o#9IAN-^9+DR5@U<<ETI3`e7_ttNpM|M=%GKh+5N z3kTKa!r)uz8igQ&resV*O3`L18#|$6Z6|fdUg%LGsc?n&JN=G`ny6nBQNLnWG^`1K zKst^0a0=<rq9Gy^#c8B94E%wSli}q=;s>-(awYT-MzvFw&tw+ku4)5MORxR_Lz0jR z60#AIj&ucmMx+Otfv$me7TN_}7rQX$$(pFWb3|SE*B<!h!~9-lz9E`+z9H+C*_H^d zU0B^*o81y^JKKUYx8G55PwZT~VIb}U+WlhZM5-TD9!ss0>@`9w8X6=wfVOB_^8&#Z zI};_}B>7Zxk!TZTv0SW-b8X&JfZ1_W1C;VnV9;WW&9Zr5U_XGaKL=qTLUVRbuh?7H zFyoxNb9YYX&XqeLH?-gJ6mBoJ&n_pqIg>ik96eD;t#a^F<ETG4JRTms?B95W6Q_FP z{=&1a-w<^lmjC^8Z=|Kty>X=CB)f==%3npj7gGOf@=5PxG#K{uEYY)Q5@oN)(fT0H zmF!(eg`H1UjB#YBJGm_OQ~5?_`pn2A<4Fb=nIzBJaId!c>jc!NjIa;0Cm&JXy>;(@ zv4`9VavOUpfNPihwUs=T3Q|;V(&xW8{QeNxQ`@lKa#V_47#4d~r-V!;;@Q6i&*QuC zEVr(~!N{(X;T{YOG4!0x*)jo22u>x~!C>@!FgzNLY`k5CyEfvz3E`6}1U-GFO=6}9 z1Jh4qni6Lbl&|gvQQiE1fz%IP9v>eb4ArNI05h|~`fk-_$T=lr^93wf_x8)Xy<P0X z@}#7hsw~Fmxol`N==LFErp(DXcKUz}NXa|1&)2dGhqzj%r<;zpO>UxeRSTZNIT}wT zboS_D@75RhJ5;q`=~8VKKId69mYf$&&d0f!rf3J8|2B=%@=N*Tw>gip46ep7YR=0V z;hm;=WQvxp*JEkUa-sI%0`&mJXDBe_>LH4M8MhWDyG^q?82W1vP3FT}g&$*#d~ofn zZP(Up%F0;}^un@d^gH#iwfg6fS|KIH!)jUSVkPNbNY!1{Sqm+EbjK=N$1K+JMcjg+ U@Sl~$eS-nE`4X^z29V+Z0NskhuK)l5 diff --git a/sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_soft_calculator.cpython-38.pyc deleted file mode 100644 index e66f737228a10bcea2f3c886a1ccb85c904c6d0c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 843 zcmZuwJ#W-N5S{hM<%oO*QjicOqH%$ZL_>j4qy*8Wxtt=IV%d22B8y%7*!3ylM0b($ zCn%syN6k;!mWsbX#mrs;J|!c~XvXWEH}8!-n@n~AZU6bt4?h^dFDizXV6l(xUSW_R znFhlu#wd?;Wa26gd8`wYRw;vbAQPEhf=pGsj;oOzO(Ea;jkh6Z{_#QQnpPb)t`}{s z4ur0kTJ)X8v&o6FTMzc$no}iZ)1Dn$(YkqOjj}a$8X8SV=AL3uP%#Nrq+-dwvMZP~ z8OiuEtCA~7ph|I;#6Lu!X*ch;z#gevFy0T)qR@L2C?~WS-R)uMVGZ9{AAMnK_C08V z{n=3-`_wHL%K9<qP22RGTe_P^K5<IV?Ffr&<{xlgYvCN{*YM|kIdjUo(g<5O?YZb} zcPh$5<v#Y^qCA;Rk4v{|+)6A&duGITQFm6A=gN|EhtzoePuXJe*^UvA_Ur;3wt8|$ zf#P=3d_+^X!YI!DF6X9`OHFyk`KP7On-fbqgtmTi!=hGNBhicwLLfjpp^##r5d!=; z3iVOG<6jIJ6-1&y0)@Q!I=Dd<nu`IKAftbF|25+NpSGnf`ImQJ9c;I_cjp^!H1mK= XU_@m6`1MATH~71Nqg7^6#<JuuKO5#O diff --git a/sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_span_question.cpython-38.pyc deleted file mode 100644 index 61e5fac5c3866d97cb474694ebc1fa60aba75bd9..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1053 zcmah|KW`H;6!-I8E|+U7ihwL2#K<iP17j2_QHd=}8X+Pf<D8vv^m3Qlt_r6zr4lR* z-8;b8&%ihF%G7Uw7=UMoRskWf^nQNt{QUm>o^Q0Zl@MHCAANpt3WR)d#`+4;*g?_v z5d;xXkU5M2hy+1J@By9>5sJnM5sf8X!kCI^NYds7dXp4vOK`)pf@{5pRy0*rEz8Uu zuCD-%9TeR|kYo&kjOmETK!yT(tDXA2A%ZjN6NckR1|m8G(G>AXIBtp-CQfWPT=3Fv z<fV|WnKoP*+gcg26e+b0Z`eplbEwLUV#dqmbXUq_6nzKbn7ks#5aP#usM=`3J{b~9 z05OF@N>u~BRD^($LoJJ`YNByYlw<YVv+hVsrMq*kvb;RvMpYBu-IaRZREzG@(QvP; z>s;3yOP<a7??zTB**%iV3DfAo4i2R@c~y26Z|pY9HCy#HkwsO`T9{D#ZlJ#9IOzQ8 zfdfcPWq84KUdz9B?z-1)gm+{L0w;VD8)RS!yJQL2b1=y8kz6g8&M|NUMc+l3A!X0o zZ{a=ph}S-WY70_Q0jKmJ#Ow6J-Bu1#P@8U}mUbRknwxV_t_4q=+Ca@Fy&~5ptLBUT z7TVf9#fZ`$^+Vs@Od6)-R4RN>k{!u>_R?r(_@t1XUQuPd(0x=cLtys}g)~c?nos0n zuCiTsnooKAA3<HY+Krk(LZk2Xz5i7)NMjo_R`R)I%qEP@D{)vjeUsrVdGYHc$V+3d zF~-Xhr{e~!(I=c#RpB{OH(h=1L9Kx+S2;$U=olfPF~oE`@LvaMU^lM*J*kx&l|NaF U;Y0eq<KC*az8j4vdNYXOC!st0zW@LL diff --git a/sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_step.cpython-38.pyc deleted file mode 100644 index 15b375efa80ab3c0292468ee00bb3e178407600e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 621 zcmaJ-y-or_5Z=9i5Q1-@l?e^^0>n_5*clTbB-bqK?P7>`x99E>0xh;agSF4#n`~?4 zE7<7FVGtW9nVIj~x%=j4+U>Rp*vtHF{ss9R$*~1MM&Jg6Ac7X8q#31%y}~Q~%x|z? z1Z9|olt?N95kAvvA|eq#5fSga9nB(<j7h)s0V<?VT}yDw^MV@#BpF*-Pd2s00^*s{ z!c2fg3$3=Y%JHvb^MQ=O%>@KWGAc;sO^NhnAn1_D5V8k3g6t282%vZ9;+fPa?^*^T zv(VAC8@R@XKhL=`_NE`Zgt3a3k}=n2tW;uEAnq{sxZ=gWqHzw_V~kgovfM6JWf;?V zGZ!uj#qBH6Jz!1vyiKP@YLk{+=gVr%tyXtDy^`j^sydxb$CK1-muAarUfq}cFUggb z=~`-ZhC>hPjgCM&1~&-WUP2Svyy5&OhZ9SqJ*d00WAy<F4nS;hIL3aR^MAf-?$qqp bwE5j_k4~{mGTa~QlHy<qgpSvt9Y3Mp2Og#v diff --git a/sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc b/sziszapangma/core/wer/__pycache__/wer_util.cpython-38.pyc deleted file mode 100644 index 5caa38f0edc34f2f56e7ed8b79fd36da809fc616..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1526 zcmZ`(&2HQ_5GJYLmDigfNP;HmADWYL*v$hNMt~UYrEm*uod5y~Ru!4G97$A2Zj4=Q zPWH8j-g}6TJ@o<lCcO5PS15|KGu(LN)Q*H14Tr<|`DUnt?d^=fQ&oTd^hZF*U%0sV zfQ~!@GP}S85wsyKEojRMMsdxWpa}gpDq{EsP12@CO361wgd#d6B06EkK(Hyv<2T@g z<jjTN*2cQ@yWZBSE1UhCy5y<UQ!D!lO)fqmEFJ-wC%_~rs2~NCfedAIhsa3K-<V(` zI1Nu|5raOFsT|ya;GmC0?DZMwQ<;GM*3nQTu#<{`-`N%!B(~+lAK1Eq<f4~lGnX#b z(#~~PQ3!kfpv-_C1DWf<Rsg~CfgY~N6rcf5Y~PXJNltYPyLxcW{{DV)V5Byaw$xSK z9hX+CpUcUfG|#Q-Cr=NiKTOQBHp{XvyIEU)xTuttlVhoo_#Cnv_X~HEt9S5mV>P!5 zKU<g>W=PobQ$Q3;8ee6IO)vnrPQdn3jhMWzaSXzW9Ff=bj9B(M069F0&ge#tK~6xM z3VOn3p-12i&P+aV0}R0JqL*$ola`x9r7iEuR_X{We6lX{M#oAE<3?IOlUjC_<fw4h z4i~+$htkw$t%$UmcjBGnN*N1457E^IXpPhC?r}tF9bR#ORFsBmb<CUEUIPQ;XFf>j z5Z>&c<^2s*A3ukafga6e$Pn-iWDj6r0Wd)YI|cCe$O%28uK=>6U`0iMw?f3TP$a+L zok;gTKp$NpI8z{V8`un7{5XQ+;Bp0yeq}EJWnXa4);Qh*GsupOA6^MYw$s3Et5n<X z-bJr?p<>4VU5?MI_GRG&iPDzOG&E?u)2OO!%q|G$hxG^!y1}MLE_>e|7p}YEl1er6 zwljI?Sl7F#)Vf?a)<e_w!1>qc9P-A>TmYN@@7#^cxqO02V{F1Q#@?o3NEyxEF7JLk z{r!C8Qtn5MbC+@6Dlu;)$V1Lw%*zIc5Ld>Qwys)f50%h3wl08M<R?=jP8$0GiFON( zDC;kfeTnQVWben$OE^`&CAcy;m?>lQb^w$z>UZ+Y#c(6)ZsuLDV@BP)?HBqpI8emv tY4wrZ=NJD8WBd(_VL*}maIaJJcj<~uT)ML^dl!FOCIxn#r8H&3;6ImOdsF}b diff --git a/sziszapangma/core/wer/classic_wer_calculator.py b/sziszapangma/core/wer/classic_wer_calculator.py deleted file mode 100644 index db54d33..0000000 --- a/sziszapangma/core/wer/classic_wer_calculator.py +++ /dev/null @@ -1,9 +0,0 @@ -from sziszapangma.core.wer.distance_matrix_calculator import \ - BinaryDistanceCalculator -from sziszapangma.core.wer.wer_calculator import WerCalculator - - -class ClassicWerCalculator(WerCalculator): - - def __init__(self): - super().__init__(BinaryDistanceCalculator()) diff --git a/sziszapangma/core/wer/wer_span_question.py b/sziszapangma/core/wer/span.py similarity index 59% rename from sziszapangma/core/wer/wer_span_question.py rename to sziszapangma/core/wer/span.py index bfdf434..44cfe84 100644 --- a/sziszapangma/core/wer/wer_span_question.py +++ b/sziszapangma/core/wer/span.py @@ -6,11 +6,11 @@ class Span: index_start: int index_end: int - def _is_index_belong(self, index: int) -> bool: + def is_index_belong(self, index: int) -> bool: return self.index_start <= index < self.index_end - def get_reference_weights_table(self, total_size: int): + def get_reference_mask_table(self, total_size: int): return [ - 1 if self._is_index_belong(it) else 0 + self.is_index_belong(it) for it in range(total_size) ] diff --git a/sziszapangma/core/wer/wer_calculator.py b/sziszapangma/core/wer/wer_calculator.py index edb06e4..3fa65db 100644 --- a/sziszapangma/core/wer/wer_calculator.py +++ b/sziszapangma/core/wer/wer_calculator.py @@ -1,221 +1,52 @@ from abc import ABC -from typing import List, Tuple, Optional +from typing import List -import numpy as np - -from sziszapangma.core.wer.distance_matrix_calculator import \ - DistanceCalculator -from sziszapangma.core.wer.step_type import StepType -from sziszapangma.core.wer.wer_processing_step import WerProcessingStep -from sziszapangma.core.wer.wer_span_question import Span -from sziszapangma.core.wer.wer_step import WerStep, StepWords -from sziszapangma.core.wer.word import Word +from sziszapangma.core.alignment.alignment_step import AlignmentStep +from sziszapangma.core.alignment.alignment_util import AlignmentUtil +from sziszapangma.core.wer.span import Span class WerCalculator(ABC): - _distance_matrix_calculator: DistanceCalculator - - def __init__(self, distance_matrix_calculator: DistanceCalculator): - self._distance_matrix_calculator = distance_matrix_calculator - - def convert_processing_steps_to_result( - self, - processing_steps: List[WerProcessingStep], - reference_weights: Optional[List[float]] = None - ) -> List[WerStep]: - if reference_weights is None: - return [ - WerStep(step.step_type, step.step_words, step.step_cost) - for step in processing_steps - ] - else: - indexes_per_steps = self._get_reference_indexes_per_steps( - processing_steps) - return [ - WerStep( - processing_steps[step_index].step_type, - processing_steps[step_index].step_words, - reference_weights[indexes_per_steps[step_index]] * - processing_steps[step_index].step_cost - ) - for step_index in range(len(processing_steps)) - ] - - def get_distance_matrix_between_words( - self, - reference: List[Word], - hypothesis: List[Word] - ) -> np.ndarray: - return self._distance_matrix_calculator.calculate_distance_matrix( - reference, hypothesis) - - def extract_steps_path( - self, - steps_matrix: List[List[WerProcessingStep]] - ) -> List[WerProcessingStep]: - x = len(steps_matrix) - 1 - y = len(steps_matrix[0]) - 1 - to_return = [] - while not (x == 0 and y == 0): - current_step = steps_matrix[x][y] - to_return.append(current_step) - if current_step.step_type == StepType.DELETION: - x = x - 1 - elif current_step.step_type == StepType.INSERTION: - y = y - 1 - else: # creation and substitution - y = y - 1 - x = x - 1 - return to_return[::-1] - - @staticmethod - def _get_levenshtein_processing_step_cross( - prev_cross_distance: float, - step_words: StepWords, - current_distance: float - ) -> WerProcessingStep: - return WerProcessingStep.levenshtein_correct( - prev_cross_distance, step_words, 0) \ - if current_distance == 0 \ - else WerProcessingStep.levenshtein_substitution( - prev_cross_distance, step_words, current_distance) - - def get_levenshtein_embedding_based( - self, - reference: List[Word], - hypothesis: List[Word], - distance_matrix: np.ndarray - ) -> Tuple[np.ndarray, List[List[WerProcessingStep]]]: - - reference_len = len(reference) - hypothesis_len = len(hypothesis) - distance_arr, steps_arr = self._get_initialized_levenshtein_matrix( - reference, hypothesis) - - for ref_index in range(reference_len): - for hyp_index in range(hypothesis_len): - step_words = StepWords(reference[ref_index], - hypothesis[hyp_index]) - current_distance = distance_matrix[ref_index][hyp_index] - prev_cross_distance = distance_arr[ref_index][hyp_index] - - cross_go_step = self._get_levenshtein_processing_step_cross( - prev_cross_distance, step_words, current_distance) - insertion_step = WerProcessingStep.levenshtein_insertion( - distance_arr[ref_index + 1][hyp_index], step_words) - deletion_step = WerProcessingStep.levenshtein_deletion( - distance_arr[ref_index][hyp_index + 1], step_words) - - best_step = min([cross_go_step, insertion_step, deletion_step], - key=lambda it: it.total_distance()) - - distance_arr[ref_index + 1][hyp_index + 1] = \ - best_step.total_distance() - steps_arr[ref_index + 1][hyp_index + 1] = best_step - - return distance_arr, steps_arr @staticmethod - def _get_initialized_levenshtein_matrix( - reference: List[Word], - hypothesis: List[Word] - ) -> Tuple[np.ndarray, List[List[Optional[WerProcessingStep]]]]: - - # TODO: consider about remove distance_arr replaced by steps_arr - reference_len = len(reference) - hypothesis_len = len(hypothesis) - distance_arr = np.zeros((reference_len + 1) * (hypothesis_len + 1)) \ - .reshape((reference_len + 1, hypothesis_len + 1)) - steps_arr = [ - [None for _ in range(hypothesis_len + 1)] - for _ in range(reference_len + 1) - ] - - # levenshtein initial - for ref_index in range(reference_len + 1): - distance_arr[ref_index][0] = ref_index - step_words = StepWords( - reference[ref_index - 1] if ref_index > 0 else None, - None - ) - steps_arr[ref_index][0] = WerProcessingStep.levenshtein_deletion( - ref_index - 1, step_words) - for hyp_index in range(hypothesis_len + 1): - distance_arr[0][hyp_index] = hyp_index - step_words = StepWords( - None, - hypothesis[hyp_index - 1] if hyp_index > 0 else None + def _convert_processing_steps_to_result( + input_steps: List[AlignmentStep], + span: Span + ) -> List[AlignmentStep]: + indexes_per_steps = AlignmentUtil.get_reference_indexes_per_steps( + input_steps) + return [ + AlignmentStep( + input_steps[step_index].step_type, + input_steps[step_index].step_words, + input_steps[step_index].step_cost * span.is_index_belong( + indexes_per_steps[step_index]) ) - steps_arr[0][hyp_index] = WerProcessingStep.levenshtein_insertion( - hyp_index - 1, step_words) - - return distance_arr, steps_arr - - def _get_reference_indexes_per_steps( - self, - steps: List[WerProcessingStep] - ) -> List[int]: - counter = 0 - indexes = [] - for step in steps: - indexes.append(counter) - if step.step_type.contain_reference_word(): - counter = counter + 1 - return indexes + for step_index in range(len(input_steps)) + ] + @staticmethod def _calculate_wer( - self, - steps: List[WerStep], + steps: List[AlignmentStep], ) -> float: - reference_len = sum([ - 1 if step.step_type.contain_reference_word() else 0 - for step in steps - ]) + reference_len = AlignmentUtil.get_reference_length(steps) return sum([step.step_cost for step in steps]) / reference_len - def _calculate_steps_path( - self, - reference: List[Word], - hypothesis: List[Word] - ) -> List[WerProcessingStep]: - distance_between_words = self.get_distance_matrix_between_words( - reference, hypothesis) - _, steps_matrix = self.get_levenshtein_embedding_based( - reference, hypothesis, distance_between_words) - return self.extract_steps_path(steps_matrix) - def calculate_wer( self, - reference: List[Word], - hypothesis: List[Word] - ) -> Tuple[float, List[WerStep]]: - steps_path = self._calculate_steps_path(reference, hypothesis) - steps = self.convert_processing_steps_to_result(steps_path) - return self._calculate_wer(steps), steps + steps: List[AlignmentStep] + ) -> float: + return self._calculate_wer(steps) def calculate_wer_for_spans( self, - reference: List[Word], - hypothesis: List[Word], + steps: List[AlignmentStep], spans: List[Span] ) -> List[float]: - steps_path = self._calculate_steps_path(reference, hypothesis) - reference_len = len(reference) return [ - self._calculate_wer(self.convert_processing_steps_to_result( - processing_steps=steps_path, - reference_weights=span.get_reference_weights_table( - reference_len) + self._calculate_wer(self._convert_processing_steps_to_result( + input_steps=steps, + span=span )) for span in spans ] - - def calculate_wer_weighted( - self, - reference: List[Word], - hypothesis: List[Word], - weights: List[float] - ) -> Tuple[float, List[WerStep]]: - steps_path = self._calculate_steps_path(reference, hypothesis) - steps = self.convert_processing_steps_to_result(steps_path, weights) - return self._calculate_wer(steps), steps diff --git a/sziszapangma/core/wer/wer_embedding_calculator.py b/sziszapangma/core/wer/wer_embedding_calculator.py deleted file mode 100644 index 5f371b7..0000000 --- a/sziszapangma/core/wer/wer_embedding_calculator.py +++ /dev/null @@ -1,63 +0,0 @@ -from typing import List, Optional - -from sziszapangma.core.transformer.embedding_transformer import \ - EmbeddingTransformer -from sziszapangma.core.wer.distance_matrix_calculator import \ - BinaryDistanceCalculator, DistanceCalculator, CosineDistanceCalculator -from sziszapangma.core.wer.step_words import StepWords -from sziszapangma.core.wer.wer_calculator import WerCalculator -from sziszapangma.core.wer.wer_processing_step import WerProcessingStep -from sziszapangma.core.wer.wer_step import WerStep - - -class WerEmbeddingCalculator(WerCalculator): - _distance_calculator: DistanceCalculator - - def __init__(self, embedding_transformer: EmbeddingTransformer): - super().__init__(BinaryDistanceCalculator()) - self._embedding_transformer = embedding_transformer - self._distance_calculator = CosineDistanceCalculator( - embedding_transformer) - - def _calculate_distance_for_word_step( - self, - step_words: StepWords - ) -> float: - return self._distance_calculator.calculate_distance_for_words( - step_words.reference_word, - step_words.hypothesis_word - ) - - def _calculate_result_cost_for_step( - self, - processing_step: WerProcessingStep - ) -> float: - step_words = processing_step.step_words - return self._calculate_distance_for_word_step(step_words) \ - if processing_step.step_type.is_cross_step() \ - else processing_step.step_cost - - def convert_processing_steps_to_result( - self, - processing_steps: List[WerProcessingStep], - reference_weights: Optional[List[float]] = None - ) -> List[WerStep]: - if reference_weights is None: - return [ - WerStep(step.step_type, step.step_words, - self._calculate_result_cost_for_step(step)) - for step in processing_steps - ] - else: - indexes_per_steps = self._get_reference_indexes_per_steps( - processing_steps) - return [ - WerStep( - processing_steps[step_index].step_type, - processing_steps[step_index].step_words, - reference_weights[indexes_per_steps[step_index]] * - self._calculate_result_cost_for_step( - processing_steps[step_index]) - ) - for step_index in range(len(processing_steps)) - ] diff --git a/sziszapangma/core/wer/wer_step.py b/sziszapangma/core/wer/wer_step.py deleted file mode 100644 index 5e1efa0..0000000 --- a/sziszapangma/core/wer/wer_step.py +++ /dev/null @@ -1,11 +0,0 @@ -from dataclasses import dataclass - -from sziszapangma.core.wer.step_type import StepType -from sziszapangma.core.wer.step_words import StepWords - - -@dataclass(frozen=True) -class WerStep: - step_type: StepType - step_words: StepWords - step_cost: float diff --git a/sziszapangma/core/wer/wer_util.py b/sziszapangma/core/wer/wer_util.py deleted file mode 100644 index a6ef666..0000000 --- a/sziszapangma/core/wer/wer_util.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import List, Optional - -import numpy as np -import pandas as pd - -from sziszapangma.core.wer.wer_step import WerStep - - -class WerUtil: - - @staticmethod - def _optional_str_to_str(value: Optional[str]) -> str: - return value if value is not None else '' - - @staticmethod - def _wer_step_to_pandas_row_lit(step: WerStep) -> List[any]: - return [ - step.step_type.get_short_name(), - WerUtil._optional_str_to_str(step.step_words.reference_word), - WerUtil._optional_str_to_str(step.step_words.hypothesis_word), - round(step.step_cost, 3) - ] - - @staticmethod - def steps_to_dataframe(steps: List[WerStep]) -> pd.DataFrame: - arr = np.array([ - WerUtil._wer_step_to_pandas_row_lit(step) - for step in steps - ]) - return pd.DataFrame( - arr, - columns=['step_type', 'reference', 'hypothesis', 'cost'] - ) diff --git a/sziszapangma/integration/task/classic_wer_metric_task.py b/sziszapangma/integration/task/classic_wer_metric_task.py index dfd8d26..1f4ba70 100644 --- a/sziszapangma/integration/task/classic_wer_metric_task.py +++ b/sziszapangma/integration/task/classic_wer_metric_task.py @@ -1,6 +1,8 @@ from typing import List, Dict -from sziszapangma.core.wer.classic_wer_calculator import ClassicWerCalculator +from sziszapangma.core.alignment.alignment_classic_calculator import \ + AlignmentClassicCalculator +from sziszapangma.core.wer.wer_calculator import WerCalculator from sziszapangma.integration.repository.experiment_repository import \ ExperimentRepository from sziszapangma.integration.task.processing_task import ProcessingTask @@ -12,7 +14,8 @@ _WORD = 'word' class ClassicWerMetricTask(ProcessingTask): _metrics_property_name: str _gold_transcript_property_name: str - _classic_wer_calculator: ClassicWerCalculator + _alignment_classic_calculator: AlignmentClassicCalculator + _wer_calculator: WerCalculator def __init__( self, @@ -26,7 +29,8 @@ class ClassicWerMetricTask(ProcessingTask): self._gold_transcript_property_name = gold_transcript_property_name self._asr_property_name = asr_property_name self._metrics_property_name = metrics_property_name - self._classic_wer_calculator = ClassicWerCalculator() + self._alignment_classic_calculator = AlignmentClassicCalculator() + self._wer_calculator = WerCalculator() def skip_for_record(self, record_id: str, experiment_repository: ExperimentRepository) -> bool: @@ -55,10 +59,12 @@ class ClassicWerMetricTask(ProcessingTask): gold_transcript: List[Dict[str, any]], asr_result: List[str] ) -> float: - return self._classic_wer_calculator.calculate_wer( - reference=[it[_WORD] for it in gold_transcript], - hypothesis=[it for it in asr_result], - )[0] + return self._wer_calculator.calculate_wer( + self._alignment_classic_calculator.calculate_alignment( + reference=[it[_WORD] for it in gold_transcript], + hypothesis=[it for it in asr_result], + ) + ) def calculate_metrics( self, diff --git a/sziszapangma/integration/task/embedding_wer_metrics_task.py b/sziszapangma/integration/task/embedding_wer_metrics_task.py index 67f8b94..3145fbf 100644 --- a/sziszapangma/integration/task/embedding_wer_metrics_task.py +++ b/sziszapangma/integration/task/embedding_wer_metrics_task.py @@ -1,12 +1,14 @@ from typing import List, Dict +from sziszapangma.core.alignment.alignment_embedding_calculator import \ + AlignmentEmbeddingCalculator +from sziszapangma.core.alignment.alignment_soft_calculator import \ + AlignmentSoftCalculator from sziszapangma.core.transformer.cached_embedding_transformer import \ CachedEmbeddingTransformer from sziszapangma.core.transformer.embedding_transformer import \ EmbeddingTransformer -from sziszapangma.core.wer.wer_embedding_calculator import \ - WerEmbeddingCalculator -from sziszapangma.core.wer.wer_soft_calculator import WerSoftCalculator +from sziszapangma.core.wer.wer_calculator import WerCalculator from sziszapangma.integration.repository.experiment_repository import \ ExperimentRepository from sziszapangma.integration.task.processing_task import ProcessingTask @@ -20,8 +22,9 @@ class EmbeddingWerMetricsTask(ProcessingTask): _metrics_property_name: str _gold_transcript_property_name: str _embedding_transformer: CachedEmbeddingTransformer - _wer_embedding_calculator: WerEmbeddingCalculator - _wer_soft_calculator: WerSoftCalculator + _alignment_embedding_calculator: AlignmentEmbeddingCalculator + _alignment_soft_calculator: AlignmentSoftCalculator + _wer_calculator: WerCalculator def __init__( self, @@ -38,10 +41,11 @@ class EmbeddingWerMetricsTask(ProcessingTask): self._metrics_property_name = metrics_property_name self._embedding_transformer = \ CachedEmbeddingTransformer(embedding_transformer) - self._wer_embedding_calculator = \ - WerEmbeddingCalculator(self._embedding_transformer) - self._wer_soft_calculator = \ - WerSoftCalculator(self._embedding_transformer) + self._alignment_embedding_calculator = \ + AlignmentEmbeddingCalculator(self._embedding_transformer) + self._alignment_soft_calculator = \ + AlignmentSoftCalculator(self._embedding_transformer) + self._wer_calculator = WerCalculator() def skip_for_record(self, record_id: str, experiment_repository: ExperimentRepository) -> bool: @@ -74,8 +78,8 @@ class EmbeddingWerMetricsTask(ProcessingTask): """Calculate all metrics for data sample.""" metrics = dict() reference = [it[_WORD] for it in gold_transcript] - metrics[_SOFT_WER] = self._wer_soft_calculator.calculate_wer( - reference, asr_result)[0] - metrics[_EMBEDDING_WER] = self._wer_embedding_calculator.calculate_wer( - reference, asr_result)[0] + metrics[_SOFT_WER] = self._alignment_soft_calculator\ + .calculate_alignment(reference, asr_result)[0] + metrics[_EMBEDDING_WER] = self._alignment_embedding_calculator\ + .calculate_wer(reference, asr_result)[0] return metrics diff --git a/tests/test_classic_wer.py b/tests/test_classic_wer.py index c78bc41..ff67ebd 100644 --- a/tests/test_classic_wer.py +++ b/tests/test_classic_wer.py @@ -2,10 +2,12 @@ from typing import List, Tuple import pytest -from sziszapangma.core.wer.classic_wer_calculator import ClassicWerCalculator -from sziszapangma.core.wer.step_type import StepType -from sziszapangma.core.wer.step_words import StepWords -from sziszapangma.core.wer.word import Word +from sziszapangma.core.alignment.alignment_classic_calculator import \ + AlignmentClassicCalculator +from sziszapangma.core.alignment.step_type import StepType +from sziszapangma.core.alignment.step_words import StepWords +from sziszapangma.core.alignment.word import Word +from sziszapangma.core.wer.wer_calculator import WerCalculator def string_list_to_words(strings: List[str]) -> List[Word]: @@ -21,14 +23,17 @@ def get_sample_data() -> Tuple[List[Word], List[Word]]: def test_classic_calculate_wer_value(): """Sample test for core calculate.""" reference, hypothesis = get_sample_data() - wer_result = ClassicWerCalculator().calculate_wer(reference, hypothesis) - assert pytest.approx(wer_result[0]) == 0.8333333 + alignment = AlignmentClassicCalculator()\ + .calculate_alignment(reference, hypothesis) + wer_result = WerCalculator().calculate_wer(alignment) + assert pytest.approx(wer_result) == 0.8333333 def test_classic_calculate_wer_steps(): """Sample test for core calculate.""" reference, hypothesis = get_sample_data() - wer_result = ClassicWerCalculator().calculate_wer(reference, hypothesis) + alignment = AlignmentClassicCalculator().calculate_alignment( + reference, hypothesis) reference_words = [ StepWords(reference[0], hypothesis[0]), @@ -44,7 +49,7 @@ def test_classic_calculate_wer_steps(): StepType.CORRECT, StepType.INSERTION, StepType.INSERTION, StepType.SUBSTITUTION, StepType.SUBSTITUTION] - assert len(wer_result[1]) == 8 - assert [it.step_type for it in wer_result[1]] == step_types - assert [it.step_cost for it in wer_result[1]] == [0, 1, 0, 0, 1, 1, 1, 1] - assert [it.step_words for it in wer_result[1]] == reference_words + assert len(alignment) == 8 + assert [it.step_type for it in alignment] == step_types + assert [it.step_cost for it in alignment] == [0, 1, 0, 0, 1, 1, 1, 1] + assert [it.step_words for it in alignment] == reference_words diff --git a/tests/test_embedding_wer.py b/tests/test_embedding_wer.py index 8507e77..4f7cd55 100644 --- a/tests/test_embedding_wer.py +++ b/tests/test_embedding_wer.py @@ -2,9 +2,12 @@ from typing import List, Tuple import pytest -from sziszapangma.core.wer.wer_embedding_calculator import \ - WerEmbeddingCalculator -from sziszapangma.core.wer.word import Word +from sziszapangma.core.alignment.alignment_calculator import \ + AlignmentCalculator +from sziszapangma.core.alignment.alignment_embedding_calculator import \ + AlignmentEmbeddingCalculator +from sziszapangma.core.alignment.word import Word +from sziszapangma.core.wer.wer_calculator import WerCalculator from tests.file_stored_embedding_transformer import \ FileStoredEmbeddingTransformer @@ -19,14 +22,15 @@ def get_sample_data() -> Tuple[List[Word], List[Word]]: return string_list_to_words(reference), string_list_to_words(hypothesis) -def get_calculator() -> WerEmbeddingCalculator: - return WerEmbeddingCalculator( +def get_alignment_calculator() -> AlignmentCalculator: + return AlignmentEmbeddingCalculator( FileStoredEmbeddingTransformer('tests/embeddings_pl.json')) def test_classic_calculate_wer_value(): """Sample test for core calculate.""" reference, hypothesis = get_sample_data() - wer_result = get_calculator().calculate_wer(reference, hypothesis) - print(wer_result[0]) - assert pytest.approx(wer_result[0]) == 0.55879563 + alignment = get_alignment_calculator().calculate_alignment(reference, + hypothesis) + wer_result = WerCalculator().calculate_wer(alignment) + assert pytest.approx(wer_result) == 0.55879563 diff --git a/tests/test_soft_wer.py b/tests/test_soft_wer.py index e47240b..85a3433 100644 --- a/tests/test_soft_wer.py +++ b/tests/test_soft_wer.py @@ -2,8 +2,12 @@ from typing import List, Tuple import pytest -from sziszapangma.core.wer.wer_soft_calculator import WerSoftCalculator -from sziszapangma.core.wer.word import Word +from sziszapangma.core.alignment.alignment_calculator import \ + AlignmentCalculator +from sziszapangma.core.alignment.alignment_soft_calculator import \ + AlignmentSoftCalculator +from sziszapangma.core.alignment.word import Word +from sziszapangma.core.wer.wer_calculator import WerCalculator from tests.file_stored_embedding_transformer import \ FileStoredEmbeddingTransformer @@ -18,14 +22,16 @@ def get_sample_data() -> Tuple[List[Word], List[Word]]: return string_list_to_words(reference), string_list_to_words(hypothesis) -def get_calculator() -> WerSoftCalculator: - return WerSoftCalculator( +def get_alignment_calculator() -> AlignmentCalculator: + return AlignmentSoftCalculator( FileStoredEmbeddingTransformer('tests/embeddings_pl.json')) def test_classic_calculate_wer_value(): """Sample test for core calculate.""" reference, hypothesis = get_sample_data() - wer_result = get_calculator().calculate_wer(reference, hypothesis) - print(wer_result[0]) - assert pytest.approx(wer_result[0]) == 0.50186761 + alignment = get_alignment_calculator().calculate_alignment( + reference, hypothesis) + wer_result = WerCalculator().calculate_wer(alignment) + print(wer_result) + assert pytest.approx(wer_result) == 0.50186761 diff --git a/tox.ini b/tox.ini index 58bfbf9..7062dd2 100644 --- a/tox.ini +++ b/tox.ini @@ -17,8 +17,6 @@ deps = ; requirements.txt with the pinned versions and uncomment the following line: ; -r{toxinidir}/requirements.txt commands = - ls -la - pip list pytest ; pytest --basetemp={envtmpdir} -- GitLab