diff --git a/pyproject.toml b/pyproject.toml index 5d89515034a6638f079db21da092d430529c4a52..8b612fe437b6b9eba8ed0d87eb581ddcbcb8a61e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,6 +44,16 @@ isort = {extras = ["pyproject"], version = "^5.11.4"} pyflakes = "^3.0.1" poethepoet = "^0.16.5" +[tool.poe.tasks] +black = "black -v --check sziszapangma" +isort = "isort --profile black sziszapangma -c" +pyflakes = "pyflakes sziszapangma" +mypy = "mypy sziszapangma" +test = "pytest" +check = ["black", "isort", "pyflakes"] +all = ["check", "test"] + + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" diff --git a/sziszapangma/core/transformer/fasttext_embedding_transformer.py b/sziszapangma/core/transformer/fasttext_embedding_transformer.py index 3b8263477af627474538c94f06a6c1f89a2959b6..bf1e2d6eab8dc030a3d52bfb4fbb042a03581440 100644 --- a/sziszapangma/core/transformer/fasttext_embedding_transformer.py +++ b/sziszapangma/core/transformer/fasttext_embedding_transformer.py @@ -1,12 +1,10 @@ -import json -from typing import Dict, List, Optional +from typing import Dict, List +import fasttext.util import numpy as np -import requests from fasttext.FastText import _FastText from sziszapangma.core.transformer.embedding_transformer import EmbeddingTransformer -import fasttext.util class FasttextEmbeddingTransformer(EmbeddingTransformer): diff --git a/sziszapangma/core/wer/wer_calculator.py b/sziszapangma/core/wer/wer_calculator.py index 8d05465b19b50414df66f6b753ba0e563fe18048..a018d79a221d7a8a0ec1962040cdef5fdb56b771 100644 --- a/sziszapangma/core/wer/wer_calculator.py +++ b/sziszapangma/core/wer/wer_calculator.py @@ -1,11 +1,8 @@ from abc import ABC from typing import List -import numpy as np - from sziszapangma.core.alignment.alignment_step import AlignmentStep from sziszapangma.core.alignment.alignment_util import AlignmentUtil -from sziszapangma.core.alignment.step_type import StepType from sziszapangma.core.wer.span import Span diff --git a/sziszapangma/integration/asr_processor/asr_processor.py b/sziszapangma/integration/asr_processor/asr_processor.py index 389beec28d7392e41f6713e1a999c02bf4aecc84..752727b97f82f23d694bfc96f7dd22ae3c79707c 100644 --- a/sziszapangma/integration/asr_processor/asr_processor.py +++ b/sziszapangma/integration/asr_processor/asr_processor.py @@ -1,7 +1,7 @@ import json from abc import ABC, abstractmethod from pathlib import Path -from typing import Any, Dict, Optional, List +from typing import Any, Dict, List, Optional import requests from pydub import AudioSegment diff --git a/sziszapangma/integration/audio_repository/local_audio_record_repository.py b/sziszapangma/integration/audio_repository/local_audio_record_repository.py index 2bdb5ecfa1fdb5d494c1fd5c64d0c23a25dccf88..6ed197aec0b4707ab2a9bc530c68b3e1b298353c 100644 --- a/sziszapangma/integration/audio_repository/local_audio_record_repository.py +++ b/sziszapangma/integration/audio_repository/local_audio_record_repository.py @@ -1,7 +1,9 @@ import shutil from pathlib import Path -from sziszapangma.integration.audio_repository.audio_record_repository import AudioRecordRepository +from sziszapangma.integration.audio_repository.audio_record_repository import ( + AudioRecordRepository, +) class LocalAudioRecordRepository(AudioRecordRepository): diff --git a/sziszapangma/integration/audio_repository/minio_audio_record_repository.py b/sziszapangma/integration/audio_repository/minio_audio_record_repository.py index 39c5c86967eab9c916dd4020c0de707becefff13..6eb5f86f76139ac86367ac9d435ef5e33c30b92b 100644 --- a/sziszapangma/integration/audio_repository/minio_audio_record_repository.py +++ b/sziszapangma/integration/audio_repository/minio_audio_record_repository.py @@ -3,7 +3,9 @@ from pathlib import Path from minio import Minio from urllib3 import HTTPResponse -from sziszapangma.integration.audio_repository.audio_record_repository import AudioRecordRepository +from sziszapangma.integration.audio_repository.audio_record_repository import ( + AudioRecordRepository, +) class MinioAudioRecordRepository(AudioRecordRepository): @@ -29,7 +31,9 @@ class MinioAudioRecordRepository(AudioRecordRepository): ) def load_file(self, record_id: str) -> Path: - record_path = (Path.home() / f".cache/asr_benchmark/{self._dataset_name}/{record_id}.wav") + record_path = ( + Path.home() / f".cache/asr_benchmark/{self._dataset_name}/{record_id}.wav" + ) if not record_path.exists(): record_response: HTTPResponse = self._minio.get_object( self._bucket, self._get_record_path(self._dataset_name, record_id) diff --git a/sziszapangma/integration/experiment_manager.py b/sziszapangma/integration/experiment_manager.py index 0f8d98737b49603a2e5166acfd60f0b7cff8b36e..81605473e6900a03aa996503856784cf718f7c47 100644 --- a/sziszapangma/integration/experiment_manager.py +++ b/sziszapangma/integration/experiment_manager.py @@ -3,6 +3,7 @@ from typing import List from sziszapangma.integration.repository.experiment_repository import ( ExperimentRepository, ) + from .record_id_iterator import RecordIdIterator from .task.processing_task import ProcessingTask diff --git a/sziszapangma/integration/repository/experiment_repository.py b/sziszapangma/integration/repository/experiment_repository.py index 73dd70229e654f41581476ee47a0c8b2800320e0..6c8f2687b992b6160f035498ac22e2fcfad2b5e6 100644 --- a/sziszapangma/integration/repository/experiment_repository.py +++ b/sziszapangma/integration/repository/experiment_repository.py @@ -1,6 +1,6 @@ """Repository to manage results of asr experiment processing.""" from abc import ABC, abstractmethod -from typing import Any, Optional, Set, Dict +from typing import Any, Dict, Optional, Set class ExperimentRepository(ABC): diff --git a/sziszapangma/integration/repository/fsspec_experiment_repository.py b/sziszapangma/integration/repository/fsspec_experiment_repository.py index 9966e4f3b60f98346186fa943bb0822f0f591f17..a15954bd54cda014c4e23b2d344e91103b991369 100644 --- a/sziszapangma/integration/repository/fsspec_experiment_repository.py +++ b/sziszapangma/integration/repository/fsspec_experiment_repository.py @@ -1,21 +1,12 @@ +import datetime import json from typing import Any, Optional, Set -import datetime -import fsspec -import s3fs + from fsspec import AbstractFileSystem -from fsspec.implementations.local import LocalFileSystem -from pymongo import MongoClient from sziszapangma.integration.repository.experiment_repository import ( ExperimentRepository, ) -from sziszapangma.integration.repository.mongo_experiment_repository import ( - MongoExperimentRepository, -) -from sziszapangma.integration.repository.multi_files_experiment_repository import ( - MultiFilesExperimentRepository, -) class FsspecExperimentRepository(ExperimentRepository): diff --git a/sziszapangma/integration/repository/mongo_experiment_repository.py b/sziszapangma/integration/repository/mongo_experiment_repository.py index 8f1d39baff6b10239ee66b0ab3a9b50d45ad8660..ca3631bf11e53f49979f282072386172f1991c68 100644 --- a/sziszapangma/integration/repository/mongo_experiment_repository.py +++ b/sziszapangma/integration/repository/mongo_experiment_repository.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Set, Dict +from typing import Any, Dict, Optional, Set from pymongo import MongoClient from pymongo.database import Database diff --git a/sziszapangma/integration/repository/multi_files_experiment_repository.py b/sziszapangma/integration/repository/multi_files_experiment_repository.py index 3d08f19e038b626fc9e320ba499942a870a0fe9c..f1c7814e576da4da729672a776658f4ee5263362 100644 --- a/sziszapangma/integration/repository/multi_files_experiment_repository.py +++ b/sziszapangma/integration/repository/multi_files_experiment_repository.py @@ -1,7 +1,7 @@ import json import os.path from pathlib import Path -from typing import Any, List, Optional, Set, Dict +from typing import Any, Dict, List, Optional, Set from sziszapangma.integration.repository.experiment_repository import ( ExperimentRepository, @@ -48,10 +48,8 @@ class MultiFilesExperimentRepository(ExperimentRepository): def get_all_record_ids(self) -> Set[str]: ids: List[str] = [] for property_name in self.get_all_properties(): - path = ( - self._root_directory - .joinpath(self._experiment_name) - .joinpath(property_name) + path = self._root_directory.joinpath(self._experiment_name).joinpath( + property_name ) property_ids = set( [children.name.replace(".json", "") for children in path.iterdir()] @@ -65,19 +63,22 @@ class MultiFilesExperimentRepository(ExperimentRepository): def _get_file_path(self, property_name: str, record_id: str) -> Path: return ( - self._root_directory - .joinpath(self._experiment_name) + self._root_directory.joinpath(self._experiment_name) .joinpath(property_name) .joinpath(f"{record_id}.json") ) def get_all_record_ids_for_property(self, property_name: str) -> Set[str]: - property_directory_path = self._root_directory / self._experiment_name / property_name - return set([ - it.name.replace('.json', '') - for it in property_directory_path.iterdir() - if it.name.endswith('.json') - ]) + property_directory_path = ( + self._root_directory / self._experiment_name / property_name + ) + return set( + [ + it.name.replace(".json", "") + for it in property_directory_path.iterdir() + if it.name.endswith(".json") + ] + ) def get_all_values_from_property(self, property_name: str) -> Dict[str, Any]: ids = self.get_all_record_ids_for_property(property_name) diff --git a/sziszapangma/integration/task/asr_task.py b/sziszapangma/integration/task/asr_task.py index c144aa1507339bcf447d48e3926a235fed693477..54b4253e78744e0eaf9bef8165eeebb83cdcc651 100644 --- a/sziszapangma/integration/task/asr_task.py +++ b/sziszapangma/integration/task/asr_task.py @@ -5,7 +5,6 @@ from sziszapangma.integration.repository.experiment_repository import ( ) from sziszapangma.integration.task.processing_task import ProcessingTask from sziszapangma.model.model_creators import create_new_word -from sziszapangma.model.relation_manager import RelationManager class AsrTask(ProcessingTask): diff --git a/sziszapangma/integration/task/embedding_wer_metrics_task.py b/sziszapangma/integration/task/embedding_wer_metrics_task.py index 22db6c1b479be4acf4943e13048690fbc20664ce..71a17b31b7b61c5a726b163efe6a6ad527f15c14 100644 --- a/sziszapangma/integration/task/embedding_wer_metrics_task.py +++ b/sziszapangma/integration/task/embedding_wer_metrics_task.py @@ -118,14 +118,6 @@ class EmbeddingWerMetricsTask(ProcessingTask): soft_wer = self._wer_calculator.calculate_wer(soft_alignment) embedding_wer = self._wer_calculator.calculate_wer(embedding_alignment) - alignment_results = { - "soft_alignment": [ - AlignmentStepMapper.to_json_dict(it) for it in soft_alignment - ], - "embedding_alignment": [ - AlignmentStepMapper.to_json_dict(it) for it in embedding_alignment - ], - } wer_results = {"soft_wer": soft_wer, "embedding_wer": embedding_wer} print(wer_results) diff --git a/sziszapangma/integration/task/flair_upos_multi_transformers_wer_processor_base.py b/sziszapangma/integration/task/flair_upos_multi_transformers_wer_processor_base.py index 0433440fc1b21e145385a613dced13603a1e92c4..891f510641c679dda1ca99c843c4f222f16b1366 100644 --- a/sziszapangma/integration/task/flair_upos_multi_transformers_wer_processor_base.py +++ b/sziszapangma/integration/task/flair_upos_multi_transformers_wer_processor_base.py @@ -1,10 +1,9 @@ from typing import List + from flair.data import Sentence from flair.models import SequenceTagger -from sziszapangma.integration.task.sentence_wer_processor import ( - SentenceWerProcessor, -) +from sziszapangma.integration.task.sentence_wer_processor import SentenceWerProcessor class FlairUposMultiTransformersWerProcessorBase(SentenceWerProcessor): diff --git a/sziszapangma/integration/task/sentence_wer_processor.py b/sziszapangma/integration/task/sentence_wer_processor.py index c05736ea2c67654f1879af26c882df769fbe3118..08a569fc74dcf79ea87f3dd27ef059f1add119d9 100644 --- a/sziszapangma/integration/task/sentence_wer_processor.py +++ b/sziszapangma/integration/task/sentence_wer_processor.py @@ -12,7 +12,6 @@ from sziszapangma.integration.repository.experiment_repository import ( ) from sziszapangma.integration.task.processing_task import ProcessingTask from sziszapangma.model.model import Word -from sziszapangma.model.relation_manager import RelationManager class SentenceWerProcessor(ProcessingTask): diff --git a/sziszapangma/integration/task/spacy_ner_sentence_wer_processor.py b/sziszapangma/integration/task/spacy_ner_sentence_wer_processor.py index ec3fc40dff49cfff68ba82daf8436797ad25e707..3b78057254840319264f5fd1b744102bc2b07155 100644 --- a/sziszapangma/integration/task/spacy_ner_sentence_wer_processor.py +++ b/sziszapangma/integration/task/spacy_ner_sentence_wer_processor.py @@ -1,10 +1,8 @@ -from typing import List, Any +from typing import Any, List import spacy -from sziszapangma.integration.task.sentence_wer_processor import ( - SentenceWerProcessor, -) +from sziszapangma.integration.task.sentence_wer_processor import SentenceWerProcessor class SpacyNerSentenceWerProcessor(SentenceWerProcessor): diff --git a/sziszapangma/integration/task/spacy_pos_sentence_dep_tag_processor.py b/sziszapangma/integration/task/spacy_pos_sentence_dep_tag_processor.py index 34d6d860947a09ea86ac1f552ea5360df2a0c961..57940d60b288554c91648f6e402383b26e14451d 100644 --- a/sziszapangma/integration/task/spacy_pos_sentence_dep_tag_processor.py +++ b/sziszapangma/integration/task/spacy_pos_sentence_dep_tag_processor.py @@ -1,10 +1,8 @@ -from typing import List, Any +from typing import Any, List import spacy -from sziszapangma.integration.task.sentence_wer_processor import ( - SentenceWerProcessor, -) +from sziszapangma.integration.task.sentence_wer_processor import SentenceWerProcessor class SpacyDepTagSentenceWerProcessor(SentenceWerProcessor): diff --git a/sziszapangma/integration/task/spacy_pos_sentence_wer_processor.py b/sziszapangma/integration/task/spacy_pos_sentence_wer_processor.py index 38d351c0a1d3fbef3469cdfd0de584fffe6d3190..17254b72504faf9e164e326ecf215d61f766dc23 100644 --- a/sziszapangma/integration/task/spacy_pos_sentence_wer_processor.py +++ b/sziszapangma/integration/task/spacy_pos_sentence_wer_processor.py @@ -1,10 +1,8 @@ -from typing import List, Any +from typing import Any, List import spacy -from sziszapangma.integration.task.sentence_wer_processor import ( - SentenceWerProcessor, -) +from sziszapangma.integration.task.sentence_wer_processor import SentenceWerProcessor class SpacyPosSentenceWerProcessor(SentenceWerProcessor): diff --git a/sziszapangma/integration/task/wikineural_multilingual_ner_transformers_wer_processor_base.py b/sziszapangma/integration/task/wikineural_multilingual_ner_transformers_wer_processor_base.py index d7d71b11c6bd51f31960a5e985e06ebb875863a7..6e009e0134f3499c6e25012c0cb68849105fb2a5 100644 --- a/sziszapangma/integration/task/wikineural_multilingual_ner_transformers_wer_processor_base.py +++ b/sziszapangma/integration/task/wikineural_multilingual_ner_transformers_wer_processor_base.py @@ -1,16 +1,14 @@ from typing import List from transformers import ( - AutoTokenizer, AutoModelForTokenClassification, - pipeline, + AutoTokenizer, Pipeline, PreTrainedTokenizer, + pipeline, ) -from sziszapangma.integration.task.sentence_wer_processor import ( - SentenceWerProcessor, -) +from sziszapangma.integration.task.sentence_wer_processor import SentenceWerProcessor class WikineuralMultilingualNerTransformersWerProcessorBase(SentenceWerProcessor):