From 942a7ef3e1371b2328f471fd07a434664b19aa87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20W=C4=85troba?= <markowanga@gmail.com> Date: Wed, 27 Apr 2022 16:55:54 +0200 Subject: [PATCH] Add processing --- .dvc/config | 1 + .gitignore | 1 - dvc.lock | 17 +++++++ dvc.yaml | 17 +++++++ .../luna/luna_record_provider.py | 35 -------------- .../luna/relation_manager_provider.py | 13 ------ examples/luna/load_datasource_luna.py | 5 -- {examples => experiment}/__init__.py | 0 {examples => experiment}/luna/__init__.py | 0 .../luna}/import_dataset/__init__.py | 0 .../luna/import_dataset}/import_luna.py | 44 ++++++++++-------- .../luna/luna_record_provider.py | 16 ++++--- experiment/luna/move_asr.py | 21 +++++++++ .../luna/pipeline}/__init__.py | 0 .../luna_gold_transcript_processor.py | 4 +- .../luna/pipeline}/luna_main.py | 27 ++++++----- .../luna/pipeline}/pos_processing/__init__.py | 0 .../asr_spacy_token_pos_processing_task.py | 2 +- ...nscript_spacy_token_pos_processing_task.py | 2 +- .../spacy_pos_wer_processing_task.py | 0 .../spacy_token_pos_processing_task.py | 0 {examples => experiment}/luna/pos.ipynb | 0 experiment_data.dvc | 5 -- experiment_data/cached_asr/.gitignore | 1 + experiment_data/cached_asr/luna_techmo.dvc | 5 ++ experiment_data/dataset/.gitignore | 1 + experiment_data/dataset/LUNA.PL.dvc | 5 ++ .../dataset_relation_manager_data/.gitignore | 1 + .../__pycache__/path_filter.cpython-38.pyc | Bin 1958 -> 1966 bytes sziszapangma/integration/path_filter.py | 2 +- sziszapangma/model/relation_manager.py | 9 ++++ 31 files changed, 134 insertions(+), 100 deletions(-) create mode 100644 dvc.lock create mode 100644 dvc.yaml delete mode 100644 examples/import_dataset/luna/luna_record_provider.py delete mode 100644 examples/import_dataset/luna/relation_manager_provider.py delete mode 100644 examples/luna/load_datasource_luna.py rename {examples => experiment}/__init__.py (100%) rename {examples => experiment}/luna/__init__.py (100%) rename {examples => experiment/luna}/import_dataset/__init__.py (100%) rename {examples/import_dataset/luna => experiment/luna/import_dataset}/import_luna.py (87%) rename {examples => experiment}/luna/luna_record_provider.py (68%) create mode 100644 experiment/luna/move_asr.py rename {examples/import_dataset/luna => experiment/luna/pipeline}/__init__.py (100%) rename {examples/luna => experiment/luna/pipeline}/luna_gold_transcript_processor.py (90%) rename {examples/luna => experiment/luna/pipeline}/luna_main.py (79%) rename {examples/luna => experiment/luna/pipeline}/pos_processing/__init__.py (100%) rename {examples/luna => experiment/luna/pipeline}/pos_processing/asr_spacy_token_pos_processing_task.py (84%) rename {examples/luna => experiment/luna/pipeline}/pos_processing/gold_transcript_spacy_token_pos_processing_task.py (85%) rename {examples/luna => experiment/luna/pipeline}/pos_processing/spacy_pos_wer_processing_task.py (100%) rename {examples/luna => experiment/luna/pipeline}/pos_processing/spacy_token_pos_processing_task.py (100%) rename {examples => experiment}/luna/pos.ipynb (100%) delete mode 100644 experiment_data.dvc create mode 100644 experiment_data/cached_asr/.gitignore create mode 100644 experiment_data/cached_asr/luna_techmo.dvc create mode 100644 experiment_data/dataset/.gitignore create mode 100644 experiment_data/dataset/LUNA.PL.dvc create mode 100644 experiment_data/dataset_relation_manager_data/.gitignore diff --git a/.dvc/config b/.dvc/config index 158cbc5..d852f27 100644 --- a/.dvc/config +++ b/.dvc/config @@ -1,5 +1,6 @@ [core] remote = clarin-biz-asr-benchmarks + autostage = true ['remote "clarin-biz-asr-benchmarks"'] url = s3://projects/clarin-biz-asr-benchmark/dvc endpointurl = https://s3.clarin-pl.eu diff --git a/.gitignore b/.gitignore index c915175..10f676d 100644 --- a/.gitignore +++ b/.gitignore @@ -111,4 +111,3 @@ debug_run/ publish_to_theliver.sh .idea -/experiment_data diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 0000000..96eddc5 --- /dev/null +++ b/dvc.lock @@ -0,0 +1,17 @@ +schema: '2.0' +stages: + import_luna_to_common_format: + cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py + deps: + - path: experiment/luna/import_dataset/import_luna.py + md5: d938162187616f7e7390983ecb9e120b + size: 8269 + - path: experiment_data/dataset/LUNA.PL + md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + outs: + - path: experiment_data/dataset_relation_manager_data/luna + md5: c68722cc69375259a4d1a4b6a2bd4dc3.dir + size: 3016826 + nfiles: 10 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..ef4354e --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,17 @@ +stages: + + import_luna_to_common_format: + cmd: PYTHONPATH=. python experiment/luna/import_dataset/import_luna.py + deps: + - experiment/luna/import_dataset/import_luna.py + - experiment_data/dataset/LUNA.PL + outs: + - experiment_data/dataset_relation_manager_data/luna + +# luna_main_pipeline: +# cmd: PYTHONPATH=. python experiment/luna/pipeline/luna_main.py +# deps: +# - experiment_data/dataset_relation_manager_data/luna +# - experiment_data/dataset/LUNA.PL +# outs: +# - experiment_data/pipeline/luna diff --git a/examples/import_dataset/luna/luna_record_provider.py b/examples/import_dataset/luna/luna_record_provider.py deleted file mode 100644 index 56d30d3..0000000 --- a/examples/import_dataset/luna/luna_record_provider.py +++ /dev/null @@ -1,35 +0,0 @@ -from typing import Dict, Set - -from sziszapangma.integration.path_filter import PathFilter -from sziszapangma.integration.record_id_iterator import RecordIdIterator -from sziszapangma.integration.record_path_provider import RecordPathProvider -from sziszapangma.integration.relation_manager_provider import RelationManagerProvider -from sziszapangma.model.relation_manager import RelationManager, FileRelationManager - - -class LunaRecordProvider(RecordIdIterator, RecordPathProvider, RelationManagerProvider): - _path_by_id: Dict[str, str] - - def __init__(self, path_filter: PathFilter): - self._path_by_id = dict({ - self._get_id(it): it - for it in path_filter.get_list_of_files() - }) - - def get_all_records(self) -> Set[str]: - return set(self._path_by_id.keys()) - - def get_path(self, record_id: str) -> str: - return self._path_by_id[record_id] - - def get_item_file_path(self, record_id: str, file) -> str: - return self._path_by_id[record_id] - - def get_relation_manager(self, record_id: str) -> RelationManager: - basic_path = self.get_path(record_id)[:-4] - return FileRelationManager(f'{basic_path}_ab_relations.csv', f'{basic_path}_ab_items.json') - - @staticmethod - def _get_id(record_file_path: str) -> str: - path = record_file_path.replace('.wav', '') - return '/'.join(path.split('/')[-6:]) diff --git a/examples/import_dataset/luna/relation_manager_provider.py b/examples/import_dataset/luna/relation_manager_provider.py deleted file mode 100644 index eb8241d..0000000 --- a/examples/import_dataset/luna/relation_manager_provider.py +++ /dev/null @@ -1,13 +0,0 @@ -from examples.import_dataset.luna.luna_record_provider import LunaRecordProvider -from sziszapangma.model.relation_manager import RelationManager, FileRelationManager - - -class RelationManagerProvider: - _luna_record_provider: LunaRecordProvider - - def __init__(self, luna_record_provider: LunaRecordProvider): - self._luna_record_provider = luna_record_provider - - def get_relation_manager(self, record_id: str) -> RelationManager: - basic_path = self._luna_record_provider.get_path(record_id)[:-4] - return FileRelationManager(f'{basic_path}_relations.csv', f'{basic_path}_items.json') diff --git a/examples/luna/load_datasource_luna.py b/examples/luna/load_datasource_luna.py deleted file mode 100644 index 062a6c8..0000000 --- a/examples/luna/load_datasource_luna.py +++ /dev/null @@ -1,5 +0,0 @@ -from lhotse import AudioSource, SupervisionSegment - - -def load_single_datasource(path: str) -> AudioSource: - return AudioSource('file', [1], path) diff --git a/examples/__init__.py b/experiment/__init__.py similarity index 100% rename from examples/__init__.py rename to experiment/__init__.py diff --git a/examples/luna/__init__.py b/experiment/luna/__init__.py similarity index 100% rename from examples/luna/__init__.py rename to experiment/luna/__init__.py diff --git a/examples/import_dataset/__init__.py b/experiment/luna/import_dataset/__init__.py similarity index 100% rename from examples/import_dataset/__init__.py rename to experiment/luna/import_dataset/__init__.py diff --git a/examples/import_dataset/luna/import_luna.py b/experiment/luna/import_dataset/import_luna.py similarity index 87% rename from examples/import_dataset/luna/import_luna.py rename to experiment/luna/import_dataset/import_luna.py index 3038632..673e186 100644 --- a/examples/import_dataset/luna/import_luna.py +++ b/experiment/luna/import_dataset/import_luna.py @@ -1,8 +1,7 @@ -from pprint import pprint from typing import Tuple, List from xml.etree import ElementTree -from examples.import_dataset.luna.luna_record_provider import LunaRecordProvider +from experiment.luna.luna_record_provider import LunaRecordProvider from sziszapangma.integration.path_filter import ExtensionPathFilter from sziszapangma.model.model import Word, SingleAnnotation from sziszapangma.model.model_creators import create_new_word, create_new_single_annotation, \ @@ -149,6 +148,7 @@ class LunaAdapter: def import_record(self, record_id: str): print(f'record {record_id}') relation_manager = self._record_provider.get_relation_manager(record_id) + relation_manager.clear_all() basic_path = self._record_provider.get_path(record_id)[:-4] words_path = f"{basic_path}_words.xml" @@ -157,27 +157,33 @@ class LunaAdapter: turn_path = f"{basic_path}_turns.xml" words, single_annotations = self.save_words(words_path, relation_manager) - print('save_words') + # print('save_words') self.read_concepts(words, concept_path, relation_manager) - print('read_concepts') + # print('read_concepts') self.read_chunks(words, chunks_path, relation_manager) - print('read_chunks') + # print('read_chunks') self.read_turns(words, turn_path, relation_manager) - print('read_turns') + # print('read_turns') relation_manager.commit() +def main(): + luna_directory = 'experiment_data/dataset/LUNA.PL' + luna_record_provider = LunaRecordProvider( + ExtensionPathFilter( + root_directory=luna_directory, + extension='wav' + ), + relation_manager_root_path='experiment_data/dataset_relation_manager_data/luna' + ) + luna_adapter = LunaAdapter(luna_record_provider) + record_ids = list(luna_record_provider.get_all_records()) + index = 0 + for record_id in record_ids: + index += 1 + print(f'{index}/{len(record_ids)}') + luna_adapter.import_record(record_id) + + if __name__ == "__main__": - luna_directory = '/Users/marcinwatroba/Desktop/LUNA/LUNA.PL' - luna_record_provider = LunaRecordProvider(ExtensionPathFilter( - root_directory=luna_directory, - extension='wav' - )) - for it in list(luna_record_provider.get_all_records())[:1]: - relation_manager = luna_record_provider.get_relation_manager(it) - document = [itt for itt in relation_manager.get_all_items() if itt['type'] == 'Document'][0] - document_words = [relation_manager.get_item_by_id(item_id) - for item_id in document['word_ids']] - all_relations = relation_manager.get_all_relations_for_item(document_words[0]['id']) - ids = [it['second_id'] for it in all_relations if it['second_type'] in ['lemma', 'pos']] - pprint([relation_manager.get_item_by_id(it) for it in ids]) + main() diff --git a/examples/luna/luna_record_provider.py b/experiment/luna/luna_record_provider.py similarity index 68% rename from examples/luna/luna_record_provider.py rename to experiment/luna/luna_record_provider.py index aa0f30a..34ac13c 100644 --- a/examples/luna/luna_record_provider.py +++ b/experiment/luna/luna_record_provider.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import Dict, Set from sziszapangma.integration.path_filter import PathFilter @@ -9,8 +10,10 @@ from sziszapangma.model.relation_manager import RelationManager, FileRelationMan class LunaRecordProvider(RecordIdIterator, RecordPathProvider, RelationManagerProvider): _path_by_id: Dict[str, str] + _relation_manager_root_path: str - def __init__(self, path_filter: PathFilter): + def __init__(self, path_filter: PathFilter, relation_manager_root_path: str): + self._relation_manager_root_path = relation_manager_root_path self._path_by_id = dict({ self._get_id(it): it for it in path_filter.get_list_of_files() @@ -22,12 +25,13 @@ class LunaRecordProvider(RecordIdIterator, RecordPathProvider, RelationManagerPr def get_path(self, record_id: str) -> str: return self._path_by_id[record_id] - def get_item_file_path(self, record_id: str, file) -> str: - return self._path_by_id[record_id] - def get_relation_manager(self, record_id: str) -> RelationManager: - basic_path = self.get_path(record_id)[:-4] - return FileRelationManager(f'{basic_path}_ab_relations.csv', f'{basic_path}_ab_items.json') + record_path = Path(self._relation_manager_root_path).joinpath(record_id) + record_path.mkdir(parents=True, exist_ok=True) + return FileRelationManager( + str(record_path.joinpath('ab_relations.csv')), + str(record_path.joinpath('ab_items.json')) + ) @staticmethod def _get_id(record_file_path: str) -> str: diff --git a/experiment/luna/move_asr.py b/experiment/luna/move_asr.py new file mode 100644 index 0000000..87910a0 --- /dev/null +++ b/experiment/luna/move_asr.py @@ -0,0 +1,21 @@ +import json +from pprint import pprint + +from experiment.luna.luna_record_provider import LunaRecordProvider +from sziszapangma.integration.path_filter import ExtensionPathFilter + +if __name__ == '__main__': + luna_directory = 'experiment_data/dataset/LUNA.PL' + luna_record_provider = LunaRecordProvider( + ExtensionPathFilter( + root_directory=luna_directory, + extension='wav' + ), + relation_manager_root_path='experiment_data/dataset_relation_manager_data/luna' + ) + pprint(luna_record_provider.get_all_records()) + for record_id in luna_record_provider.get_all_records(): + path = f'experiment_data/cached_asr/LUNA_techmo_asr_cache/{record_id.replace("__", "/")}.wav.techmo.json' + raw = json.load(open(path, 'r')) + output_path = f'experiment_data/cached_asr/luna_techmo/{record_id}.json' + json.dump(raw, open(output_path, 'w')) diff --git a/examples/import_dataset/luna/__init__.py b/experiment/luna/pipeline/__init__.py similarity index 100% rename from examples/import_dataset/luna/__init__.py rename to experiment/luna/pipeline/__init__.py diff --git a/examples/luna/luna_gold_transcript_processor.py b/experiment/luna/pipeline/luna_gold_transcript_processor.py similarity index 90% rename from examples/luna/luna_gold_transcript_processor.py rename to experiment/luna/pipeline/luna_gold_transcript_processor.py index b3d508a..4bfce0a 100644 --- a/examples/luna/luna_gold_transcript_processor.py +++ b/experiment/luna/pipeline/luna_gold_transcript_processor.py @@ -1,7 +1,7 @@ from typing import List, Dict -# from examples.luna.luna_record_provider import LunaRecordProvider -from examples.luna.luna_record_provider import LunaRecordProvider +# from experiment.luna.luna_record_provider import LunaRecordProvider +from experiment.luna.luna_record_provider import LunaRecordProvider from sziszapangma.integration.gold_transcript_processor import GoldTranscriptProcessor from sziszapangma.model.relation_manager import RelationManager diff --git a/examples/luna/luna_main.py b/experiment/luna/pipeline/luna_main.py similarity index 79% rename from examples/luna/luna_main.py rename to experiment/luna/pipeline/luna_main.py index 98684c5..7b6b719 100644 --- a/examples/luna/luna_main.py +++ b/experiment/luna/pipeline/luna_main.py @@ -1,10 +1,11 @@ -from examples.luna.luna_gold_transcript_processor import LunaGoldTranscriptProcessor -from examples.luna.luna_record_provider import LunaRecordProvider -from examples.luna.pos_processing.asr_spacy_token_pos_processing_task import \ +from experiment.luna.pipeline.luna_gold_transcript_processor import LunaGoldTranscriptProcessor +from experiment.luna.pipeline.pos_processing.asr_spacy_token_pos_processing_task import \ AsrSpacyTokenPosProcessingTask -from examples.luna.pos_processing.gold_transcript_spacy_token_pos_processing_task import \ +from experiment.luna.pipeline.pos_processing.gold_transcript_spacy_token_pos_processing_task import \ GoldTranscriptSpacyTokenPosProcessingTask -from examples.luna.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask +from experiment.luna.pipeline.pos_processing.spacy_pos_wer_processing_task import \ + SpacyPosWerProcessingTask +from experiment.luna.luna_record_provider import LunaRecordProvider from sziszapangma.integration.asr_processor import AsrPathCacheClient from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.path_filter import ExtensionPathFilter @@ -15,7 +16,7 @@ from sziszapangma.integration.task.asr_task import AsrTask from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask from sziszapangma.integration.task.gold_transcript_task import GoldTranscriptTask -LUNA_DIRECTORY = '/Users/marcinwatroba/Desktop/LUNA/LUNA.PL' +LUNA_DIRECTORY = 'experiment_data/dataset/LUNA.PL' GOLD_TRANSCRIPT = 'gold_transcript' TECHMO_POLISH_ASR = 'techmo_polish_asr' TECHMO_POLISH_CLASSIC_WER_METRIC = 'techmo_polish_classic_wer_metric' @@ -27,10 +28,13 @@ POS_METRICS_WER = 'pos_metrics_wer' def run_luna_experiment(experiment_repository: ExperimentRepository): - record_provider = LunaRecordProvider(ExtensionPathFilter( - root_directory=LUNA_DIRECTORY, - extension='wav' - )) + record_provider = LunaRecordProvider( + ExtensionPathFilter( + root_directory=LUNA_DIRECTORY, + extension='wav' + ), + relation_manager_root_path='experiment_data/dataset_relation_manager_data/luna' + ) experiment_processor = ExperimentManager( record_id_iterator=record_provider, processing_tasks=[ @@ -84,7 +88,8 @@ def run_luna_experiment(experiment_repository: ExperimentRepository): def example_run(): - experiment_repository = MultiFilesExperimentRepository('experiment_data', 'asr_benchmark_luna') + experiment_repository = MultiFilesExperimentRepository( + 'experiment_data/pipeline', 'asr_benchmark_luna') run_luna_experiment(experiment_repository) diff --git a/examples/luna/pos_processing/__init__.py b/experiment/luna/pipeline/pos_processing/__init__.py similarity index 100% rename from examples/luna/pos_processing/__init__.py rename to experiment/luna/pipeline/pos_processing/__init__.py diff --git a/examples/luna/pos_processing/asr_spacy_token_pos_processing_task.py b/experiment/luna/pipeline/pos_processing/asr_spacy_token_pos_processing_task.py similarity index 84% rename from examples/luna/pos_processing/asr_spacy_token_pos_processing_task.py rename to experiment/luna/pipeline/pos_processing/asr_spacy_token_pos_processing_task.py index 1f6b11e..42a0e25 100644 --- a/examples/luna/pos_processing/asr_spacy_token_pos_processing_task.py +++ b/experiment/luna/pipeline/pos_processing/asr_spacy_token_pos_processing_task.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Any -from examples.luna.pos_processing.spacy_token_pos_processing_task import SpacyTokenPosProcessingTask +from experiment.luna.pipeline.pos_processing.spacy_token_pos_processing_task import SpacyTokenPosProcessingTask @dataclass diff --git a/examples/luna/pos_processing/gold_transcript_spacy_token_pos_processing_task.py b/experiment/luna/pipeline/pos_processing/gold_transcript_spacy_token_pos_processing_task.py similarity index 85% rename from examples/luna/pos_processing/gold_transcript_spacy_token_pos_processing_task.py rename to experiment/luna/pipeline/pos_processing/gold_transcript_spacy_token_pos_processing_task.py index 52861da..e142212 100644 --- a/examples/luna/pos_processing/gold_transcript_spacy_token_pos_processing_task.py +++ b/experiment/luna/pipeline/pos_processing/gold_transcript_spacy_token_pos_processing_task.py @@ -1,7 +1,7 @@ from dataclasses import dataclass from typing import Any -from examples.luna.pos_processing.spacy_token_pos_processing_task import SpacyTokenPosProcessingTask +from experiment.luna.pipeline.pos_processing.spacy_token_pos_processing_task import SpacyTokenPosProcessingTask @dataclass diff --git a/examples/luna/pos_processing/spacy_pos_wer_processing_task.py b/experiment/luna/pipeline/pos_processing/spacy_pos_wer_processing_task.py similarity index 100% rename from examples/luna/pos_processing/spacy_pos_wer_processing_task.py rename to experiment/luna/pipeline/pos_processing/spacy_pos_wer_processing_task.py diff --git a/examples/luna/pos_processing/spacy_token_pos_processing_task.py b/experiment/luna/pipeline/pos_processing/spacy_token_pos_processing_task.py similarity index 100% rename from examples/luna/pos_processing/spacy_token_pos_processing_task.py rename to experiment/luna/pipeline/pos_processing/spacy_token_pos_processing_task.py diff --git a/examples/luna/pos.ipynb b/experiment/luna/pos.ipynb similarity index 100% rename from examples/luna/pos.ipynb rename to experiment/luna/pos.ipynb diff --git a/experiment_data.dvc b/experiment_data.dvc deleted file mode 100644 index 1769484..0000000 --- a/experiment_data.dvc +++ /dev/null @@ -1,5 +0,0 @@ -outs: -- md5: 599ed5f6253699162bcae7ec02a8cb10.dir - size: 1654734492 - nfiles: 9101 - path: experiment_data diff --git a/experiment_data/cached_asr/.gitignore b/experiment_data/cached_asr/.gitignore new file mode 100644 index 0000000..eeada19 --- /dev/null +++ b/experiment_data/cached_asr/.gitignore @@ -0,0 +1 @@ +/luna_techmo diff --git a/experiment_data/cached_asr/luna_techmo.dvc b/experiment_data/cached_asr/luna_techmo.dvc new file mode 100644 index 0000000..120a4df --- /dev/null +++ b/experiment_data/cached_asr/luna_techmo.dvc @@ -0,0 +1,5 @@ +outs: +- md5: 033ea7b5434dded73bf869bfdd299462.dir + size: 4256479 + nfiles: 500 + path: luna_techmo diff --git a/experiment_data/dataset/.gitignore b/experiment_data/dataset/.gitignore new file mode 100644 index 0000000..beaf701 --- /dev/null +++ b/experiment_data/dataset/.gitignore @@ -0,0 +1 @@ +/LUNA.PL diff --git a/experiment_data/dataset/LUNA.PL.dvc b/experiment_data/dataset/LUNA.PL.dvc new file mode 100644 index 0000000..2fdb21a --- /dev/null +++ b/experiment_data/dataset/LUNA.PL.dvc @@ -0,0 +1,5 @@ +outs: +- md5: d342155b1871e881797cf7da09d5dc3c.dir + size: 1578358645 + nfiles: 4500 + path: LUNA.PL diff --git a/experiment_data/dataset_relation_manager_data/.gitignore b/experiment_data/dataset_relation_manager_data/.gitignore new file mode 100644 index 0000000..70bb483 --- /dev/null +++ b/experiment_data/dataset_relation_manager_data/.gitignore @@ -0,0 +1 @@ +/luna diff --git a/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc b/sziszapangma/integration/__pycache__/path_filter.cpython-38.pyc index c41c7eb3674f5dc55a2723bb84bfaa528e31c5a4..b70f37888ee88dc389a9aca5fdb47356cc822f56 100644 GIT binary patch delta 43 xcmZ3+zmA_bl$V!_0SF3RGLzP9<gH}o<n;;lbJPp)(VyJTx|i|RW>>aEMgZz%3%39O delta 35 pcmZ3-zl@(Zl$V!_0SIPHx}C6OBX1=uBkSaT*1e2Tn|;|583C(V3F`m= diff --git a/sziszapangma/integration/path_filter.py b/sziszapangma/integration/path_filter.py index a5e55f9..3053335 100644 --- a/sziszapangma/integration/path_filter.py +++ b/sziszapangma/integration/path_filter.py @@ -34,6 +34,6 @@ class ExtensionPathFilter(PathFilter): """ Implementation of searching files with extension. """ - path_generator = Path(self._root_directory).glob(f"**/*.{self._extension}") + path_generator = Path(self._root_directory).glob(f"LUNA.PL/**/*.{self._extension}") all_files = [str(it) for it in path_generator] return all_files if self._files_limit is None else all_files[: self._files_limit] diff --git a/sziszapangma/model/relation_manager.py b/sziszapangma/model/relation_manager.py index 74ec581..64a742d 100644 --- a/sziszapangma/model/relation_manager.py +++ b/sziszapangma/model/relation_manager.py @@ -44,6 +44,10 @@ class RelationManager(ABC): def get_all_items(self) -> List[UUIDable]: pass + @abstractmethod + def clear_all(self) -> None: + pass + class FileRelationManager(RelationManager): relations_csv_path: str @@ -106,3 +110,8 @@ class FileRelationManager(RelationManager): items_json = json.dumps(self.items_dict) with open(self.items_json_path, "w") as f: f.write(items_json) + + def clear_all(self) -> None: + self.items_dict.clear() + self.relations_dataframe = self.relations_dataframe[0:0] + -- GitLab