diff --git a/new_experiment/add_to_queue_pipeline.py b/new_experiment/add_to_queue_pipeline.py index 76a5069649ec20988ae48ad999e5ff410029938b..b87ca62988c0efe8d771708027a94a3a19af35a6 100644 --- a/new_experiment/add_to_queue_pipeline.py +++ b/new_experiment/add_to_queue_pipeline.py @@ -99,6 +99,7 @@ def main(): # add_facebook_hf_wav2vec2_asr(channel) # add_facebook_hf_wav2vec2_pipeline(channel) connection.close() + # ['de', 'en', 'es', 'fr', 'it'] if __name__ == '__main__': diff --git a/new_experiment/hf_asr/import_nvidia_nemo_asr_result.py b/new_experiment/hf_asr/import_nvidia_nemo_asr_result.py index 9c2345650a8cab4cbc2b0f49202b10903a353a05..32c4e2cf8d0c92dea3126baf8170e0d5a5a31bd0 100644 --- a/new_experiment/hf_asr/import_nvidia_nemo_asr_result.py +++ b/new_experiment/hf_asr/import_nvidia_nemo_asr_result.py @@ -2,6 +2,7 @@ import json from pathlib import Path from new_experiment.new_dependency_provider import get_experiment_repository +from new_experiment.utils.property_helper import PropertyHelper from sziszapangma.model.model_creators import create_new_word @@ -12,6 +13,8 @@ def load_nemo_asr_results(dataset_name: str): with open(jsonl_path, 'r') as reader: lines = reader.read().splitlines(keepends=False) for json_line in lines: + old_property_name = f'nvidia_stt_{language_code}_conformer_transducer_large' + repository.delete_property(old_property_name) parsed_json = json.loads(json_line) print(parsed_json['audio_filepath'].split('/')[-1]) record_id = parsed_json['audio_filepath'].split('/')[-1][:-4] @@ -22,7 +25,7 @@ def load_nemo_asr_results(dataset_name: str): "full_text": transcript, "words_time_alignment": None } - property_name = f'nvidia_stt_{language_code}_conformer_transducer_large' + property_name = PropertyHelper.asr_result(f'nvidia_stt_{language_code}_conformer_transducer_large') repository.update_property_for_key(record_id, property_name, asr_result) diff --git a/sziszapangma/integration/repository/experiment_repository.py b/sziszapangma/integration/repository/experiment_repository.py index 61ddbb38852342a13063c6d9afe28cc9877243e5..779933fbb33dd536f9bb817a73d6aedd6075abad 100644 --- a/sziszapangma/integration/repository/experiment_repository.py +++ b/sziszapangma/integration/repository/experiment_repository.py @@ -41,3 +41,7 @@ class ExperimentRepository(ABC): @abstractmethod def get_all_values_from_property(self, property_name: str) -> Dict[str, Any]: pass + + @abstractmethod + def delete_property(self, property_name: str): + pass diff --git a/sziszapangma/integration/repository/mongo_experiment_repository.py b/sziszapangma/integration/repository/mongo_experiment_repository.py index 98c2ef3833c48b9bcfb9614770a5fd8fd6912d5b..1ee99a3e0196dfb2bc15e79ac846803ae41ee9fb 100644 --- a/sziszapangma/integration/repository/mongo_experiment_repository.py +++ b/sziszapangma/integration/repository/mongo_experiment_repository.py @@ -61,3 +61,6 @@ class MongoExperimentRepository(ExperimentRepository): def get_all_values_from_property(self, property_name: str) -> Dict[str, Any]: return {record[ID]: record[VALUE] for record in self._get_database()[property_name].find()} + + def delete_property(self, property_name: str): + self._get_database().drop_collection(property_name)