Skip to content
Snippets Groups Projects
Commit aa7b7f1e authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Add NeMo processing

parent 7649144c
Branches
No related merge requests found
......@@ -99,6 +99,7 @@ def main():
# add_facebook_hf_wav2vec2_asr(channel)
# add_facebook_hf_wav2vec2_pipeline(channel)
connection.close()
# ['de', 'en', 'es', 'fr', 'it']
if __name__ == '__main__':
......
......@@ -2,6 +2,7 @@ import json
from pathlib import Path
from new_experiment.new_dependency_provider import get_experiment_repository
from new_experiment.utils.property_helper import PropertyHelper
from sziszapangma.model.model_creators import create_new_word
......@@ -12,6 +13,8 @@ def load_nemo_asr_results(dataset_name: str):
with open(jsonl_path, 'r') as reader:
lines = reader.read().splitlines(keepends=False)
for json_line in lines:
old_property_name = f'nvidia_stt_{language_code}_conformer_transducer_large'
repository.delete_property(old_property_name)
parsed_json = json.loads(json_line)
print(parsed_json['audio_filepath'].split('/')[-1])
record_id = parsed_json['audio_filepath'].split('/')[-1][:-4]
......@@ -22,7 +25,7 @@ def load_nemo_asr_results(dataset_name: str):
"full_text": transcript,
"words_time_alignment": None
}
property_name = f'nvidia_stt_{language_code}_conformer_transducer_large'
property_name = PropertyHelper.asr_result(f'nvidia_stt_{language_code}_conformer_transducer_large')
repository.update_property_for_key(record_id, property_name, asr_result)
......
......@@ -41,3 +41,7 @@ class ExperimentRepository(ABC):
@abstractmethod
def get_all_values_from_property(self, property_name: str) -> Dict[str, Any]:
pass
@abstractmethod
def delete_property(self, property_name: str):
pass
......@@ -61,3 +61,6 @@ class MongoExperimentRepository(ExperimentRepository):
def get_all_values_from_property(self, property_name: str) -> Dict[str, Any]:
return {record[ID]: record[VALUE] for record in self._get_database()[property_name].find()}
def delete_property(self, property_name: str):
self._get_database().drop_collection(property_name)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment