Skip to content
Snippets Groups Projects
Commit 38826e3e authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Add worker for pipeline

parent 4eb6ea65
Branches
No related merge requests found
...@@ -6,15 +6,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata ...@@ -6,15 +6,6 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y tzdata
RUN add-apt-repository ppa:deadsnakes/ppa && apt-get update && apt-get install -y python3.8 python3-pip ffmpeg RUN add-apt-repository ppa:deadsnakes/ppa && apt-get update && apt-get install -y python3.8 python3-pip ffmpeg
RUN alias python='python3' && alias pip='pip3' && pip install poetry RUN alias python='python3' && alias pip='pip3' && pip install poetry
RUN pip install spacy
RUN python -m spacy download de_core_news_lg
RUN python -m spacy download pl_core_news_lg
RUN python -m spacy download en_core_news_lg
RUN python -m spacy download it_core_news_lg
RUN python -m spacy download nl_core_news_lg
RUN python -m spacy download sp_core_news_lg
RUN python -m spacy download pt_core_news_lg
ADD poetry.lock ./ ADD poetry.lock ./
ADD pyproject.toml ./ ADD pyproject.toml ./
ADD README.rst ./ ADD README.rst ./
...@@ -27,3 +18,11 @@ RUN ls -l ...@@ -27,3 +18,11 @@ RUN ls -l
RUN poetry config virtualenvs.create false --local RUN poetry config virtualenvs.create false --local
RUN poetry install RUN poetry install
RUN poetry run python -m spacy download de_core_news_lg
RUN poetry run python -m spacy download pl_core_news_lg
RUN poetry run python -m spacy download en_core_news_lg
RUN poetry run python -m spacy download it_core_news_lg
RUN poetry run python -m spacy download nl_core_news_lg
RUN poetry run python -m spacy download sp_core_news_lg
RUN poetry run python -m spacy download pt_core_news_lg
...@@ -10,7 +10,7 @@ from new_experiment.utils.property_helper import PropertyHelper ...@@ -10,7 +10,7 @@ from new_experiment.utils.property_helper import PropertyHelper
from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.experiment_manager import ExperimentManager
def run_spacy_pos_wer_pipeline(dataset_name: str, asr_name: str): def run_spacy_dep_tag_wer_pipeline(dataset_name: str, asr_name: str):
repository = get_experiment_repository(dataset_name) repository = get_experiment_repository(dataset_name)
record_provider = LoadedRemoteDatasetHelper(repository, get_minio_audio_record_repository(), dataset_name) record_provider = LoadedRemoteDatasetHelper(repository, get_minio_audio_record_repository(), dataset_name)
language_code = dataset_name[:2] language_code = dataset_name[:2]
...@@ -34,4 +34,4 @@ def run_spacy_pos_wer_pipeline(dataset_name: str, asr_name: str): ...@@ -34,4 +34,4 @@ def run_spacy_pos_wer_pipeline(dataset_name: str, asr_name: str):
if __name__ == '__main__': if __name__ == '__main__':
run_spacy_pos_wer_pipeline('de_minds14', 'whisper_tiny') run_spacy_dep_tag_wer_pipeline('de_minds14', 'whisper_tiny')
...@@ -12,7 +12,7 @@ from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetr ...@@ -12,7 +12,7 @@ from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetr
from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask
def run_word_wer_classic_pipeline(dataset_name: str, asr_name: str): def run_word_wer_embedding_pipeline(dataset_name: str, asr_name: str):
repository = get_experiment_repository(dataset_name) repository = get_experiment_repository(dataset_name)
experiment_processor = ExperimentManager( experiment_processor = ExperimentManager(
record_id_iterator=LoadedRemoteDatasetHelper(repository, get_minio_audio_record_repository(), dataset_name), record_id_iterator=LoadedRemoteDatasetHelper(repository, get_minio_audio_record_repository(), dataset_name),
...@@ -33,4 +33,4 @@ def run_word_wer_classic_pipeline(dataset_name: str, asr_name: str): ...@@ -33,4 +33,4 @@ def run_word_wer_classic_pipeline(dataset_name: str, asr_name: str):
if __name__ == '__main__': if __name__ == '__main__':
run_word_wer_classic_pipeline('de_minds14', 'whisper_tiny') run_word_wer_embedding_pipeline('de_minds14', 'whisper_tiny')
...@@ -8,6 +8,11 @@ from pymongo import MongoClient ...@@ -8,6 +8,11 @@ from pymongo import MongoClient
from urllib3 import HTTPResponse from urllib3 import HTTPResponse
from new_datasets.whisper_processor import WhisperAsrProcessor from new_datasets.whisper_processor import WhisperAsrProcessor
from new_experiment.pipeline.pipeline_process_spacy_dep_tag_wer import run_spacy_dep_tag_wer_pipeline
from new_experiment.pipeline.pipeline_process_spacy_ner_wer import run_spacy_ner_wer_pipeline
from new_experiment.pipeline.pipeline_process_spacy_pos_wer import run_spacy_pos_wer_pipeline
from new_experiment.pipeline.pipeline_process_word_classic_wer import run_word_wer_classic_pipeline
from new_experiment.pipeline.pipeline_process_word_embedding_wer import run_word_wer_embedding_pipeline
from sziszapangma.integration.repository.mongo_experiment_repository import MongoExperimentRepository from sziszapangma.integration.repository.mongo_experiment_repository import MongoExperimentRepository
...@@ -17,6 +22,8 @@ def get_param(name: str, default: str) -> str: ...@@ -17,6 +22,8 @@ def get_param(name: str, default: str) -> str:
_RABBIT_URL = get_param('RABBIT_URL', _RABBIT_URL = get_param('RABBIT_URL',
'amqps://rabbit_user:kz6m4972OUHFmtUcPOHx4kF3Lj6yw7lo@rabbit-asr-benchmarks.theliver.pl:5671/') 'amqps://rabbit_user:kz6m4972OUHFmtUcPOHx4kF3Lj6yw7lo@rabbit-asr-benchmarks.theliver.pl:5671/')
def main(): def main():
parameters = pika.URLParameters(_RABBIT_URL) parameters = pika.URLParameters(_RABBIT_URL)
connection = pika.BlockingConnection(parameters=parameters) connection = pika.BlockingConnection(parameters=parameters)
...@@ -30,7 +37,21 @@ def main(): ...@@ -30,7 +37,21 @@ def main():
print(message_dict) print(message_dict)
task = message_dict['task'] task = message_dict['task']
dataset = message_dict['dataset']
asr_name = message_dict['asr_name']
if task == 'run_word_wer_classic_pipeline':
run_word_wer_classic_pipeline(dataset, asr_name)
elif task == 'run_word_wer_embedding_pipeline':
run_word_wer_embedding_pipeline(dataset, asr_name)
elif task == 'run_spacy_dep_tag_wer_pipeline':
run_spacy_dep_tag_wer_pipeline(dataset, asr_name)
elif task == 'run_spacy_ner_wer_pipeline':
run_spacy_ner_wer_pipeline(dataset, asr_name)
elif task == 'run_spacy_pos_wer_pipeline':
run_spacy_pos_wer_pipeline(dataset, asr_name)
else:
raise Exception(f"Bad message {message_dict}")
channel.basic_ack(method_frame.delivery_tag) channel.basic_ack(method_frame.delivery_tag)
print('\n########################################################\n') print('\n########################################################\n')
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment