Skip to content
Snippets Groups Projects
Commit 90e06ab8 authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

download_dataset command

parent 95365ee2
No related merge requests found
import argparse
from experiment.const_pipeline_names import GOLD_TRANSCRIPT
from experiment.experiment_dependency_provider import get_record_provider, get_repository
from experiment.sentence_wer_processor.flair_upos_multi_transformers_wer_processor_base import \ from experiment.sentence_wer_processor.flair_upos_multi_transformers_wer_processor_base import \
FlairUposMultiTransformersWerProcessorBase FlairUposMultiTransformersWerProcessorBase
from experiment.sentence_wer_processor.wikineural_multilingual_ner_transformers_wer_processor_base import \ from new_experiment.new_dependency_provider import get_experiment_repository, get_minio_audio_record_repository
WikineuralMultilingualNerTransformersWerProcessorBase from new_experiment.utils.loaded_remote_dataset_helper import LoadedRemoteDatasetHelper
from new_experiment.utils.property_helper import PropertyHelper from new_experiment.utils.property_helper import PropertyHelper
from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer
from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.experiment_manager import ExperimentManager
from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask
def run_flair_upos_multi_pipeline(dataset_name: str, asr_name: str): def run_flair_upos_multi_pipeline(dataset_name: str, asr_name: str):
record_provider = get_record_provider(dataset_name) repository = get_experiment_repository(dataset_name)
record_provider = LoadedRemoteDatasetHelper(repository, get_minio_audio_record_repository(), dataset_name)
experiment_processor = ExperimentManager( experiment_processor = ExperimentManager(
record_id_iterator=record_provider, record_id_iterator=record_provider,
processing_tasks=[ processing_tasks=[
FlairUposMultiTransformersWerProcessorBase( FlairUposMultiTransformersWerProcessorBase(
gold_transcript_property_name=GOLD_TRANSCRIPT, gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(asr_name), asr_property_name=PropertyHelper.asr_result(asr_name),
alignment_property_name=PropertyHelper.pos_alignment(asr_name, 'flair_upos_multi'), alignment_property_name=PropertyHelper.pos_alignment(asr_name, 'flair_upos_multi'),
wer_property_name=PropertyHelper.pos_metrics(asr_name, 'flair_upos_multi'), wer_property_name=PropertyHelper.pos_metrics(asr_name, 'flair_upos_multi'),
...@@ -27,6 +21,6 @@ def run_flair_upos_multi_pipeline(dataset_name: str, asr_name: str): ...@@ -27,6 +21,6 @@ def run_flair_upos_multi_pipeline(dataset_name: str, asr_name: str):
require_update=False require_update=False
) )
], ],
experiment_repository=get_repository(dataset_name) experiment_repository=repository
) )
experiment_processor.process() experiment_processor.process()
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment