from experiment.luna.pipeline.dependency_provider import get_record_provider, GOLD_TRANSCRIPT, \
get_multiple_files_repository, \
GOLD_TRANSCRIPT_SPACY, AJN_POLISH_ASR, WORD_AJN_METRICS_WER, WORD_AJN_ALIGNMENT_WER, AJN_SPACY, \
POS_AJN_ALIGNMENT_WER, POS_AJN_METRICS_WER, WORD_AJN_METRICS_WER_EMBEDDINGS, WORD_AJN_ALIGNMENT_WER_EMBEDDINGS
from experiment.pos_processing.asr_spacy_token_pos_processing_task import AsrSpacyTokenPosProcessingTask
from experiment.pos_processing.spacy_pos_wer_processing_task import SpacyPosWerProcessingTask
from sziszapangma.core.transformer.web_embedding_transformer import WebEmbeddingTransformer
from sziszapangma.integration.experiment_manager import ExperimentManager
from sziszapangma.integration.task.asr_task import AsrTask
from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
from sziszapangma.integration.asr_processor import AsrWebClient, MultipleSourcesAsrProcessor, AsrPathCacheClient
from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask
def run_luna_experiment():
record_provider = get_record_provider()
experiment_processor = ExperimentManager(
record_id_iterator=record_provider,
processing_tasks=[
AsrTask(
task_name='ajn_polish_asr_task',
asr_processor=MultipleSourcesAsrProcessor([
AsrPathCacheClient('experiment_data/cached_asr/luna_ajn_polish_asr', record_provider,
record_provider),
AsrWebClient('http://localhost:5431/process_asr', '__example_token__')
]),
asr_property_name=AJN_POLISH_ASR,
require_update=False,
record_path_provider=record_provider
),
ClassicWerMetricTask(
task_name='techmo_word_wer_processing',
asr_property_name=AJN_POLISH_ASR,
gold_transcript_property_name=GOLD_TRANSCRIPT,
metrics_property_name=WORD_AJN_METRICS_WER,
require_update=False,
alignment_property_name=WORD_AJN_ALIGNMENT_WER
),
AsrSpacyTokenPosProcessingTask(
task_name='techmo_spacy_task',
input_property_name=AJN_POLISH_ASR,
spacy_property_name=AJN_SPACY,
require_update=True
),
SpacyPosWerProcessingTask(
task_name='techmo_pos_wer_processing',
require_update=False,
gold_transcript_pos_property_name=GOLD_TRANSCRIPT_SPACY,
asr_pos_property_name=AJN_SPACY,
pos_alignment_wer=POS_AJN_ALIGNMENT_WER,
pos_metrics_wer=POS_AJN_METRICS_WER
),
EmbeddingWerMetricsTask(
task_name='EmbeddingWerMetricsTask',
asr_property_name='techmo_polish_asr',
gold_transcript_property_name=GOLD_TRANSCRIPT,
metrics_property_name=WORD_AJN_METRICS_WER_EMBEDDINGS,
require_update=False,
embedding_transformer=WebEmbeddingTransformer('pl', 'http://localhost:5003', 'fjsd-mkwe-oius-m9h2'),
alignment_property_name=WORD_AJN_ALIGNMENT_WER_EMBEDDINGS
)
],
experiment_repository=get_multiple_files_repository(),
relation_manager_provider=record_provider
)
experiment_processor.process()
if __name__ == '__main__':
run_luna_experiment()