Skip to content
Snippets Groups Projects
Commit 9b9238ce authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Update docs

parent aa8fb055
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
# Run experiment # Run experiment
%% Cell type:markdown id: tags: %% Cell type:markdown id: tags:
To run any experiment steps in `import_any_dataset.ipynb` and `use_any_asr.ipynb` must be done. To run any experiment steps in `import_any_dataset.ipynb` and `use_any_asr.ipynb` must be done.
Experiment is managed by `ExperimentRepository`. Below is example of simple experiment. Experiment is managed by `ExperimentRepository`. Below is example of simple experiment.
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# imports # imports
from experiment.sentence_wer_processor.flair_upos_multi_transformers_wer_processor_base import \ from sziszapangma.integration.task.flair_upos_multi_transformers_wer_processor_base import \
FlairUposMultiTransformersWerProcessorBase FlairUposMultiTransformersWerProcessorBase
from experiment.sentence_wer_processor.wikineural_multilingual_ner_transformers_wer_processor_base import \ from sziszapangma.integration.task.wikineural_multilingual_ner_transformers_wer_processor_base import \
WikineuralMultilingualNerTransformersWerProcessorBase WikineuralMultilingualNerTransformersWerProcessorBase
from experiment.sentence_wer_processor.spacy_pos_sentence_dep_tag_processor import SpacyDepTagSentenceWerProcessor from sziszapangma.integration.task.spacy_pos_sentence_dep_tag_processor import SpacyDepTagSentenceWerProcessor
from experiment.sentence_wer_processor.spacy_ner_sentence_wer_processor import SpacyNerSentenceWerProcessor from sziszapangma.integration.task.spacy_ner_sentence_wer_processor import SpacyNerSentenceWerProcessor
from experiment.sentence_wer_processor.spacy_pos_sentence_wer_processor import SpacyPosSentenceWerProcessor from sziszapangma.integration.task.spacy_pos_sentence_wer_processor import SpacyPosSentenceWerProcessor
from sziszapangma.core.transformer.fasttext_embedding_transformer import FasttextEmbeddingTransformer from sziszapangma.core.transformer.fasttext_embedding_transformer import FasttextEmbeddingTransformer
from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask
from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
from experiment.hf_asr.wav2vec2_hf import Wav2Vec2AsrProcessor from experiment.hf_asr.wav2vec2_hf import Wav2Vec2AsrProcessor
from experiment.utils.property_helper import PropertyHelper from experiment.utils.property_helper import PropertyHelper
from sziszapangma.integration.task.asr_task import AsrTask from sziszapangma.integration.task.asr_task import AsrTask
from sziszapangma.integration.experiment_manager import ExperimentManager from sziszapangma.integration.experiment_manager import ExperimentManager
from sziszapangma.integration.audio_repository.local_audio_record_repository import LocalAudioRecordRepository from sziszapangma.integration.audio_repository.local_audio_record_repository import LocalAudioRecordRepository
from experiment.utils.loaded_dataset_helper import LoadedDatasetHelper from experiment.utils.loaded_dataset_helper import LoadedDatasetHelper
from pathlib import Path from pathlib import Path
from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# globals # globals
DATASET_NAME = 'dataset_name' DATASET_NAME = 'dataset_name'
ASR_NAME = 'asr_name' ASR_NAME = 'asr_name'
REPOSITORY_ROOT_PATH = Path.home() / 'asr-benchmarks-repository' REPOSITORY_ROOT_PATH = Path.home() / 'asr-benchmarks-repository'
AUDIO_ROOT_PATH = Path.home() / '.cache/asr-benchmarks' AUDIO_ROOT_PATH = Path.home() / '.cache/asr-benchmarks'
FASTTEXT_LANGUAGE_CODE = 'pl' FASTTEXT_LANGUAGE_CODE = 'pl'
WIKINEURAL = "wikineural" WIKINEURAL = "wikineural"
SPACY_MODEL_NAME = 'pl_core_news_lg' SPACY_MODEL_NAME = 'pl_core_news_lg'
FLAIR_UPOS_MULTI = 'flair_upos_multi' FLAIR_UPOS_MULTI = 'flair_upos_multi'
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# data providers # data providers
repository = MultiFilesExperimentRepository(REPOSITORY_ROOT_PATH, DATASET_NAME) repository = MultiFilesExperimentRepository(REPOSITORY_ROOT_PATH, DATASET_NAME)
record_provider = LoadedDatasetHelper( record_provider = LoadedDatasetHelper(
repository, LocalAudioRecordRepository(AUDIO_ROOT_PATH, DATASET_NAME), DATASET_NAME repository, LocalAudioRecordRepository(AUDIO_ROOT_PATH, DATASET_NAME), DATASET_NAME
) )
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# experiment definition # experiment definition
experiment_processor = ExperimentManager( experiment_processor = ExperimentManager(
record_id_iterator=record_provider, record_id_iterator=record_provider,
processing_tasks=[ processing_tasks=[
AsrTask( AsrTask(
asr_property_name=PropertyHelper.asr_result(ASR_NAME), asr_property_name=PropertyHelper.asr_result(ASR_NAME),
task_name=f"AsrTask___{DATASET_NAME}___{ASR_NAME}", task_name=f"AsrTask___{DATASET_NAME}___{ASR_NAME}",
require_update=False, require_update=False,
asr_processor=Wav2Vec2AsrProcessor("facebook/wav2vec2-large-xlsr-53-polish"), asr_processor=Wav2Vec2AsrProcessor("facebook/wav2vec2-large-xlsr-53-polish"),
record_path_provider=record_provider, record_path_provider=record_provider,
), ),
ClassicWerMetricTask( ClassicWerMetricTask(
task_name=f"ClassicWerMetricTask___{DATASET_NAME}___{ASR_NAME}", task_name=f"ClassicWerMetricTask___{DATASET_NAME}___{ASR_NAME}",
asr_property_name=PropertyHelper.asr_result(ASR_NAME), asr_property_name=PropertyHelper.asr_result(ASR_NAME),
gold_transcript_property_name=PropertyHelper.get_gold_transcript_words(), gold_transcript_property_name=PropertyHelper.get_gold_transcript_words(),
metrics_property_name=PropertyHelper.word_wer_classic_metrics(ASR_NAME), metrics_property_name=PropertyHelper.word_wer_classic_metrics(ASR_NAME),
require_update=False, require_update=False,
alignment_property_name=PropertyHelper.word_wer_classic_alignment(ASR_NAME), alignment_property_name=PropertyHelper.word_wer_classic_alignment(ASR_NAME),
), ),
EmbeddingWerMetricsTask( EmbeddingWerMetricsTask(
task_name=f"EmbeddingWerMetricsTask___{DATASET_NAME}___{ASR_NAME}", task_name=f"EmbeddingWerMetricsTask___{DATASET_NAME}___{ASR_NAME}",
asr_property_name=PropertyHelper.asr_result(ASR_NAME), asr_property_name=PropertyHelper.asr_result(ASR_NAME),
gold_transcript_property_name=PropertyHelper.get_gold_transcript_words(), gold_transcript_property_name=PropertyHelper.get_gold_transcript_words(),
require_update=False, require_update=False,
embedding_transformer=FasttextEmbeddingTransformer(FASTTEXT_LANGUAGE_CODE), embedding_transformer=FasttextEmbeddingTransformer(FASTTEXT_LANGUAGE_CODE),
embeddings_alignment_property_name=PropertyHelper.word_wer_embeddings_alignment(ASR_NAME), embeddings_alignment_property_name=PropertyHelper.word_wer_embeddings_alignment(ASR_NAME),
embeddings_metrics_property_name=PropertyHelper.word_wer_embeddings_metrics(ASR_NAME), embeddings_metrics_property_name=PropertyHelper.word_wer_embeddings_metrics(ASR_NAME),
soft_alignment_property_name=PropertyHelper.word_wer_soft_alignment(ASR_NAME), soft_alignment_property_name=PropertyHelper.word_wer_soft_alignment(ASR_NAME),
soft_metrics_property_name=PropertyHelper.word_wer_soft_metrics(ASR_NAME), soft_metrics_property_name=PropertyHelper.word_wer_soft_metrics(ASR_NAME),
), ),
SpacyPosSentenceWerProcessor( SpacyPosSentenceWerProcessor(
model_name=SPACY_MODEL_NAME, model_name=SPACY_MODEL_NAME,
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(), gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME), asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.pos_alignment(ASR_NAME, SPACY_MODEL_NAME), alignment_property_name=PropertyHelper.pos_alignment(ASR_NAME, SPACY_MODEL_NAME),
wer_property_name=PropertyHelper.pos_metrics(ASR_NAME, SPACY_MODEL_NAME), wer_property_name=PropertyHelper.pos_metrics(ASR_NAME, SPACY_MODEL_NAME),
task_name=f"SpacyPosSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}", task_name=f"SpacyPosSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}",
require_update=False, require_update=False,
), ),
SpacyNerSentenceWerProcessor( SpacyNerSentenceWerProcessor(
model_name=SPACY_MODEL_NAME, model_name=SPACY_MODEL_NAME,
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(), gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME), asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.ner_alignment( alignment_property_name=PropertyHelper.ner_alignment(
ASR_NAME, SPACY_MODEL_NAME ASR_NAME, SPACY_MODEL_NAME
), ),
wer_property_name=PropertyHelper.ner_metrics(ASR_NAME, SPACY_MODEL_NAME), wer_property_name=PropertyHelper.ner_metrics(ASR_NAME, SPACY_MODEL_NAME),
task_name=f"SpacyNerSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}", task_name=f"SpacyNerSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}",
require_update=False, require_update=False,
), ),
SpacyDepTagSentenceWerProcessor( SpacyDepTagSentenceWerProcessor(
model_name=SPACY_MODEL_NAME, model_name=SPACY_MODEL_NAME,
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(), gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME), asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.dep_tag_alignment( alignment_property_name=PropertyHelper.dep_tag_alignment(
ASR_NAME, SPACY_MODEL_NAME ASR_NAME, SPACY_MODEL_NAME
), ),
wer_property_name=PropertyHelper.dep_tag_metrics(ASR_NAME, SPACY_MODEL_NAME), wer_property_name=PropertyHelper.dep_tag_metrics(ASR_NAME, SPACY_MODEL_NAME),
task_name=f"SpacyDepTagSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}", task_name=f"SpacyDepTagSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}",
require_update=False, require_update=False,
), ),
WikineuralMultilingualNerTransformersWerProcessorBase( WikineuralMultilingualNerTransformersWerProcessorBase(
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(), gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME), asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.ner_alignment( alignment_property_name=PropertyHelper.ner_alignment(
ASR_NAME, WIKINEURAL ASR_NAME, WIKINEURAL
), ),
wer_property_name=PropertyHelper.ner_metrics(ASR_NAME, WIKINEURAL), wer_property_name=PropertyHelper.ner_metrics(ASR_NAME, WIKINEURAL),
task_name=f"WikineuralMultilingualNerTransformersWerProcessorBase___{DATASET_NAME}___{ASR_NAME}", task_name=f"WikineuralMultilingualNerTransformersWerProcessorBase___{DATASET_NAME}___{ASR_NAME}",
require_update=False, require_update=False,
), ),
FlairUposMultiTransformersWerProcessorBase( FlairUposMultiTransformersWerProcessorBase(
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(), gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME), asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.pos_alignment( alignment_property_name=PropertyHelper.pos_alignment(
ASR_NAME, FLAIR_UPOS_MULTI ASR_NAME, FLAIR_UPOS_MULTI
), ),
wer_property_name=PropertyHelper.pos_metrics( wer_property_name=PropertyHelper.pos_metrics(
ASR_NAME, FLAIR_UPOS_MULTI ASR_NAME, FLAIR_UPOS_MULTI
), ),
task_name=f"FlairUposMultiTransformersWerProcessorBase___{DATASET_NAME}___{ASR_NAME}", task_name=f"FlairUposMultiTransformersWerProcessorBase___{DATASET_NAME}___{ASR_NAME}",
require_update=False, require_update=False,
) )
], ],
experiment_repository=repository, experiment_repository=repository,
) )
``` ```
%% Cell type:code id: tags: %% Cell type:code id: tags:
``` python ``` python
# run experiment # run experiment
experiment_processor.process() experiment_processor.process()
``` ```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment