Skip to content
Snippets Groups Projects
Commit 9b9238ce authored by Marcin Wątroba's avatar Marcin Wątroba
Browse files

Update docs

parent aa8fb055
Branches
No related merge requests found
%% Cell type:markdown id: tags:
# Run experiment
%% Cell type:markdown id: tags:
To run any experiment steps in `import_any_dataset.ipynb` and `use_any_asr.ipynb` must be done.
Experiment is managed by `ExperimentRepository`. Below is example of simple experiment.
%% Cell type:code id: tags:
``` python
# imports
from experiment.sentence_wer_processor.flair_upos_multi_transformers_wer_processor_base import \
from sziszapangma.integration.task.flair_upos_multi_transformers_wer_processor_base import \
FlairUposMultiTransformersWerProcessorBase
from experiment.sentence_wer_processor.wikineural_multilingual_ner_transformers_wer_processor_base import \
from sziszapangma.integration.task.wikineural_multilingual_ner_transformers_wer_processor_base import \
WikineuralMultilingualNerTransformersWerProcessorBase
from experiment.sentence_wer_processor.spacy_pos_sentence_dep_tag_processor import SpacyDepTagSentenceWerProcessor
from experiment.sentence_wer_processor.spacy_ner_sentence_wer_processor import SpacyNerSentenceWerProcessor
from experiment.sentence_wer_processor.spacy_pos_sentence_wer_processor import SpacyPosSentenceWerProcessor
from sziszapangma.integration.task.spacy_pos_sentence_dep_tag_processor import SpacyDepTagSentenceWerProcessor
from sziszapangma.integration.task.spacy_ner_sentence_wer_processor import SpacyNerSentenceWerProcessor
from sziszapangma.integration.task.spacy_pos_sentence_wer_processor import SpacyPosSentenceWerProcessor
from sziszapangma.core.transformer.fasttext_embedding_transformer import FasttextEmbeddingTransformer
from sziszapangma.integration.task.embedding_wer_metrics_task import EmbeddingWerMetricsTask
from sziszapangma.integration.task.classic_wer_metric_task import ClassicWerMetricTask
from experiment.hf_asr.wav2vec2_hf import Wav2Vec2AsrProcessor
from experiment.utils.property_helper import PropertyHelper
from sziszapangma.integration.task.asr_task import AsrTask
from sziszapangma.integration.experiment_manager import ExperimentManager
from sziszapangma.integration.audio_repository.local_audio_record_repository import LocalAudioRecordRepository
from experiment.utils.loaded_dataset_helper import LoadedDatasetHelper
from pathlib import Path
from sziszapangma.integration.repository.multi_files_experiment_repository import MultiFilesExperimentRepository
```
%% Cell type:code id: tags:
``` python
# globals
DATASET_NAME = 'dataset_name'
ASR_NAME = 'asr_name'
REPOSITORY_ROOT_PATH = Path.home() / 'asr-benchmarks-repository'
AUDIO_ROOT_PATH = Path.home() / '.cache/asr-benchmarks'
FASTTEXT_LANGUAGE_CODE = 'pl'
WIKINEURAL = "wikineural"
SPACY_MODEL_NAME = 'pl_core_news_lg'
FLAIR_UPOS_MULTI = 'flair_upos_multi'
```
%% Cell type:code id: tags:
``` python
# data providers
repository = MultiFilesExperimentRepository(REPOSITORY_ROOT_PATH, DATASET_NAME)
record_provider = LoadedDatasetHelper(
repository, LocalAudioRecordRepository(AUDIO_ROOT_PATH, DATASET_NAME), DATASET_NAME
)
```
%% Cell type:code id: tags:
``` python
# experiment definition
experiment_processor = ExperimentManager(
record_id_iterator=record_provider,
processing_tasks=[
AsrTask(
asr_property_name=PropertyHelper.asr_result(ASR_NAME),
task_name=f"AsrTask___{DATASET_NAME}___{ASR_NAME}",
require_update=False,
asr_processor=Wav2Vec2AsrProcessor("facebook/wav2vec2-large-xlsr-53-polish"),
record_path_provider=record_provider,
),
ClassicWerMetricTask(
task_name=f"ClassicWerMetricTask___{DATASET_NAME}___{ASR_NAME}",
asr_property_name=PropertyHelper.asr_result(ASR_NAME),
gold_transcript_property_name=PropertyHelper.get_gold_transcript_words(),
metrics_property_name=PropertyHelper.word_wer_classic_metrics(ASR_NAME),
require_update=False,
alignment_property_name=PropertyHelper.word_wer_classic_alignment(ASR_NAME),
),
EmbeddingWerMetricsTask(
task_name=f"EmbeddingWerMetricsTask___{DATASET_NAME}___{ASR_NAME}",
asr_property_name=PropertyHelper.asr_result(ASR_NAME),
gold_transcript_property_name=PropertyHelper.get_gold_transcript_words(),
require_update=False,
embedding_transformer=FasttextEmbeddingTransformer(FASTTEXT_LANGUAGE_CODE),
embeddings_alignment_property_name=PropertyHelper.word_wer_embeddings_alignment(ASR_NAME),
embeddings_metrics_property_name=PropertyHelper.word_wer_embeddings_metrics(ASR_NAME),
soft_alignment_property_name=PropertyHelper.word_wer_soft_alignment(ASR_NAME),
soft_metrics_property_name=PropertyHelper.word_wer_soft_metrics(ASR_NAME),
),
SpacyPosSentenceWerProcessor(
model_name=SPACY_MODEL_NAME,
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.pos_alignment(ASR_NAME, SPACY_MODEL_NAME),
wer_property_name=PropertyHelper.pos_metrics(ASR_NAME, SPACY_MODEL_NAME),
task_name=f"SpacyPosSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}",
require_update=False,
),
SpacyNerSentenceWerProcessor(
model_name=SPACY_MODEL_NAME,
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.ner_alignment(
ASR_NAME, SPACY_MODEL_NAME
),
wer_property_name=PropertyHelper.ner_metrics(ASR_NAME, SPACY_MODEL_NAME),
task_name=f"SpacyNerSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}",
require_update=False,
),
SpacyDepTagSentenceWerProcessor(
model_name=SPACY_MODEL_NAME,
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.dep_tag_alignment(
ASR_NAME, SPACY_MODEL_NAME
),
wer_property_name=PropertyHelper.dep_tag_metrics(ASR_NAME, SPACY_MODEL_NAME),
task_name=f"SpacyDepTagSentenceWerProcessor___{DATASET_NAME}___{ASR_NAME}",
require_update=False,
),
WikineuralMultilingualNerTransformersWerProcessorBase(
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.ner_alignment(
ASR_NAME, WIKINEURAL
),
wer_property_name=PropertyHelper.ner_metrics(ASR_NAME, WIKINEURAL),
task_name=f"WikineuralMultilingualNerTransformersWerProcessorBase___{DATASET_NAME}___{ASR_NAME}",
require_update=False,
),
FlairUposMultiTransformersWerProcessorBase(
gold_transcript_property_name=PropertyHelper.get_gold_transcript_raw(),
asr_property_name=PropertyHelper.asr_result(ASR_NAME),
alignment_property_name=PropertyHelper.pos_alignment(
ASR_NAME, FLAIR_UPOS_MULTI
),
wer_property_name=PropertyHelper.pos_metrics(
ASR_NAME, FLAIR_UPOS_MULTI
),
task_name=f"FlairUposMultiTransformersWerProcessorBase___{DATASET_NAME}___{ASR_NAME}",
require_update=False,
)
],
experiment_repository=repository,
)
```
%% Cell type:code id: tags:
``` python
# run experiment
experiment_processor.process()
```
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment