Skip to content
Snippets Groups Projects
dvc.yaml 3.12 KiB
Newer Older
stages:
  actions_extraction:
    cmd: python3 -m src.pipelines.actions_based.stage1_extraction
    deps:
    - data
    - src/pipelines/actions_based/stage1_extraction.py
    params:
    - actions.extraction.num_partitions
    outs:
    - generated/actions/stage1_extraction
  actions_tokenization:
    cmd: python3 -m src.pipelines.actions_based.stage2_tokenization
    deps:
    - generated/actions/stage1_extraction
    - src/pipelines/actions_based/stage2_tokenization.py
    params:
    - actions.tokenization.max_tokens
    - actions.tokenization.min_tokens
    - global.base_model
    outs:
    - generated/actions/stage2_tokenization
  actions_exploding:
    cmd: python3 -m src.pipelines.actions_based.stage3_exploding
    deps:
    - generated/actions/stage2_tokenization
    - src/pipelines/actions_based/stage3_exploding.py
    outs:
    - generated/actions/stage3_exploding
  actions_reindexing:
    cmd: python3 -m src.pipelines.actions_based.stage4_reindexing
    deps:
    - generated/actions/stage3_exploding
    - src/pipelines/actions_based/stage4_reindexing.py
    outs:
    - generated/actions/stage4_reindexing
  actions_stats:
    cmd: python3 -m src.pipelines.actions_based.stage5_stats
    deps:
    - generated/actions/stage4_reindexing
    - src/pipelines/actions_based/stage5_stats.py
    outs:
    - generated/actions/stage5_stats
  actions_training:
    cmd: python3 -m src.pipelines.actions_based.train
    deps:
    - generated/actions/stage4_reindexing
    - generated/actions/stage5_stats
    - src/pipelines/actions_based/train.py
    params:
    - global.base_model
    - actions.training.max_training_time
    - actions.training.learning_rate
    - actions.training.num_epochs
    - actions.training.batch_size
    - actions.training.save_step
    outs:
    - checkpoints/actions
  translations_extraction:
    cmd: python3 -m src.pipelines.translation_based.stage1_extraction
    deps:
    - data
    params:
    - translations.extraction.num_partitions
    outs:
    - generated/translations/stage1_extraction
  translations_create_batches:
    cmd: python3 -m src.pipelines.translation_based.stage2_create_batches
    deps:
    - generated/translations/stage1_extraction
    params:
    - global.base_model
    outs:
    - generated/translations/stage2_create_batches
  translations_exploding:
    cmd: python3 -m src.pipelines.translation_based.stage3_exploding
    deps:
    - generated/translations/stage2_create_batches
    outs:
    - generated/translations/stage3_exploding
  translations_reindexing:
    cmd: python3 -m src.pipelines.translation_based.stage4_reindexing
    deps:
    - generated/translations/stage3_exploding
    outs:
    - generated/translations/stage4_reindexing
  translations_training:
    cmd: python3 -m src.pipelines.translation_based.train
    deps:
    - generated/translations/stage4_reindexing
    - src/pipelines/translation_based/train.py
    params:
    - global.base_model
    - translations.training.max_training_time
    - translations.training.learning_rate
    - translations.training.num_epochs
    - translations.training.batch_size
    - translations.training.save_step
    outs:
    - checkpoints/translations