Skip to content
Snippets Groups Projects
params.yaml 1.39 KiB
Newer Older
global:
    dashboard_port: 8787
    base_model: "dkleczek/bert-base-polish-cased-v1"
    random_seed: 44

actions:
    extraction:
        num_partitions: 2_000
        num_workers: 24
        worker_memory_limit: "2GB"

    tokenization:
        min_tokens: 10
        max_tokens: 500
        num_workers: 24
        worker_memory_limit: "2GB"

    exploding:
        num_workers: 24
        worker_memory_limit: "2GB"

    reindexing:
        num_workers: 1
        worker_memory_limit: "60GB"

    stats:
        num_workers: 24
        worker_memory_limit: "2GB"

    training:
        learning_rate: 0.0001
        num_epochs: 5
        batch_size: 2
        save_step: 100
        max_training_time: null
        loss_averaging_span: 1000
        fresh_start: true
        device: "cuda:0"
translations:
    extraction:
        num_partitions: 2_000
        num_workers: 24
        worker_memory_limit: "2GB"

    create_batches:
        num_workers: 24
        worker_memory_limit: "2GB"
        min_tokens: 5
        max_tokens: 300

    exploding:
        num_workers: 24
        worker_memory_limit: "2GB"

    reindexing:
        num_workers: 1
        worker_memory_limit: "60GB"

    training:
        learning_rate: 0.0001
        num_epochs: 5
        batch_size: 10
        save_step: 1000
        max_training_time: "4h"
        loss_averaging_span: 1000
        fresh_start: false
        device: "cuda:1"