From b670bba0d4b70c1a9435426e40985d2aa7b2e6a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martyna=20Wi=C4=85cek?= <martyna.wiacek@ipipan.waw.pl> Date: Sat, 10 Feb 2024 22:24:17 +0100 Subject: [PATCH] add template for transformer and for bilstm --- combo/config.template.json | 36 +-- combo/config.template.transformer.json | 304 +++++++++++++++++++++++++ 2 files changed, 323 insertions(+), 17 deletions(-) create mode 100644 combo/config.template.transformer.json diff --git a/combo/config.template.json b/combo/config.template.json index c6bf597..93332c5 100644 --- a/combo/config.template.json +++ b/combo/config.template.json @@ -10,7 +10,7 @@ "parameters": { "activation": { "type": "tanh", "parameters": {} }, "dropout_rate": 0.25, - "in_features": 164, + "in_features": 1024, "out_features": 128 } }, @@ -22,7 +22,7 @@ "type": "linear_layer", "parameters": { "activation": { "type": "tanh", "parameters": {} }, - "in_features": 164, + "in_features": 1024, "out_features": 512 } }, @@ -30,7 +30,7 @@ "type": "linear_layer", "parameters": { "activation": { "type": "tanh", "parameters": {} }, - "in_features": 164, + "in_features": 1024, "out_features": 512 } } @@ -41,7 +41,7 @@ "parameters": { "activation": { "type": "tanh", "parameters": {} }, "dropout_rate": 0.25, - "in_features": 164, + "in_features": 1024, "out_features": 128 } }, @@ -66,7 +66,7 @@ "parameters": { "activation": { "type": "tanh", "parameters": {} }, "dropout_rate": 0.25, - "in_features": 164, + "in_features": 1024, "out_features": 32 } }, @@ -94,7 +94,7 @@ ], "dropout": [0.25, 0.0], "hidden_dims": [128], - "input_dim": 164, + "input_dim": 1024, "num_layers": 2, "vocab_namespace": "feats_labels" } @@ -123,17 +123,19 @@ } }, "seq_encoder": { - "type": "combo_transformer_encoder", + "type": "combo_encoder", "parameters": { "layer_dropout_probability": 0.33, - "input_dim": 164, - "num_layers": 2, - "feedforward_hidden_dim": 2048, - "num_attention_heads": 4, - "positional_encoding": null, - "positional_embedding_size": 512, - "dropout_prob": 0.1, - "activation": "relu" + "stacked_bilstm": { + "type": "combo_stacked_bilstm", + "parameters": { + "hidden_size": 512, + "input_size": 164, + "layer_dropout_probability": 0.33, + "num_layers": 2, + "recurrent_dropout_probability": 0.33 + } + } } }, "text_field_embedder": { @@ -173,7 +175,7 @@ "type": "feedforward_predictor_from_vocab", "parameters": { "vocab_namespace": "upostag_labels", - "input_dim": 164, + "input_dim": 1024, "num_layers": 2, "hidden_dims": [64], "activations": [ @@ -187,7 +189,7 @@ "type": "feedforward_predictor_from_vocab", "parameters": { "vocab_namespace": "xpostag_labels", - "input_dim": 164, + "input_dim": 1024, "num_layers": 2, "hidden_dims": [64], "activations": [ diff --git a/combo/config.template.transformer.json b/combo/config.template.transformer.json new file mode 100644 index 0000000..c6bf597 --- /dev/null +++ b/combo/config.template.transformer.json @@ -0,0 +1,304 @@ +{ + "model": { + "type": "semantic_multitask", + "parameters": { + "dependency_relation": { + "type": "combo_dependency_parsing_from_vocab", + "parameters": { + "dependency_projection_layer": { + "type": "linear_layer", + "parameters": { + "activation": { "type": "tanh", "parameters": {} }, + "dropout_rate": 0.25, + "in_features": 164, + "out_features": 128 + } + }, + "head_predictor": { + "type": "head_prediction", + "parameters": { + "cycle_loss_n": 0, + "dependency_projection_layer": { + "type": "linear_layer", + "parameters": { + "activation": { "type": "tanh", "parameters": {} }, + "in_features": 164, + "out_features": 512 + } + }, + "head_projection_layer": { + "type": "linear_layer", + "parameters": { + "activation": { "type": "tanh", "parameters": {} }, + "in_features": 164, + "out_features": 512 + } + } + } + }, + "head_projection_layer": { + "type": "linear_layer", + "parameters": { + "activation": { "type": "tanh", "parameters": {} }, + "dropout_rate": 0.25, + "in_features": 164, + "out_features": 128 + } + }, + "vocab_namespace": "deprel_labels" + } + }, + "lemmatizer": { + "type": "combo_lemma_predictor_from_vocab", + "parameters": { + "activations": [ + { "type": "gelu", "parameters": {} }, + { "type": "gelu", "parameters": {} }, + { "type": "gelu", "parameters": {} }, + { "type": "linear", "parameters": {} } + ], + "char_vocab_namespace": "token_characters", + "dilation": [1, 2, 4, 1], + "embedding_dim": 300, + "filters": [256, 256, 256], + "input_projection_layer": { + "type": "linear_layer", + "parameters": { + "activation": { "type": "tanh", "parameters": {} }, + "dropout_rate": 0.25, + "in_features": 164, + "out_features": 32 + } + }, + "kernel_size": [3, 3, 3, 1], + "lemma_vocab_namespace": "lemma_characters", + "padding": [1, 2, 4, 0], + "stride": [1, 1, 1, 1] + } + }, + "loss_weights": { + "deprel": 0.8, + "feats": 0.2, + "head": 0.2, + "lemma": 0.05, + "semrel": 0.05, + "upostag": 0.05, + "xpostag": 0.05 + }, + "morphological_feat": { + "type": "combo_morpho_from_vocab", + "parameters": { + "activations": [ + { "type": "tanh", "parameters": {} }, + { "type": "linear", "parameters": {} } + ], + "dropout": [0.25, 0.0], + "hidden_dims": [128], + "input_dim": 164, + "num_layers": 2, + "vocab_namespace": "feats_labels" + } + }, + "regularizer": { + "type": "base_regularizer", + "parameters": { + "regexes": [ + [ + ".*conv1d.*", + { "type": "l2_regularizer", "parameters": { "alpha": 1e-6 } } + ], + [ + ".*forward.*", + { "type": "l2_regularizer", "parameters": { "alpha": 1e-6 } } + ], + [ + ".*backward.*", + { "type": "l2_regularizer", "parameters": { "alpha": 1e-6 } } + ], + [ + ".*char_embed.*", + { "type": "l2_regularizer", "parameters": { "alpha": 1e-5 } } + ] + ] + } + }, + "seq_encoder": { + "type": "combo_transformer_encoder", + "parameters": { + "layer_dropout_probability": 0.33, + "input_dim": 164, + "num_layers": 2, + "feedforward_hidden_dim": 2048, + "num_attention_heads": 4, + "positional_encoding": null, + "positional_embedding_size": 512, + "dropout_prob": 0.1, + "activation": "relu" + } + }, + "text_field_embedder": { + "type": "base_text_field_embedder", + "parameters": { + "token_embedders": { + "char": { + "type": "char_embeddings_token_embedder", + "parameters": { + "dilated_cnn_encoder": { + "type": "dilated_cnn", + "parameters": { + "activations": [ + { "type": "gelu", "parameters": {} }, + { "type": "gelu", "parameters": {} }, + { "type": "linear", "parameters": {} } + ], + "dilation": [1, 2, 4], + "filters": [512, 256, 64], + "input_dim": 64, + "kernel_size": [3, 3, 3], + "padding": [1, 2, 4], + "stride": [1, 1, 1] + } + }, + "embedding_dim": 64 + } + }, + "token": { + "type": "transformers_word_embedder", + "parameters": { "projection_dim": 100 } + } + } + } + }, + "upos_tagger": { + "type": "feedforward_predictor_from_vocab", + "parameters": { + "vocab_namespace": "upostag_labels", + "input_dim": 164, + "num_layers": 2, + "hidden_dims": [64], + "activations": [ + { "type": "tanh", "parameters": {} }, + { "type": "linear", "parameters": {} } + ], + "dropout": [0.25, 0.0] + } + }, + "xpos_tagger": { + "type": "feedforward_predictor_from_vocab", + "parameters": { + "vocab_namespace": "xpostag_labels", + "input_dim": 164, + "num_layers": 2, + "hidden_dims": [64], + "activations": [ + { "type": "tanh", "parameters": {} }, + { "type": "linear", "parameters": {} } + ], + "dropout": [0.25, 0.0] + } + } + } + }, + "data_loader": { + "type": "simple_data_loader_from_dataset_reader", + "parameters": { + "reader": { + "type": "conllu_dataset_reader", + "parameters": { + "features": ["token", "char"], + "tokenizer": { + "type": "lambo_tokenizer" + }, + "lemma_indexers": { + "char": { + "type": "characters_const_padding_token_indexer", + "parameters": { + "tokenizer": { + "type": "character_tokenizer", + "parameters": { + "end_tokens": ["__END__"], + "start_tokens": ["__START__"] + } + }, + "min_padding_length": 32, + "namespace": "lemma_characters" + } + } + }, + "targets": ["deprel", "feats", "head", "lemma", "upostag", "xpostag"], + "token_indexers": { + "char": { + "type": "characters_const_padding_token_indexer", + "parameters": { + "tokenizer": { + "type": "character_tokenizer", + "parameters": { + "end_tokens": ["__END__"], + "start_tokens": ["__START__"] + } + }, + "min_padding_length": 32 + } + }, + "token": { + "type": "pretrained_transformer_mismatched_fixed_token_indexer", + "parameters": { "model_name": "allegro/herbert-base-cased" } + } + }, + "use_sem": false + } + }, + "batch_size": 1, + "shuffle": true, + "quiet": false + } + }, + "dataset_reader": { + "type": "conllu_dataset_reader", + "parameters": { + "features": ["token", "char"], + "tokenizer": { + "type": "lambo_tokenizer" + }, + "lemma_indexers": { + "char": { + "type": "characters_const_padding_token_indexer", + "parameters": { + "tokenizer": { + "type": "character_tokenizer", + "parameters": { + "end_tokens": ["__END__"], + "start_tokens": ["__START__"] + } + }, + "min_padding_length": 32, + "namespace": "lemma_characters" + } + } + }, + "targets": ["deprel", "feats", "head", "lemma", "upostag", "xpostag"], + "token_indexers": { + "char": { + "type": "characters_const_padding_token_indexer", + "parameters": { + "tokenizer": { + "type": "character_tokenizer", + "parameters": { + "end_tokens": ["__END__"], + "start_tokens": ["__START__"] + } + }, + "min_padding_length": 32 + } + }, + "token": { + "type": "pretrained_transformer_mismatched_fixed_token_indexer", + "parameters": { "model_name": "allegro/herbert-base-cased" } + } + }, + "use_sem": false + } + }, + "training": {}, + "model_name": "allegro/herbert-base-cased" +} -- GitLab