From 27ff4b0a3bc1bfb51c1438eab9d20ab7d11c362a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martyna=20Wi=C4=85cek?= <martyna.wiacek@ipipan.waw.pl> Date: Sat, 10 Feb 2024 22:13:21 +0100 Subject: [PATCH] fixes in template and default_model --- combo/config.template.json | 36 +++++++++++++++++------------------- combo/default_model.py | 18 +++++++++--------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/combo/config.template.json b/combo/config.template.json index 93332c5..c6bf597 100644 --- a/combo/config.template.json +++ b/combo/config.template.json @@ -10,7 +10,7 @@ "parameters": { "activation": { "type": "tanh", "parameters": {} }, "dropout_rate": 0.25, - "in_features": 1024, + "in_features": 164, "out_features": 128 } }, @@ -22,7 +22,7 @@ "type": "linear_layer", "parameters": { "activation": { "type": "tanh", "parameters": {} }, - "in_features": 1024, + "in_features": 164, "out_features": 512 } }, @@ -30,7 +30,7 @@ "type": "linear_layer", "parameters": { "activation": { "type": "tanh", "parameters": {} }, - "in_features": 1024, + "in_features": 164, "out_features": 512 } } @@ -41,7 +41,7 @@ "parameters": { "activation": { "type": "tanh", "parameters": {} }, "dropout_rate": 0.25, - "in_features": 1024, + "in_features": 164, "out_features": 128 } }, @@ -66,7 +66,7 @@ "parameters": { "activation": { "type": "tanh", "parameters": {} }, "dropout_rate": 0.25, - "in_features": 1024, + "in_features": 164, "out_features": 32 } }, @@ -94,7 +94,7 @@ ], "dropout": [0.25, 0.0], "hidden_dims": [128], - "input_dim": 1024, + "input_dim": 164, "num_layers": 2, "vocab_namespace": "feats_labels" } @@ -123,19 +123,17 @@ } }, "seq_encoder": { - "type": "combo_encoder", + "type": "combo_transformer_encoder", "parameters": { "layer_dropout_probability": 0.33, - "stacked_bilstm": { - "type": "combo_stacked_bilstm", - "parameters": { - "hidden_size": 512, - "input_size": 164, - "layer_dropout_probability": 0.33, - "num_layers": 2, - "recurrent_dropout_probability": 0.33 - } - } + "input_dim": 164, + "num_layers": 2, + "feedforward_hidden_dim": 2048, + "num_attention_heads": 4, + "positional_encoding": null, + "positional_embedding_size": 512, + "dropout_prob": 0.1, + "activation": "relu" } }, "text_field_embedder": { @@ -175,7 +173,7 @@ "type": "feedforward_predictor_from_vocab", "parameters": { "vocab_namespace": "upostag_labels", - "input_dim": 1024, + "input_dim": 164, "num_layers": 2, "hidden_dims": [64], "activations": [ @@ -189,7 +187,7 @@ "type": "feedforward_predictor_from_vocab", "parameters": { "vocab_namespace": "xpostag_labels", - "input_dim": 1024, + "input_dim": 164, "num_layers": 2, "hidden_dims": [64], "activations": [ diff --git a/combo/default_model.py b/combo/default_model.py index 61be958..47d2b14 100644 --- a/combo/default_model.py +++ b/combo/default_model.py @@ -131,26 +131,26 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ dependency_projection_layer=Linear( activation=TanhActivation(), dropout_rate=0.25, - in_features=1024, + in_features=164, out_features=128 ), head_predictor=HeadPredictionModel( cycle_loss_n=0, dependency_projection_layer=Linear( activation=TanhActivation(), - in_features=1024, + in_features=164, out_features=512 ), head_projection_layer=Linear( activation=TanhActivation(), - in_features=1024, + in_features=164, out_features=512 ) ), head_projection_layer=Linear( activation=TanhActivation(), dropout_rate=0.25, - in_features=1024, + in_features=164, out_features=128 ), vocab_namespace="deprel_labels" @@ -160,12 +160,12 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ activations=[GELUActivation(), GELUActivation(), GELUActivation(), LinearActivation()], char_vocab_namespace="token_characters", dilation=[1, 2, 4, 1], - embedding_dim=300, + embedding_dim=256, filters=[256, 256, 256], input_projection_layer=Linear( activation=TanhActivation(), dropout_rate=0.25, - in_features=1024, + in_features=164, out_features=32 ), kernel_size=[3, 3, 3, 1], @@ -187,7 +187,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ activations=[TanhActivation(), LinearActivation()], dropout=[0.25, 0.], hidden_dims=[128], - input_dim=1024, + input_dim=164, num_layers=2, vocab_namespace="feats_labels" ), @@ -231,7 +231,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ activations=[TanhActivation(), LinearActivation()], dropout=[0.25, 0.], hidden_dims=[64], - input_dim=1024, + input_dim=164, num_layers=2, vocab_namespace="upostag_labels" ), @@ -240,7 +240,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ activations=[TanhActivation(), LinearActivation()], dropout=[0.25, 0.], hidden_dims=[64], - input_dim=1024, + input_dim=164, num_layers=2, vocab_namespace="xpostag_labels" ) -- GitLab