Skip to content
Snippets Groups Projects
Commit 27ff4b0a authored by Martyna Wiącek's avatar Martyna Wiącek
Browse files

fixes in template and default_model

parent a42a032a
Branches
Tags
2 merge requests!49Multiword fix transformer encoder,!47Fixed multiword prediction + bug that made the code write empty predictions
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
"parameters": { "parameters": {
"activation": { "type": "tanh", "parameters": {} }, "activation": { "type": "tanh", "parameters": {} },
"dropout_rate": 0.25, "dropout_rate": 0.25,
"in_features": 1024, "in_features": 164,
"out_features": 128 "out_features": 128
} }
}, },
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
"type": "linear_layer", "type": "linear_layer",
"parameters": { "parameters": {
"activation": { "type": "tanh", "parameters": {} }, "activation": { "type": "tanh", "parameters": {} },
"in_features": 1024, "in_features": 164,
"out_features": 512 "out_features": 512
} }
}, },
...@@ -30,7 +30,7 @@ ...@@ -30,7 +30,7 @@
"type": "linear_layer", "type": "linear_layer",
"parameters": { "parameters": {
"activation": { "type": "tanh", "parameters": {} }, "activation": { "type": "tanh", "parameters": {} },
"in_features": 1024, "in_features": 164,
"out_features": 512 "out_features": 512
} }
} }
...@@ -41,7 +41,7 @@ ...@@ -41,7 +41,7 @@
"parameters": { "parameters": {
"activation": { "type": "tanh", "parameters": {} }, "activation": { "type": "tanh", "parameters": {} },
"dropout_rate": 0.25, "dropout_rate": 0.25,
"in_features": 1024, "in_features": 164,
"out_features": 128 "out_features": 128
} }
}, },
...@@ -66,7 +66,7 @@ ...@@ -66,7 +66,7 @@
"parameters": { "parameters": {
"activation": { "type": "tanh", "parameters": {} }, "activation": { "type": "tanh", "parameters": {} },
"dropout_rate": 0.25, "dropout_rate": 0.25,
"in_features": 1024, "in_features": 164,
"out_features": 32 "out_features": 32
} }
}, },
...@@ -94,7 +94,7 @@ ...@@ -94,7 +94,7 @@
], ],
"dropout": [0.25, 0.0], "dropout": [0.25, 0.0],
"hidden_dims": [128], "hidden_dims": [128],
"input_dim": 1024, "input_dim": 164,
"num_layers": 2, "num_layers": 2,
"vocab_namespace": "feats_labels" "vocab_namespace": "feats_labels"
} }
...@@ -123,19 +123,17 @@ ...@@ -123,19 +123,17 @@
} }
}, },
"seq_encoder": { "seq_encoder": {
"type": "combo_encoder", "type": "combo_transformer_encoder",
"parameters": { "parameters": {
"layer_dropout_probability": 0.33, "layer_dropout_probability": 0.33,
"stacked_bilstm": { "input_dim": 164,
"type": "combo_stacked_bilstm", "num_layers": 2,
"parameters": { "feedforward_hidden_dim": 2048,
"hidden_size": 512, "num_attention_heads": 4,
"input_size": 164, "positional_encoding": null,
"layer_dropout_probability": 0.33, "positional_embedding_size": 512,
"num_layers": 2, "dropout_prob": 0.1,
"recurrent_dropout_probability": 0.33 "activation": "relu"
}
}
} }
}, },
"text_field_embedder": { "text_field_embedder": {
...@@ -175,7 +173,7 @@ ...@@ -175,7 +173,7 @@
"type": "feedforward_predictor_from_vocab", "type": "feedforward_predictor_from_vocab",
"parameters": { "parameters": {
"vocab_namespace": "upostag_labels", "vocab_namespace": "upostag_labels",
"input_dim": 1024, "input_dim": 164,
"num_layers": 2, "num_layers": 2,
"hidden_dims": [64], "hidden_dims": [64],
"activations": [ "activations": [
...@@ -189,7 +187,7 @@ ...@@ -189,7 +187,7 @@
"type": "feedforward_predictor_from_vocab", "type": "feedforward_predictor_from_vocab",
"parameters": { "parameters": {
"vocab_namespace": "xpostag_labels", "vocab_namespace": "xpostag_labels",
"input_dim": 1024, "input_dim": 164,
"num_layers": 2, "num_layers": 2,
"hidden_dims": [64], "hidden_dims": [64],
"activations": [ "activations": [
......
...@@ -131,26 +131,26 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ ...@@ -131,26 +131,26 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_
dependency_projection_layer=Linear( dependency_projection_layer=Linear(
activation=TanhActivation(), activation=TanhActivation(),
dropout_rate=0.25, dropout_rate=0.25,
in_features=1024, in_features=164,
out_features=128 out_features=128
), ),
head_predictor=HeadPredictionModel( head_predictor=HeadPredictionModel(
cycle_loss_n=0, cycle_loss_n=0,
dependency_projection_layer=Linear( dependency_projection_layer=Linear(
activation=TanhActivation(), activation=TanhActivation(),
in_features=1024, in_features=164,
out_features=512 out_features=512
), ),
head_projection_layer=Linear( head_projection_layer=Linear(
activation=TanhActivation(), activation=TanhActivation(),
in_features=1024, in_features=164,
out_features=512 out_features=512
) )
), ),
head_projection_layer=Linear( head_projection_layer=Linear(
activation=TanhActivation(), activation=TanhActivation(),
dropout_rate=0.25, dropout_rate=0.25,
in_features=1024, in_features=164,
out_features=128 out_features=128
), ),
vocab_namespace="deprel_labels" vocab_namespace="deprel_labels"
...@@ -160,12 +160,12 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ ...@@ -160,12 +160,12 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_
activations=[GELUActivation(), GELUActivation(), GELUActivation(), LinearActivation()], activations=[GELUActivation(), GELUActivation(), GELUActivation(), LinearActivation()],
char_vocab_namespace="token_characters", char_vocab_namespace="token_characters",
dilation=[1, 2, 4, 1], dilation=[1, 2, 4, 1],
embedding_dim=300, embedding_dim=256,
filters=[256, 256, 256], filters=[256, 256, 256],
input_projection_layer=Linear( input_projection_layer=Linear(
activation=TanhActivation(), activation=TanhActivation(),
dropout_rate=0.25, dropout_rate=0.25,
in_features=1024, in_features=164,
out_features=32 out_features=32
), ),
kernel_size=[3, 3, 3, 1], kernel_size=[3, 3, 3, 1],
...@@ -187,7 +187,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ ...@@ -187,7 +187,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_
activations=[TanhActivation(), LinearActivation()], activations=[TanhActivation(), LinearActivation()],
dropout=[0.25, 0.], dropout=[0.25, 0.],
hidden_dims=[128], hidden_dims=[128],
input_dim=1024, input_dim=164,
num_layers=2, num_layers=2,
vocab_namespace="feats_labels" vocab_namespace="feats_labels"
), ),
...@@ -231,7 +231,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ ...@@ -231,7 +231,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_
activations=[TanhActivation(), LinearActivation()], activations=[TanhActivation(), LinearActivation()],
dropout=[0.25, 0.], dropout=[0.25, 0.],
hidden_dims=[64], hidden_dims=[64],
input_dim=1024, input_dim=164,
num_layers=2, num_layers=2,
vocab_namespace="upostag_labels" vocab_namespace="upostag_labels"
), ),
...@@ -240,7 +240,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_ ...@@ -240,7 +240,7 @@ def default_model(pretrained_transformer_name: str, vocabulary: Vocabulary, use_
activations=[TanhActivation(), LinearActivation()], activations=[TanhActivation(), LinearActivation()],
dropout=[0.25, 0.], dropout=[0.25, 0.],
hidden_dims=[64], hidden_dims=[64],
input_dim=1024, input_dim=164,
num_layers=2, num_layers=2,
vocab_namespace="xpostag_labels" vocab_namespace="xpostag_labels"
) )
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment