Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
C
combo
Manage
Activity
Members
Labels
Plan
Issues
20
Issue boards
Milestones
Wiki
Redmine
Code
Merge requests
2
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Syntactic Tools
combo
Commits
efe2fe48
Commit
efe2fe48
authored
1 year ago
by
Maja Jablonska
Browse files
Options
Downloads
Patches
Plain Diff
Added config template file
parent
0319c75c
Branches
Branches containing commit
Tags
Tags containing commit
1 merge request
!46
Merge COMBO 3.0 into master
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
combo/config.template.json
+307
-0
307 additions, 0 deletions
combo/config.template.json
combo/modules/archival.py
+3
-3
3 additions, 3 deletions
combo/modules/archival.py
with
310 additions
and
3 deletions
combo/config.template.json
0 → 100644
+
307
−
0
View file @
efe2fe48
{
"model"
:
{
"type"
:
"semantic_multitask"
,
"parameters"
:
{
"dependency_relation"
:
{
"type"
:
"combo_dependency_parsing_from_vocab"
,
"parameters"
:
{
"dependency_projection_layer"
:
{
"type"
:
"linear_layer"
,
"parameters"
:
{
"activation"
:
{
"type"
:
"tanh"
,
"parameters"
:
{}
},
"dropout_rate"
:
0.25
,
"in_features"
:
1024
,
"out_features"
:
128
}
},
"head_predictor"
:
{
"type"
:
"head_prediction"
,
"parameters"
:
{
"cycle_loss_n"
:
0
,
"dependency_projection_layer"
:
{
"type"
:
"linear_layer"
,
"parameters"
:
{
"activation"
:
{
"type"
:
"tanh"
,
"parameters"
:
{}
},
"in_features"
:
1024
,
"out_features"
:
512
}
},
"head_projection_layer"
:
{
"type"
:
"linear_layer"
,
"parameters"
:
{
"activation"
:
{
"type"
:
"tanh"
,
"parameters"
:
{}
},
"in_features"
:
1024
,
"out_features"
:
512
}
}
}
},
"head_projection_layer"
:
{
"type"
:
"linear_layer"
,
"parameters"
:
{
"activation"
:
{
"type"
:
"tanh"
,
"parameters"
:
{}
},
"dropout_rate"
:
0.25
,
"in_features"
:
1024
,
"out_features"
:
128
}
},
"vocab_namespace"
:
"deprel_labels"
}
},
"lemmatizer"
:
{
"type"
:
"combo_lemma_predictor_from_vocab"
,
"parameters"
:
{
"activations"
:
[
{
"type"
:
"gelu"
,
"parameters"
:
{}
},
{
"type"
:
"gelu"
,
"parameters"
:
{}
},
{
"type"
:
"gelu"
,
"parameters"
:
{}
},
{
"type"
:
"linear"
,
"parameters"
:
{}
}
],
"char_vocab_namespace"
:
"token_characters"
,
"dilation"
:
[
1
,
2
,
4
,
1
],
"embedding_dim"
:
300
,
"filters"
:
[
256
,
256
,
256
],
"input_projection_layer"
:
{
"type"
:
"linear_layer"
,
"parameters"
:
{
"activation"
:
{
"type"
:
"tanh"
,
"parameters"
:
{}
},
"dropout_rate"
:
0.25
,
"in_features"
:
1024
,
"out_features"
:
32
}
},
"kernel_size"
:
[
3
,
3
,
3
,
1
],
"lemma_vocab_namespace"
:
"lemma_characters"
,
"padding"
:
[
1
,
2
,
4
,
0
],
"stride"
:
[
1
,
1
,
1
,
1
]
}
},
"loss_weights"
:
{
"deprel"
:
0.8
,
"feats"
:
0.2
,
"head"
:
0.2
,
"lemma"
:
0.05
,
"semrel"
:
0.05
,
"upostag"
:
0.05
,
"xpostag"
:
0.05
},
"morphological_feat"
:
{
"type"
:
"combo_morpho_from_vocab"
,
"parameters"
:
{
"activations"
:
[
{
"type"
:
"tanh"
,
"parameters"
:
{}
},
{
"type"
:
"linear"
,
"parameters"
:
{}
}
],
"dropout"
:
[
0.25
,
0.0
],
"hidden_dims"
:
[
128
],
"input_dim"
:
1024
,
"num_layers"
:
2
,
"vocab_namespace"
:
"feats_labels"
}
},
"regularizer"
:
{
"type"
:
"base_regularizer"
,
"parameters"
:
{
"regexes"
:
[
[
".*conv1d.*"
,
{
"type"
:
"l2_regularizer"
,
"parameters"
:
{
"alpha"
:
1e-6
}
}
],
[
".*forward.*"
,
{
"type"
:
"l2_regularizer"
,
"parameters"
:
{
"alpha"
:
1e-6
}
}
],
[
".*backward.*"
,
{
"type"
:
"l2_regularizer"
,
"parameters"
:
{
"alpha"
:
1e-6
}
}
],
[
".*char_embed.*"
,
{
"type"
:
"l2_regularizer"
,
"parameters"
:
{
"alpha"
:
1e-5
}
}
]
]
}
},
"seq_encoder"
:
{
"type"
:
"combo_encoder"
,
"parameters"
:
{
"layer_dropout_probability"
:
0.33
,
"stacked_bilstm"
:
{
"type"
:
"combo_stacked_bilstm"
,
"parameters"
:
{
"hidden_size"
:
512
,
"input_size"
:
164
,
"layer_dropout_probability"
:
0.33
,
"num_layers"
:
2
,
"recurrent_dropout_probability"
:
0.33
}
}
}
},
"text_field_embedder"
:
{
"type"
:
"base_text_field_embedder"
,
"parameters"
:
{
"token_embedders"
:
{
"char"
:
{
"type"
:
"char_embeddings_token_embedder"
,
"parameters"
:
{
"dilated_cnn_encoder"
:
{
"type"
:
"dilated_cnn"
,
"parameters"
:
{
"activations"
:
[
{
"type"
:
"gelu"
,
"parameters"
:
{}
},
{
"type"
:
"gelu"
,
"parameters"
:
{}
},
{
"type"
:
"linear"
,
"parameters"
:
{}
}
],
"dilation"
:
[
1
,
2
,
4
],
"filters"
:
[
512
,
256
,
64
],
"input_dim"
:
64
,
"kernel_size"
:
[
3
,
3
,
3
],
"padding"
:
[
1
,
2
,
4
],
"stride"
:
[
1
,
1
,
1
]
}
},
"embedding_dim"
:
64
}
},
"token"
:
{
"type"
:
"transformers_word_embedder"
,
"parameters"
:
{
"projection_dim"
:
100
}
}
}
}
},
"upos_tagger"
:
{
"type"
:
"feedforward_predictor_from_vocab"
,
"parameters"
:
{
"vocab_namespace"
:
"upostag_labels"
,
"input_dim"
:
1024
,
"num_layers"
:
2
,
"hidden_dims"
:
[
64
],
"activations"
:
[
{
"type"
:
"tanh"
,
"parameters"
:
{}
},
{
"type"
:
"linear"
,
"parameters"
:
{}
}
],
"dropout"
:
[
0.25
,
0.0
]
}
},
"xpos_tagger"
:
{
"type"
:
"feedforward_predictor_from_vocab"
,
"parameters"
:
{
"vocab_namespace"
:
"xpostag_labels"
,
"input_dim"
:
1024
,
"num_layers"
:
2
,
"hidden_dims"
:
[
64
],
"activations"
:
[
{
"type"
:
"tanh"
,
"parameters"
:
{}
},
{
"type"
:
"linear"
,
"parameters"
:
{}
}
],
"dropout"
:
[
0.25
,
0.0
]
}
}
}
},
"data_loader"
:
{
"type"
:
"simple_data_loader_from_dataset_reader"
,
"parameters"
:
{
"reader"
:
{
"type"
:
"conllu_dataset_reader"
,
"parameters"
:
{
"features"
:
[
"token"
,
"char"
],
"tokenizer"
:
{
"type"
:
"lambo_tokenizer"
},
"lemma_indexers"
:
{
"char"
:
{
"type"
:
"characters_const_padding_token_indexer"
,
"parameters"
:
{
"tokenizer"
:
{
"type"
:
"character_tokenizer"
,
"parameters"
:
{
"end_tokens"
:
[
"__END__"
],
"start_tokens"
:
[
"__START__"
]
}
},
"min_padding_length"
:
32
,
"namespace"
:
"lemma_characters"
}
}
},
"targets"
:
[
"deprel"
,
"feats"
,
"head"
,
"lemma"
,
"upostag"
,
"xpostag"
],
"token_indexers"
:
{
"char"
:
{
"type"
:
"characters_const_padding_token_indexer"
,
"parameters"
:
{
"tokenizer"
:
{
"type"
:
"character_tokenizer"
,
"parameters"
:
{
"end_tokens"
:
[
"__END__"
],
"start_tokens"
:
[
"__START__"
]
}
},
"min_padding_length"
:
32
}
},
"token"
:
{
"type"
:
"pretrained_transformer_mismatched_fixed_token_indexer"
,
"parameters"
:
{
"model_name"
:
"allegro/herbert-base-cased"
}
}
},
"use_sem"
:
false
}
},
"batch_size"
:
1
,
"shuffle"
:
true
,
"batches_per_epoch"
:
64
,
"quiet"
:
false
}
},
"dataset_reader"
:
{
"type"
:
"conllu_dataset_reader"
,
"parameters"
:
{
"features"
:
[
"token"
,
"char"
],
"tokenizer"
:
{
"type"
:
"lambo_tokenizer"
},
"lemma_indexers"
:
{
"char"
:
{
"type"
:
"characters_const_padding_token_indexer"
,
"parameters"
:
{
"tokenizer"
:
{
"type"
:
"character_tokenizer"
,
"parameters"
:
{
"end_tokens"
:
[
"__END__"
],
"start_tokens"
:
[
"__START__"
]
}
},
"min_padding_length"
:
32
,
"namespace"
:
"lemma_characters"
}
}
},
"targets"
:
[
"deprel"
,
"feats"
,
"head"
,
"lemma"
,
"upostag"
,
"xpostag"
],
"token_indexers"
:
{
"char"
:
{
"type"
:
"characters_const_padding_token_indexer"
,
"parameters"
:
{
"tokenizer"
:
{
"type"
:
"character_tokenizer"
,
"parameters"
:
{
"end_tokens"
:
[
"__END__"
],
"start_tokens"
:
[
"__START__"
]
}
},
"min_padding_length"
:
32
}
},
"token"
:
{
"type"
:
"pretrained_transformer_mismatched_fixed_token_indexer"
,
"parameters"
:
{
"model_name"
:
"allegro/herbert-base-cased"
}
}
},
"use_sem"
:
false
}
},
"training"
:
{},
"model_name"
:
"allegro/herbert-base-cased"
}
This diff is collapsed.
Click to expand it.
combo/modules/archival.py
+
3
−
3
View file @
efe2fe48
...
...
@@ -81,7 +81,7 @@ def archive(model: Model,
with
(
TemporaryDirectory
(
os
.
path
.
join
(
'
tmp
'
))
as
t
,
BytesIO
()
as
out_stream
,
tarfile
.
open
(
os
.
path
.
join
(
serialization_dir
,
'
model.tar.gz
'
),
'
w|gz
'
)
as
tar_file
):
add_to_tar
(
tar_file
,
out_stream
,
json
.
dumps
(
parameters
).
encode
(),
'
config.json
'
)
add_to_tar
(
tar_file
,
out_stream
,
json
.
dumps
(
parameters
).
encode
(),
'
config.
template.
json
'
)
weights_path
=
os
.
path
.
join
(
t
,
'
weights.th
'
)
torch
.
save
(
model
.
state_dict
(),
weights_path
)
tar_file
.
add
(
weights_path
,
'
weights.th
'
)
...
...
@@ -100,7 +100,7 @@ def extracted_archive(resolved_archive_file, cleanup=True):
with
tarfile
.
open
(
resolved_archive_file
)
as
archive
:
subdir_and_files
=
[
tarinfo
for
tarinfo
in
archive
.
getmembers
()
if
(
any
([
tarinfo
.
name
.
endswith
(
f
)
for
f
in
[
'
config.json
'
,
'
weights.th
'
]])
if
(
any
([
tarinfo
.
name
.
endswith
(
f
)
for
f
in
[
'
config.
template.
json
'
,
'
weights.th
'
]])
or
'
vocabulary
'
in
tarinfo
.
name
)
]
for
f
in
subdir_and_files
:
...
...
@@ -127,7 +127,7 @@ def load_archive(url_or_filename: Union[PathLike, str],
with
extracted_archive
(
rarchive_file
)
as
archive_file
:
model
=
Model
.
load
(
archive_file
,
cuda_device
=
cuda_device
)
config_path
=
os
.
path
.
join
(
archive_file
,
'
config.json
'
)
config_path
=
os
.
path
.
join
(
archive_file
,
'
config.
template.
json
'
)
with
open
(
config_path
,
'
r
'
)
as
f
:
config
=
json
.
load
(
f
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment