Skip to content
Snippets Groups Projects
Commit 2fa951df authored by Mateusz Klimaszewski's avatar Mateusz Klimaszewski
Browse files

Change few transformer models. Make position_ids not necessary. Update to pytorch 1.7.

parent 2e2525de
No related merge requests found
Pipeline #2989 passed with stage
......@@ -112,10 +112,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
use_sem: if in_targets("semrel") then true else false,
token_indexers: {
token: if use_transformer then {
type: "pretrained_transformer_mismatched_fixed",
model_name: pretrained_transformer_name,
tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
then {use_fast: false} else {},
type: "pretrained_transformer_mismatched",
model_name: pretrained_transformer_name
} else {
# SingleIdTokenIndexer, token as single int
type: "single_id",
......@@ -204,9 +202,7 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
type: "transformers_word_embeddings",
last_layer_only: false,
model_name: pretrained_transformer_name,
projection_dim: projected_embedding_dim,
tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
then {use_fast: false} else {},
projection_dim: projected_embedding_dim
} else {
type: "embeddings_projected",
embedding_dim: embedding_dim,
......
......@@ -105,6 +105,8 @@ class TransformersWordEmbedder(token_embedders.PretrainedTransformerMismatchedEm
Tested with Bert (but should work for other models as well).
"""
authorized_missing_keys = [r"position_ids$"]
def __init__(self,
model_name: str,
projection_dim: int = 0,
......
......@@ -32,6 +32,10 @@ def graph_and_tree_merge(tree_arc_scores,
if not d:
continue
label = idx2label[tree_rel_scores[d - 1]]
# graph_label = graph_idx2label[graph_rel_pred[d - 1][h - 1]]
# if ">" in graph_label and label in graph_label:
# print("Using graph label instead of tree.")
# label = graph_label
if label != _ACL_REL_CL:
graph[h].append(d)
labeled_graph[h].append((d, label))
......@@ -118,16 +122,17 @@ def restore_collapse_edges(tree_tokens):
if ">" in current_node_relation:
second_empty_node_relation, current_node_relation = current_node_relation.split(">")
deps[i] = f"{ehead}:{current_node_relation}"
second_ehead = f"{len(tree_tokens)}.{len(empty_tokens) + 2}"
empty_tokens.append(
{
"id": ehead,
"deps": f"{head}:{empty_node_relation}"
"deps": f"{second_ehead}:{empty_node_relation}"
}
)
empty_tokens.append(
{
"id": f"{len(tree_tokens)}.{len(empty_tokens) + 1}",
"deps": f"{ehead}:{second_empty_node_relation}"
"id": second_ehead,
"deps": f"{head}:{second_empty_node_relation}"
}
)
......
......@@ -4,7 +4,7 @@ import subprocess
LANG2TRANSFORMER = {
"en": "bert-base-cased",
"pl": "allegro/herbert-base-cased",
"pl": "allegro/herbert-large-cased",
"zh": "bert-base-chinese",
"fi": "TurkuNLP/bert-base-finnish-cased-v1",
"ko": "kykim/bert-kor-base",
......@@ -12,12 +12,12 @@ LANG2TRANSFORMER = {
"ar": "aubmindlab/bert-base-arabertv2",
"eu": "ixa-ehu/berteus-base-cased",
"tr": "dbmdz/bert-base-turkish-cased",
"bg": "iarfmoose/roberta-base-bulgarian",
"nl": "GroNLP/bert-base-dutch-cased",
"bg": "xlm-roberta-large",
"nl": "xlm-roberta-large",
"fr": "camembert-base",
"it": "dbmdz/bert-base-italian-cased",
"ru": "blinoff/roberta-base-russian-v0",
"sv": "KB/bert-base-swedish-cased",
"it": "xlm-roberta-large",
"ru": "xlm-roberta-large",
"sv": "xlm-roberta-large",
"uk": "xlm-roberta-large",
"ta": "xlm-roberta-large",
"sk": "xlm-roberta-large",
......
......@@ -15,7 +15,7 @@ REQUIREMENTS = [
'scipy<1.6.0;python_version<"3.7"', # SciPy 1.6.0 works for 3.7+
'spacy==2.3.2',
'scikit-learn<=0.23.2',
'torch==1.6.0',
'torch==1.7.0',
'tqdm==4.43.0',
'transformers==4.0.1',
'urllib3==1.25.11',
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment