diff --git a/combo/config.graph.template.jsonnet b/combo/config.graph.template.jsonnet
index 708cfa335f42007de2a9b987db7ef0977c0e19c0..a4725606e3cbc1917c46966ee3bf38833de969bc 100644
--- a/combo/config.graph.template.jsonnet
+++ b/combo/config.graph.template.jsonnet
@@ -112,10 +112,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
         use_sem: if in_targets("semrel") then true else false,
         token_indexers: {
             token: if use_transformer then {
-                type: "pretrained_transformer_mismatched_fixed",
-                model_name: pretrained_transformer_name,
-                tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
-                                  then {use_fast: false} else {},
+                type: "pretrained_transformer_mismatched",
+                model_name: pretrained_transformer_name
             } else {
                 # SingleIdTokenIndexer, token as single int
                 type: "single_id",
@@ -204,9 +202,7 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
                     type: "transformers_word_embeddings",
                     last_layer_only: false,
                     model_name: pretrained_transformer_name,
-                    projection_dim: projected_embedding_dim,
-                    tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
-                                      then {use_fast: false} else {},
+                    projection_dim: projected_embedding_dim
                 } else {
                     type: "embeddings_projected",
                     embedding_dim: embedding_dim,
diff --git a/combo/models/embeddings.py b/combo/models/embeddings.py
index d8c3d713631701064e28802ac684ab0bc3bad0b8..c8499bafd6da01d063210431967d2599c079da3f 100644
--- a/combo/models/embeddings.py
+++ b/combo/models/embeddings.py
@@ -105,6 +105,8 @@ class TransformersWordEmbedder(token_embedders.PretrainedTransformerMismatchedEm
     Tested with Bert (but should work for other models as well).
     """
 
+    authorized_missing_keys = [r"position_ids$"]
+
     def __init__(self,
                  model_name: str,
                  projection_dim: int = 0,
diff --git a/combo/utils/graph.py b/combo/utils/graph.py
index b4c9632811d34037d1155d0c349480e2276cc738..f61a68e5b835da0c2ce3dac438425c602b084240 100644
--- a/combo/utils/graph.py
+++ b/combo/utils/graph.py
@@ -32,6 +32,10 @@ def graph_and_tree_merge(tree_arc_scores,
         if not d:
             continue
         label = idx2label[tree_rel_scores[d - 1]]
+        # graph_label = graph_idx2label[graph_rel_pred[d - 1][h - 1]]
+        # if ">" in graph_label and label in graph_label:
+        #     print("Using graph label instead of tree.")
+        #     label = graph_label
         if label != _ACL_REL_CL:
             graph[h].append(d)
             labeled_graph[h].append((d, label))
@@ -118,16 +122,17 @@ def restore_collapse_edges(tree_tokens):
                 if ">" in current_node_relation:
                     second_empty_node_relation, current_node_relation = current_node_relation.split(">")
                     deps[i] = f"{ehead}:{current_node_relation}"
+                    second_ehead = f"{len(tree_tokens)}.{len(empty_tokens) + 2}"
                     empty_tokens.append(
                         {
                             "id": ehead,
-                            "deps": f"{head}:{empty_node_relation}"
+                            "deps": f"{second_ehead}:{empty_node_relation}"
                         }
                     )
                     empty_tokens.append(
                         {
-                            "id": f"{len(tree_tokens)}.{len(empty_tokens) + 1}",
-                            "deps": f"{ehead}:{second_empty_node_relation}"
+                            "id": second_ehead,
+                            "deps": f"{head}:{second_empty_node_relation}"
                         }
                     )
 
diff --git a/scripts/utils.py b/scripts/utils.py
index 925f13b4c9cf143d6ff3fc15e67ca96693374139..09f7591441327c25be5bf2c34931b4e59dbfeedb 100644
--- a/scripts/utils.py
+++ b/scripts/utils.py
@@ -4,7 +4,7 @@ import subprocess
 
 LANG2TRANSFORMER = {
     "en": "bert-base-cased",
-    "pl": "allegro/herbert-base-cased",
+    "pl": "allegro/herbert-large-cased",
     "zh": "bert-base-chinese",
     "fi": "TurkuNLP/bert-base-finnish-cased-v1",
     "ko": "kykim/bert-kor-base",
@@ -12,12 +12,12 @@ LANG2TRANSFORMER = {
     "ar": "aubmindlab/bert-base-arabertv2",
     "eu": "ixa-ehu/berteus-base-cased",
     "tr": "dbmdz/bert-base-turkish-cased",
-    "bg": "iarfmoose/roberta-base-bulgarian",
-    "nl": "GroNLP/bert-base-dutch-cased",
+    "bg": "xlm-roberta-large",
+    "nl": "xlm-roberta-large",
     "fr": "camembert-base",
-    "it": "dbmdz/bert-base-italian-cased",
-    "ru": "blinoff/roberta-base-russian-v0",
-    "sv": "KB/bert-base-swedish-cased",
+    "it": "xlm-roberta-large",
+    "ru": "xlm-roberta-large",
+    "sv": "xlm-roberta-large",
     "uk": "xlm-roberta-large",
     "ta": "xlm-roberta-large",
     "sk": "xlm-roberta-large",
diff --git a/setup.py b/setup.py
index e1354b76cf47145c09f9da140068b9a22f68a4d2..06717badfa4d6b50f21c6c7230584403dacb2e57 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@ REQUIREMENTS = [
     'scipy<1.6.0;python_version<"3.7"',  # SciPy 1.6.0 works for 3.7+
     'spacy==2.3.2',
     'scikit-learn<=0.23.2',
-    'torch==1.6.0',
+    'torch==1.7.0',
     'tqdm==4.43.0',
     'transformers==4.0.1',
     'urllib3==1.25.11',