From f3f26738dc307788a8bebfa1adada26e440bbcc9 Mon Sep 17 00:00:00 2001
From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com>
Date: Wed, 14 Apr 2021 12:09:42 +0200
Subject: [PATCH] Remove unnecessary dependency from custom transformer
 indexer.

---
 combo/config.graph.template.jsonnet                         | 6 +-----
 combo/config.template.jsonnet                               | 6 +-----
 .../pretrained_transformer_mismatched_indexer.py            | 5 ++++-
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/combo/config.graph.template.jsonnet b/combo/config.graph.template.jsonnet
index b88c436..fc4c1ea 100644
--- a/combo/config.graph.template.jsonnet
+++ b/combo/config.graph.template.jsonnet
@@ -112,10 +112,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
         use_sem: if in_targets("semrel") then true else false,
         token_indexers: {
             token: if use_transformer then {
-                type: "pretrained_transformer_mismatched_fixed",
+                type: "pretrained_transformer_mismatched",
                 model_name: pretrained_transformer_name,
-                tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
-                                  then {use_fast: false} else {},
             } else {
                 # SingleIdTokenIndexer, token as single int
                 type: "single_id",
@@ -204,8 +202,6 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
                     type: "transformers_word_embeddings",
                     model_name: pretrained_transformer_name,
                     projection_dim: projected_embedding_dim,
-                    tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
-                                      then {use_fast: false} else {},
                 } else {
                     type: "embeddings_projected",
                     embedding_dim: embedding_dim,
diff --git a/combo/config.template.jsonnet b/combo/config.template.jsonnet
index 2019d7b..f4b0d08 100644
--- a/combo/config.template.jsonnet
+++ b/combo/config.template.jsonnet
@@ -111,10 +111,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
         use_sem: if in_targets("semrel") then true else false,
         token_indexers: {
             token: if use_transformer then {
-                type: "pretrained_transformer_mismatched_fixed",
+                type: "pretrained_transformer_mismatched",
                 model_name: pretrained_transformer_name,
-                tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
-                                  then {use_fast: false} else {},
             } else {
                 # SingleIdTokenIndexer, token as single int
                 type: "single_id",
@@ -203,8 +201,6 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
                     type: "transformers_word_embeddings",
                     model_name: pretrained_transformer_name,
                     projection_dim: projected_embedding_dim,
-                    tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
-                                      then {use_fast: false} else {},
                 } else {
                     type: "embeddings_projected",
                     embedding_dim: embedding_dim,
diff --git a/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py b/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py
index 3eee80e..5f2ea00 100644
--- a/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py
+++ b/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py
@@ -1,3 +1,7 @@
+"""Kept for backward compatibility of pre-trained models.
+
+   TODO(mklimasz) Transform current models' config files before deleting this file.
+"""
 from typing import Optional, Dict, Any, List, Tuple
 
 from allennlp import data
@@ -6,7 +10,6 @@ from allennlp.data import token_indexers, tokenizers
 
 @data.TokenIndexer.register("pretrained_transformer_mismatched_fixed")
 class PretrainedTransformerMismatchedIndexer(token_indexers.PretrainedTransformerMismatchedIndexer):
-    """TODO(mklimasz) Remove during next allennlp update, fixed on allennlp master."""
 
     def __init__(self, model_name: str, namespace: str = "tags", max_length: int = None,
                  tokenizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs) -> None:
-- 
GitLab