diff --git a/combo/config.graph.template.jsonnet b/combo/config.graph.template.jsonnet index b88c436a54f04796204cc5c086b0657d7e2e313c..fc4c1ea17b7354201dc72489884fbc24e5fa2502 100644 --- a/combo/config.graph.template.jsonnet +++ b/combo/config.graph.template.jsonnet @@ -112,10 +112,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't use_sem: if in_targets("semrel") then true else false, token_indexers: { token: if use_transformer then { - type: "pretrained_transformer_mismatched_fixed", + type: "pretrained_transformer_mismatched", model_name: pretrained_transformer_name, - tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert") - then {use_fast: false} else {}, } else { # SingleIdTokenIndexer, token as single int type: "single_id", @@ -204,8 +202,6 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't type: "transformers_word_embeddings", model_name: pretrained_transformer_name, projection_dim: projected_embedding_dim, - tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert") - then {use_fast: false} else {}, } else { type: "embeddings_projected", embedding_dim: embedding_dim, diff --git a/combo/config.template.jsonnet b/combo/config.template.jsonnet index 2019d7b05bcdcd7ef8180663206e3367b04119de..f4b0d082678297e3a3441c521f237da27be3f9b2 100644 --- a/combo/config.template.jsonnet +++ b/combo/config.template.jsonnet @@ -111,10 +111,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't use_sem: if in_targets("semrel") then true else false, token_indexers: { token: if use_transformer then { - type: "pretrained_transformer_mismatched_fixed", + type: "pretrained_transformer_mismatched", model_name: pretrained_transformer_name, - tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert") - then {use_fast: false} else {}, } else { # SingleIdTokenIndexer, token as single int type: "single_id", @@ -203,8 +201,6 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't type: "transformers_word_embeddings", model_name: pretrained_transformer_name, projection_dim: projected_embedding_dim, - tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert") - then {use_fast: false} else {}, } else { type: "embeddings_projected", embedding_dim: embedding_dim, diff --git a/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py b/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py index 3eee80efd65ad905c447849d9e4cdd93673be522..5f2ea003c6a296937b903d7a0a4c33dab3922429 100644 --- a/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py +++ b/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py @@ -1,3 +1,7 @@ +"""Kept for backward compatibility of pre-trained models. + + TODO(mklimasz) Transform current models' config files before deleting this file. +""" from typing import Optional, Dict, Any, List, Tuple from allennlp import data @@ -6,7 +10,6 @@ from allennlp.data import token_indexers, tokenizers @data.TokenIndexer.register("pretrained_transformer_mismatched_fixed") class PretrainedTransformerMismatchedIndexer(token_indexers.PretrainedTransformerMismatchedIndexer): - """TODO(mklimasz) Remove during next allennlp update, fixed on allennlp master.""" def __init__(self, model_name: str, namespace: str = "tags", max_length: int = None, tokenizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs) -> None: