From f3f26738dc307788a8bebfa1adada26e440bbcc9 Mon Sep 17 00:00:00 2001 From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com> Date: Wed, 14 Apr 2021 12:09:42 +0200 Subject: [PATCH] Remove unnecessary dependency from custom transformer indexer. --- combo/config.graph.template.jsonnet | 6 +----- combo/config.template.jsonnet | 6 +----- .../pretrained_transformer_mismatched_indexer.py | 5 ++++- 3 files changed, 6 insertions(+), 11 deletions(-) diff --git a/combo/config.graph.template.jsonnet b/combo/config.graph.template.jsonnet index b88c436..fc4c1ea 100644 --- a/combo/config.graph.template.jsonnet +++ b/combo/config.graph.template.jsonnet @@ -112,10 +112,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't use_sem: if in_targets("semrel") then true else false, token_indexers: { token: if use_transformer then { - type: "pretrained_transformer_mismatched_fixed", + type: "pretrained_transformer_mismatched", model_name: pretrained_transformer_name, - tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert") - then {use_fast: false} else {}, } else { # SingleIdTokenIndexer, token as single int type: "single_id", @@ -204,8 +202,6 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't type: "transformers_word_embeddings", model_name: pretrained_transformer_name, projection_dim: projected_embedding_dim, - tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert") - then {use_fast: false} else {}, } else { type: "embeddings_projected", embedding_dim: embedding_dim, diff --git a/combo/config.template.jsonnet b/combo/config.template.jsonnet index 2019d7b..f4b0d08 100644 --- a/combo/config.template.jsonnet +++ b/combo/config.template.jsonnet @@ -111,10 +111,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't use_sem: if in_targets("semrel") then true else false, token_indexers: { token: if use_transformer then { - type: "pretrained_transformer_mismatched_fixed", + type: "pretrained_transformer_mismatched", model_name: pretrained_transformer_name, - tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert") - then {use_fast: false} else {}, } else { # SingleIdTokenIndexer, token as single int type: "single_id", @@ -203,8 +201,6 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't type: "transformers_word_embeddings", model_name: pretrained_transformer_name, projection_dim: projected_embedding_dim, - tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert") - then {use_fast: false} else {}, } else { type: "embeddings_projected", embedding_dim: embedding_dim, diff --git a/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py b/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py index 3eee80e..5f2ea00 100644 --- a/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py +++ b/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py @@ -1,3 +1,7 @@ +"""Kept for backward compatibility of pre-trained models. + + TODO(mklimasz) Transform current models' config files before deleting this file. +""" from typing import Optional, Dict, Any, List, Tuple from allennlp import data @@ -6,7 +10,6 @@ from allennlp.data import token_indexers, tokenizers @data.TokenIndexer.register("pretrained_transformer_mismatched_fixed") class PretrainedTransformerMismatchedIndexer(token_indexers.PretrainedTransformerMismatchedIndexer): - """TODO(mklimasz) Remove during next allennlp update, fixed on allennlp master.""" def __init__(self, model_name: str, namespace: str = "tags", max_length: int = None, tokenizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs) -> None: -- GitLab