Skip to content
Snippets Groups Projects

Add try/catch clause for sentences with large number of wordpieces.

1 file
+ 25
19
Compare changes
  • Side-by-side
  • Inline
import logging
import sys
from typing import Optional, Dict, Any, List, Tuple
from allennlp import data
from allennlp.data import token_indexers, tokenizers, IndexedTokenList, vocabulary
from overrides import overrides
from typing import List
logger = logging.getLogger(__name__)
@data.TokenIndexer.register("pretrained_transformer_mismatched_fixed")
class PretrainedTransformerMismatchedIndexer(token_indexers.PretrainedTransformerMismatchedIndexer):
@@ -34,6 +35,7 @@ class PretrainedTransformerMismatchedIndexer(token_indexers.PretrainedTransforme
Method is overridden in order to raise an error while the number of tokens needed to embed a sentence exceeds the
maximal input of a model.
"""
try:
self._matched_indexer._add_encoding_to_vocabulary_if_needed(vocabulary)
wordpieces, offsets = self._allennlp_tokenizer.intra_word_tokenize(
@@ -57,6 +59,10 @@ class PretrainedTransformerMismatchedIndexer(token_indexers.PretrainedTransforme
return self._matched_indexer._postprocess_output(output)
except ValueError as value_error:
logger.error(value_error)
sys.exit(1)
class PretrainedTransformerIndexer(token_indexers.PretrainedTransformerIndexer):
Loading