Skip to content
Snippets Groups Projects
Commit 2a413006 authored by Mateusz Klimaszewski's avatar Mateusz Klimaszewski
Browse files

Update allennlp to 1.2.0.

parent dfec6d56
Branches
Tags
2 merge requests!4Documentation,!3Herbert configuration and AllenNLP 1.2.0 update.
This commit is part of merge request !3. Comments created here will be created in the context of that merge request.
from .pretrained_transformer_mismatched_indexer import PretrainedTransformerMismatchedIndexer
from .token_characters_indexer import TokenCharactersIndexer
from .token_features_indexer import TokenFeatsIndexer
from typing import Optional, Dict, Any, List, Tuple
from allennlp import data
from allennlp.data import token_indexers, tokenizers
@data.TokenIndexer.register("pretrained_transformer_mismatched_fixed")
class PretrainedTransformerMismatchedIndexer(token_indexers.PretrainedTransformerMismatchedIndexer):
def __init__(self, model_name: str, namespace: str = "tags", max_length: int = None,
tokenizer_kwargs: Optional[Dict[str, Any]] = None, **kwargs) -> None:
# The matched version v.s. mismatchedńskie
super().__init__(model_name, namespace, max_length, tokenizer_kwargs, **kwargs)
self._matched_indexer = PretrainedTransformerIndexer(
model_name,
namespace=namespace,
max_length=max_length,
tokenizer_kwargs=tokenizer_kwargs,
**kwargs,
)
self._allennlp_tokenizer = self._matched_indexer._allennlp_tokenizer
self._tokenizer = self._matched_indexer._tokenizer
self._num_added_start_tokens = self._matched_indexer._num_added_start_tokens
self._num_added_end_tokens = self._matched_indexer._num_added_end_tokens
class PretrainedTransformerIndexer(token_indexers.PretrainedTransformerIndexer):
def __init__(
self,
model_name: str,
namespace: str = "tags",
max_length: int = None,
tokenizer_kwargs: Optional[Dict[str, Any]] = None,
**kwargs,
) -> None:
super().__init__(model_name, namespace, max_length, tokenizer_kwargs, **kwargs)
self._namespace = namespace
self._allennlp_tokenizer = PretrainedTransformerTokenizer(
model_name, tokenizer_kwargs=tokenizer_kwargs
)
self._tokenizer = self._allennlp_tokenizer.tokenizer
self._added_to_vocabulary = False
self._num_added_start_tokens = len(self._allennlp_tokenizer.single_sequence_start_tokens)
self._num_added_end_tokens = len(self._allennlp_tokenizer.single_sequence_end_tokens)
self._max_length = max_length
if self._max_length is not None:
num_added_tokens = len(self._allennlp_tokenizer.tokenize("a")) - 1
self._effective_max_length = ( # we need to take into account special tokens
self._max_length - num_added_tokens
)
if self._effective_max_length <= 0:
raise ValueError(
"max_length needs to be greater than the number of special tokens inserted."
)
class PretrainedTransformerTokenizer(tokenizers.PretrainedTransformerTokenizer):
def _intra_word_tokenize(
self, string_tokens: List[str]
) -> Tuple[List[data.Token], List[Optional[Tuple[int, int]]]]:
tokens: List[data.Token] = []
offsets: List[Optional[Tuple[int, int]]] = []
for token_string in string_tokens:
wordpieces = self.tokenizer.encode_plus(
token_string,
add_special_tokens=False,
return_tensors=None,
return_offsets_mapping=False,
return_attention_mask=False,
)
wp_ids = wordpieces["input_ids"]
if len(wp_ids) > 0:
offsets.append((len(tokens), len(tokens) + len(wp_ids) - 1))
tokens.extend(
data.Token(text=wp_text, text_id=wp_id)
for wp_id, wp_text in zip(wp_ids, self.tokenizer.convert_ids_to_tokens(wp_ids))
)
else:
offsets.append(None)
return tokens, offsets
"""Embeddings."""
from typing import Optional
from typing import Optional, Dict, Any
import torch
import torch.nn as nn
......@@ -110,8 +110,10 @@ class TransformersWordEmbedder(token_embedders.PretrainedTransformerMismatchedEm
projection_dim: int,
projection_activation: Optional[allen_nn.Activation] = lambda x: x,
projection_dropout_rate: Optional[float] = 0.0,
freeze_transformer: bool = True):
super().__init__(model_name)
freeze_transformer: bool = True,
tokenizer_kwargs: Optional[Dict[str, Any]] = None,
transformer_kwargs: Optional[Dict[str, Any]] = None):
super().__init__(model_name, tokenizer_kwargs=tokenizer_kwargs, transformer_kwargs=transformer_kwargs)
self.freeze_transformer = freeze_transformer
if self.freeze_transformer:
self._matched_embedder.eval()
......
......@@ -3,7 +3,7 @@ import logging
import os
import time
import traceback
from typing import Any, Dict, List, Optional
from typing import Any, Dict, List, Optional, Union
import torch
import torch.distributed as dist
......@@ -30,7 +30,8 @@ logger = logging.getLogger(__name__)
@training.EpochCallback.register("transfer_patience")
class TransferPatienceEpochCallback(training.EpochCallback):
def __call__(self, trainer: "training.GradientDescentTrainer", metrics: Dict[str, Any], epoch: int) -> None:
def __call__(self, trainer: "training.GradientDescentTrainer", metrics: Dict[str, Any], epoch: int,
is_master: bool) -> None:
if trainer._learning_rate_scheduler and trainer._learning_rate_scheduler.patience is not None:
trainer._metric_tracker._patience = trainer._learning_rate_scheduler.patience
trainer._metric_tracker._epochs_with_no_improvement = 0
......@@ -45,20 +46,23 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
patience: Optional[int] = None, validation_metric: str = "-loss",
validation_data_loader: data.DataLoader = None, num_epochs: int = 20,
serialization_dir: Optional[str] = None, checkpointer: checkpointer.Checkpointer = None,
cuda_device: int = -1,
cuda_device: Optional[Union[int, torch.device]] = -1,
grad_norm: Optional[float] = None, grad_clipping: Optional[float] = None,
learning_rate_scheduler: Optional[learning_rate_schedulers.LearningRateScheduler] = None,
momentum_scheduler: Optional[momentum_schedulers.MomentumScheduler] = None,
tensorboard_writer: allen_tensorboard_writer.TensorboardWriter = None,
moving_average: Optional[moving_average.MovingAverage] = None,
batch_callbacks: List[training.BatchCallback] = None,
epoch_callbacks: List[training.EpochCallback] = None, distributed: bool = False, local_rank: int = 0,
epoch_callbacks: List[training.EpochCallback] = None,
end_callbacks: List[training.EpochCallback] = None,
trainer_callbacks: List[training.TrainerCallback] = None,
distributed: bool = False, local_rank: int = 0,
world_size: int = 1, num_gradient_accumulation_steps: int = 1,
use_amp: bool = False) -> None:
super().__init__(model, optimizer, data_loader, patience, validation_metric, validation_data_loader, num_epochs,
serialization_dir, checkpointer, cuda_device, grad_norm, grad_clipping,
learning_rate_scheduler, momentum_scheduler, tensorboard_writer, moving_average,
batch_callbacks, epoch_callbacks, distributed, local_rank, world_size,
batch_callbacks, epoch_callbacks, end_callbacks, trainer_callbacks, distributed, local_rank, world_size,
num_gradient_accumulation_steps, use_amp)
# TODO extract param to constructor (+ constructor method?)
self.validate_every_n = 5
......@@ -93,7 +97,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
metrics["best_validation_" + key] = value
for callback in self._epoch_callbacks:
callback(self, metrics={}, epoch=-1)
callback(self, metrics={}, epoch=-1, is_master=True)
for epoch in range(epoch_counter, self._num_epochs):
epoch_start_time = time.time()
......@@ -190,7 +194,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
dist.barrier()
for callback in self._epoch_callbacks:
callback(self, metrics=metrics, epoch=epoch)
callback(self, metrics=metrics, epoch=epoch, is_master=self._master)
epoch_elapsed_time = time.time() - epoch_start_time
logger.info("Epoch duration: %s", datetime.timedelta(seconds=epoch_elapsed_time))
......@@ -243,7 +247,7 @@ class GradientDescentTrainer(training.GradientDescentTrainer):
batch_callbacks: List[training.BatchCallback] = None,
epoch_callbacks: List[training.EpochCallback] = None,
) -> "training.Trainer":
if tensorboard_writer.construct() is None:
if tensorboard_writer is None:
tensorboard_writer = common.Lazy(combo_tensorboard_writer.NullTensorboardWriter)
return super().from_partial_objects(
model=model,
......
......@@ -112,8 +112,10 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
use_sem: if in_targets("semrel") then true else false,
token_indexers: {
token: if use_transformer then {
type: "pretrained_transformer_mismatched",
type: "pretrained_transformer_mismatched_fixed",
model_name: pretrained_transformer_name,
tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
then {use_fast: false} else {},
} else {
# SingleIdTokenIndexer, token as single int
type: "single_id",
......@@ -202,6 +204,8 @@ assert pretrained_tokens == null || pretrained_transformer_name == null: "Can't
type: "transformers_word_embeddings",
model_name: pretrained_transformer_name,
projection_dim: projected_embedding_dim,
tokenizer_kwargs: if std.startsWith(pretrained_transformer_name, "allegro/herbert")
then {use_fast: false} else {},
} else {
type: "embeddings_projected",
embedding_dim: embedding_dim,
......
......@@ -3,8 +3,9 @@ from setuptools import find_packages, setup
REQUIREMENTS = [
'absl-py==0.9.0',
'allennlp==1.1.0',
'allennlp==1.2.0',
'conllu==2.3.2',
'dataclasses==0.5',
'dataclasses-json==0.5.2',
'joblib==0.14.1',
'jsonnet==0.15.0',
......@@ -13,7 +14,7 @@ REQUIREMENTS = [
'tensorboard==2.1.0',
'torch==1.6.0',
'tqdm==4.43.0',
'transformers>=3.0.0,<3.1.0',
'transformers>=3.4.0,<3.5',
'urllib3>=1.25.11',
]
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment