Skip to content
Snippets Groups Projects
Commit 74ff65e0 authored by Maja Jablonska's avatar Maja Jablonska
Browse files

Minor fixes

parent c24d252e
Branches
Tags
1 merge request!46Merge COMBO 3.0 into master
from typing import Dict, Optional, List, Union, Tuple
import torch
import torch.nn as nn
from overrides import overrides
from combo.nn import Activation
import combo.utils.checks as checks
from combo.data.vocabulary import Vocabulary
from combo.models.utils import masked_cross_entropy
from combo.predictors.predictor import Predictor
class Linear(nn.Linear):
def __init__(self,
in_features: int,
out_features: int,
activation: Optional[Activation] = None,
dropout_rate: Optional[float] = 0.0):
super().__init__(in_features, out_features)
self.activation = activation if activation else self.identity
self.dropout = nn.Dropout(p=dropout_rate) if dropout_rate else self.identity
def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
x = super().forward(x)
x = self.activation(x)
return self.dropout(x)
def get_output_dim(self) -> int:
return self.out_features
@staticmethod
def identity(x):
return x
class FeedForward(torch.nn.Module):
"""
Modified copy of allennlp.modules.feedforward.FeedForward
This `Module` is a feed-forward neural network, just a sequence of `Linear` layers with
activation functions in between.
# Parameters
input_dim : `int`, required
The dimensionality of the input. We assume the input has shape `(batch_size, input_dim)`.
num_layers : `int`, required
The number of `Linear` layers to apply to the input.
hidden_dims : `Union[int, List[int]]`, required
The output dimension of each of the `Linear` layers. If this is a single `int`, we use
it for all `Linear` layers. If it is a `List[int]`, `len(hidden_dims)` must be
`num_layers`.
activations : `Union[Activation, List[Activation]]`, required
The activation function to use after each `Linear` layer. If this is a single function,
we use it after all `Linear` layers. If it is a `List[Activation]`,
`len(activations)` must be `num_layers`. Activation must have torch.nn.Module type.
dropout : `Union[float, List[float]]`, optional (default = `0.0`)
If given, we will apply this amount of dropout after each layer. Semantics of `float`
versus `List[float]` is the same as with other parameters.
# Examples
```python
FeedForward(124, 2, [64, 32], torch.nn.ReLU(), 0.2)
#> FeedForward(
#> (_activations): ModuleList(
#> (0): ReLU()
#> (1): ReLU()
#> )
#> (_linear_layers): ModuleList(
#> (0): Linear(in_features=124, out_features=64, bias=True)
#> (1): Linear(in_features=64, out_features=32, bias=True)
#> )
#> (_dropout): ModuleList(
#> (0): Dropout(p=0.2, inplace=False)
#> (1): Dropout(p=0.2, inplace=False)
#> )
#> )
```
"""
def __init__(
self,
input_dim: int,
num_layers: int,
hidden_dims: Union[int, List[int]],
activations: Union[Activation, List[Activation]],
dropout: Union[float, List[float]] = 0.0,
) -> None:
super().__init__()
if not isinstance(hidden_dims, list):
hidden_dims = [hidden_dims] * num_layers # type: ignore
if not isinstance(activations, list):
activations = [activations] * num_layers # type: ignore
if not isinstance(dropout, list):
dropout = [dropout] * num_layers # type: ignore
if len(hidden_dims) != num_layers:
raise checks.ConfigurationError(
"len(hidden_dims) (%d) != num_layers (%d)" % (len(hidden_dims), num_layers)
)
if len(activations) != num_layers:
raise checks.ConfigurationError(
"len(activations) (%d) != num_layers (%d)" % (len(activations), num_layers)
)
if len(dropout) != num_layers:
raise checks.ConfigurationError(
"len(dropout) (%d) != num_layers (%d)" % (len(dropout), num_layers)
)
self._activations = torch.nn.ModuleList(activations)
input_dims = [input_dim] + hidden_dims[:-1]
linear_layers = []
for layer_input_dim, layer_output_dim in zip(input_dims, hidden_dims):
linear_layers.append(torch.nn.Linear(layer_input_dim, layer_output_dim))
self._linear_layers = torch.nn.ModuleList(linear_layers)
dropout_layers = [torch.nn.Dropout(p=value) for value in dropout]
self._dropout = torch.nn.ModuleList(dropout_layers)
self._output_dim = hidden_dims[-1]
self.input_dim = input_dim
def get_output_dim(self):
return self._output_dim
def get_input_dim(self):
return self.input_dim
def forward(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]:
output = inputs
feature_maps = []
for layer, activation, dropout in zip(
self._linear_layers, self._activations, self._dropout
):
feature_maps.append(output)
output = dropout(activation(layer(output)))
return output, feature_maps
class FeedForwardPredictor(Predictor):
"""Feedforward predictor. Should be used on top of Seq2Seq encoder."""
def __init__(self, feedforward_network: "FeedForward"):
super().__init__()
self.feedforward_network = feedforward_network
def forward(self,
x: Union[torch.Tensor, List[torch.Tensor]],
mask: Optional[torch.BoolTensor] = None,
labels: Optional[Union[torch.Tensor, List[torch.Tensor]]] = None,
sample_weights: Optional[Union[torch.Tensor, List[torch.Tensor]]] = None) -> Dict[str, torch.Tensor]:
if mask is None:
mask = x.new_ones(x.size()[:-1])
x, feature_maps = self.feedforward_network(x)
output = {
"prediction": x.argmax(-1),
"probability": x,
"embedding": feature_maps[-1],
}
if labels is not None:
if sample_weights is None:
sample_weights = labels.new_ones([mask.size(0)])
output["loss"] = self._loss(x, labels, mask, sample_weights)
return output
def _loss(self,
pred: torch.Tensor,
true: torch.Tensor,
mask: torch.BoolTensor,
sample_weights: torch.Tensor) -> torch.Tensor:
BATCH_SIZE, _, CLASSES = pred.size()
valid_positions = mask.sum()
pred = pred.reshape(-1, CLASSES)
true = true.reshape(-1)
mask = mask.reshape(-1)
loss = masked_cross_entropy(pred, true, mask)
loss = loss.reshape(BATCH_SIZE, -1) * sample_weights.unsqueeze(-1)
return loss.sum() / valid_positions
@classmethod
def from_vocab(cls,
vocab: Vocabulary,
vocab_namespace: str,
input_dim: int,
num_layers: int,
hidden_dims: List[int],
activations: Union[Activation, List[Activation]],
dropout: Union[float, List[float]] = 0.0,
):
if len(hidden_dims) + 1 != num_layers:
raise checks.ConfigurationError(
f"len(hidden_dims) ({len(hidden_dims):d}) + 1 != num_layers ({num_layers:d})"
)
assert vocab_namespace in vocab.get_namespaces(), \
f"There is not {vocab_namespace} in created vocabs, check if this field has any values to predict!"
hidden_dims = hidden_dims + [vocab.get_vocab_size(vocab_namespace)]
return cls(FeedForward(
input_dim=input_dim,
num_layers=num_layers,
hidden_dims=hidden_dims,
activations=activations,
dropout=dropout))
"""
Adapted from AllenNLP
"""
class TimeDistributed(torch.nn.Module):
"""
Given an input shaped like `(batch_size, time_steps, [rest])` and a `Module` that takes
inputs like `(batch_size, [rest])`, `TimeDistributed` reshapes the input to be
`(batch_size * time_steps, [rest])`, applies the contained `Module`, then reshapes it back.
Note that while the above gives shapes with `batch_size` first, this `Module` also works if
`batch_size` is second - we always just combine the first two dimensions, then split them.
It also reshapes keyword arguments unless they are not tensors or their name is specified in
the optional `pass_through` iterable.
"""
def __init__(self, module):
super().__init__()
self._module = module
@overrides
def forward(self, *inputs, pass_through: List[str] = None, **kwargs):
pass_through = pass_through or []
reshaped_inputs = [self._reshape_tensor(input_tensor) for input_tensor in inputs]
# Need some input to then get the batch_size and time_steps.
some_input = None
if inputs:
some_input = inputs[-1]
reshaped_kwargs = {}
for key, value in kwargs.items():
if isinstance(value, torch.Tensor) and key not in pass_through:
if some_input is None:
some_input = value
value = self._reshape_tensor(value)
reshaped_kwargs[key] = value
reshaped_outputs = self._module(*reshaped_inputs, **reshaped_kwargs)
if some_input is None:
raise RuntimeError("No input tensor to time-distribute")
# Now get the output back into the right shape.
# (batch_size, time_steps, **output_size)
new_size = some_input.size()[:2] + reshaped_outputs.size()[1:]
outputs = reshaped_outputs.contiguous().view(new_size)
return outputs
@staticmethod
def _reshape_tensor(input_tensor):
input_size = input_tensor.size()
if len(input_size) <= 2:
raise RuntimeError(f"No dimension to distribute: {input_size}")
# Squash batch_size and time_steps into a single axis; result has shape
# (batch_size * time_steps, **input_size).
squashed_shape = [-1] + list(input_size[2:])
return input_tensor.contiguous().view(*squashed_shape)
from typing import Optional
import torch
from overrides import overrides
from torch import nn
from torchtext.vocab import Vectors, GloVe, FastText, CharNGram
from combo.data import Vocabulary
from combo.models.base import TimeDistributed
from combo.models.dilated_cnn import DilatedCnnEncoder
from combo.models.utils import tiny_value_of_dtype
from combo.utils import ConfigurationError
class TokenEmbedder(nn.Module):
def __init__(self):
super(TokenEmbedder, self).__init__()
@property
def output_dim(self) -> int:
raise NotImplementedError()
def forward(self,
x: torch.Tensor,
char_mask: Optional[torch.BoolTensor] = None) -> torch.Tensor:
raise NotImplementedError()
class _TorchEmbedder(TokenEmbedder):
def __init__(self,
num_embeddings: int,
embedding_dim: int,
padding_idx: Optional[int] = None,
max_norm: Optional[float] = None,
norm_type: float = 2.,
scale_grad_by_freq: bool = False,
sparse: bool = False,
vocab_namespace: str = "tokens",
vocab: Vocabulary = None,
weight: Optional[torch.Tensor] = None,
trainable: bool = True,
projection_dim: Optional[int] = None):
super(_TorchEmbedder, self).__init__()
self._embedding_dim = embedding_dim
self._embedding = nn.Embedding(num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
padding_idx=padding_idx,
max_norm=max_norm,
norm_type=norm_type,
scale_grad_by_freq=scale_grad_by_freq,
sparse=sparse)
self.__vocab_namespace = vocab_namespace
self.__vocab = vocab
if weight is not None:
if weight.shape() != (num_embeddings, embedding_dim):
raise ConfigurationError(
"Weight matrix must be of shape (num_embeddings, embedding_dim)." +
f"Got: ({weight.shape()})"
)
self.__weight = torch.nn.Parameter(weight, requires_grad=trainable)
else:
self.__weight = torch.nn.Parameter(torch.FloatTensor(num_embeddings, embedding_dim),
requires_grad=trainable)
torch.nn.init.xavier_uniform_(self.__weight)
if padding_idx is not None:
self.__weight.data[padding_idx].fill_(0)
if projection_dim:
self._projection = torch.nn.Linear(embedding_dim, projection_dim)
self._output_dim = projection_dim
else:
self._projection = None
self._output_dim = embedding_dim
@overrides
def output_dim(self) -> int:
return self._output_dim
@overrides
def forward(self,
x: torch.Tensor,
char_mask: Optional[torch.BoolTensor] = None) -> torch.Tensor:
embedded = self._embedding(x)
if self._projection:
projection = self._projection
for p in range(embedded.dim()-2):
projection = TimeDistributed(p)
embedded = projection(embedded)
return embedded
class _TorchtextVectorsEmbedder(TokenEmbedder):
"""
Torchtext Vectors object wrapper
"""
def __init__(self,
torchtext_embedder: Vectors,
lower_case_backup: bool = False):
"""
:param torchtext_embedder: Torchtext Vectors object
:param lower_case_backup: whether to look up the token in the
lower case. Default: False.
"""
super(_TorchtextVectorsEmbedder, self).__init__()
self.__torchtext_embedder = torchtext_embedder
self.__lower_case_backup = lower_case_backup
@overrides
def output_dim(self) -> int:
return len(self.__torchtext_embedder)
@overrides
def forward(self,
x: torch.Tensor,
char_mask: Optional[torch.BoolTensor] = None) -> torch.Tensor:
return self.__torchtext_embedder.get_vecs_by_tokens(x, self.__lower_case_backup)
class GloVe42BEmbedder(_TorchtextVectorsEmbedder):
def __init__(self, dim: int = 300):
super(GloVe42BEmbedder, self).__init__(GloVe("42B", dim))
class GloVe840BEmbedder(_TorchtextVectorsEmbedder):
def __init__(self, dim: int = 300):
super(GloVe840BEmbedder, self).__init__(GloVe("840B", dim))
class GloVeTwitter27BEmbedder(_TorchtextVectorsEmbedder):
def __init__(self, dim: int = 300):
super(GloVeTwitter27BEmbedder, self).__init__(GloVe("twitter.27B", dim))
class GloVe6BEmbedder(_TorchtextVectorsEmbedder):
def __init__(self, dim: int = 300):
super(GloVe6BEmbedder, self).__init__(GloVe("6B", dim))
class FastTextEmbedder(_TorchtextVectorsEmbedder):
def __init__(self, language: str = "en"):
super(FastTextEmbedder, self).__init__(FastText(language))
class CharNGramEmbedder(_TorchtextVectorsEmbedder):
def __init__(self):
super(CharNGramEmbedder, self).__init__(CharNGram())
class CharacterBasedWordEmbedder(TokenEmbedder):
def __init__(self,
num_embeddings: int,
embedding_dim: int,
dilated_cnn_encoder: DilatedCnnEncoder):
super(CharacterBasedWordEmbedder, self).__init__()
self.__embedding_dim = embedding_dim
self.__dilated_cnn_encoder = dilated_cnn_encoder
self.char_embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim)
@overrides
def output_dim(self) -> int:
return self.__embedding_dim
@overrides
def forward(self,
x: torch.Tensor,
char_mask: Optional[torch.BoolTensor] = None) -> torch.Tensor:
if char_mask is None:
char_mask = x.new_ones(x.size())
x = self.char_embed(x)
x = x * char_mask.unsqueeze(-1).float()
x = self.__dilated_cnn_encoder(x.transpose(2, 3))
return torch.max(x, dim=-1)[0]
class PretrainedTransformerMismatchedEmbedder(TokenEmbedder):
pass
class TransformersWordEmbedder(PretrainedTransformerMismatchedEmbedder):
pass
class FeatsTokenEmbedder(_TorchEmbedder):
def __init__(self,
num_embeddings: int,
embedding_dim: int,
padding_idx: Optional[int] = None,
max_norm: Optional[float] = None,
norm_type: float = 2.,
scale_grad_by_freq: bool = False,
sparse: bool = False,
vocab_namespace: str = "feats",
vocab: Vocabulary = None,
weight: Optional[torch.Tensor] = None,
trainable: bool = True):
super(FeatsTokenEmbedder, self).__init__(num_embeddings,
embedding_dim,
padding_idx,
max_norm,
norm_type,
scale_grad_by_freq,
sparse,
vocab_namespace,
vocab,
weight,
trainable)
@overrides
def forward(self,
x: torch.Tensor,
char_mask: Optional[torch.BoolTensor] = None) -> torch.Tensor:
mask = x.gt(0)
x = super().forward(x)
return x.sum(dim=-2)/(
(mask.sum(dim=-1)+tiny_value_of_dtype(torch.float)).unsqueeze(dim=-1)
)
...@@ -6,19 +6,20 @@ Author: Mateusz Klimaszewski ...@@ -6,19 +6,20 @@ Author: Mateusz Klimaszewski
from typing import List, Optional, Union, Tuple, Dict from typing import List, Optional, Union, Tuple, Dict
from combo import data from combo import data
from combo.models import base from combo.predictors import Predictor
from combo.models.base import Predictor
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from combo.nn.base import Linear
class GraphHeadPredictionModel(Predictor): class GraphHeadPredictionModel(Predictor):
"""Head prediction model.""" """Head prediction model."""
def __init__(self, def __init__(self,
head_projection_layer: base.Linear, head_projection_layer: Linear,
dependency_projection_layer: base.Linear, dependency_projection_layer: Linear,
cycle_loss_n: int = 0, cycle_loss_n: int = 0,
graph_weighting: float = 0.2): graph_weighting: float = 0.2):
super().__init__() super().__init__()
...@@ -107,9 +108,9 @@ class GraphDependencyRelationModel(Predictor): ...@@ -107,9 +108,9 @@ class GraphDependencyRelationModel(Predictor):
def __init__(self, def __init__(self,
head_predictor: GraphHeadPredictionModel, head_predictor: GraphHeadPredictionModel,
head_projection_layer: base.Linear, head_projection_layer: Linear,
dependency_projection_layer: base.Linear, dependency_projection_layer: Linear,
relation_prediction_layer: base.Linear): relation_prediction_layer: Linear):
super().__init__() super().__init__()
self.head_predictor = head_predictor self.head_predictor = head_predictor
self.head_projection_layer = head_projection_layer self.head_projection_layer = head_projection_layer
...@@ -173,12 +174,12 @@ class GraphDependencyRelationModel(Predictor): ...@@ -173,12 +174,12 @@ class GraphDependencyRelationModel(Predictor):
vocab: data.Vocabulary, vocab: data.Vocabulary,
vocab_namespace: str, vocab_namespace: str,
head_predictor: GraphHeadPredictionModel, head_predictor: GraphHeadPredictionModel,
head_projection_layer: base.Linear, head_projection_layer: Linear,
dependency_projection_layer: base.Linear dependency_projection_layer: Linear
): ):
"""Creates parser combining model configuration and vocabulary data.""" """Creates parser combining model configuration and vocabulary data."""
assert vocab_namespace in vocab.get_namespaces() assert vocab_namespace in vocab.get_namespaces()
relation_prediction_layer = base.Linear( relation_prediction_layer = Linear(
in_features=head_projection_layer.get_output_dim() + dependency_projection_layer.get_output_dim(), in_features=head_projection_layer.get_output_dim() + dependency_projection_layer.get_output_dim(),
out_features=vocab.get_vocab_size(vocab_namespace) out_features=vocab.get_vocab_size(vocab_namespace)
) )
......
from typing import Optional, Dict, List, Union
import torch
import torch.nn as nn
from combo import data
from combo.models import dilated_cnn, base, utils
from combo.models.base import Predictor, TimeDistributed
from combo.nn import Activation
from combo.utils import ConfigurationError
class LemmatizerModel(Predictor):
"""Lemmatizer model."""
def __init__(self,
num_embeddings: int,
embedding_dim: int,
dilated_cnn_encoder: dilated_cnn.DilatedCnnEncoder,
input_projection_layer: base.Linear):
super().__init__()
self.char_embed = nn.Embedding(
num_embeddings=num_embeddings,
embedding_dim=embedding_dim,
)
self.dilated_cnn_encoder = TimeDistributed(dilated_cnn_encoder)
self.input_projection_layer = input_projection_layer
def forward(self,
x: Union[torch.Tensor, List[torch.Tensor]],
mask: Optional[torch.BoolTensor] = None,
labels: Optional[Union[torch.Tensor, List[torch.Tensor]]] = None,
sample_weights: Optional[Union[torch.Tensor, List[torch.Tensor]]] = None) -> Dict[str, torch.Tensor]:
encoder_emb, chars = x
encoder_emb = self.input_projection_layer(encoder_emb)
char_embeddings = self.char_embed(chars)
BATCH_SIZE, _, MAX_WORD_LENGTH, CHAR_EMB = char_embeddings.size()
encoder_emb = encoder_emb.unsqueeze(2).repeat(1, 1, MAX_WORD_LENGTH, 1)
x = torch.cat((char_embeddings, encoder_emb), dim=-1).transpose(2, 3)
x = self.dilated_cnn_encoder(x).transpose(2, 3)
output = {
"prediction": x.argmax(-1),
"probability": x
}
if labels is not None:
if mask is None:
mask = encoder_emb.new_ones(encoder_emb.size()[:-2])
if sample_weights is None:
sample_weights = labels.new_ones(BATCH_SIZE)
mask = mask.unsqueeze(2).repeat(1, 1, MAX_WORD_LENGTH).bool()
output["loss"] = self._loss(x, labels, mask, sample_weights)
return output
@staticmethod
def _loss(pred: torch.Tensor, true: torch.Tensor, mask: torch.BoolTensor,
sample_weights: torch.Tensor) -> torch.Tensor:
BATCH_SIZE, SENTENCE_LENGTH, MAX_WORD_LENGTH, CHAR_CLASSES = pred.size()
pred = pred.reshape(-1, CHAR_CLASSES)
true = true.reshape(-1)
mask = true.gt(0)
loss = utils.masked_cross_entropy(pred, true, mask)
loss = loss.reshape(BATCH_SIZE, -1) * sample_weights.unsqueeze(-1)
valid_positions = mask.sum()
return loss.sum() / valid_positions
@classmethod
def from_vocab(cls,
vocab: data.Vocabulary,
char_vocab_namespace: str,
lemma_vocab_namespace: str,
embedding_dim: int,
input_projection_layer: base.Linear,
filters: List[int],
kernel_size: List[int],
stride: List[int],
padding: List[int],
dilation: List[int],
activations: List[Activation],
):
assert char_vocab_namespace in vocab.get_namespaces()
assert lemma_vocab_namespace in vocab.get_namespaces()
if len(filters) + 1 != len(kernel_size):
raise ConfigurationError(
f"len(filters) ({len(filters):d}) + 1 != kernel_size ({len(kernel_size):d})"
)
filters = filters + [vocab.get_vocab_size(lemma_vocab_namespace)]
dilated_cnn_encoder = dilated_cnn.DilatedCnnEncoder(
input_dim=embedding_dim + input_projection_layer.get_output_dim(),
filters=filters,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
activations=activations,
)
return cls(num_embeddings=vocab.get_vocab_size(char_vocab_namespace),
embedding_dim=embedding_dim,
dilated_cnn_encoder=dilated_cnn_encoder,
input_projection_layer=input_projection_layer)
"""
Adapted from COMBO
Author: Mateusz Klimaszewski
"""
from typing import Dict, List, Optional, Union
import torch
from combo import data
from combo.data import dataset
from combo.models import base, utils
from combo.nn import Activation
from combo.utils import ConfigurationError
class MorphologicalFeatures(base.Predictor):
"""Morphological features predicting model."""
def __init__(self, feedforward_network: base.FeedForward, slices: Dict[str, List[int]]):
super().__init__()
self.feedforward_network = feedforward_network
self.slices = slices
def forward(self,
x: Union[torch.Tensor, List[torch.Tensor]],
mask: Optional[torch.BoolTensor] = None,
labels: Optional[Union[torch.Tensor, List[torch.Tensor]]] = None,
sample_weights: Optional[Union[torch.Tensor, List[torch.Tensor]]] = None) -> Dict[str, torch.Tensor]:
if mask is None:
mask = x.new_ones(x.size()[:-1])
x, feature_maps = self.feedforward_network(x)
prediction = []
for _, cat_indices in self.slices.items():
prediction.append(x[:, :, cat_indices].argmax(dim=-1))
output = {
"prediction": torch.stack(prediction, dim=-1),
"probability": x,
"embedding": feature_maps[-1],
}
if labels is not None:
if sample_weights is None:
sample_weights = labels.new_ones([mask.size(0)])
output["loss"] = self._loss(x, labels, mask, sample_weights)
return output
def _loss(self, pred: torch.Tensor, true: torch.Tensor, mask: torch.BoolTensor,
sample_weights: torch.Tensor) -> torch.Tensor:
assert pred.size() == true.size()
BATCH_SIZE, _, MORPHOLOGICAL_FEATURES = pred.size()
valid_positions = mask.sum()
pred = pred.reshape(-1, MORPHOLOGICAL_FEATURES)
true = true.reshape(-1, MORPHOLOGICAL_FEATURES)
mask = mask.reshape(-1)
loss = None
loss_func = utils.masked_cross_entropy
for cat, cat_indices in self.slices.items():
if cat not in ["__PAD__", "_"]:
if loss is None:
loss = loss_func(pred[:, cat_indices],
true[:, cat_indices].argmax(dim=1),
mask)
else:
loss += loss_func(pred[:, cat_indices],
true[:, cat_indices].argmax(dim=1),
mask)
loss = loss.reshape(BATCH_SIZE, -1) * sample_weights.unsqueeze(-1)
return loss.sum() / valid_positions
@classmethod
def from_vocab(cls,
vocab: data.Vocabulary,
vocab_namespace: str,
input_dim: int,
num_layers: int,
hidden_dims: List[int],
activations: Union[Activation, List[Activation]],
dropout: Union[float, List[float]] = 0.0,
):
if len(hidden_dims) + 1 != num_layers:
raise ConfigurationError(
f"len(hidden_dims) ({len(hidden_dims):d}) + 1 != num_layers ({num_layers:d})"
)
assert vocab_namespace in vocab.get_namespaces()
hidden_dims = hidden_dims + [vocab.get_vocab_size(vocab_namespace)]
slices = dataset.get_slices_if_not_provided(vocab)
return cls(
feedforward_network=base.FeedForward(
input_dim=input_dim,
num_layers=num_layers,
hidden_dims=hidden_dims,
activations=activations,
dropout=dropout),
slices=slices
)
"""
Adapted from AllenNLP
"""
from typing import List
import torch
from overrides import overrides
from combo.config.registry import Registry
from combo.config.from_parameters import FromParameters, register_arguments
@Registry.register('time_distributed')
class TimeDistributed(torch.nn.Module, FromParameters):
"""
Given an input shaped like `(batch_size, time_steps, [rest])` and a `Module` that takes
inputs like `(batch_size, [rest])`, `TimeDistributed` reshapes the input to be
`(batch_size * time_steps, [rest])`, applies the contained `Module`, then reshapes it back.
Note that while the above gives shapes with `batch_size` first, this `Module` also works if
`batch_size` is second - we always just combine the first two dimensions, then split them.
It also reshapes keyword arguments unless they are not tensors or their name is specified in
the optional `pass_through` iterable.
"""
@register_arguments
def __init__(self, module):
super().__init__()
self._module = module
@overrides
def forward(self, *inputs, pass_through: List[str] = None, **kwargs):
pass_through = pass_through or []
reshaped_inputs = [self._reshape_tensor(input_tensor) for input_tensor in inputs]
# Need some input to then get the batch_size and time_steps.
some_input = None
if inputs:
some_input = inputs[-1]
reshaped_kwargs = {}
for key, value in kwargs.items():
if isinstance(value, torch.Tensor) and key not in pass_through:
if some_input is None:
some_input = value
value = self._reshape_tensor(value)
reshaped_kwargs[key] = value
reshaped_outputs = self._module(*reshaped_inputs, **reshaped_kwargs)
if some_input is None:
raise RuntimeError("No input tensor to time-distribute")
# Now get the output back into the right shape.
# (batch_size, time_steps, **output_size)
new_size = some_input.size()[:2] + reshaped_outputs.size()[1:]
outputs = reshaped_outputs.contiguous().view(new_size)
return outputs
@staticmethod
def _reshape_tensor(input_tensor):
input_size = input_tensor.size()
if len(input_size) <= 2:
raise RuntimeError(f"No dimension to distribute: {input_size}")
# Squash batch_size and time_steps into a single axis; result has shape
# (batch_size * time_steps, **input_size).
squashed_shape = [-1] + list(input_size[2:])
return input_tensor.contiguous().view(*squashed_shape)
...@@ -12,7 +12,7 @@ from combo.nn import base ...@@ -12,7 +12,7 @@ from combo.nn import base
from combo.nn.activations import Activation from combo.nn.activations import Activation
from combo.nn.utils import masked_cross_entropy from combo.nn.utils import masked_cross_entropy
from combo.utils import ConfigurationError from combo.utils import ConfigurationError
from combo.models.base import TimeDistributed from combo.models.time_distributed import TimeDistributed
from combo.predictors import Predictor from combo.predictors import Predictor
......
...@@ -15,7 +15,7 @@ from combo.modules.text_field_embedders.text_field_embedder import TextFieldEmbe ...@@ -15,7 +15,7 @@ from combo.modules.text_field_embedders.text_field_embedder import TextFieldEmbe
from combo.modules.token_embedders import EmptyEmbedder from combo.modules.token_embedders import EmptyEmbedder
from combo.modules.token_embedders.token_embedder import TokenEmbedder from combo.modules.token_embedders.token_embedder import TokenEmbedder
from combo.utils import ConfigurationError from combo.utils import ConfigurationError
from combo.models.base import TimeDistributed from combo.models.time_distributed import TimeDistributed
@Registry.register("base_text_field_embedder") @Registry.register("base_text_field_embedder")
......
...@@ -12,7 +12,7 @@ from combo.data import Vocabulary ...@@ -12,7 +12,7 @@ from combo.data import Vocabulary
from combo.nn.utils import tiny_value_of_dtype, uncombine_initial_dims, combine_initial_dims from combo.nn.utils import tiny_value_of_dtype, uncombine_initial_dims, combine_initial_dims
from combo.modules.module import Module from combo.modules.module import Module
from combo.utils import ConfigurationError from combo.utils import ConfigurationError
from combo.models.base import TimeDistributed from models.time_distributed import TimeDistributed
class TokenEmbedder(Module, FromParameters): class TokenEmbedder(Module, FromParameters):
......
...@@ -17,9 +17,3 @@ class SpacyTokenizerTest(unittest.TestCase): ...@@ -17,9 +17,3 @@ class SpacyTokenizerTest(unittest.TestCase):
tokens = self.spacy_tokenizer.tokenize('') tokens = self.spacy_tokenizer.tokenize('')
self.assertEqual(len(tokens), 0) self.assertEqual(len(tokens), 0)
# def test_batch_tokenize_sentence(self):
# tokens = self.spacy_tokenizer.batch_tokenize(['First sentence!', 'This is the second sentence.'])
# self.assertListEqual([t.text for t in tokens[0]],
# ['First', 'sentence', '!'])
# self.assertListEqual([t.text for t in tokens[1]],
# ['This', 'is', 'the', 'second', 'sentence', '.'])
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment