From 06efc0439c91efcbb463c7acbb5ba27a627bca6d Mon Sep 17 00:00:00 2001 From: Mateusz Klimaszewski <mk.klimaszewski@gmail.com> Date: Tue, 6 Apr 2021 13:01:19 +0200 Subject: [PATCH] Add pos, deprel and feats embeddings as an additional output. --- combo/data/api.py | 11 ++-- combo/models/base.py | 115 ++++++++++++++++++++++++++++++++++++++--- combo/models/model.py | 9 +++- combo/models/morpho.py | 10 ++-- combo/models/parser.py | 1 + combo/predict.py | 13 +++-- 6 files changed, 137 insertions(+), 22 deletions(-) diff --git a/combo/data/api.py b/combo/data/api.py index 4ab7f1a..bfec5ee 100644 --- a/combo/data/api.py +++ b/combo/data/api.py @@ -21,12 +21,13 @@ class Token: deps: Optional[str] = None misc: Optional[str] = None semrel: Optional[str] = None + embeddings: Dict[str, List[float]] = field(default_factory=list, repr=False) @dataclass class Sentence: tokens: List[Token] = field(default_factory=list) - sentence_embedding: List[float] = field(default_factory=list) + sentence_embedding: List[float] = field(default_factory=list, repr=False) metadata: Dict[str, Any] = field(default_factory=collections.OrderedDict) def to_json(self): @@ -77,14 +78,16 @@ def tokens2conllu(tokens: List[str]) -> conllu.TokenList: def conllu2sentence(conllu_sentence: conllu.TokenList, - sentence_embedding=None) -> Sentence: + sentence_embedding=None, embeddings=None) -> Sentence: + if embeddings is None: + embeddings = {} if sentence_embedding is None: sentence_embedding = [] tokens = [] - for token in conllu_sentence.tokens: + for idx, token in enumerate(conllu_sentence.tokens): tokens.append( Token( - **token + **token, embeddings=embeddings[idx] ) ) return Sentence( diff --git a/combo/models/base.py b/combo/models/base.py index a5cb5fe..234fbca 100644 --- a/combo/models/base.py +++ b/combo/models/base.py @@ -1,11 +1,10 @@ -from typing import Dict, Optional, List, Union +from typing import Dict, Optional, List, Union, Tuple import torch import torch.nn as nn from allennlp import common, data from allennlp import nn as allen_nn from allennlp.common import checks -from allennlp.modules import feedforward from allennlp.nn import Activation from combo.models import utils @@ -51,7 +50,7 @@ class Linear(nn.Linear, common.FromParams): class FeedForwardPredictor(Predictor): """Feedforward predictor. Should be used on top of Seq2Seq encoder.""" - def __init__(self, feedforward_network: feedforward.FeedForward): + def __init__(self, feedforward_network: "FeedForward"): super().__init__() self.feedforward_network = feedforward_network @@ -63,10 +62,11 @@ class FeedForwardPredictor(Predictor): if mask is None: mask = x.new_ones(x.size()[:-1]) - x = self.feedforward_network(x) + x, feature_maps = self.feedforward_network(x) output = { "prediction": x.argmax(-1), - "probability": x + "probability": x, + "embedding": feature_maps[-1], } if labels is not None: @@ -109,9 +109,112 @@ class FeedForwardPredictor(Predictor): f"There is not {vocab_namespace} in created vocabs, check if this field has any values to predict!" hidden_dims = hidden_dims + [vocab.get_vocab_size(vocab_namespace)] - return cls(feedforward.FeedForward( + return cls(FeedForward( input_dim=input_dim, num_layers=num_layers, hidden_dims=hidden_dims, activations=activations, dropout=dropout)) + + +class FeedForward(torch.nn.Module, common.FromParams): + """ + Modified copy of allennlp.modules.feedforward.FeedForward + + This `Module` is a feed-forward neural network, just a sequence of `Linear` layers with + activation functions in between. + + # Parameters + + input_dim : `int`, required + The dimensionality of the input. We assume the input has shape `(batch_size, input_dim)`. + num_layers : `int`, required + The number of `Linear` layers to apply to the input. + hidden_dims : `Union[int, List[int]]`, required + The output dimension of each of the `Linear` layers. If this is a single `int`, we use + it for all `Linear` layers. If it is a `List[int]`, `len(hidden_dims)` must be + `num_layers`. + activations : `Union[Activation, List[Activation]]`, required + The activation function to use after each `Linear` layer. If this is a single function, + we use it after all `Linear` layers. If it is a `List[Activation]`, + `len(activations)` must be `num_layers`. Activation must have torch.nn.Module type. + dropout : `Union[float, List[float]]`, optional (default = `0.0`) + If given, we will apply this amount of dropout after each layer. Semantics of `float` + versus `List[float]` is the same as with other parameters. + + # Examples + + ```python + FeedForward(124, 2, [64, 32], torch.nn.ReLU(), 0.2) + #> FeedForward( + #> (_activations): ModuleList( + #> (0): ReLU() + #> (1): ReLU() + #> ) + #> (_linear_layers): ModuleList( + #> (0): Linear(in_features=124, out_features=64, bias=True) + #> (1): Linear(in_features=64, out_features=32, bias=True) + #> ) + #> (_dropout): ModuleList( + #> (0): Dropout(p=0.2, inplace=False) + #> (1): Dropout(p=0.2, inplace=False) + #> ) + #> ) + ``` + """ + + def __init__( + self, + input_dim: int, + num_layers: int, + hidden_dims: Union[int, List[int]], + activations: Union[Activation, List[Activation]], + dropout: Union[float, List[float]] = 0.0, + ) -> None: + + super().__init__() + if not isinstance(hidden_dims, list): + hidden_dims = [hidden_dims] * num_layers # type: ignore + if not isinstance(activations, list): + activations = [activations] * num_layers # type: ignore + if not isinstance(dropout, list): + dropout = [dropout] * num_layers # type: ignore + if len(hidden_dims) != num_layers: + raise checks.ConfigurationError( + "len(hidden_dims) (%d) != num_layers (%d)" % (len(hidden_dims), num_layers) + ) + if len(activations) != num_layers: + raise checks.ConfigurationError( + "len(activations) (%d) != num_layers (%d)" % (len(activations), num_layers) + ) + if len(dropout) != num_layers: + raise checks.ConfigurationError( + "len(dropout) (%d) != num_layers (%d)" % (len(dropout), num_layers) + ) + self._activations = torch.nn.ModuleList(activations) + input_dims = [input_dim] + hidden_dims[:-1] + linear_layers = [] + for layer_input_dim, layer_output_dim in zip(input_dims, hidden_dims): + linear_layers.append(torch.nn.Linear(layer_input_dim, layer_output_dim)) + self._linear_layers = torch.nn.ModuleList(linear_layers) + dropout_layers = [torch.nn.Dropout(p=value) for value in dropout] + self._dropout = torch.nn.ModuleList(dropout_layers) + self._output_dim = hidden_dims[-1] + self.input_dim = input_dim + + def get_output_dim(self): + return self._output_dim + + def get_input_dim(self): + return self.input_dim + + def forward(self, inputs: torch.Tensor) -> Tuple[torch.Tensor, List[torch.Tensor]]: + + output = inputs + feature_maps = [] + for layer, activation, dropout in zip( + self._linear_layers, self._activations, self._dropout + ): + feature_maps.append(output) + output = dropout(activation(layer(output))) + return output, feature_maps diff --git a/combo/models/model.py b/combo/models/model.py index 9866bcb..c648453 100644 --- a/combo/models/model.py +++ b/combo/models/model.py @@ -129,6 +129,11 @@ class ComboModel(allen_models.Model): "enhanced_head": enhanced_head_pred, "enhanced_deprel": enhanced_relations_pred, "sentence_embedding": torch.max(encoder_emb, dim=1)[0], + "upostag_token_embedding": upos_output["embedding"], + "xpostag_token_embedding": xpos_output["embedding"], + "semrel_token_embedding": semrel_output["embedding"], + "feats_token_embedding": morpho_output["embedding"], + "deprel_token_embedding": parser_output["embedding"], } if "rel_probability" in enhanced_parser_output: @@ -196,8 +201,8 @@ class ComboModel(allen_models.Model): if callable_model: return callable_model(*args, **kwargs) if returns_tuple: - return {"prediction": (None, None), "loss": (None, None)} - return {"prediction": None, "loss": None} + return {"prediction": (None, None), "loss": (None, None), "embedding": (None, None)} + return {"prediction": None, "loss": None, "embedding": None} @staticmethod def _clean(output): diff --git a/combo/models/morpho.py b/combo/models/morpho.py index ea3451d..b0d3079 100644 --- a/combo/models/morpho.py +++ b/combo/models/morpho.py @@ -4,7 +4,6 @@ from typing import Dict, List, Optional, Union import torch from allennlp import data from allennlp.common import checks -from allennlp.modules import feedforward from allennlp.nn import Activation from combo.data import dataset @@ -15,7 +14,7 @@ from combo.models import base, utils class MorphologicalFeatures(base.Predictor): """Morphological features predicting model.""" - def __init__(self, feedforward_network: feedforward.FeedForward, slices: Dict[str, List[int]]): + def __init__(self, feedforward_network: base.FeedForward, slices: Dict[str, List[int]]): super().__init__() self.feedforward_network = feedforward_network self.slices = slices @@ -28,7 +27,7 @@ class MorphologicalFeatures(base.Predictor): if mask is None: mask = x.new_ones(x.size()[:-1]) - x = self.feedforward_network(x) + x, feature_maps = self.feedforward_network(x) prediction = [] for _, cat_indices in self.slices.items(): @@ -36,7 +35,8 @@ class MorphologicalFeatures(base.Predictor): output = { "prediction": torch.stack(prediction, dim=-1), - "probability": x + "probability": x, + "embedding": feature_maps[-1], } if labels is not None: @@ -92,7 +92,7 @@ class MorphologicalFeatures(base.Predictor): slices = dataset.get_slices_if_not_provided(vocab) return cls( - feedforward_network=feedforward.FeedForward( + feedforward_network=base.FeedForward( input_dim=input_dim, num_layers=num_layers, hidden_dims=hidden_dims, diff --git a/combo/models/parser.py b/combo/models/parser.py index 511edff..b16f0ad 100644 --- a/combo/models/parser.py +++ b/combo/models/parser.py @@ -153,6 +153,7 @@ class DependencyRelationModel(base.Predictor): dep_rel_pred = torch.cat((dep_rel_pred, dep_rel_emb), dim=-1) relation_prediction = self.relation_prediction_layer(dep_rel_pred) output = head_output + output["embedding"] = dep_rel_pred if self.training: output["prediction"] = (relation_prediction.argmax(-1)[:, 1:], head_output["prediction"]) diff --git a/combo/predict.py b/combo/predict.py index e528a18..f580c01 100644 --- a/combo/predict.py +++ b/combo/predict.py @@ -82,8 +82,8 @@ class COMBO(predictor.Predictor): sentences = [] predictions = super().predict_batch_instance(instances) for prediction, instance in zip(predictions, instances): - tree, sentence_embedding = self._predictions_as_tree(prediction, instance) - sentence = conllu2sentence(tree, sentence_embedding) + tree, sentence_embedding, embeddings = self._predictions_as_tree(prediction, instance) + sentence = conllu2sentence(tree, sentence_embedding, embeddings) sentences.append(sentence) return sentences @@ -96,8 +96,8 @@ class COMBO(predictor.Predictor): @overrides def predict_instance(self, instance: allen_data.Instance, serialize: bool = True) -> data.Sentence: predictions = super().predict_instance(instance) - tree, sentence_embedding = self._predictions_as_tree(predictions, instance) - return conllu2sentence(tree, sentence_embedding) + tree, sentence_embedding, embeddings = self._predictions_as_tree(predictions, instance, ) + return conllu2sentence(tree, sentence_embedding, embeddings) @overrides def predict_json(self, inputs: common.JsonDict) -> data.Sentence: @@ -141,6 +141,7 @@ class COMBO(predictor.Predictor): tree = instance.fields["metadata"]["input"] field_names = instance.fields["metadata"]["field_names"] tree_tokens = [t for t in tree if isinstance(t["id"], int)] + embeddings = [{} for _ in range(len(tree_tokens))] for field_name in field_names: if field_name not in predictions: continue @@ -149,6 +150,7 @@ class COMBO(predictor.Predictor): if field_name in {"xpostag", "upostag", "semrel", "deprel"}: value = self.vocab.get_token_from_index(field_predictions[idx], field_name + "_labels") token[field_name] = value + embeddings[idx][field_name] = predictions[f"{field_name}_token_embedding"][idx] elif field_name == "head": token[field_name] = int(field_predictions[idx]) elif field_name == "deps": @@ -174,6 +176,7 @@ class COMBO(predictor.Predictor): field_value = "|".join(np.array(features)[arg_indices].tolist()) token[field_name] = field_value + embeddings[idx][field_name] = predictions[f"{field_name}_token_embedding"][idx] elif field_name == "lemma": prediction = field_predictions[idx] word_chars = [] @@ -206,7 +209,7 @@ class COMBO(predictor.Predictor): empty_tokens = graph.restore_collapse_edges(tree_tokens) tree.tokens.extend(empty_tokens) - return tree, predictions["sentence_embedding"] + return tree, predictions["sentence_embedding"], embeddings @classmethod def with_spacy_tokenizer(cls, model: models.Model, -- GitLab