Remove unnecessary dilated_cnn copy

bf863906 · Maja Jablonska · e1bfa0c7 · bf863906 · bf863906 · e1bfa0c7
Commit bf863906 authored 1 year ago by Maja Jablonska
--- a/combo/default_model.py
+++ b/combo/default_model.py
@@ -12,7 +12,7 @@ from combo.data.tokenizers import CharacterTokenizer
 from combo.data.vocabulary import Vocabulary
 from combo.combo_model import ComboModel
 from combo.models.encoder import ComboEncoder, ComboStackedBidirectionalLSTM
-from combo.modules.dilated_cnn import DilatedCnnEncoder
+from combo.models.dilated_cnn import DilatedCnnEncoder
 from combo.modules.lemma import LemmatizerModel
 from combo.modules.morpho import MorphologicalFeatures
 from combo.modules.parser import DependencyRelationModel, HeadPredictionModel

--- a/combo/main.py
+++ b/combo/main.py
@@ -18,6 +18,7 @@ from combo.default_model import default_ud_dataset_reader, default_data_loader
 from combo.modules.archival import load_archive, archive
 from combo.predict import COMBO
 from combo.data import api
+from combo.data import DatasetReader

 logging.setLoggerClass(ComboLogger)
 logger = logging.getLogger(__name__)
@@ -93,14 +94,6 @@ flags.DEFINE_enum(name="predictor_name", default="combo-lambo",
                  enum_values=["combo", "combo-spacy", "combo-lambo"],
                  help="Use predictor with whitespace, spacy or lambo (recommended) tokenizer.")

-
-def get_predictor() -> COMBO:
-    checks.file_exists(FLAGS.model_path)
-    arch = load_archive(FLAGS.model_path)
-    dataset_reader = default_ud_dataset_reader()
-    return COMBO(arch.model, dataset_reader)
-
-
 def run(_):
    if FLAGS.mode == 'train':
        if not FLAGS.finetuning:
@@ -211,13 +204,39 @@ def run(_):
                                                        keep_semrel=dataset_reader.use_sem).serialize())

    elif FLAGS.mode == 'predict':
-        predictor = get_predictor()
-        sentence = input("Sentence:")
-        prediction = predictor(sentence)
-        print("{:15} {:15} {:10} {:10} {:10}".format('TOKEN', 'LEMMA', 'UPOS', 'HEAD', 'DEPREL'))
-        for token in prediction.tokens:
-            print("{:15} {:15} {:10} {:10} {:10}".format(token.text, token.lemma, token.upostag, token.head,
-                                                         token.deprel))
+        prefix = 'Predicting'
+        logger.info('Loading the model', prefix=prefix)
+        model, _, _, _, dataset_reader = load_archive(FLAGS.model_path)
+
+        if not dataset_reader:
+            logger.info("No dataset reader in the configuration or archive file - using a default UD dataset reader",
+                        prefix=prefix)
+            dataset_reader = default_ud_dataset_reader()
+        
+        predictor = COMBO(model, dataset_reader)
+
+        if FLAGS.input_file == '-':
+            print("Interactive mode.")
+            sentence = input("Sentence: ")
+            prediction = predictor(sentence)
+            print("{:15} {:15} {:10} {:10} {:10}".format('TOKEN', 'LEMMA', 'UPOS', 'HEAD', 'DEPREL'))
+            for token in prediction.tokens:
+                print("{:15} {:15} {:10} {:10} {:10}".format(token.text, token.lemma, token.upostag, token.head,
+                                                             token.deprel))
+        elif FLAGS.output_file:
+            checks.file_exists(FLAGS.input_file)
+            logger.info("Predicting examples from file", prefix=prefix)
+            test_trees = dataset_reader.read(FLAGS.input_file)
+            predictor = COMBO(model, dataset_reader)
+            with open(FLAGS.output_file, "w") as file:
+                for tree in tqdm(test_trees):
+                    file.writelines(api.sentence2conllu(predictor.predict_instance(tree),
+                                                        keep_semrel=dataset_reader.use_sem).serialize())
+
+        else:
+            msg = 'No output file for input file {input_file} specified.'.format(input_file=FLAGS.input_file)
+            logger.info(msg, prefix=prefix)
+            print(msg)


 def _get_ext_vars(finetuning: bool = False) -> Dict:

--- a/combo/modules/dilated_cnn.py
+++ b/combo/modules/dilated_cnn.py
-"""
-Adapted from COMBO 1.0
-Author: Mateusz Klimaszewski
-"""
-
-from typing import List
-
-import torch
-
-from combo.config import FromParameters, Registry
-from combo.config.from_parameters import register_arguments
-from combo.nn.activations import Activation
-
-
-@Registry.register('dilated_cnn')
-class DilatedCnnEncoder(torch.nn.Module, FromParameters):
-    @register_arguments
-    def __init__(self,
-                 input_dim: int,
-                 filters: List[int],
-                 kernel_size: List[int],
-                 stride: List[int],
-                 padding: List[int],
-                 dilation: List[int],
-                 activations: List[Activation]):
-        super().__init__()
-        conv1d_layers = []
-        input_dims = [input_dim] + filters[:-1]
-        output_dims = filters
-        for idx in range(len(activations)):
-            conv1d_layers.append(torch.nn.Conv1d(
-                in_channels=input_dims[idx],
-                out_channels=output_dims[idx],
-                kernel_size=(kernel_size[idx],),
-                stride=(stride[idx],),
-                padding=padding[idx],
-                dilation=(dilation[idx],)))
-        self.conv1d_layers = torch.nn.ModuleList(conv1d_layers)
-        self.activations = activations
-        assert len(self.activations) == len(self.conv1d_layers)
-
-    def forward(self, x: torch.FloatTensor) -> torch.FloatTensor:
-        for layer, activation in zip(self.conv1d_layers, self.activations):
-            x = activation(layer(x))
-        return x
--- a/combo/modules/lemma.py
+++ b/combo/modules/lemma.py
@@ -7,7 +7,7 @@ from overrides import overrides
 from combo import data
 from combo.config import Registry
 from combo.config.from_parameters import register_arguments
-from combo.modules import dilated_cnn
+from combo.models import dilated_cnn
 from combo.nn import base
 from combo.nn.activations import Activation
 from combo.nn.utils import masked_cross_entropy

--- a/combo/modules/token_embedders/character_token_embedder.py
+++ b/combo/modules/token_embedders/character_token_embedder.py
@@ -8,7 +8,7 @@ from overrides import overrides
 from combo.config import Registry
 from combo.config.from_parameters import register_arguments
 from combo.data import Vocabulary
-from combo.modules.dilated_cnn import DilatedCnnEncoder
+from combo.models.dilated_cnn import DilatedCnnEncoder
 from combo.modules.token_embedders import TokenEmbedder

 from typing import Optional

--- a/combo/polish_model_training.ipynb
+++ b/combo/polish_model_training.ipynb
@@ -14,15 +14,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:28:53.129601Z",
-     "start_time": "2023-11-11T07:28:52.947282Z"
+     "end_time": "2023-11-13T07:47:15.954139Z",
+     "start_time": "2023-11-13T07:47:15.711912Z"
    }
   },
   "id": "b28c7d8bacb08d02"
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
   "outputs": [],
   "source": [
    "from combo.predict import COMBO\n",
@@ -34,7 +34,7 @@
    "from combo.modules.token_embedders import CharacterBasedWordEmbedder, TransformersWordEmbedder\n",
    "from combo.modules import FeedForwardPredictor\n",
    "from combo.nn.activations import ReLUActivation, TanhActivation, LinearActivation\n",
-    "from combo.modules.dilated_cnn import DilatedCnnEncoder\n",
+    "from combo.models.dilated_cnn import DilatedCnnEncoder\n",
    "from combo.data.tokenizers import LamboTokenizer, CharacterTokenizer\n",
    "from combo.data.token_indexers import PretrainedTransformerIndexer, TokenConstPaddingCharactersIndexer, TokenFeatsIndexer, SingleIdTokenIndexer, PretrainedTransformerFixedMismatchedIndexer\n",
    "from combo.data.dataset_readers import UniversalDependenciesDatasetReader\n",
@@ -51,15 +51,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:29:25.986145Z",
-     "start_time": "2023-11-11T07:29:25.671527Z"
+     "end_time": "2023-11-13T07:47:22.233317Z",
+     "start_time": "2023-11-13T07:47:15.766709Z"
    }
   },
   "id": "initial_id"
  },
  {
   "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
@@ -77,7 +77,7 @@
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
-       "model_id": "0df1c8c352a14e9993691edf5626968f"
+       "model_id": "d318d4f50da14b76a14eb20cb877ee67"
      }
     },
     "metadata": {},
@@ -89,7 +89,7 @@
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
-       "model_id": "8ac307b142074e38afe3d55e00b3c203"
+       "model_id": "dbab946d82ab4d0ead64fc02796c2a9f"
      }
     },
     "metadata": {},
@@ -101,7 +101,7 @@
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
-       "model_id": "f60a4d838fc849c3aff01901f531d91c"
+       "model_id": "2f3d9306cb2b463eb080c922fe775b02"
      }
     },
     "metadata": {},
@@ -169,15 +169,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:29:54.208157Z",
-     "start_time": "2023-11-11T07:29:25.685934Z"
+     "end_time": "2023-11-13T07:47:42.601537Z",
+     "start_time": "2023-11-13T07:47:22.243325Z"
    }
   },
   "id": "d74957f422f0b05b"
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
   "outputs": [],
   "source": [
    "seq_encoder = ComboEncoder(layer_dropout_probability=0.33,\n",
@@ -192,15 +192,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:29:55.768681Z",
-     "start_time": "2023-11-11T07:29:54.231728Z"
+     "end_time": "2023-11-13T07:47:44.068445Z",
+     "start_time": "2023-11-13T07:47:42.595098Z"
    }
   },
   "id": "fa724d362fd6bd23"
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
@@ -211,9 +211,9 @@
    },
    {
     "data": {
-      "text/plain": "<generator object SimpleDataLoader.iter_instances at 0x7faf9b7f6820>"
+      "text/plain": "<generator object SimpleDataLoader.iter_instances at 0x7fdd1e0a0c80>"
     },
-     "execution_count": 7,
+     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -239,15 +239,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:29:55.840836Z",
-     "start_time": "2023-11-11T07:29:55.773085Z"
+     "end_time": "2023-11-13T07:47:44.196484Z",
+     "start_time": "2023-11-13T07:47:44.034821Z"
    }
   },
   "id": "f8a10f9892005fca"
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
   "outputs": [
    {
     "name": "stderr",
@@ -263,21 +263,21 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:29:55.859643Z",
-     "start_time": "2023-11-11T07:29:55.837113Z"
+     "end_time": "2023-11-13T07:47:44.197075Z",
+     "start_time": "2023-11-13T07:47:44.055240Z"
    }
   },
   "id": "14413692656b68ac"
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.sso.sso_relationship.bias', 'cls.predictions.decoder.weight', 'cls.sso.sso_relationship.weight']\n",
+      "Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.sso.sso_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.sso.sso_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']\n",
      "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
      "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n"
     ]
@@ -411,15 +411,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:30:01.875464Z",
-     "start_time": "2023-11-11T07:29:55.851182Z"
+     "end_time": "2023-11-13T07:47:48.599708Z",
+     "start_time": "2023-11-13T07:47:44.063606Z"
    }
   },
   "id": "437d12054baaffa1"
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
   "outputs": [],
   "source": [
    "data_loader.index_with(vocabulary)\n",
@@ -430,15 +430,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:30:46.834983Z",
-     "start_time": "2023-11-11T07:30:01.904158Z"
+     "end_time": "2023-11-13T07:48:26.090634Z",
+     "start_time": "2023-11-13T07:47:48.622684Z"
    }
   },
   "id": "e131e0ec75dc6927"
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 9,
   "outputs": [],
   "source": [
    "val_data_loader.index_with(vocabulary)"
@@ -446,15 +446,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:30:51.486798Z",
-     "start_time": "2023-11-11T07:30:46.839866Z"
+     "end_time": "2023-11-13T07:48:32.052740Z",
+     "start_time": "2023-11-13T07:48:26.077694Z"
    }
   },
   "id": "195c71fcf8170ff"
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 10,
   "outputs": [
    {
     "name": "stderr",
@@ -481,15 +481,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:30:51.879326Z",
-     "start_time": "2023-11-11T07:30:51.500543Z"
+     "end_time": "2023-11-13T07:48:32.321842Z",
+     "start_time": "2023-11-13T07:48:32.056903Z"
    }
   },
   "id": "cefc5173154d1605"
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 11,
   "outputs": [
    {
     "name": "stderr",
@@ -512,7 +512,7 @@
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
-       "model_id": "7b4d5f7d5cec41b98aaf4c5fa3fff0d8"
+       "model_id": "027b704c9899478bb71021e074ad29bf"
      }
     },
     "metadata": {},
@@ -534,7 +534,7 @@
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
-       "model_id": "71a7264ff03d4f17828db6f8a7893e39"
+       "model_id": "4af02d76668645ae9213db79ae97d36f"
      }
     },
     "metadata": {},
@@ -546,7 +546,7 @@
      "application/vnd.jupyter.widget-view+json": {
       "version_major": 2,
       "version_minor": 0,
-       "model_id": "b93d9100d1bc4a04ab706a9a3840644c"
+       "model_id": "55eda5299f554849aba6bd2781608ed2"
      }
     },
     "metadata": {},
@@ -566,15 +566,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:32:36.809443Z",
-     "start_time": "2023-11-11T07:30:51.816554Z"
+     "end_time": "2023-11-13T07:49:35.721377Z",
+     "start_time": "2023-11-13T07:48:32.278875Z"
    }
   },
   "id": "e5af131bae4b1a33"
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 12,
   "outputs": [],
   "source": [
    "predictor = COMBO(model, dataset_reader)"
@@ -582,15 +582,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:32:37.095367Z",
-     "start_time": "2023-11-11T07:32:32.550627Z"
+     "end_time": "2023-11-13T07:49:35.728679Z",
+     "start_time": "2023-11-13T07:49:35.696749Z"
    }
   },
   "id": "3e23413c86063183"
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 13,
   "outputs": [],
   "source": [
    "a = predictor(\"Cześć, jestem psem.\")"
@@ -598,24 +598,24 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:32:37.333871Z",
-     "start_time": "2023-11-11T07:32:32.625348Z"
+     "end_time": "2023-11-13T07:49:35.972167Z",
+     "start_time": "2023-11-13T07:49:35.711714Z"
    }
   },
   "id": "d555d7f0223a624b"
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 14,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "TOKEN           LEMMA           UPOS       HEAD       DEPREL    \n",
-      "Cześć,          ?????           NOUN                2 punct     \n",
-      "jestem          ?????           NOUN                0 root      \n",
-      "psem.           ????            NOUN                2 punct     \n"
+      "Cześć,          ??????          NOUN                0 root      \n",
+      "jestem          ??????          NOUN                1 punct     \n",
+      "psem.           ?????           NOUN                1 punct     \n"
     ]
    }
   ],
@@ -627,15 +627,15 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:32:38.854144Z",
-     "start_time": "2023-11-11T07:32:35.324424Z"
+     "end_time": "2023-11-13T07:49:35.973153Z",
+     "start_time": "2023-11-13T07:49:35.929034Z"
    }
   },
   "id": "a68cd3861e1ceb67"
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 15,
   "outputs": [],
   "source": [
    "from modules.archival import archive"
@@ -643,21 +643,21 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:32:41.112617Z",
-     "start_time": "2023-11-11T07:32:35.502093Z"
+     "end_time": "2023-11-13T07:49:35.973436Z",
+     "start_time": "2023-11-13T07:49:35.931941Z"
    }
   },
   "id": "d0f43f4493218b5"
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 16,
   "outputs": [
    {
     "data": {
      "text/plain": "'/Users/majajablonska/Documents/combo'"
     },
-     "execution_count": 18,
+     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -668,19 +668,23 @@
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
-     "end_time": "2023-11-11T07:34:18.278208Z",
-     "start_time": "2023-11-11T07:32:35.783931Z"
+     "end_time": "2023-11-13T07:51:13.077831Z",
+     "start_time": "2023-11-13T07:49:35.950950Z"
    }
   },
   "id": "ec92aa5bb5bb3605"
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
   "outputs": [],
   "source": [],
   "metadata": {
-    "collapsed": false
+    "collapsed": false,
+    "ExecuteTime": {
+     "end_time": "2023-11-13T07:51:13.123575Z",
+     "start_time": "2023-11-13T07:51:13.067631Z"
+    }
   },
   "id": "5ad8a827586f65e3"
  }

 %% Cell type:code id:b28c7d8bacb08d02 tags:

 ``` python
 # The path where the training and validation datasets are stored
 TRAINING_DATA_PATH: str = '/Users/majajablonska/Documents/PDB/PDBUD_train.conllu'
 VALIDATION_DATA_PATH: str = '/Users/majajablonska/Documents/PDB/PDBUD_val.conllu'
 # The path where the model can be saved to
 SERIALIZATION_DIR: str = "/Users/majajablonska/Documents/Workspace/combotest"
 ```

 %% Cell type:code id:initial_id tags:

 ``` python
 from combo.predict import COMBO
 from combo.combo_model import ComboModel
 from combo.data.vocabulary import Vocabulary
 from combo.models.encoder import ComboEncoder, ComboStackedBidirectionalLSTM
 from combo.modules.text_field_embedders import BasicTextFieldEmbedder
 from combo.nn.base import Linear
 from combo.modules.token_embedders import CharacterBasedWordEmbedder, TransformersWordEmbedder
 from combo.modules import FeedForwardPredictor
 from combo.nn.activations import ReLUActivation, TanhActivation, LinearActivation
-from combo.modules.dilated_cnn import DilatedCnnEncoder
+from combo.models.dilated_cnn import DilatedCnnEncoder
 from combo.data.tokenizers import LamboTokenizer, CharacterTokenizer
 from combo.data.token_indexers import PretrainedTransformerIndexer, TokenConstPaddingCharactersIndexer, TokenFeatsIndexer, SingleIdTokenIndexer, PretrainedTransformerFixedMismatchedIndexer
 from combo.data.dataset_readers import UniversalDependenciesDatasetReader
 import torch
 from combo.data.dataset_loaders import SimpleDataLoader
 from combo.modules.parser import DependencyRelationModel, HeadPredictionModel
 from combo.modules.lemma import LemmatizerModel
 from combo.modules.morpho import MorphologicalFeatures
 from combo.nn.regularizers.regularizers import L2Regularizer
 import pytorch_lightning as pl
 from combo.training.trainable_combo import TrainableCombo
 from itertools import chain
 ```

 %% Cell type:code id:d74957f422f0b05b tags:

 ``` python
 def default_const_character_indexer(namespace = None):
    if namespace:
        return TokenConstPaddingCharactersIndexer(
            tokenizer=CharacterTokenizer(end_tokens=["__END__"],
            start_tokens=["__START__"]),
            min_padding_length=32,
            namespace=namespace
        )
    else:
        return TokenConstPaddingCharactersIndexer(
            tokenizer=CharacterTokenizer(end_tokens=["__END__"],
            start_tokens=["__START__"]),
            min_padding_length=32
        )

 dataset_reader = UniversalDependenciesDatasetReader(
    features=["token", "char"],
    lemma_indexers={
        "char": default_const_character_indexer("lemma_characters")
    },
    targets=["deprel", "head", "upostag", "lemma", "feats", "xpostag"],
    token_indexers={
        "char": default_const_character_indexer(),
        "feats": TokenFeatsIndexer(),
        "lemma": default_const_character_indexer(),
        "token": PretrainedTransformerFixedMismatchedIndexer("bert-base-cased"),
        "upostag": SingleIdTokenIndexer(
            feature_name="pos_",
            namespace="upostag"
        ),
        "xpostag": SingleIdTokenIndexer(
            feature_name="tag_",
            namespace="xpostag"
        )
    },
    use_sem=False
 )

 data_loader = SimpleDataLoader.from_dataset_reader(dataset_reader,
                                                   data_path=TRAINING_DATA_PATH,
                                                   batch_size=16,
                                                   batches_per_epoch=4,
                                                   shuffle=True)
 val_data_loader = SimpleDataLoader.from_dataset_reader(dataset_reader,
                                                   data_path=VALIDATION_DATA_PATH,
                                                   batch_size=16,
                                                    batches_per_epoch=4,
                                                    shuffle=True)

 vocabulary = Vocabulary.from_instances_extended(
    chain(data_loader.iter_instances(), val_data_loader.iter_instances()),
    non_padded_namespaces=['head_labels'],
    only_include_pretrained_words=False,
    oov_token='_',
    padding_token='__PAD__'
 )
 ```

 %% Output

    huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
    To disable this warning, you can either:
    	- Avoid using `tokenizers` before the fork if possible
    	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




 %% Cell type:code id:fa724d362fd6bd23 tags:

 ``` python
 seq_encoder = ComboEncoder(layer_dropout_probability=0.33,
                           stacked_bilstm=ComboStackedBidirectionalLSTM(
                               hidden_size=512,
                               input_size=164,
                               layer_dropout_probability=0.33,
                               num_layers=2,
                               recurrent_dropout_probability=0.33
                           ))
 ```

 %% Cell type:code id:f8a10f9892005fca tags:

 ``` python
 char_words_embedder = CharacterBasedWordEmbedder(
    dilated_cnn_encoder = DilatedCnnEncoder(
        input_dim=64,
        kernel_size=[3, 3, 3],
        padding=[1, 2, 4],
        stride=[1, 1, 1],
        filters=[512, 256, 64],
        dilation=[1, 2, 4],
        activations=[ReLUActivation(), ReLUActivation(), LinearActivation()]
    ),
    embedding_dim=64,
    vocabulary=vocabulary
 )
 tokenizer = LamboTokenizer()
 indexer = PretrainedTransformerIndexer('bert-base-cased')
 data_loader.iter_instances()
 ```

 %% Output

    Using model LAMBO-UD_English-EWT

-<generator object SimpleDataLoader.iter_instances at 0x7faf9b7f6820>
+<generator object SimpleDataLoader.iter_instances at 0x7fdd1e0a0c80>

 %% Cell type:code id:14413692656b68ac tags:

 ``` python
 vocabulary.save_to_files('/Users/majajablonska/PycharmProjects/combo-lightning/tests/fixtures/train_vocabulary')
 ```

 %% Output

    Directory /Users/majajablonska/PycharmProjects/combo-lightning/tests/fixtures/train_vocabulary is not empty

 %% Cell type:code id:437d12054baaffa1 tags:

 ``` python
 from nn import RegularizerApplicator

 model = ComboModel(
    vocabulary=vocabulary,
    dependency_relation=DependencyRelationModel(
        vocabulary=vocabulary,
        dependency_projection_layer=Linear(
            activation=TanhActivation(),
            dropout_rate=0.25,
            in_features=1024,
            out_features=128
        ),
        head_predictor=HeadPredictionModel(
            cycle_loss_n=0,
            dependency_projection_layer=Linear(
                activation=TanhActivation(),
                in_features=1024,
                out_features=512
            ),
            head_projection_layer=Linear(
                activation=TanhActivation(),
                in_features=1024,
                out_features=512
            )
        ),
        head_projection_layer=Linear(
            activation=TanhActivation(),
            dropout_rate=0.25,
            in_features=1024,
            out_features=128
        ),
        vocab_namespace="deprel_labels"
    ),
    lemmatizer=LemmatizerModel(
        vocabulary=vocabulary,
        activations=[ReLUActivation(), ReLUActivation(), ReLUActivation(), LinearActivation()],
        char_vocab_namespace="token_characters",
        dilation=[1, 2, 4, 1],
        embedding_dim=256,
        filters=[256, 256, 256],
        input_projection_layer=Linear(
            activation=TanhActivation(),
            dropout_rate=0.25,
            in_features=1024,
            out_features=32
        ),
        kernel_size=[3, 3, 3, 1],
        lemma_vocab_namespace="lemma_characters",
        padding=[1, 2, 4, 0],
        stride=[1, 1, 1, 1]
    ),
    loss_weights={
            "deprel": 0.8,
            "feats": 0.2,
            "head": 0.2,
            "lemma": 0.05,
            "semrel": 0.05,
            "upostag": 0.05,
            "xpostag": 0.05
    },
    morphological_feat=MorphologicalFeatures(
        vocabulary=vocabulary,
        activations=[TanhActivation(), LinearActivation()],
        dropout=[0.25, 0.],
        hidden_dims=[128],
        input_dim=1024,
        num_layers=2,
        vocab_namespace="feats_labels"
    ),
    regularizer=RegularizerApplicator([
        (".*conv1d.*", L2Regularizer(1e-6)),
        (".*forward.*", L2Regularizer(1e-6)),
        (".*backward.*", L2Regularizer(1e-6)),
        (".*char_embed.*", L2Regularizer(1e-5))
    ]),
    seq_encoder=ComboEncoder(
        layer_dropout_probability=0.33,
        stacked_bilstm=ComboStackedBidirectionalLSTM(
            hidden_size=512,
            input_size=164,
            layer_dropout_probability=0.33,
            num_layers=2,
            recurrent_dropout_probability=0.33
        )
    ),
    text_field_embedder=BasicTextFieldEmbedder(
        token_embedders={
            "char": CharacterBasedWordEmbedder(
                vocabulary=vocabulary,
                dilated_cnn_encoder=DilatedCnnEncoder(
                    activations=[ReLUActivation(), ReLUActivation(), LinearActivation()],
                    dilation=[1, 2, 4],
                    filters=[512, 256, 64],
                    input_dim=64,
                    kernel_size=[3, 3, 3],
                    padding=[1, 2, 4],
                    stride=[1, 1, 1],
                ),
                embedding_dim=64
            ),
            "token": TransformersWordEmbedder("allegro/herbert-base-cased", projection_dim=100)
        }
    ),
    upos_tagger=FeedForwardPredictor.from_vocab(
        vocabulary=vocabulary,
        activations=[TanhActivation(), LinearActivation()],
        dropout=[0.25, 0.],
        hidden_dims=[64],
        input_dim=1024,
        num_layers=2,
        vocab_namespace="upostag_labels"
    ),
    xpos_tagger=FeedForwardPredictor.from_vocab(
        vocabulary=vocabulary,
        activations=[TanhActivation(), LinearActivation()],
        dropout=[0.25, 0.],
        hidden_dims=[64],
        input_dim=1024,
        num_layers=2,
        vocab_namespace="xpostag_labels"
    ),
    serialization_dir=SERIALIZATION_DIR
 )
 ```

 %% Output

-    Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.sso.sso_relationship.bias', 'cls.predictions.decoder.weight', 'cls.sso.sso_relationship.weight']
+    Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.sso.sso_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.sso.sso_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
    - This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
    - This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).

 %% Cell type:code id:e131e0ec75dc6927 tags:

 ``` python
 data_loader.index_with(vocabulary)
 a = 0
 for i in data_loader:
    break
 ```

 %% Cell type:code id:195c71fcf8170ff tags:

 ``` python
 val_data_loader.index_with(vocabulary)
 ```

 %% Cell type:code id:cefc5173154d1605 tags:

 ``` python
 nlp = TrainableCombo(model, torch.optim.Adam,
                     optimizer_kwargs={'betas': [0.9, 0.9], 'lr': 0.002},
                     validation_metrics=['EM'])

 trainer = pl.Trainer(max_epochs=1,
                     default_root_dir=SERIALIZATION_DIR,
                     gradient_clip_val=5)
 ```

 %% Output

    GPU available: False, used: False
    TPU available: False, using: 0 TPU cores
    IPU available: False, using: 0 IPUs
    HPU available: False, using: 0 HPUs
    /Users/majajablonska/miniconda/envs/combo/lib/python3.9/site-packages/pytorch_lightning/trainer/connectors/logger_connector/logger_connector.py:67: UserWarning: Starting from v1.9.0, `tensorboardX` has been removed as a dependency of the `pytorch_lightning` package, due to potential conflicts with other packages in the ML ecosystem. For this reason, `logger=True` will use `CSVLogger` as the default logger, unless the `tensorboard` or `tensorboardX` packages are found. Please `pip install lightning[extra]` or one of them to enable TensorBoard support by default
      warning_cache.warn(

 %% Cell type:code id:e5af131bae4b1a33 tags:

 ``` python
 trainer.fit(model=nlp, train_dataloaders=data_loader, val_dataloaders=val_data_loader)
 ```

 %% Output

    
      | Name  | Type       | Params
    -------------------------------------
    0 | model | ComboModel | 136 M
    -------------------------------------
    12.1 M    Trainable params
    124 M     Non-trainable params
    136 M     Total params
    546.115   Total estimated model params size (MB)


    /Users/majajablonska/miniconda/envs/combo/lib/python3.9/site-packages/pytorch_lightning/utilities/data.py:76: UserWarning: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 16. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
      warning_cache.warn(
    /Users/majajablonska/miniconda/envs/combo/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py:280: PossibleUserWarning: The number of training batches (4) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.
      rank_zero_warn(



    `Trainer.fit` stopped: `max_epochs=1` reached.

 %% Cell type:code id:3e23413c86063183 tags:

 ``` python
 predictor = COMBO(model, dataset_reader)
 ```

 %% Cell type:code id:d555d7f0223a624b tags:

 ``` python
 a = predictor("Cześć, jestem psem.")
 ```

 %% Cell type:code id:a68cd3861e1ceb67 tags:

 ``` python
 print("{:15} {:15} {:10} {:10} {:10}".format('TOKEN', 'LEMMA', 'UPOS', 'HEAD', 'DEPREL'))
 for token in a.tokens:
    print("{:15} {:15} {:10} {:10} {:10}".format(token.text, token.lemma, token.upostag, token.head, token.deprel))
 ```

 %% Output

    TOKEN           LEMMA           UPOS       HEAD       DEPREL
-    Cześć,          ?????           NOUN                2 punct
-    jestem          ?????           NOUN                0 root
-    psem.           ????            NOUN                2 punct
+    Cześć,          ??????          NOUN                0 root
+    jestem          ??????          NOUN                1 punct
+    psem.           ?????           NOUN                1 punct

 %% Cell type:code id:d0f43f4493218b5 tags:

 ``` python
 from modules.archival import archive
 ```

 %% Cell type:code id:ec92aa5bb5bb3605 tags:

 ``` python
 archive(model, '/Users/majajablonska/Documents/combo', data_loader, val_data_loader, dataset_reader)
 ```

 %% Output

 '/Users/majajablonska/Documents/combo'

 %% Cell type:code id:5ad8a827586f65e3 tags:

 ``` python
 ```