diff --git a/combo/config/from_parameters.py b/combo/config/from_parameters.py index 5640ad416f315d9d80d41202661a24efdd848c61..62c8d7ee9f7e26d6371b9a63e29f5152812682cd 100644 --- a/combo/config/from_parameters.py +++ b/combo/config/from_parameters.py @@ -1,6 +1,7 @@ import inspect from typing import Any, Callable, Dict, List, Optional import typing +import functools import json @@ -24,9 +25,6 @@ def _resolve(values: typing.Union[Dict[str, Any], str], pass_down_parameters: Di if isinstance(values, Params): values = Params.as_dict() - tt = values.get("type", "?") if isinstance(values, dict) else values - print(f'Resolving {tt} with pass_down_parameters {pass_down_parameters}') - if isinstance(values, list): return [_resolve(v, pass_down_parameters) for v in values] @@ -68,6 +66,7 @@ def serialize_single_value(value: Any, pass_down_parameter_names: List[str] = No def register_arguments(func: callable): + @functools.wraps(func) def wrapper(*args, **kwargs): self_arg = args[0] if self_arg.constructed_args is None: diff --git a/combo/data/dataset_readers/universal_dependencies_dataset_reader.py b/combo/data/dataset_readers/universal_dependencies_dataset_reader.py index af53b155311a9504b3b3b937a9506dbf11f6e076..14866e7e8a0654f08b8b4328cc0681034fbad8ee 100644 --- a/combo/data/dataset_readers/universal_dependencies_dataset_reader.py +++ b/combo/data/dataset_readers/universal_dependencies_dataset_reader.py @@ -131,7 +131,7 @@ class UniversalDependenciesDatasetReader(DatasetReader, ABC): yield self.text_to_instance(annotation) - def text_to_instance(self, tree: conllu.TokenList) -> Instance: + def text_to_instance(self, tree: conllu.models.TokenList) -> Instance: fields_: Dict[str, Field] = {} tokens = [Token.from_conllu_token(t) for t in tree if isinstance(t["id"], int)] diff --git a/combo/data/vocabulary.py b/combo/data/vocabulary.py index da62cd9a0ff4318d58351b56448b81add7d375c4..0b3e0c6acb3549a53390aee1dc1b672f9c0b7ed7 100644 --- a/combo/data/vocabulary.py +++ b/combo/data/vocabulary.py @@ -8,7 +8,6 @@ from typing import Dict, Optional, Iterable, Set, List, Union, Any import logging from filelock import FileLock -from overrides import overrides from transformers import PreTrainedTokenizer from combo.common import Tqdm @@ -118,9 +117,9 @@ class _NamespaceDependentDefaultDict(defaultdict[str, NamespaceVocabulary]): @Registry.register("base_vocabulary") @Registry.register("from_files_vocabulary", "from_files") @Registry.register("from_pretrained_transformer_vocabulary", "from_pretrained_transformer") -@Registry.register("from_instances_vocabulary", "from_instances") +@Registry.register("from_data_loader_vocabulary", "from_data_loader") @Registry.register("from_pretrained_transformer_and_instances_vocabulary", "from_pretrained_transformer_and_instances") -@Registry.register("from_instances_extended_vocabulary", "from_instances_extended") +@Registry.register("from_data_loader_extended_vocabulary", "from_data_loader_extended") class Vocabulary(FromParameters): @register_arguments def __init__(self, @@ -306,6 +305,37 @@ class Vocabulary(FromParameters): @classmethod @register_arguments + def from_data_loader( + cls, + data_loader: "DataLoader", + min_count: Dict[str, int] = None, + max_vocab_size: Union[int, Dict[str, int]] = None, + non_padded_namespaces: Iterable[str] = DEFAULT_NON_PADDED_NAMESPACES, + pretrained_files: Optional[Dict[str, str]] = None, + only_include_pretrained_words: bool = False, + tokens_to_add: Dict[str, List[str]] = None, + min_pretrained_embeddings: Dict[str, int] = None, + padding_token: Optional[str] = DEFAULT_PADDING_TOKEN, + oov_token: Optional[str] = DEFAULT_OOV_TOKEN, + serialization_dir: Optional[str] = None + ) -> "Vocabulary": + vocab = cls.from_instances( + instances=data_loader.iter_instances(), + min_count=min_count, + max_vocab_size=max_vocab_size, + non_padded_namespaces=non_padded_namespaces, + pretrained_files=pretrained_files, + only_include_pretrained_words=only_include_pretrained_words, + tokens_to_add=tokens_to_add, + min_pretrained_embeddings=min_pretrained_embeddings, + padding_token=padding_token, + oov_token=oov_token, + serialization_dir=serialization_dir + ) + vocab.constructed_from = 'from_dataset_loader' + return vocab + + @classmethod def from_instances( cls, instances: Iterable["Instance"], @@ -349,7 +379,6 @@ class Vocabulary(FromParameters): oov_token=oov_token, serialization_dir=serialization_dir ) - vocab.constructed_from = 'from_instances' return vocab @classmethod @@ -389,8 +418,7 @@ class Vocabulary(FromParameters): pretrained_files=pretrained_files, only_include_pretrained_words=only_include_pretrained_words, tokens_to_add=tokens_to_add, - min_pretrained_embeddings=min_pretrained_embeddings, - serialization_dir=serialization_dir + min_pretrained_embeddings=min_pretrained_embeddings ) vocab.constructed_from = 'from_files_and_instances' return vocab @@ -649,6 +677,36 @@ class Vocabulary(FromParameters): @classmethod @register_arguments + def from_data_loader_extended( + cls, + data_loader: "DataLoader", + min_count: Dict[str, int] = None, + max_vocab_size: Union[int, Dict[str, int]] = None, + non_padded_namespaces: Iterable[str] = DEFAULT_NON_PADDED_NAMESPACES, + pretrained_files: Optional[Dict[str, str]] = None, + only_include_pretrained_words: bool = False, + min_pretrained_embeddings: Dict[str, int] = None, + padding_token: Optional[str] = DEFAULT_PADDING_TOKEN, + oov_token: Optional[str] = DEFAULT_OOV_TOKEN, + serialization_dir: Optional[str] = None + ) -> "Vocabulary": + + vocab = cls.from_instances_extended( + instances=data_loader.iter_instances(), + min_count=min_count, + max_vocab_size=max_vocab_size, + non_padded_namespaces=non_padded_namespaces, + pretrained_files=pretrained_files, + only_include_pretrained_words=only_include_pretrained_words, + min_pretrained_embeddings=min_pretrained_embeddings, + padding_token=padding_token, + oov_token=oov_token, + serialization_dir=serialization_dir + ) + vocab.constructed_from = 'from_data_loader_extended' + return vocab + + @classmethod def from_instances_extended( cls, instances: Iterable["Instance"], @@ -695,17 +753,6 @@ class Vocabulary(FromParameters): vocab.constructed_from = 'from_instances_extended' return vocab - @overrides - def serialize(self, pass_down_parameter_names: List[str] = None) -> Dict[str, Any]: - if self._serialization_dir is None: - raise ConfigurationError("To serialize a vocabulary, serialization_dir needs to be provided") - self.save_to_files(self._serialization_dir) - return {'type': 'from_files_vocabulary', - 'parameters': { - 'directory': self._serialization_dir, - 'padding_token': self._padding_token, - 'oov_token': self._oov_token - }} def get_slices_if_not_provided(vocab: Vocabulary): if hasattr(vocab, "slices"): diff --git a/combo/default_model.py b/combo/default_model.py index 4f0159d55853f226d46f9f77765ee279eb32dda3..520a63b558ef86e6ddbebfcb43f1a16b1c9e7ea0 100644 --- a/combo/default_model.py +++ b/combo/default_model.py @@ -17,7 +17,7 @@ from combo.modules.token_embedders import CharacterBasedWordEmbedder, Transforme from combo.nn.activations import ReLUActivation, TanhActivation, LinearActivation from combo.modules import FeedForwardPredictor from combo.nn.base import Linear -from combo.nn.regularizers import RegularizerApplicator +from combo.nn.regularizers import Regularizer from combo.nn.regularizers.regularizers import L2Regularizer @@ -153,7 +153,7 @@ def default_model(vocabulary: Vocabulary) -> ComboModel: num_layers=2, vocab_namespace="feats_labels" ), - regularizer=RegularizerApplicator([ + regularizer=Regularizer([ (".*conv1d.*", L2Regularizer(1e-6)), (".*forward.*", L2Regularizer(1e-6)), (".*backward.*", L2Regularizer(1e-6)), diff --git a/combo/models/combo_model.py b/combo/models/combo_model.py index bd6088507d8b79023751b541e236d15fe4d49e4a..33ff8a54139f448cb5953bb38db7ff8b6375b16a 100644 --- a/combo/models/combo_model.py +++ b/combo/models/combo_model.py @@ -14,7 +14,7 @@ from combo.modules.parser import DependencyRelationModel from combo.modules import TextFieldEmbedder from combo.modules.model import Model from combo.modules.seq2seq_encoders.seq2seq_encoder import Seq2SeqEncoder -from combo.nn import RegularizerApplicator, base +from combo.nn import Regularizer, base from combo.nn.utils import get_text_field_mask from combo.utils import metrics @@ -40,7 +40,7 @@ class ComboModel(Model, FromParameters): morphological_feat: MorphologicalFeatures = None, dependency_relation: DependencyRelationModel = None, enhanced_dependency_relation: DependencyRelationModel = None, - regularizer: RegularizerApplicator = None, + regularizer: Regularizer = None, serialization_dir: Optional[str] = None) -> None: super().__init__(vocabulary, regularizer, serialization_dir) diff --git a/combo/modules/model.py b/combo/modules/model.py index 413e9ebb22f091a4141ce2a014995f0a021230f3..65c840810ace8b71f1ad7a2da020e4f9603ac534 100644 --- a/combo/modules/model.py +++ b/combo/modules/model.py @@ -21,7 +21,7 @@ from combo.data import Vocabulary, Instance from combo.data.batch import Batch from combo.data.dataset_loaders.dataset_loader import TensorDict from combo.modules.module import Module -from combo.nn import utils, RegularizerApplicator +from combo.nn import utils, Regularizer from combo.nn.utils import device_mapping from combo.utils import ConfigurationError @@ -69,7 +69,7 @@ class Model(Module, FromParameters): In a typical AllenNLP configuration file, this parameter does not get an entry under the "model", it gets specified as a top-level parameter, then is passed in to the model separately. - regularizer: `RegularizerApplicator`, optional + regularizer: `Regularizer`, optional If given, the `Trainer` will use this to regularize model parameters. serialization_dir: `str`, optional The directory in which the training output is saved to, or the directory the model is loaded from. @@ -82,7 +82,7 @@ class Model(Module, FromParameters): def __init__( self, vocabulary: Vocabulary, - regularizer: RegularizerApplicator = None, + regularizer: Regularizer = None, serialization_dir: Optional[str] = None, ) -> None: super(Model, self).__init__() diff --git a/combo/nn/regularizers/__init__.py b/combo/nn/regularizers/__init__.py index 60bc9d4a248589c34125b369eb96d15a5831fcc7..27f101d7bd31e0502eee7722c973b0d2629f62fe 100644 --- a/combo/nn/regularizers/__init__.py +++ b/combo/nn/regularizers/__init__.py @@ -1,2 +1,2 @@ from .regularizers import * -from .regularizer_applicator import * \ No newline at end of file +from .regularizer import * \ No newline at end of file diff --git a/combo/nn/regularizers/regularizer_applicator.py b/combo/nn/regularizers/regularizer.py similarity index 56% rename from combo/nn/regularizers/regularizer_applicator.py rename to combo/nn/regularizers/regularizer.py index f1603609f4cf00825e0b3c059010d890717a779a..76cb7e6ec71431e29749bfdabd5607b646095ce6 100644 --- a/combo/nn/regularizers/regularizer_applicator.py +++ b/combo/nn/regularizers/regularizer.py @@ -1,5 +1,5 @@ import re -from typing import List, Tuple +from typing import List, Tuple, Dict, Any import torch @@ -7,9 +7,13 @@ from combo.config import FromParameters, Registry from combo.config.from_parameters import register_arguments from combo.nn.regularizers import Regularizer +from overrides import overrides + +from utils import ConfigurationError + @Registry.register('base_regularizer') -class RegularizerApplicator(FromParameters): +class Regularizer(FromParameters): """ Applies regularizers to the parameters of a Module based on regex matches. """ @@ -41,3 +45,22 @@ class RegularizerApplicator(FromParameters): accumulator = accumulator + penalty break return accumulator + + @classmethod + def from_parameters(cls, + parameters: Dict[str, Any] = None, + constructor_method_name: str = None, + pass_down_parameters: Dict[str, Any] = None): + regexes = parameters.get('regexes', []) + regexes_to_pass = [] + for regex, regularizer_dict in regexes: + if 'type' not in regularizer_dict: + raise ConfigurationError('Regularizer dict does not have the type field') + resolved_regularizer, resolved_regularizer_constr = Registry.resolve(regularizer_dict['type']) + regexes_to_pass.append((regex, + resolved_regularizer.from_parameters( + regularizer_dict.get('parameters', {}), + resolved_regularizer_constr, + pass_down_parameters + ))) + return cls(regexes_to_pass) diff --git a/combo/polish_model_training.ipynb b/combo/polish_model_training.ipynb index f0c654dbc99157d4b715810e21bf04676ca6bb6e..e6fd3c4174ae0c898d9fcdb987f143b2d5ae6741 100644 --- a/combo/polish_model_training.ipynb +++ b/combo/polish_model_training.ipynb @@ -7,8 +7,8 @@ "metadata": { "collapsed": true, "ExecuteTime": { - "end_time": "2023-10-04T14:17:39.094271Z", - "start_time": "2023-10-04T14:17:35.329633Z" + "end_time": "2023-10-08T13:28:38.808164Z", + "start_time": "2023-10-08T13:28:35.145356Z" } }, "outputs": [], @@ -30,7 +30,7 @@ "from combo.modules.parser import DependencyRelationModel, HeadPredictionModel\n", "from combo.modules.lemma import LemmatizerModel\n", "from combo.modules.morpho import MorphologicalFeatures\n", - "from combo.nn.regularizers import RegularizerApplicator\n", + "from combo.nn.regularizers import Regularizer\n", "from combo.nn.regularizers.regularizers import L2Regularizer\n", "\n", "from combo.training.trainable_combo import TrainableCombo" @@ -56,7 +56,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "102cf8c68b9e4cc39e904c68d91f368c" + "model_id": "37164bdb71764f9abaddea25d25cd2ba" } }, "metadata": {}, @@ -68,7 +68,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "b96808a366d74f9395c63bbb0d0d34c1" + "model_id": "9487ea68d13d4082a63b6063d031390a" } }, "metadata": {}, @@ -80,7 +80,7 @@ "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, - "model_id": "a071f8b7d82646d8bdfdc48f2113d814" + "model_id": "fe5b8e7ad7e3450aa870d572eaccaf3f" } }, "metadata": {}, @@ -134,17 +134,15 @@ " data_path=TRAIN_FILE_PATH,\n", " batch_size=16,\n", " batches_per_epoch=4,\n", - " shuffle=True,\n", - " collate_fn=lambda instances: Batch(instances).as_tensor_dict())\n", + " shuffle=True)\n", "val_data_loader = SimpleDataLoader.from_dataset_reader(dataset_reader,\n", " data_path=VAL_FILE_PATH,\n", " batch_size=16,\n", " batches_per_epoch=4,\n", - " shuffle=True,\n", - " collate_fn=lambda instances: Batch(instances).as_tensor_dict())\n", + " shuffle=True)\n", "\n", - "vocabulary = Vocabulary.from_instances_extended(\n", - " data_loader.iter_instances(),\n", + "vocabulary = Vocabulary.from_data_loader_extended(\n", + " data_loader,\n", " non_padded_namespaces=['head_labels'],\n", " only_include_pretrained_words=False,\n", " oov_token='_',\n", @@ -154,8 +152,8 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T14:18:10.003185Z", - "start_time": "2023-10-04T14:17:39.099526Z" + "end_time": "2023-10-08T13:29:08.749171Z", + "start_time": "2023-10-08T13:28:38.803643Z" } }, "id": "d74957f422f0b05b" @@ -163,6 +161,31 @@ { "cell_type": "code", "execution_count": 3, + "outputs": [ + { + "data": { + "text/plain": "{'type': 'from_data_loader_extended_vocabulary',\n 'parameters': {'data_loader': {'type': 'simple_data_loader_from_dataset_reader',\n 'parameters': {'reader': {'type': 'conllu_dataset_reader',\n 'parameters': {'features': ['token', 'char'],\n 'lemma_indexers': {'char': {'type': 'characters_const_padding_token_indexer',\n 'parameters': {'tokenizer': {'type': 'character_tokenizer',\n 'parameters': {'end_tokens': ['__END__'],\n 'start_tokens': ['__START__']}},\n 'min_padding_length': 32,\n 'namespace': 'lemma_characters'}}},\n 'targets': ['deprel', 'head', 'upostag', 'lemma', 'feats', 'xpostag'],\n 'token_indexers': {'char': {'type': 'characters_const_padding_token_indexer',\n 'parameters': {'tokenizer': {'type': 'character_tokenizer',\n 'parameters': {'end_tokens': ['__END__'],\n 'start_tokens': ['__START__']}},\n 'min_padding_length': 32}},\n 'token': {'type': 'pretrained_transformer_mismatched_fixed_token_indexer',\n 'parameters': {'model_name': 'bert-base-cased'}}},\n 'use_sem': False}},\n 'data_path': '/Users/majajablonska/Downloads/PDBUD-master-85167180bcbe0565a09269257456961365cf6ff3/PDB-UD/PDB-UD/PDBUD_train.conllu',\n 'batch_size': 16,\n 'batches_per_epoch': 4,\n 'shuffle': True}},\n 'non_padded_namespaces': ['head_labels'],\n 'only_include_pretrained_words': False,\n 'oov_token': '_',\n 'padding_token': '__PAD__'}}" + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vocabulary.serialize()" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-08T13:29:08.766564Z", + "start_time": "2023-10-08T13:29:08.755088Z" + } + }, + "id": "b1688e302d5db9ff" + }, + { + "cell_type": "code", + "execution_count": 4, "outputs": [], "source": [ "seq_encoder = ComboEncoder(layer_dropout_probability=0.33,\n", @@ -177,15 +200,15 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T14:18:10.548453Z", - "start_time": "2023-10-04T14:18:10.004200Z" + "end_time": "2023-10-08T13:29:09.498266Z", + "start_time": "2023-10-08T13:29:08.767101Z" } }, "id": "fa724d362fd6bd23" }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "outputs": [ { "name": "stdout", @@ -196,9 +219,9 @@ }, { "data": { - "text/plain": "<generator object SimpleDataLoader.iter_instances at 0x7fe9a2ebf510>" + "text/plain": "<generator object SimpleDataLoader.iter_instances at 0x7f9b300c4820>" }, - "execution_count": 4, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -224,21 +247,21 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T14:18:10.601841Z", - "start_time": "2023-10-04T14:18:10.553394Z" + "end_time": "2023-10-08T13:29:09.542951Z", + "start_time": "2023-10-08T13:29:09.502284Z" } }, "id": "f8a10f9892005fca" }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.sso.sso_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.sso.sso_relationship.weight']\n", + "Some weights of the model checkpoint at allegro/herbert-base-cased were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.sso.sso_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.sso.sso_relationship.weight', 'cls.predictions.transform.dense.bias']\n", "- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", "- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n" ] @@ -312,7 +335,7 @@ " num_layers=2,\n", " vocab_namespace=\"feats_labels\"\n", " ),\n", - " regularizer=RegularizerApplicator([\n", + " regularizer=Regularizer([\n", " (\".*conv1d.*\", L2Regularizer(1e-6)),\n", " (\".*forward.*\", L2Regularizer(1e-6)),\n", " (\".*backward.*\", L2Regularizer(1e-6)),\n", @@ -370,45 +393,37 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T14:18:15.680319Z", - "start_time": "2023-10-04T14:18:10.576516Z" + "end_time": "2023-10-08T13:29:13.170798Z", + "start_time": "2023-10-08T13:29:09.537173Z" } }, "id": "437d12054baaffa1" }, { "cell_type": "code", - "execution_count": 6, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Directory /Users/majajablonska/.combo/test_model/vocabulary is not empty\n" - ] - } - ], + "execution_count": 7, + "outputs": [], "source": [ "serialized = model.serialize()" ], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T14:18:15.699389Z", - "start_time": "2023-10-04T14:18:15.666866Z" + "end_time": "2023-10-08T13:29:13.180014Z", + "start_time": "2023-10-08T13:29:13.109173Z" } }, "id": "d6d27128951828d8" }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "outputs": [ { "data": { - "text/plain": "{'type': 'semantic_multitask',\n 'parameters': {'vocabulary': {'type': 'from_files_vocabulary',\n 'parameters': {'directory': '/Users/majajablonska/.combo/test_model/vocabulary',\n 'padding_token': '__PAD__',\n 'oov_token': '_'}},\n 'dependency_relation': {'type': 'combo_dependency_parsing_from_vocab',\n 'parameters': {'dependency_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'dropout_rate': 0.25,\n 'in_features': 1024,\n 'out_features': 128}},\n 'head_predictor': {'type': 'head_prediction',\n 'parameters': {'cycle_loss_n': 0,\n 'dependency_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'in_features': 1024,\n 'out_features': 512}},\n 'head_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'in_features': 1024,\n 'out_features': 512}}}},\n 'head_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'dropout_rate': 0.25,\n 'in_features': 1024,\n 'out_features': 128}},\n 'vocab_namespace': 'deprel_labels'}},\n 'lemmatizer': {'type': 'combo_lemma_predictor_from_vocab',\n 'parameters': {'activations': [{'type': 'gelu', 'parameters': {}},\n {'type': 'gelu', 'parameters': {}},\n {'type': 'gelu', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'char_vocab_namespace': 'token_characters',\n 'dilation': [1, 2, 4, 1],\n 'embedding_dim': 256,\n 'filters': [256, 256, 256],\n 'input_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'dropout_rate': 0.25,\n 'in_features': 1024,\n 'out_features': 32}},\n 'kernel_size': [3, 3, 3, 1],\n 'lemma_vocab_namespace': 'lemma_characters',\n 'padding': [1, 2, 4, 0],\n 'stride': [1, 1, 1, 1]}},\n 'loss_weights': {'deprel': 0.8,\n 'feats': 0.2,\n 'head': 0.2,\n 'lemma': 0.05,\n 'semrel': 0.05,\n 'upostag': 0.05,\n 'xpostag': 0.05},\n 'morphological_feat': {'type': 'combo_morpho_from_vocab',\n 'parameters': {'activations': [{'type': 'tanh', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'dropout': [0.25, 0.0],\n 'hidden_dims': [128],\n 'input_dim': 1024,\n 'num_layers': 2,\n 'vocab_namespace': 'feats_labels'}},\n 'regularizer': {'type': 'base_regularizer',\n 'parameters': {'regexes': [('.*conv1d.*',\n {'type': 'l2_regularizer', 'parameters': {'alpha': 1e-06}}),\n ('.*forward.*',\n {'type': 'l2_regularizer', 'parameters': {'alpha': 1e-06}}),\n ('.*backward.*',\n {'type': 'l2_regularizer', 'parameters': {'alpha': 1e-06}}),\n ('.*char_embed.*',\n {'type': 'l2_regularizer', 'parameters': {'alpha': 1e-05}})]}},\n 'seq_encoder': {'type': 'combo_encoder',\n 'parameters': {'layer_dropout_probability': 0.33,\n 'stacked_bilstm': {'type': 'combo_stacked_bilstm',\n 'parameters': {'hidden_size': 512,\n 'input_size': 164,\n 'layer_dropout_probability': 0.33,\n 'num_layers': 2,\n 'recurrent_dropout_probability': 0.33}}}},\n 'text_field_embedder': {'type': 'base_text_field_embedder',\n 'parameters': {'token_embedders': {'char': {'type': 'char_embeddings_token_embedder',\n 'parameters': {'dilated_cnn_encoder': {'type': 'dilated_cnn',\n 'parameters': {'activations': [{'type': 'gelu', 'parameters': {}},\n {'type': 'gelu', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'dilation': [1, 2, 4],\n 'filters': [512, 256, 64],\n 'input_dim': 64,\n 'kernel_size': [3, 3, 3],\n 'padding': [1, 2, 4],\n 'stride': [1, 1, 1]}},\n 'embedding_dim': 64}},\n 'token': {'type': 'transformers_word_embedder',\n 'parameters': {'model_name': 'allegro/herbert-base-cased',\n 'projection_dim': 100}}}}},\n 'upos_tagger': {'type': 'feedforward_predictor',\n 'parameters': {'feedforward_network': {'type': 'feedforward_layer',\n 'parameters': {'input_dim': 1024,\n 'num_layers': 2,\n 'hidden_dims': [64, 19],\n 'activations': [{'type': 'tanh', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'dropout': [0.25, 0.0]}}}},\n 'xpos_tagger': {'type': 'feedforward_predictor',\n 'parameters': {'feedforward_network': {'type': 'feedforward_layer',\n 'parameters': {'input_dim': 1024,\n 'num_layers': 2,\n 'hidden_dims': [64, 859],\n 'activations': [{'type': 'tanh', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'dropout': [0.25, 0.0]}}}},\n 'serialization_dir': '/Users/majajablonska/.combo/test_model'},\n 'weights': '/Users/majajablonska/.combo/test_model/weights.pth'}" + "text/plain": "{'type': 'semantic_multitask',\n 'parameters': {'vocabulary': {'type': 'from_data_loader_extended_vocabulary',\n 'parameters': {'data_loader': {'type': 'simple_data_loader_from_dataset_reader',\n 'parameters': {'reader': {'type': 'conllu_dataset_reader',\n 'parameters': {'features': ['token', 'char'],\n 'lemma_indexers': {'char': {'type': 'characters_const_padding_token_indexer',\n 'parameters': {'tokenizer': {'type': 'character_tokenizer',\n 'parameters': {'end_tokens': ['__END__'],\n 'start_tokens': ['__START__']}},\n 'min_padding_length': 32,\n 'namespace': 'lemma_characters'}}},\n 'targets': ['deprel', 'head', 'upostag', 'lemma', 'feats', 'xpostag'],\n 'token_indexers': {'char': {'type': 'characters_const_padding_token_indexer',\n 'parameters': {'tokenizer': {'type': 'character_tokenizer',\n 'parameters': {'end_tokens': ['__END__'],\n 'start_tokens': ['__START__']}},\n 'min_padding_length': 32}},\n 'token': {'type': 'pretrained_transformer_mismatched_fixed_token_indexer',\n 'parameters': {'model_name': 'bert-base-cased'}}},\n 'use_sem': False}},\n 'data_path': '/Users/majajablonska/Downloads/PDBUD-master-85167180bcbe0565a09269257456961365cf6ff3/PDB-UD/PDB-UD/PDBUD_train.conllu',\n 'batch_size': 16,\n 'batches_per_epoch': 4,\n 'shuffle': True}},\n 'non_padded_namespaces': ['head_labels'],\n 'only_include_pretrained_words': False,\n 'oov_token': '_',\n 'padding_token': '__PAD__'}},\n 'dependency_relation': {'type': 'combo_dependency_parsing_from_vocab',\n 'parameters': {'dependency_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'dropout_rate': 0.25,\n 'in_features': 1024,\n 'out_features': 128}},\n 'head_predictor': {'type': 'head_prediction',\n 'parameters': {'cycle_loss_n': 0,\n 'dependency_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'in_features': 1024,\n 'out_features': 512}},\n 'head_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'in_features': 1024,\n 'out_features': 512}}}},\n 'head_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'dropout_rate': 0.25,\n 'in_features': 1024,\n 'out_features': 128}},\n 'vocab_namespace': 'deprel_labels'}},\n 'lemmatizer': {'type': 'combo_lemma_predictor_from_vocab',\n 'parameters': {'activations': [{'type': 'gelu', 'parameters': {}},\n {'type': 'gelu', 'parameters': {}},\n {'type': 'gelu', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'char_vocab_namespace': 'token_characters',\n 'dilation': [1, 2, 4, 1],\n 'embedding_dim': 256,\n 'filters': [256, 256, 256],\n 'input_projection_layer': {'type': 'linear_layer',\n 'parameters': {'activation': {'type': 'tanh', 'parameters': {}},\n 'dropout_rate': 0.25,\n 'in_features': 1024,\n 'out_features': 32}},\n 'kernel_size': [3, 3, 3, 1],\n 'lemma_vocab_namespace': 'lemma_characters',\n 'padding': [1, 2, 4, 0],\n 'stride': [1, 1, 1, 1]}},\n 'loss_weights': {'deprel': 0.8,\n 'feats': 0.2,\n 'head': 0.2,\n 'lemma': 0.05,\n 'semrel': 0.05,\n 'upostag': 0.05,\n 'xpostag': 0.05},\n 'morphological_feat': {'type': 'combo_morpho_from_vocab',\n 'parameters': {'activations': [{'type': 'tanh', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'dropout': [0.25, 0.0],\n 'hidden_dims': [128],\n 'input_dim': 1024,\n 'num_layers': 2,\n 'vocab_namespace': 'feats_labels'}},\n 'regularizer': {'type': 'base_regularizer',\n 'parameters': {'regexes': [('.*conv1d.*',\n {'type': 'l2_regularizer', 'parameters': {'alpha': 1e-06}}),\n ('.*forward.*',\n {'type': 'l2_regularizer', 'parameters': {'alpha': 1e-06}}),\n ('.*backward.*',\n {'type': 'l2_regularizer', 'parameters': {'alpha': 1e-06}}),\n ('.*char_embed.*',\n {'type': 'l2_regularizer', 'parameters': {'alpha': 1e-05}})]}},\n 'seq_encoder': {'type': 'combo_encoder',\n 'parameters': {'layer_dropout_probability': 0.33,\n 'stacked_bilstm': {'type': 'combo_stacked_bilstm',\n 'parameters': {'hidden_size': 512,\n 'input_size': 164,\n 'layer_dropout_probability': 0.33,\n 'num_layers': 2,\n 'recurrent_dropout_probability': 0.33}}}},\n 'text_field_embedder': {'type': 'base_text_field_embedder',\n 'parameters': {'token_embedders': {'char': {'type': 'char_embeddings_token_embedder',\n 'parameters': {'dilated_cnn_encoder': {'type': 'dilated_cnn',\n 'parameters': {'activations': [{'type': 'gelu', 'parameters': {}},\n {'type': 'gelu', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'dilation': [1, 2, 4],\n 'filters': [512, 256, 64],\n 'input_dim': 64,\n 'kernel_size': [3, 3, 3],\n 'padding': [1, 2, 4],\n 'stride': [1, 1, 1]}},\n 'embedding_dim': 64}},\n 'token': {'type': 'transformers_word_embedder',\n 'parameters': {'model_name': 'allegro/herbert-base-cased',\n 'projection_dim': 100}}}}},\n 'upos_tagger': {'type': 'feedforward_predictor',\n 'parameters': {'feedforward_network': {'type': 'feedforward_layer',\n 'parameters': {'input_dim': 1024,\n 'num_layers': 2,\n 'hidden_dims': [64, 19],\n 'activations': [{'type': 'tanh', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'dropout': [0.25, 0.0]}}}},\n 'xpos_tagger': {'type': 'feedforward_predictor',\n 'parameters': {'feedforward_network': {'type': 'feedforward_layer',\n 'parameters': {'input_dim': 1024,\n 'num_layers': 2,\n 'hidden_dims': [64, 859],\n 'activations': [{'type': 'tanh', 'parameters': {}},\n {'type': 'linear', 'parameters': {}}],\n 'dropout': [0.25, 0.0]}}}},\n 'serialization_dir': '/Users/majajablonska/.combo/test_model'},\n 'weights': '/Users/majajablonska/.combo/test_model/weights.pth'}" }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -419,42 +434,64 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T14:18:15.703790Z", - "start_time": "2023-10-04T14:18:15.697386Z" + "end_time": "2023-10-08T13:29:13.183826Z", + "start_time": "2023-10-08T13:29:13.124423Z" } }, "id": "5f375b17cdb4de9" }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Resolving from_files_vocabulary with pass_down_parameters {}\n", - "Resolving ? with pass_down_parameters {}\n", - "Resolving /Users/majajablonska/.combo/test_model/vocabulary with pass_down_parameters {}\n", - "Resolving __PAD__ with pass_down_parameters {}\n", - "Resolving _ with pass_down_parameters {}\n" - ] + "data": { + "text/plain": "{'type': 'from_data_loader_extended_vocabulary',\n 'parameters': {'data_loader': {'type': 'simple_data_loader_from_dataset_reader',\n 'parameters': {'reader': {'type': 'conllu_dataset_reader',\n 'parameters': {'features': ['token', 'char'],\n 'lemma_indexers': {'char': {'type': 'characters_const_padding_token_indexer',\n 'parameters': {'tokenizer': {'type': 'character_tokenizer',\n 'parameters': {'end_tokens': ['__END__'],\n 'start_tokens': ['__START__']}},\n 'min_padding_length': 32,\n 'namespace': 'lemma_characters'}}},\n 'targets': ['deprel', 'head', 'upostag', 'lemma', 'feats', 'xpostag'],\n 'token_indexers': {'char': {'type': 'characters_const_padding_token_indexer',\n 'parameters': {'tokenizer': {'type': 'character_tokenizer',\n 'parameters': {'end_tokens': ['__END__'],\n 'start_tokens': ['__START__']}},\n 'min_padding_length': 32}},\n 'token': {'type': 'pretrained_transformer_mismatched_fixed_token_indexer',\n 'parameters': {'model_name': 'bert-base-cased'}}},\n 'use_sem': False}},\n 'data_path': '/Users/majajablonska/Downloads/PDBUD-master-85167180bcbe0565a09269257456961365cf6ff3/PDB-UD/PDB-UD/PDBUD_train.conllu',\n 'batch_size': 16,\n 'batches_per_epoch': 4,\n 'shuffle': True}},\n 'non_padded_namespaces': ['head_labels'],\n 'only_include_pretrained_words': False,\n 'oov_token': '_',\n 'padding_token': '__PAD__'}}" + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "serialized['parameters']['vocabulary']" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-08T13:29:13.190030Z", + "start_time": "2023-10-08T13:29:13.130388Z" + } + }, + "id": "568f82d49e0c9406" + }, + { + "cell_type": "code", + "execution_count": 10, + "outputs": [ + { + "data": { + "text/plain": "loading instances: 0it [00:00, ?it/s]", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "0b44941761b642ac9cb186ebf57ca269" + } + }, + "metadata": {}, + "output_type": "display_data" }, { - "ename": "TypeError", - "evalue": "from_files() missing 1 required positional argument: 'directory'", - "output_type": "error", - "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mTypeError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[0;32mIn[8], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m model \u001B[38;5;241m=\u001B[39m \u001B[43mComboModel\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfrom_parameters\u001B[49m\u001B[43m(\u001B[49m\u001B[43mserialized\u001B[49m\u001B[43m)\u001B[49m\n", - "File \u001B[0;32m~/PycharmProjects/combo-lightning/combo/modules/model.py:494\u001B[0m, in \u001B[0;36mModel.from_parameters\u001B[0;34m(cls, parameters, constructor_method_name, pass_down_parameters)\u001B[0m\n\u001B[1;32m 488\u001B[0m \u001B[38;5;129m@classmethod\u001B[39m\n\u001B[1;32m 489\u001B[0m \u001B[38;5;129m@overrides\u001B[39m\n\u001B[1;32m 490\u001B[0m \u001B[38;5;28;01mdef\u001B[39;00m \u001B[38;5;21mfrom_parameters\u001B[39m(\u001B[38;5;28mcls\u001B[39m,\n\u001B[1;32m 491\u001B[0m parameters: Dict[\u001B[38;5;28mstr\u001B[39m, Any] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 492\u001B[0m constructor_method_name: \u001B[38;5;28mstr\u001B[39m \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m,\n\u001B[1;32m 493\u001B[0m pass_down_parameters: Dict[\u001B[38;5;28mstr\u001B[39m, Any] \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;01mNone\u001B[39;00m):\n\u001B[0;32m--> 494\u001B[0m constructed_model \u001B[38;5;241m=\u001B[39m \u001B[38;5;28;43msuper\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43m)\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfrom_parameters\u001B[49m\u001B[43m(\u001B[49m\u001B[43mparameters\u001B[49m\u001B[43m[\u001B[49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[38;5;124;43mparameters\u001B[39;49m\u001B[38;5;124;43m'\u001B[39;49m\u001B[43m]\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 495\u001B[0m constructed_model\u001B[38;5;241m.\u001B[39mload_state_dict(torch\u001B[38;5;241m.\u001B[39mload(parameters[\u001B[38;5;124m'\u001B[39m\u001B[38;5;124mweights\u001B[39m\u001B[38;5;124m'\u001B[39m]))\n\u001B[1;32m 496\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m constructed_model\n", - "File \u001B[0;32m~/PycharmProjects/combo-lightning/combo/config/from_parameters.py:128\u001B[0m, in \u001B[0;36mFromParameters.from_parameters\u001B[0;34m(cls, parameters, constructor_method_name, pass_down_parameters)\u001B[0m\n\u001B[1;32m 126\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m param_name, param_value \u001B[38;5;129;01min\u001B[39;00m parameters\u001B[38;5;241m.\u001B[39mitems():\n\u001B[1;32m 127\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m param_name \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mcls\u001B[39m\u001B[38;5;241m.\u001B[39mpass_down_parameter_names():\n\u001B[0;32m--> 128\u001B[0m resolved_value \u001B[38;5;241m=\u001B[39m \u001B[43m_resolve\u001B[49m\u001B[43m(\u001B[49m\u001B[43mparam_value\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mpass_down_parameters\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 129\u001B[0m pass_down_parameters[param_name] \u001B[38;5;241m=\u001B[39m resolved_value\n\u001B[1;32m 131\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m param_name \u001B[38;5;129;01min\u001B[39;00m constructor_method_args:\n", - "File \u001B[0;32m~/PycharmProjects/combo-lightning/combo/config/from_parameters.py:47\u001B[0m, in \u001B[0;36m_resolve\u001B[0;34m(values, pass_down_parameters)\u001B[0m\n\u001B[1;32m 44\u001B[0m \u001B[38;5;28;01mif\u001B[39;00m constructor_method \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28;01mNone\u001B[39;00m:\n\u001B[1;32m 45\u001B[0m \u001B[38;5;28;01mraise\u001B[39;00m ConfigurationError(\u001B[38;5;28mstr\u001B[39m(clz) \u001B[38;5;241m+\u001B[39m \u001B[38;5;124m'\u001B[39m\u001B[38;5;124m has no constructor method with name \u001B[39m\u001B[38;5;124m'\u001B[39m \u001B[38;5;241m+\u001B[39m constructor)\n\u001B[0;32m---> 47\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mclz\u001B[49m\u001B[38;5;241;43m.\u001B[39;49m\u001B[43mfrom_parameters\u001B[49m\u001B[43m(\u001B[49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mconstructor\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[43mpass_down_parameters\u001B[49m\u001B[43m)\u001B[49m\n\u001B[1;32m 49\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m values\n", - "File \u001B[0;32m~/PycharmProjects/combo-lightning/combo/config/from_parameters.py:144\u001B[0m, in \u001B[0;36mFromParameters.from_parameters\u001B[0;34m(cls, parameters, constructor_method_name, pass_down_parameters)\u001B[0m\n\u001B[1;32m 142\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mcls\u001B[39m(\u001B[38;5;241m*\u001B[39m\u001B[38;5;241m*\u001B[39mparameters_to_call)\n\u001B[1;32m 143\u001B[0m \u001B[38;5;28;01melse\u001B[39;00m:\n\u001B[0;32m--> 144\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mconstructor_method\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mparameters_to_call\u001B[49m\u001B[43m)\u001B[49m\n", - "File \u001B[0;32m~/PycharmProjects/combo-lightning/combo/config/from_parameters.py:87\u001B[0m, in \u001B[0;36mregister_arguments.<locals>.wrapper\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 84\u001B[0m parameters_to_register[kwarg_name] \u001B[38;5;241m=\u001B[39m kwarg_val\n\u001B[1;32m 86\u001B[0m self_arg\u001B[38;5;241m.\u001B[39mconstructed_args \u001B[38;5;241m=\u001B[39m parameters_to_register\n\u001B[0;32m---> 87\u001B[0m \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[43mfunc\u001B[49m\u001B[43m(\u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43margs\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[38;5;241;43m*\u001B[39;49m\u001B[43mkwargs\u001B[49m\u001B[43m)\u001B[49m\n", - "\u001B[0;31mTypeError\u001B[0m: from_files() missing 1 required positional argument: 'directory'" - ] + "data": { + "text/plain": "building vocabulary: 0it [00:00, ?it/s]", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "6e1ddd5a820843d18b7ceef4c19f10a2" + } + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -463,28 +500,41 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T14:18:16.605998Z", - "start_time": "2023-10-04T14:18:15.698978Z" + "end_time": "2023-10-08T13:29:34.785510Z", + "start_time": "2023-10-08T13:29:13.139692Z" } }, "id": "11527ec6300c8484" }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 21, "outputs": [ { - "ename": "NameError", - "evalue": "name 'data_loader' is not defined", - "output_type": "error", - "traceback": [ - "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", - "\u001B[0;31mNameError\u001B[0m Traceback (most recent call last)", - "Cell \u001B[0;32mIn[1], line 1\u001B[0m\n\u001B[0;32m----> 1\u001B[0m \u001B[43mdata_loader\u001B[49m\u001B[38;5;241m.\u001B[39mindex_with(vocabulary)\n\u001B[1;32m 2\u001B[0m a \u001B[38;5;241m=\u001B[39m \u001B[38;5;241m0\u001B[39m\n\u001B[1;32m 3\u001B[0m \u001B[38;5;28;01mfor\u001B[39;00m i \u001B[38;5;129;01min\u001B[39;00m data_loader:\n", - "\u001B[0;31mNameError\u001B[0m: name 'data_loader' is not defined" - ] + "data": { + "text/plain": "<combo.nn.regularizers.regularizer.Regularizer at 0x7f9b3033b790>" + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" } ], + "source": [ + "model._regularizer" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2023-10-08T13:32:27.554092Z", + "start_time": "2023-10-08T13:32:26.769315Z" + } + }, + "id": "8e8e9a73bbaa856c" + }, + { + "cell_type": "code", + "execution_count": 12, + "outputs": [], "source": [ "data_loader.index_with(vocabulary)\n", "a = 0\n", @@ -494,15 +544,15 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T13:37:20.610720Z", - "start_time": "2023-10-04T13:37:20.214954Z" + "end_time": "2023-10-08T13:30:14.216618Z", + "start_time": "2023-10-08T13:29:34.784464Z" } }, "id": "e131e0ec75dc6927" }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "outputs": [], "source": [ "val_data_loader.index_with(vocabulary)" @@ -510,14 +560,15 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "start_time": "2023-10-04T13:37:20.606437Z" + "end_time": "2023-10-08T13:30:46.735978Z", + "start_time": "2023-10-08T13:30:14.215132Z" } }, "id": "195c71fcf8170ff" }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "outputs": [], "source": [ "import pytorch_lightning as pl\n", @@ -526,16 +577,27 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T13:37:20.620836Z", - "start_time": "2023-10-04T13:37:20.616820Z" + "end_time": "2023-10-08T13:30:46.742699Z", + "start_time": "2023-10-08T13:30:46.731891Z" } }, "id": "dfb5ee72353e8c7f" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 26, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "GPU available: False, used: False\n", + "TPU available: False, using: 0 TPU cores\n", + "IPU available: False, using: 0 IPUs\n", + "HPU available: False, using: 0 HPUs\n" + ] + } + ], "source": [ "nlp = TrainableCombo(model, torch.optim.Adam,\n", " optimizer_kwargs={'betas': [0.9, 0.9], 'lr': 0.002},\n", @@ -544,34 +606,124 @@ "trainer = pl.Trainer(max_epochs=10,\n", " default_root_dir='/Users/majajablonska/Documents/Workspace/combo_training',\n", " gradient_clip_val=5,\n", - " callbacks=[EarlyStopping(monitor='EM', mode='max', patience=1)])" + " callbacks=[EarlyStopping(monitor='EM', mode='max', patience=3)])" ], "metadata": { "collapsed": false, "ExecuteTime": { - "start_time": "2023-10-04T13:37:20.619233Z" + "end_time": "2023-10-08T13:34:07.707089Z", + "start_time": "2023-10-08T13:34:07.615986Z" } }, "id": "cefc5173154d1605" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 27, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "\n", + " | Name | Type | Params\n", + "-------------------------------------\n", + "0 | model | ComboModel | 136 M \n", + "-------------------------------------\n", + "12.1 M Trainable params\n", + "124 M Non-trainable params\n", + "136 M Total params\n", + "546.107 Total estimated model params size (MB)\n" + ] + }, + { + "data": { + "text/plain": "Sanity Checking: 0it [00:00, ?it/s]", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "541edfbc1c804075994a2f4f5fda52f3" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "Training: 0it [00:00, ?it/s]", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "ade19f1a7bea495484ff1fdf689d6b7b" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "Validation: 0it [00:00, ?it/s]", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "a3d7afd6ac544f8f8d5e58098d802721" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "Validation: 0it [00:00, ?it/s]", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "7120391159054b1cad9b229d2d7b9cce" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "Validation: 0it [00:00, ?it/s]", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "c4cad70e5c504b09977a4b1720357030" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": "Validation: 0it [00:00, ?it/s]", + "application/vnd.jupyter.widget-view+json": { + "version_major": 2, + "version_minor": 0, + "model_id": "7f79aedcf26240f1843e94111c416dd5" + } + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "trainer.fit(model=nlp, train_dataloaders=data_loader, val_dataloaders=val_data_loader)" ], "metadata": { "collapsed": false, "ExecuteTime": { - "start_time": "2023-10-04T13:37:20.619817Z" + "end_time": "2023-10-08T13:37:29.975320Z", + "start_time": "2023-10-08T13:34:09.078149Z" } }, "id": "e5af131bae4b1a33" }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "outputs": [], "source": [ "from combo.predict import COMBO\n", @@ -580,15 +732,15 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T13:37:20.624003Z", - "start_time": "2023-10-04T13:37:20.621622Z" + "end_time": "2023-10-08T13:37:35.279649Z", + "start_time": "2023-10-08T13:37:35.185270Z" } }, "id": "3e23413c86063183" }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "outputs": [], "source": [ "a = predictor(\"Cześć, jestem psem.\")" @@ -596,15 +748,27 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "start_time": "2023-10-04T13:37:20.621955Z" + "end_time": "2023-10-08T13:37:36.883507Z", + "start_time": "2023-10-08T13:37:36.572489Z" } }, "id": "d555d7f0223a624b" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 30, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "TOKEN LEMMA UPOS HEAD DEPREL \n", + "Cześć, czeska NOUN 2 case \n", + "jestem jesteć NOUN 0 root \n", + "psem. psen PUNCT 2 punct \n" + ] + } + ], "source": [ "print(\"{:15} {:15} {:10} {:10} {:10}\".format('TOKEN', 'LEMMA', 'UPOS', 'HEAD', 'DEPREL'))\n", "for token in a.tokens:\n", @@ -613,36 +777,47 @@ "metadata": { "collapsed": false, "ExecuteTime": { - "start_time": "2023-10-04T13:37:20.622667Z" + "end_time": "2023-10-08T13:37:37.453094Z", + "start_time": "2023-10-08T13:37:37.436676Z" } }, "id": "a68cd3861e1ceb67" }, { "cell_type": "code", - "execution_count": null, - "outputs": [], + "execution_count": 31, + "outputs": [ + { + "data": { + "text/plain": "{'UPOS_ACC': 0.27411435909056575,\n 'XPOS_ACC': 0.1552987385754211,\n 'SEMREL_ACC': 0.0,\n 'LEMMA_ACC': 0.10453961779590604,\n 'FEATS_ACC': 0.050910189591358866,\n 'EM': 0.0,\n 'UAS': 0.0882241861167762,\n 'LAS': 0.02016768638114661,\n 'UEM': 0.007211538461538462,\n 'LEM': 0.0,\n 'EUAS': 0.0,\n 'ELAS': 0.0,\n 'EUEM': 0.0,\n 'ELEM': 0.0,\n 'partial_loss/upostag_loss': 6.356462001800537,\n 'partial_loss/xpostag_loss': 12.649209022521973,\n 'partial_loss/feats_loss': 29.61688995361328,\n 'partial_loss/lemma_loss': 2.7162468433380127,\n 'partial_loss/head_loss': 14.484538078308105,\n 'partial_loss/deprel_loss': 16.029619216918945,\n 'partial_loss/cycle_loss': 0.0}" + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "model.get_metrics()" ], "metadata": { "collapsed": false, "ExecuteTime": { - "start_time": "2023-10-04T13:37:20.623373Z" + "end_time": "2023-10-08T13:37:43.163361Z", + "start_time": "2023-10-08T13:37:43.108099Z" } }, "id": "d6578197e2403037" }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "outputs": [], "source": [], "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2023-10-04T13:37:20.626802Z", - "start_time": "2023-10-04T13:37:20.624427Z" + "end_time": "2023-10-08T13:32:26.781715Z", + "start_time": "2023-10-08T13:32:26.748870Z" } }, "id": "6391caeb9e843c0b"