diff --git a/combo/config/from_parameters.py b/combo/config/from_parameters.py index 200c52f1d2909c036b6ae67b44903469fe36c81b..e8530426f12d94d5dca5244ffaeceb6a4f011df2 100644 --- a/combo/config/from_parameters.py +++ b/combo/config/from_parameters.py @@ -1,9 +1,7 @@ +import functools import inspect -from typing import Any, Callable, Dict, List, Optional import typing -import functools - -import json +from typing import Any, Callable, Dict, List, Optional from combo.common.params import Params from combo.config.exceptions import RegistryException diff --git a/combo/config/registry.py b/combo/config/registry.py index d4b1544ae660bb55048c5ca1ae2187b4371af43a..8b12813edd985b680268f5d3879e88c273f76022 100644 --- a/combo/config/registry.py +++ b/combo/config/registry.py @@ -1,5 +1,4 @@ -from collections import defaultdict -from typing import Any, Callable, Optional, Type, Union, Dict, List, Tuple +from typing import Optional, Type, Dict, Tuple from combo.config.exceptions import RegistryException diff --git a/combo/data/api.py b/combo/data/api.py index 649a3a806edc57fe8d257b455e47c3df3ec6e827..6233cc000a94342aad0fe97e38e1c6ec703d2a9d 100644 --- a/combo/data/api.py +++ b/combo/data/api.py @@ -3,7 +3,7 @@ import dataclasses import json from dataclasses import dataclass, field from typing import Optional, List, Dict, Any, Union, Tuple -from tokenizers import Token +from combo.data.tokenizers import Token import conllu from overrides import overrides diff --git a/combo/data/batch.py b/combo/data/batch.py index 818135462da8ae24c7b6dcf493a4e7db7065134b..d6d3785482c314e0c3196d021a9d82565424b548 100644 --- a/combo/data/batch.py +++ b/combo/data/batch.py @@ -13,9 +13,9 @@ from combo.common.util import ensure_list from combo.data import Instance, Vocabulary from combo.utils import ConfigurationError - logger = logging.getLogger(__name__) + class Batch(Iterable): """ A batch of Instances. In addition to containing the instances themselves, @@ -79,9 +79,9 @@ class Batch(Iterable): return {**padding_lengths} def as_tensor_dict( - self, - padding_lengths: Dict[str, Dict[str, int]] = None, - verbose: bool = False, + self, + padding_lengths: Dict[str, Dict[str, int]] = None, + verbose: bool = False, ) -> Dict[str, Union[torch.Tensor, Dict[str, torch.Tensor]]]: # This complex return type is actually predefined elsewhere as a DataArray, # but we can't use it because mypy doesn't like it. diff --git a/combo/data/dataset_readers/conllu.py b/combo/data/dataset_readers/conllu.py index 21e85b787f4459e23736d6d8f3af44df2809d020..ca9cbbf1351a34a72747ebe399d65ff147e8a702 100644 --- a/combo/data/dataset_readers/conllu.py +++ b/combo/data/dataset_readers/conllu.py @@ -7,17 +7,15 @@ import itertools import logging from typing import Dict, List, Optional, Sequence, Iterable -from overrides import overrides - -from combo.data.token_indexers.single_id_token_indexer import SingleIdTokenIndexer -from combo.data.token_indexers.token_indexer import TokenIndexer, Token -from combo.utils import ConfigurationError +from combo.config import Registry +from combo.config.from_parameters import FromParameters, register_arguments +from combo.data import Instance from combo.data.dataset_readers.dataset_reader import DatasetReader, DatasetReaderInput from combo.data.dataset_readers.dataset_utils.span_utils import to_bioul -from combo.data import Instance from combo.data.fields import MetadataField, TextField, Field, SequenceLabelField -from combo.config import Registry -from combo.config.from_parameters import FromParameters, register_arguments +from combo.data.token_indexers.single_id_token_indexer import SingleIdTokenIndexer +from combo.data.token_indexers.token_indexer import TokenIndexer, Token +from combo.utils import ConfigurationError logger = logging.getLogger(__name__) diff --git a/combo/data/dataset_readers/dataset_reader.py b/combo/data/dataset_readers/dataset_reader.py index 9834cd245675f000e0ef07b38e8a81ced37f9a35..b7bc935432c0767590ecb512dd0d964238546e53 100644 --- a/combo/data/dataset_readers/dataset_reader.py +++ b/combo/data/dataset_readers/dataset_reader.py @@ -4,7 +4,7 @@ https://github.com/allenai/allennlp/blob/main/allennlp/data/dataset_readers/data """ import logging from os import PathLike -from typing import Iterable, Iterator, Optional, Union, TypeVar, Dict, List +from typing import Iterable, Iterator, Optional, Union, Dict, List from overrides import overrides from torch.utils.data import IterableDataset @@ -13,8 +13,8 @@ from combo.config import FromParameters, Registry from combo.config.from_parameters import register_arguments from combo.data import SpacyTokenizer, SingleIdTokenIndexer from combo.data.instance import Instance -from combo.data.tokenizers import Tokenizer from combo.data.token_indexers import TokenIndexer +from combo.data.tokenizers import Tokenizer logger = logging.getLogger(__name__) diff --git a/combo/data/dataset_readers/dataset_utils/span_utils.py b/combo/data/dataset_readers/dataset_utils/span_utils.py index 4d25add8437a38b6669df821d7b2e3ecd0883e1c..cb39714bbaf360a77a9371fe4ccc8a34a2895d47 100644 --- a/combo/data/dataset_readers/dataset_utils/span_utils.py +++ b/combo/data/dataset_readers/dataset_utils/span_utils.py @@ -4,7 +4,6 @@ https://github.com/allenai/allennlp/blob/80fb6061e568cb9d6ab5d45b661e86eb61b92c8 """ from typing import Callable, List, Set, Tuple, TypeVar, Optional -import warnings from combo.data.tokenizers.tokenizer import Token from combo.utils import ConfigurationError, InvalidTagSequence diff --git a/combo/data/dataset_readers/universal_dependencies_dataset_reader.py b/combo/data/dataset_readers/universal_dependencies_dataset_reader.py index 8117c45bb509c991699a9b11e5f188e29e2e08fc..eb336ae9131a03a0403f10070f966149ffd80ddd 100644 --- a/combo/data/dataset_readers/universal_dependencies_dataset_reader.py +++ b/combo/data/dataset_readers/universal_dependencies_dataset_reader.py @@ -9,9 +9,9 @@ from typing import List, Any, Dict, Iterable, Tuple import conllu import torch +from conllu import parser from overrides import overrides -from combo import data from combo.config import Registry from combo.config.from_parameters import register_arguments from combo.data import Vocabulary, fields, Instance, Token @@ -22,8 +22,6 @@ from combo.data.fields.metadata_field import MetadataField from combo.data.fields.sequence_label_field import SequenceLabelField from combo.data.fields.text_field import TextField from combo.data.token_indexers import TokenIndexer -from conllu import parser - from combo.data.vocabulary import get_slices_if_not_provided from combo.utils import checks, pad_sequence_to_length diff --git a/combo/data/token_indexers/pretrained_transformer_fixed_mismatched_indexer.py b/combo/data/token_indexers/pretrained_transformer_fixed_mismatched_indexer.py index 89dc2c847fd0f60c3ee45ec608b75aa8e3e1963d..6fb6a2036a0a9f1b0ecd0c09adeb361deedafab3 100644 --- a/combo/data/token_indexers/pretrained_transformer_fixed_mismatched_indexer.py +++ b/combo/data/token_indexers/pretrained_transformer_fixed_mismatched_indexer.py @@ -10,7 +10,6 @@ from overrides import overrides from combo.config import Registry from combo.config.from_parameters import register_arguments from combo.data import Vocabulary -from combo.data.token_indexers.token_indexer import TokenIndexer from combo.data.token_indexers import IndexedTokenList from combo.data.token_indexers.pretrained_transformer_indexer import PretrainedTransformerIndexer from combo.data.token_indexers.pretrained_transformer_mismatched_indexer import PretrainedTransformerMismatchedIndexer diff --git a/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py b/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py index 5f5ddc8511e2e3cf5878b60909f283b5b680b683..161c71d2b1cf81f63aeb8c8e20d4ac82b16f6631 100644 --- a/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py +++ b/combo/data/token_indexers/pretrained_transformer_mismatched_indexer.py @@ -3,18 +3,18 @@ Adapted from AllenNLP https://github.com/allenai/allennlp/blob/main/allennlp/data/token_indexers/pretrained_transformer_mismatched_indexer.py """ -from typing import Dict, List, Any, Optional import logging +from typing import Dict, List, Any, Optional import torch from overrides import overrides -from combo.config import Registry, FromParameters +from combo.config import Registry from combo.config.from_parameters import register_arguments from combo.data import Vocabulary -from combo.data.tokenizers import Token from combo.data.token_indexers import TokenIndexer, IndexedTokenList from combo.data.token_indexers.pretrained_transformer_indexer import PretrainedTransformerIndexer +from combo.data.tokenizers import Token from combo.utils import pad_sequence_to_length logger = logging.getLogger(__name__) diff --git a/combo/data/token_indexers/single_id_token_indexer.py b/combo/data/token_indexers/single_id_token_indexer.py index 20489e244f8a9c95e76d4b8ea54e09b58ae60a98..81b99b1c0ebb134ea1cfa3d32d9f79ca5c705026 100644 --- a/combo/data/token_indexers/single_id_token_indexer.py +++ b/combo/data/token_indexers/single_id_token_indexer.py @@ -3,16 +3,16 @@ Adapted from AllenNLP https://github.com/allenai/allennlp/blob/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/allennlp/data/token_indexers/single_id_token_indexer.py """ -from typing import Dict, List, Optional, Any import itertools +from typing import Dict, List, Optional from overrides import overrides -from combo.config import FromParameters, Registry +from combo.config import Registry from combo.config.from_parameters import register_arguments -from combo.data.vocabulary import Vocabulary -from combo.data.tokenizers import Token from combo.data.token_indexers import TokenIndexer, IndexedTokenList +from combo.data.tokenizers import Token +from combo.data.vocabulary import Vocabulary _DEFAULT_VALUE = "THIS IS A REALLY UNLIKELY VALUE THAT HAS TO BE A STRING" diff --git a/combo/data/token_indexers/token_const_padding_characters_indexer.py b/combo/data/token_indexers/token_const_padding_characters_indexer.py index 10f54c8a7a4b5530bedca9a89a2d02671ac56f9e..ea1d89e77d7971213eb01d86cb2b64103f3b9a07 100644 --- a/combo/data/token_indexers/token_const_padding_characters_indexer.py +++ b/combo/data/token_indexers/token_const_padding_characters_indexer.py @@ -7,13 +7,11 @@ import itertools from typing import List, Dict import torch +from overrides import overrides from combo.config import Registry from combo.config.from_parameters import register_arguments from combo.data.token_indexers import IndexedTokenList -from combo.data.token_indexers.token_indexer import TokenIndexer -from overrides import overrides - from combo.data.token_indexers.token_characters_indexer import TokenCharactersIndexer from combo.data.tokenizers import CharacterTokenizer from combo.utils import pad_sequence_to_length diff --git a/combo/data/token_indexers/token_indexer.py b/combo/data/token_indexers/token_indexer.py index b2c86d45519643bdb5d8e3b0dab0ebce9f21e84e..b4b2d4ce599a65a6ac8bfae7b74b06b52deba9d6 100644 --- a/combo/data/token_indexers/token_indexer.py +++ b/combo/data/token_indexers/token_indexer.py @@ -6,7 +6,6 @@ https://github.com/allenai/allennlp/blob/main/allennlp/data/token_indexers/token from typing import Any, Dict, List import torch -from overrides import overrides from combo.config import FromParameters from combo.data.tokenizers.tokenizer import Token diff --git a/combo/data/tokenizers/character_tokenizer.py b/combo/data/tokenizers/character_tokenizer.py index 6b66c13c0686a856d45769dabf405ef8e9b54ecc..b407e6fe6e9488d25b7e4e33e906ca9d58cb8092 100644 --- a/combo/data/tokenizers/character_tokenizer.py +++ b/combo/data/tokenizers/character_tokenizer.py @@ -3,7 +3,7 @@ Adapted from AllenNLP https://github.com/allenai/allennlp/blob/main/allennlp/data/tokenizers/character_tokenizer.py """ -from typing import List, Union, Dict, Any +from typing import List, Union from combo.config import Registry from combo.config.from_parameters import register_arguments diff --git a/combo/data/tokenizers/lambo_tokenizer.py b/combo/data/tokenizers/lambo_tokenizer.py index 2105a4034ce61997041ab89583756da5330e2609..f37b07640e8cf7f22add0cc56ba2a9d049c3d201 100644 --- a/combo/data/tokenizers/lambo_tokenizer.py +++ b/combo/data/tokenizers/lambo_tokenizer.py @@ -1,10 +1,11 @@ -from typing import List, Dict, Any +from typing import List + +from lambo.segmenter.lambo import Lambo from combo.config import Registry from combo.config.from_parameters import register_arguments from combo.data.tokenizers.token import Token from combo.data.tokenizers.tokenizer import Tokenizer -from lambo.segmenter.lambo import Lambo @Registry.register('lambo_tokenizer') diff --git a/combo/data/tokenizers/pretrained_transformer_tokenizer.py b/combo/data/tokenizers/pretrained_transformer_tokenizer.py index b884a18dee455eb04cabd54aa16cba65d2ee5f68..cb12d2a050b6e0748a8b10fd068d7492d33ae107 100644 --- a/combo/data/tokenizers/pretrained_transformer_tokenizer.py +++ b/combo/data/tokenizers/pretrained_transformer_tokenizer.py @@ -8,7 +8,7 @@ import dataclasses import logging from typing import Any, Dict, List, Optional, Tuple, Iterable -from transformers import PreTrainedTokenizer, AutoTokenizer +from transformers import PreTrainedTokenizer from combo.config import Registry from combo.config.from_parameters import register_arguments diff --git a/combo/data/tokenizers/sentence_splitter.py b/combo/data/tokenizers/sentence_splitter.py index c0383c994e992e678b905d0fb7aa034ba8cee9d3..250b01edde24593bf5409ce98cf5dd7fe05ee5f2 100644 --- a/combo/data/tokenizers/sentence_splitter.py +++ b/combo/data/tokenizers/sentence_splitter.py @@ -2,7 +2,7 @@ Adapted from AllenNLP https://github.com/allenai/allennlp/blob/80fb6061e568cb9d6ab5d45b661e86eb61b92c82/allennlp/data/tokenizers/sentence_splitter.py """ -from typing import List, Dict, Any +from typing import List import spacy diff --git a/combo/data/tokenizers/spacy_tokenizer.py b/combo/data/tokenizers/spacy_tokenizer.py index d53bc6f5f1170c0a50995a0c033e29589e89c8ae..b884aae278b3891d4bdce0dab6c07b0f2323edf6 100644 --- a/combo/data/tokenizers/spacy_tokenizer.py +++ b/combo/data/tokenizers/spacy_tokenizer.py @@ -3,10 +3,9 @@ Adapted from AllenNLP https://github.com/allenai/allennlp/blob/main/allennlp/data/tokenizers/spacy_tokenizer.py """ -from typing import List, Optional, Dict +from typing import List, Optional import spacy -from overrides import overrides from spacy.tokens import Doc from combo.config import Registry diff --git a/combo/data/tokenizers/tokenizer.py b/combo/data/tokenizers/tokenizer.py index 4a6ea3c4a0b636ecc070d22844b2755b5ad31bcd..7b9269c4021d4bb658c1e04dbc190c2755e63762 100644 --- a/combo/data/tokenizers/tokenizer.py +++ b/combo/data/tokenizers/tokenizer.py @@ -4,7 +4,7 @@ https://github.com/allenai/allennlp/blob/main/allennlp/data/tokenizers/tokenizer """ import logging -from .token import Token +from combo.data.tokenizers.token import Token from typing import List, Optional from combo.config import FromParameters diff --git a/combo/data/tokenizers/whitespace_tokenizer.py b/combo/data/tokenizers/whitespace_tokenizer.py index 84a54efcde3c67926e3550d245db0cc0911ae4cb..eaf74f74e20c04d1b9a93e4de4c7c311216554e7 100644 --- a/combo/data/tokenizers/whitespace_tokenizer.py +++ b/combo/data/tokenizers/whitespace_tokenizer.py @@ -1,4 +1,4 @@ -from typing import List, Dict, Any +from typing import List from combo.config import Registry from combo.data.tokenizers.token import Token diff --git a/combo/data/vocabulary.py b/combo/data/vocabulary.py index 9f711aacc4c2fd56d3267e2b8e7f242c6f9b909b..4f3565c4a936b9beca136c36c7971286856eb65a 100644 --- a/combo/data/vocabulary.py +++ b/combo/data/vocabulary.py @@ -1,12 +1,11 @@ import codecs +import glob import json +import logging import os import re -import glob from collections import defaultdict -from typing import Dict, Optional, Iterable, Set, List, Union, Any - -import logging +from typing import Dict, Optional, Iterable, Set, List, Union from filelock import FileLock from transformers import PreTrainedTokenizer diff --git a/combo/main.py b/combo/main.py index 65119f97f90bdcc17166d0c1b8d0d132c95b4ba1..2faaf1882501d642018cabfe0c9780af26eca8d6 100755 --- a/combo/main.py +++ b/combo/main.py @@ -5,8 +5,7 @@ import tempfile from typing import Dict import torch -from absl import app -from absl import flags +from absl import app, flags import pytorch_lightning as pl from combo.training.trainable_combo import TrainableCombo diff --git a/combo/modules/token_embedders/character_token_embedder.py b/combo/modules/token_embedders/character_token_embedder.py index 7dfce1131e88afba15bc8b2890e6e5279ae1191d..1967ecf9bf92214b3d3124ffa92e930cca3a2753 100644 --- a/combo/modules/token_embedders/character_token_embedder.py +++ b/combo/modules/token_embedders/character_token_embedder.py @@ -11,9 +11,7 @@ from combo.data import Vocabulary from combo.modules.dilated_cnn import DilatedCnnEncoder from combo.modules.token_embedders import TokenEmbedder -"""Embeddings.""" from typing import Optional - import torch import torch.nn as nn from combo import modules diff --git a/combo/nn/base.py b/combo/nn/base.py index 90a71e1012d577431103e1883c03eea87176b243..9e16607b57337fb18b03d9924ad84ff96ab4ffb0 100644 --- a/combo/nn/base.py +++ b/combo/nn/base.py @@ -1,14 +1,11 @@ -from typing import Dict, Optional, List, Union, Tuple +from typing import Optional, List, Union, Tuple import torch import torch.nn as nn -from combo.config.registry import Registry -from combo.config.from_parameters import FromParameters, register_arguments import combo.utils.checks as checks -from combo.data.vocabulary import Vocabulary -from combo.nn.utils import masked_cross_entropy -from combo.predictors.predictor import Predictor +from combo.config.from_parameters import FromParameters, register_arguments +from combo.config.registry import Registry @Registry.register("linear_layer") diff --git a/combo/predictors/predictor_model.py b/combo/predictors/predictor_model.py index d9e1b1ebf0e6b76d4d8f0ad0a98f8384a392a6f3..70025092004af25c289597cbc88cf4f2253c1fa0 100644 --- a/combo/predictors/predictor_model.py +++ b/combo/predictors/predictor_model.py @@ -3,25 +3,23 @@ Adapted from AllenNLP https://github.com/allenai/allennlp/blob/main/allennlp/predictors/predictor.py """ -from typing import List, Iterator, Dict, Tuple, Any, Union -import logging import json +import logging import re from contextlib import contextmanager +from typing import List, Iterator, Dict, Tuple, Any import numpy +import pytorch_lightning as pl import torch from overrides import overrides -from torch.utils.hooks import RemovableHandle from torch import Tensor from torch import backends - -import pytorch_lightning as pl +from torch.utils.hooks import RemovableHandle from combo.common.util import sanitize from combo.config import FromParameters from combo.data.batch import Batch -from combo.data.dataset_loaders.dataset_loader import TensorDict from combo.data.dataset_readers.dataset_reader import DatasetReader from combo.data.instance import JsonDict, Instance from combo.modules.model import Model diff --git a/combo/training/trainer.py b/combo/training/trainer.py deleted file mode 100644 index ebb8ff0c3d33f593a8cf80cdd7720d41a8189cc1..0000000000000000000000000000000000000000 --- a/combo/training/trainer.py +++ /dev/null @@ -1,13 +0,0 @@ -from pytorch_lightning import Trainer - - -class Callback: - pass - - -class TransferPatienceEpochCallback: - pass - - -class GradientDescentTrainer(Trainer): - pass