Skip to content
Snippets Groups Projects
Commit bcecfd40 authored by Maja Jablonska's avatar Maja Jablonska
Browse files

Delete obsolete _Token class

parent 93d9ac59
1 merge request!46Merge COMBO 3.0 into master
import logging
from dataclasses import dataclass
from typing import Optional
from combo.data import Token
logger = logging.getLogger(__name__)
@dataclass(init=False, repr=False)
class _Token(Token):
__slots__ = Token.__slots__ + ['feats_']
feats_: Optional[str]
def __init__(self, text: str = None, idx: int = None, idx_end: int = None, lemma_: str = None, pos_: str = None,
tag_: str = None, dep_: str = None, ent_type_: str = None, text_id: int = None, type_id: int = None,
feats_: str = None) -> None:
super().__init__(text, idx, idx_end, lemma_, pos_, tag_, dep_, ent_type_, text_id, type_id)
self.feats_ = feats_
def get_slices_if_not_provided(vocab):
return None
......@@ -3,20 +3,19 @@ Adapted from AllenNLP
https://github.com/allenai/allennlp/blob/main/allennlp/data/dataset_readers/conll2003.py
"""
from typing import Dict, List, Optional, Sequence, Iterable
import itertools
import logging
from typing import Dict, List, Optional, Sequence, Iterable
from combo.utils import ConfigurationError
from .dataset_reader import DatasetReader, PathOrStr
from combo.data.token_indexers.token_indexer import TokenIndexer, Token
from combo.data.token_indexers.single_id_token_indexer import SingleIdTokenIndexer
from combo.data.token_indexers.token_indexer import TokenIndexer, Token
from combo.utils import ConfigurationError
from .dataset_reader import DatasetReader
from .dataset_utils.span_utils import to_bioul
from .. import Instance
from ..fields import MetadataField, TextField, Field, SequenceLabelField
from ...utils.file_utils import cached_path
logger = logging.getLogger(__name__)
......
......@@ -12,7 +12,6 @@ from overrides import overrides
from combo import data
from combo.data import Vocabulary, fields, Instance, Token
from combo.data.dataset import _Token
from combo.data.dataset_readers.dataset_reader import DatasetReader
from combo.data.fields import Field
from combo.data.fields.adjacency_field import AdjacencyField
......@@ -120,12 +119,13 @@ class UniversalDependenciesDatasetReader(DatasetReader):
def text_to_instance(self, tree: conllu.TokenList) -> Instance:
fields_: Dict[str, Field] = {}
tree_tokens = [t for t in tree if isinstance(t["idx"], int)]
tokens = [_Token(t["token"],
pos_=t.get("upostag"),
tag_=t.get("xpostag"),
lemma_=t.get("lemma"),
feats_=t.get("feats"))
tree_tokens = [t for t in tree if isinstance(t["id"], int)]
tokens = [Token(text=t["token"],
upostag=t.get("upostag"),
xpostag=t.get("xpostag"),
lemma=t.get("lemma"),
feats=t.get("feats"))
for t in tree_tokens]
# features
......@@ -153,7 +153,7 @@ class UniversalDependenciesDatasetReader(DatasetReader):
label_namespace=target_name + "_labels")
elif target_name == "deps":
# Graphs require adding ROOT (AdjacencyField uses sequence length from TextField).
text_field_deps = TextField([_Token("ROOT")] + copy.deepcopy(tokens), self._token_indexers)
text_field_deps = TextField([Token("ROOT")] + copy.deepcopy(tokens), self._token_indexers)
enhanced_heads: List[Tuple[int, int]] = []
enhanced_deprels: List[str] = []
for idx, t in enumerate(tree_tokens):
......
......@@ -15,7 +15,7 @@ import glob
from torch.nn import Module
from combo.common.params import Params
from combo.data.dataset import DatasetReader
from combo.data.dataset_readers import DatasetReader
from combo.models.model import Model
from combo.utils import ConfigurationError
from combo.utils.file_utils import cached_path
......
......@@ -12,7 +12,7 @@ class ConllDatasetReaderTest(unittest.TestCase):
def test_tokenize_correct_tokens(self):
reader = ConllDatasetReader(coding_scheme='IOB2')
token = next(iter(reader('conll_test_file.txt')))
self.assertListEqual([str(t) for t in token['tokens'].tokens],
self.assertListEqual([str(t) for t in token['tokens'].tokens],
['SOCCER', '-', 'JAPAN', 'GET', 'LUCKY', 'WIN', ',',
'CHINA', 'IN', 'SURPRISE', 'DEFEAT', '.'])
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment