Skip to content
Snippets Groups Projects
Commit b6614125 authored by Maja Jabłońska's avatar Maja Jabłońska Committed by Martyna Wiącek
Browse files

Add get_slices_if_not_provided to data/dataset.py

parent 2b3c13dc
1 merge request!46Merge COMBO 3.0 into master
import logging import logging
from combo import data
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -7,5 +8,30 @@ logger = logging.getLogger(__name__) ...@@ -7,5 +8,30 @@ logger = logging.getLogger(__name__)
class DatasetReader: class DatasetReader:
pass pass
class UniversalDependenciesDatasetReader(DatasetReader): class UniversalDependenciesDatasetReader(DatasetReader):
pass pass
\ No newline at end of file
def get_slices_if_not_provided(vocab: data.Vocabulary):
if hasattr(vocab, "slices"):
return vocab.slices
if "feats_labels" in vocab.get_namespaces():
idx2token = vocab.get_index_to_token_vocabulary("feats_labels")
for _, v in dict(idx2token).items():
if v not in ["_", "__PAD__"]:
empty_value = v.split("=")[0] + "=None"
vocab.add_token_to_namespace(empty_value, "feats_labels")
slices = {}
for idx, name in vocab.get_index_to_token_vocabulary("feats_labels").items():
# There are 2 types features: with (Case=Acc) or without assigment (None).
# Here we group their indices by name (before assigment sign).
name = name.split("=")[0]
if name in slices:
slices[name].append(idx)
else:
slices[name] = [idx]
vocab.slices = slices
return vocab.slices
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment