Skip to content
Snippets Groups Projects

Release 1.0.4

21 files
+ 1110
216
Compare changes
  • Side-by-side
  • Inline

Files

+ 7
3
@@ -21,12 +21,13 @@ class Token:
deps: Optional[str] = None
misc: Optional[str] = None
semrel: Optional[str] = None
embeddings: Dict[str, List[float]] = field(default_factory=list, repr=False)
@dataclass
class Sentence:
tokens: List[Token] = field(default_factory=list)
sentence_embedding: List[float] = field(default_factory=list)
sentence_embedding: List[float] = field(default_factory=list, repr=False)
metadata: Dict[str, Any] = field(default_factory=collections.OrderedDict)
def to_json(self):
@@ -54,6 +55,7 @@ def sentence2conllu(sentence: Sentence, keep_semrel: bool = True) -> conllu.Toke
# Remove semrel to have default conllu format.
if not keep_semrel:
del token_dict["semrel"]
del token_dict["embeddings"]
tokens.append(token_dict)
# Range tokens must be tuple not list, this is conllu library requirement
for t in tokens:
@@ -77,14 +79,16 @@ def tokens2conllu(tokens: List[str]) -> conllu.TokenList:
def conllu2sentence(conllu_sentence: conllu.TokenList,
sentence_embedding=None) -> Sentence:
sentence_embedding=None, embeddings=None) -> Sentence:
if embeddings is None:
embeddings = {}
if sentence_embedding is None:
sentence_embedding = []
tokens = []
for token in conllu_sentence.tokens:
tokens.append(
Token(
**token
**token, embeddings=embeddings[token["id"]]
)
)
return Sentence(
Loading