Skip to content
Snippets Groups Projects
Commit 77f1a90e authored by Bartlomiej's avatar Bartlomiej
Browse files

Change open file to clarin json

parent 0456a3f2
No related branches found
No related tags found
1 merge request!11Clarin json support
Pipeline #14177 failed
--index-url https://pypi.clarin-pl.eu/simple/
clarin-json
nlp-ws
regex==2020.10.28
Babel==2.8.0
......
......@@ -73,43 +73,33 @@ class WiktorNERInputParser(InputParser):
Tuple[str, List[Tuple[int, int, Annotation]]]: Text and annotations.
"""
content_parsed = json.loads(content)
if "text" in content_parsed:
text = content_parsed["text"]
if content.text:
text = content.text
else:
text = ""
annotations = []
# Morphosyntactic annotations
if "tokens" in content_parsed:
for token in content_parsed["tokens"]:
if "position" in token:
token_start, token_end = token["position"]
if "lexemes" in token:
for lexeme in token["lexemes"]:
if "disamb" in lexeme and lexeme["disamb"] is True:
if "mstag" in lexeme:
lemma = lexeme.get("lemma", None)
if content.tokens:
for token in content.tokens():
if token.start and token.stop:
if token.lexemes:
for lexeme in token.lexemes:
if lexeme.disamb and lexeme.disamb is True:
if lexeme.pos:
if lexeme.lemma:
lemma = lexeme.lemma
else:
lemma = None
annotations.append(
(
token_start,
token_end,
token.start,
token.stop,
MorphosyntacticAnnotation(
lexeme["mstag"], lemma
lexeme.pos, lemma
),
)
)
# NER annotations
if "entities" in content_parsed:
for entity in content_parsed["entities"]:
if "positions" in entity:
entity_start, entity_end = entity["positions"]
if "type" in entity:
annotations.append(
(entity_start, entity_end, NerAnnotation(entity["type"]))
)
return text, annotations
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment