Skip to content
Snippets Groups Projects
Commit 77f1a90e authored by Bartlomiej's avatar Bartlomiej
Browse files

Change open file to clarin json

parent 0456a3f2
1 merge request!11Clarin json support
Pipeline #14177 failed with stages
in 17 seconds
--index-url https://pypi.clarin-pl.eu/simple/
clarin-json
nlp-ws
regex==2020.10.28
Babel==2.8.0
......@@ -6,4 +7,4 @@ bitarray==2.6.1
random-username==1.0.2
randominfo==2.0.2
hydra-core==1.3.1
lxml==4.9.2
\ No newline at end of file
lxml==4.9.2
......@@ -73,43 +73,33 @@ class WiktorNERInputParser(InputParser):
Tuple[str, List[Tuple[int, int, Annotation]]]: Text and annotations.
"""
content_parsed = json.loads(content)
if "text" in content_parsed:
text = content_parsed["text"]
if content.text:
text = content.text
else:
text = ""
annotations = []
# Morphosyntactic annotations
if "tokens" in content_parsed:
for token in content_parsed["tokens"]:
if "position" in token:
token_start, token_end = token["position"]
if "lexemes" in token:
for lexeme in token["lexemes"]:
if "disamb" in lexeme and lexeme["disamb"] is True:
if "mstag" in lexeme:
lemma = lexeme.get("lemma", None)
if content.tokens:
for token in content.tokens():
if token.start and token.stop:
if token.lexemes:
for lexeme in token.lexemes:
if lexeme.disamb and lexeme.disamb is True:
if lexeme.pos:
if lexeme.lemma:
lemma = lexeme.lemma
else:
lemma = None
annotations.append(
(
token_start,
token_end,
token.start,
token.stop,
MorphosyntacticAnnotation(
lexeme["mstag"], lemma
lexeme.pos, lemma
),
)
)
# NER annotations
if "entities" in content_parsed:
for entity in content_parsed["entities"]:
if "positions" in entity:
entity_start, entity_end = entity["positions"]
if "type" in entity:
annotations.append(
(entity_start, entity_end, NerAnnotation(entity["type"]))
)
return text, annotations
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment