Skip to content
Snippets Groups Projects
Commit 17aef862 authored by Paweł Walkowiak's avatar Paweł Walkowiak
Browse files

Rollback changes due to lib req

parent c6b2a2ee
1 merge request!8Update to 0.7
Pipeline #14467 passed with stages
in 3 minutes and 25 seconds
......@@ -3,4 +3,3 @@ nlp_ws
winer==0.7.4post1
awscli==1.22.57
PyYAML==5.3.1
clarin_json>=0.6
......@@ -30,31 +30,6 @@ class WinerWorker(nlp_ws.NLPWorker):
batch_size=self.batch_size)
self._model._tokenizer.model_max_length = 512
def _add_entities_to_document(self, documents):
result_documents = []
for document in documents:
# Pre-process document
plain_inputs, tokenized_inputs, sent_starts = \
get_sentences_from_document(document)
# Process data
aggregations = self._model.process(tokenized_inputs)
# Aggregate results to entities
entities = allign_into_entities(
tokenized_inputs,
aggregations,
inputs_sentences=plain_inputs,
sentences_range=sent_starts
)
document.set_spans(
[Span(**entity)
for sent_entities in entities
for entity in sent_entities],
"ner")
result_documents.append(document)
return result_documents
def process(
self,
input_path: str,
......@@ -74,9 +49,28 @@ class WinerWorker(nlp_ws.NLPWorker):
"""
# Read inputs and open output
F_ASCII = task_options.get("ensure_ascii", False)
clarin_json.process(
input_path,
self._add_entities_to_document,
output_path,
ensure_ascii=F_ASCII
)
with clarin_json.open(output_path, "w", ensure_ascii=F_ASCII) as fout:
with clarin_json.open(input_path, "r") as fin:
for document in fin:
# Pre-process document
plain_inputs, tokenized_inputs, sent_starts = \
get_sentences_from_document(document)
# Process data
aggregations = self._model.process(tokenized_inputs)
# Aggregate results to entities
entities = allign_into_entities(
tokenized_inputs,
aggregations,
inputs_sentences=plain_inputs,
sentences_range=sent_starts
)
document.set_spans(
[Span(**entity)
for sent_entities in entities
for entity in sent_entities],
"ner")
# Write processed document
fout.write(document)
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment