diff --git a/src/pipeline/sequential_jsonl.py b/src/pipeline/sequential_jsonl.py index dc76e97540caa15d806019e87513c26e7f659974..8418949b7c36ad8a6ebdd276479a3e4f5ebcc073 100644 --- a/src/pipeline/sequential_jsonl.py +++ b/src/pipeline/sequential_jsonl.py @@ -7,7 +7,7 @@ from src.input_parsers.interface import InputParser from src.pipeline.interface import Pipeline from src.replacers.interface import ReplacerInterface from src.suppressors.interface import Suppressor - +import clarin_json class SequentialJSONLPipeline(Pipeline): """Pipeline that runs the whole anonymization process on jsonl-splitted input. @@ -55,12 +55,10 @@ class SequentialJSONLPipeline(Pipeline): """ result = [] - with open(input_path, "r") as f: - for line in f.readlines(): - if line.strip() == "": - continue - parsed_input = self._input_parser.parse(line) + with clarin_json.open(input_path, 'r') as f: + for line in f: + parsed_input = self._input_parser.parse(line) detected_entities = [] for detector_name, detector in self._detectors.items(): detected_entities += detector.detect(