From 76cba42e3a3b8add03d1777fd80033d6d565fdaf Mon Sep 17 00:00:00 2001
From: Bartlomiej <bartlomiej.piotr.bojanowski@gmail.com>
Date: Wed, 18 Oct 2023 10:55:38 +0200
Subject: [PATCH] Add clarin_json

---
 src/pipeline/sequential_jsonl.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/src/pipeline/sequential_jsonl.py b/src/pipeline/sequential_jsonl.py
index dc76e97..8418949 100644
--- a/src/pipeline/sequential_jsonl.py
+++ b/src/pipeline/sequential_jsonl.py
@@ -7,7 +7,7 @@ from src.input_parsers.interface import InputParser
 from src.pipeline.interface import Pipeline
 from src.replacers.interface import ReplacerInterface
 from src.suppressors.interface import Suppressor
-
+import clarin_json
 
 class SequentialJSONLPipeline(Pipeline):
     """Pipeline that runs the whole anonymization process on jsonl-splitted input.
@@ -55,12 +55,10 @@ class SequentialJSONLPipeline(Pipeline):
 
         """
         result = []
-        with open(input_path, "r") as f:
-            for line in f.readlines():
-                if line.strip() == "":
-                    continue
-                parsed_input = self._input_parser.parse(line)
 
+        with clarin_json.open(input_path, 'r') as f:
+            for line in f:
+                parsed_input = self._input_parser.parse(line)
                 detected_entities = []
                 for detector_name, detector in self._detectors.items():
                     detected_entities += detector.detect(
-- 
GitLab