diff --git a/src/tagger.py b/src/tagger.py index 3ad81480a3f3a9c9854938767a8179b5c099fc65..85c68ecdc0b9489b554ce8687ac714feaeef28f8 100644 --- a/src/tagger.py +++ b/src/tagger.py @@ -164,8 +164,6 @@ class TaggerWorker(nlp_ws.NLPWorker): if text.startswith("<?xml version='1.0' encoding='UTF-8'?>"): _tei_style = True - _log.error(f"{_dir_style=} {_tei_style=}") - _log.debug(f"Running LPMN: {json_lpmn}") chunk_size = int(self._chunking_limit * 0.5) @@ -235,7 +233,7 @@ class TaggerWorker(nlp_ws.NLPWorker): merge_type = src.utils.MergeType.JSON2TEI elif (lang, method) == ("pl", "tagger"): merge_type = src.utils.MergeType.PLAINTEXT2CCL - _log.error(f"Merge type: {merge_type}") + _log.debug(f"Merge type: {merge_type}") result_path = str(uuid.uuid4()) if output == JSON and linking \ else output_path @@ -273,8 +271,8 @@ class TaggerWorker(nlp_ws.NLPWorker): Path(destination_path).mkdir(parents=True, exist_ok=True) tei_list = src.utils.read_tei_file(input_path, _log) - _log.error(f'Paragraphs: {tei_list=}') - _log.error(f'Number of paragraphs: {len(tei_list)=}') + _log.debug(f'Paragraphs: {tei_list=}') + _log.debug(f'Number of paragraphs: {len(tei_list)=}') l_results = [] while len(tei_list) > 0: diff --git a/src/utils.py b/src/utils.py index 2f663687612537216d7baa851e69624e3ad2437b..393259e22f05c6244f02f46fdcefc11cea5cff95 100644 --- a/src/utils.py +++ b/src/utils.py @@ -147,7 +147,6 @@ def merge_splits(output_path: str, destination_path: str, os.remove(output_path) # create output file _log.debug(f"Creating output file: {output_path}") - # _log.debug(f"Created output file: {output_path}") # run tagger on each chunk subtask_args_queue_awaiting = [] for dbg_i, chunk in enumerate(splitted_corpus): @@ -184,10 +183,8 @@ def merge_splits(output_path: str, destination_path: str, subtask.run(blocking=False) l_results = [subtask.get_output_path() for subtask in subtasks] - _log.debug(f"Multiple results: {l_results}") for l_result in l_results: - _log.debug(f"Result of chunk: {l_result}") with open(l_result, "r") as f: if merge_type == MergeType.PLAINTEXT2CCL: file_content2 = f.read()