From 37ba9fb4564e0ff879215fa1af0cd627322e4197 Mon Sep 17 00:00:00 2001 From: NRopiak <norbert.ropiak@pwr.edu.pl> Date: Fri, 14 Jan 2022 11:14:17 +0100 Subject: [PATCH 1/3] Resolve #1 issue --- src/date2words.py | 2 +- src/wordifier.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/date2words.py b/src/date2words.py index 055e5f6..068f2d7 100644 --- a/src/date2words.py +++ b/src/date2words.py @@ -99,7 +99,7 @@ def date2words(date_match, tags=None): date_order = [day, *check_none(date_match['punct5']), month, *check_none(date_match['punct6'])] else: - date_order = [month] + date_order = [month, *check_none(date_match['punct6'])] if year: date_order = date_order + [year] date_order = list(map(lambda x: x if x else '', date_order)) diff --git a/src/wordifier.py b/src/wordifier.py index 0f4ed21..18d2788 100644 --- a/src/wordifier.py +++ b/src/wordifier.py @@ -118,8 +118,9 @@ class Wordifier: tags.append(tag) tok_id += 2 elif elem.tag == 'ns': - tok_id -= 1 - string_builder.pop() + if string_builder: + string_builder.pop() + tok_id -= 1 else: raise Exception('Unrecognized tag inside sentence: ' + elem.tag) return self._process_sentence(string_builder, tags) @@ -389,6 +390,7 @@ class Wordifier: replaced by words. """ + print('WORDIFY', self._wordify_tokens) wordify_tokens = self._join_tokens(self._wordify_tokens, string_builder) enum_special = enumerate(wordify_tokens) for i, special_token in enum_special: @@ -443,6 +445,7 @@ class Wordifier: replace = [] for match in matches: date_tags = self._get_match_tag(match, string_builder, tags) + print('MATCH', date2words(match, date_tags)) replace.append(date2words(match, date_tags)) matches = list(map(lambda m: m.group(0), matches)) builder, self._wordify_tokens = check_and_replace(string_builder, @@ -460,8 +463,11 @@ class Wordifier: str: Sentece with replaced special tokens. """ + print('A', string_builder) string_builder = self._handle_regexes(string_builder, tags) + print('B', string_builder) string_builder = self._handle_special_types(string_builder) + print('C', string_builder) if string_builder[0] and not string_builder[0][0].isupper(): string_builder[0] = string_builder[0].capitalize() return ''.join(string_builder) -- GitLab From c050eedbb653322f40c876298dce5f8a10d8f27f Mon Sep 17 00:00:00 2001 From: NRopiak <norbert.ropiak@pwr.edu.pl> Date: Fri, 14 Jan 2022 11:31:27 +0100 Subject: [PATCH 2/3] remove debugging prints --- src/wordifier.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/wordifier.py b/src/wordifier.py index 18d2788..cdb8462 100644 --- a/src/wordifier.py +++ b/src/wordifier.py @@ -390,7 +390,6 @@ class Wordifier: replaced by words. """ - print('WORDIFY', self._wordify_tokens) wordify_tokens = self._join_tokens(self._wordify_tokens, string_builder) enum_special = enumerate(wordify_tokens) for i, special_token in enum_special: @@ -445,7 +444,6 @@ class Wordifier: replace = [] for match in matches: date_tags = self._get_match_tag(match, string_builder, tags) - print('MATCH', date2words(match, date_tags)) replace.append(date2words(match, date_tags)) matches = list(map(lambda m: m.group(0), matches)) builder, self._wordify_tokens = check_and_replace(string_builder, @@ -463,11 +461,8 @@ class Wordifier: str: Sentece with replaced special tokens. """ - print('A', string_builder) string_builder = self._handle_regexes(string_builder, tags) - print('B', string_builder) string_builder = self._handle_special_types(string_builder) - print('C', string_builder) if string_builder[0] and not string_builder[0][0].isupper(): string_builder[0] = string_builder[0].capitalize() return ''.join(string_builder) -- GitLab From 33cbe93b9039af63a3e7dff3a7bc753fd6047aab Mon Sep 17 00:00:00 2001 From: NRopiak <norbert.ropiak@pwr.edu.pl> Date: Fri, 14 Jan 2022 13:19:08 +0100 Subject: [PATCH 3/3] Add deployment.yaml --- config.ini | 10 ++------- deployment.yaml | 57 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 8 deletions(-) create mode 100644 deployment.yaml diff --git a/config.ini b/config.ini index 2845245..beb8c9a 100755 --- a/config.ini +++ b/config.ini @@ -1,6 +1,5 @@ [service] tool = wordifier - root = /samba/requests/ rabbit_host = rabbitmq rabbit_user = test @@ -8,13 +7,8 @@ rabbit_password = test queue_prefix = nlp_ [tool] -workers_number = 5 -processed_lines = 1000 +workers_number = 2 [logging] port = 9998 -local_log_level = INFO - -[logging_levels] -__main__ = INFO - +local_log_level = INFO \ No newline at end of file diff --git a/deployment.yaml b/deployment.yaml new file mode 100644 index 0000000..a01efea --- /dev/null +++ b/deployment.yaml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: wordifier + name: wordifier + namespace: nlpworkers +spec: + replicas: 1 + selector: + matchLabels: + app: wordifier + template: + metadata: + labels: + app: wordifier + spec: + containers: + - image: clarinpl/wordifier:latest + name: wordifier + volumeMounts: + - name: samba + mountPath: /samba + - name: config + mountPath: /home/worker/config.ini + subPath: config.ini + volumes: + - name: samba + hostPath: + path: /samba + type: "" + - name: config + configMap: + name: wordifier-config-ini + +--- +apiVersion: v1 +data: + config.ini: |- + [service] + tool = wordifier + root = /samba/requests/ + rabbit_host = rabbit_host + rabbit_user = rabbit_user + rabbit_password = rabbit_password + queue_prefix = nlp_ + + [tool] + workers_number = 2 + + [logging] + port = 9998 + local_log_level = INFO +kind: ConfigMap +metadata: + name: wordifier-config-ini + namespace: nlpworkers -- GitLab