diff --git a/.gitignore b/.gitignore index b42935e91820cff7243a019e4c45ffdd0c08394f..e0975df1bfb05ad36f68a776f287e4278772e6af 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -model/* \ No newline at end of file +model/* +venv +.idea \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 9ba696ab71ff8aedb09d498117582328f8a2af2b..e5494286bd7f793a62d2827f0ea60e88b306b33c 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -2,30 +2,35 @@ image: 'clarinpl/python:3.6' cache: paths: - .tox + stages: - check_style - build -before_script: - - pip install tox==2.9.1 + +.check_style_template: + before_script: + - pip install tox==2.9.1 + pep8: + extends: .check_style_template stage: check_style script: - tox -v -e pep8 + docstyle: + extends: .check_style_template stage: check_style script: - tox -v -e docstyle + build_image: stage: build image: 'docker:18.09.7' only: - master - - gitlab-registry services: - 'docker:18.09.7-dind' - before_script: - - '' script: - docker build -t $CI_REGISTRY_IMAGE . - docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY - - docker push $CI_REGISTRY_IMAGE \ No newline at end of file + - docker push $CI_REGISTRY_IMAGE diff --git a/config.ini b/config.ini index 2d732bc2d3d20180f92a8a0ce576ef81046f5e04..de4416dc69863f13222271273878bd7582256c39 100644 --- a/config.ini +++ b/config.ini @@ -7,6 +7,7 @@ rabbit_password = clarin123 [tool] model=model/plwn_dump_27-03-2018.sqlite workers_number = 1 +url = http://plwordnet.pwr.wroc.pl/ [logging] port = 9975 diff --git a/entrypoint.py b/entrypoint.py index 54d87efcd1b67163a74193332f31ed15dd0114e3..d53d5eac80813108e242ac965168bb2692f19856 100644 --- a/entrypoint.py +++ b/entrypoint.py @@ -18,4 +18,4 @@ location_params = \ cmd = main_cmd + location_params run(cmd, shell=True) -run(["python3.6", "main.py", "service"]) +run(["python3.6", "main.py"]) diff --git a/main.py b/main.py index 1f5183c8a897a995851fbbf8a3b71e51b8a1e346..918979250c7fc3f06f3c16084ed1970b9469f4fd 100644 --- a/main.py +++ b/main.py @@ -1,37 +1,8 @@ """Implementation of hask service.""" -import argparse - import lex_ws from src.plwordnet_worker import PLWordnetWorker -def get_args(): - """Gets command line arguments.""" - parser = argparse.ArgumentParser(description="Topic Modeling") - - subparsers = parser.add_subparsers(dest="algorithm") - subparsers.required = True - - subparsers.add_parser( - "service", - help="Run as a service" - ) - - return parser.parse_args() - - -def main(): - """Runs the program.""" - args = get_args() - - generators = { - "service": lambda: lex_ws.LexService.main(PLWordnetWorker), - } - - gen_fn = generators.get(args.algorithm, lambda: None) - gen_fn() - - if __name__ == "__main__": - main() + lex_ws.LexService.main(PLWordnetWorker) diff --git a/src/plwordnet_worker.py b/src/plwordnet_worker.py index 2d838679a90856e4657f87dcb66073b7186fce7c..450441038b423253d65f6a839fc4f4762c42e264 100644 --- a/src/plwordnet_worker.py +++ b/src/plwordnet_worker.py @@ -8,18 +8,28 @@ import lex_ws import plwn import sqlite3 + + print(sqlite3.sqlite_version) my_logger = logging.getLogger(__name__) def syn2str(synset): - """Turns synset into string.""" + """Turns synset into string. + + :param synset: Set of synonyms (WordNet Synset) + :type synset: Synset + """ return synset.lexical_units[0].lemma + ":" + \ str(synset.lexical_units[0].variant) def tuples2dict(tuples): - """Turns touples into dictionary.""" + """Turns tuples into dictionary. + + :param tuples: The tuples to be converted into dictionary. + :type tuples: set + """ nodes_my_dict = dict() nodes = list() links = list() @@ -31,16 +41,23 @@ def tuples2dict(tuples): nodes_my_dict[to] = len(nodes_my_dict) nodes.append({"id": nodes_my_dict[to], "name": to}) links.append( - {"source_id": nodes_my_dict[from_], "target_id": nodes_my_dict[to]}) + {"source_id": nodes_my_dict[from_], + "target_id": nodes_my_dict[to]}) return {"nodes": nodes, "links": links} class PLWordnetWorker(lex_ws.LexWorker): """Implements PLWordnet worker.""" + URL = "http://plwordnet.pwr.wroc.pl/" + @classmethod def static_init(cls, config): - """Initializes PLWordnet worker.""" + """Initializes PLWordnet worker. + + :param config: The service configuration dictionary. + :type config: dict + """ cls.config = config return @@ -49,31 +66,36 @@ class PLWordnetWorker(lex_ws.LexWorker): my_logger.info("Loading models...") start_time = time.time() model_path = self.config['tool']["model"] + self.url = self.config['tool'].get('url', self.URL) self.wn = plwn.load(model_path, "sqlite3") exec_time = time.time() - start_time print("Loading took: " + str(exec_time)) my_logger.info("Loading finished. Loading took: " + str(exec_time)) return - def process(self, input): - """Running lex process.""" + def process(self, input_): + """Running lex process. + + :param input_: The process input. + :type input_: dict + """ my_logger.info("Doing work!") - if "function" in input: - res = self._evaluate_function(input["function"], input) + if "function" in input_: + res = self._evaluate_function(input_["function"], input_) else: - res = self._evaluate_operation(input["task"], input) + res = self._evaluate_operation(input_["task"], input_) my_logger.info("Work done!") print(res) return res - def _evaluate_operation(self, operation_type, input): + def _evaluate_operation(self, operation_type, input_): wn = self.wn if operation_type == "synset": - id = input["id"] - return wn.synset_by_id(id).to_dict() + id_ = input_["id"] + return wn.synset_by_id(id_).to_dict() elif operation_type == "all" or not operation_type: - synsets = [syn.to_dict() for syn in wn.synsets(input["lexeme"])] + synsets = [syn.to_dict() for syn in wn.synsets(input_["lexeme"])] return {"synsets": synsets, "href": "http://plwordnet.pwr.wroc.pl/wordnet/"} @@ -121,40 +143,39 @@ class PLWordnetWorker(lex_ws.LexWorker): traceback.print_stack() return res - def _evaluate_function(self, function_type, input): + def _evaluate_function(self, function_type, input_): response = {} wn = self.wn if function_type == "list": - element = input["element"] - url = "http://plwordnet.pwr.wroc.pl/" + element = input_["element"] if "lemma" in element: - if "lang" not in element or element["lang"] not in ["pl", "en"]: + if ("lang" not in element or element["lang"] + not in ["pl", "en"]): return response res = wn.synsets(element["lemma"].replace("_", " ")) res = self._filter_by_lang(res, element["lang"]) if len(res) > 0: formats = ["json"] - url = "http://plwordnet.pwr.wroc.pl/wordnet/lemma/" + \ - element["lemma"] + url = self.url + "wordnet/lemma/" + element["lemma"] response = {"formats": formats, "url": url} elif "typeOfSynset" in element and \ "plwordnet" in element["typeOfSynset"]: try: res = wn.synset_by_id(int(element["synsetid"])) formats = ["json"] - url = "http://plwordnet.pwr.wroc.pl/" - response = {"formats": formats, "url": url} + response = {"formats": formats, "url": self.url} except Exception as ex: print(ex) pass return response elif function_type == 'get': - element = input["element"] + element = input_["element"] if "lemma" in element: - if "lang" not in element or element["lang"] not in ["pl", "en"]: + if ("lang" not in element or element["lang"] + not in ["pl", "en"]): return {} synsets = self._filter_by_lang(wn.synsets( element["lemma"].replace("_", " ")), element["lang"])