Skip to content
Snippets Groups Projects
Commit 981ca4ed authored by Bartosz Walkowiak's avatar Bartosz Walkowiak
Browse files

Merge branch 'develop' into 'master'

Develop

Closes #2

See merge request !5
parents c7a95bb7 4c05429c
Branches master
1 merge request!5Develop
Pipeline #14610 passed with stages
in 1 minute and 26 seconds
image: 'clarinpl/python:3.6'
cache:
paths:
- .tox
stages:
- check_style
- build
before_script:
- pip install tox==2.9.1
.check_style_template:
before_script:
- pip install tox==2.9.1
pep8:
extends: .check_style_template
stage: check_style
script:
- tox -v -e pep8
docstyle:
extends: .check_style_template
stage: check_style
script:
- tox -v -e docstyle
build_image:
stage: build
image: 'docker:18.09.7'
......@@ -22,8 +31,6 @@ build_image:
- master
services:
- 'docker:18.09.7-dind'
before_script:
- ''
script:
- docker build -t $CI_REGISTRY_IMAGE .
- docker login -u $CI_REGISTRY_USER -p $CI_REGISTRY_PASSWORD $CI_REGISTRY
......
......@@ -18,4 +18,4 @@ location_params = \
cmd = main_cmd + location_params
run(cmd, shell=True)
run(["python3.6", "main.py", "service"])
run(["python3.6", "main.py"])
"""Implementation of hask service."""
import argparse
import lex_ws
from src.word2vec_worker import W2vWorker
def get_args():
"""Gets command line arguments."""
parser = argparse.ArgumentParser(description="Topic Modeling")
subparsers = parser.add_subparsers(dest="algorithm")
subparsers.required = True
subparsers.add_parser(
"service",
help="Run as a service"
)
return parser.parse_args()
def main():
"""Runs the program."""
args = get_args()
generators = {
"service": lambda: lex_ws.LexService.main(W2vWorker),
}
gen_fn = generators.get(args.algorithm, lambda: None)
gen_fn()
if __name__ == "__main__":
main()
lex_ws.LexService.main(W2vWorker)
......@@ -112,9 +112,9 @@ class W2vController:
top_n=top_n)))
return most_similar
def __selectbestmodelformatlist(self, list):
def __selectbestmodelformatlist(self, pos_neg_list):
result = []
for el in list:
for el in pos_neg_list:
result.append(self.__selectbestmodelformat(el))
return result
......@@ -144,17 +144,19 @@ class W2vController:
def doc2vec(self, words, counts):
"""."""
ind = 0
sum = 0
total_sum = 0
for word, count in zip(words, counts):
if self.__model.contains(word):
ind = ind + count
if count == 1:
sum = sum + self.__model.vector_representation(word)
total_sum = total_sum + \
self.__model.vector_representation(word)
else:
sum = sum + self.__model.vector_representation(word) * count
total_sum = total_sum + self.__model.vector_representation(
word) * count
if ind > 0:
sum = sum / ind
return sum.tolist()
total_sum = total_sum / ind
return total_sum.tolist()
def similarity_betweens(self, first_words, second_words):
"""."""
......@@ -230,7 +232,3 @@ class W2vController:
else:
word = splitted_phrase[0]
return ModelFormatGenerator(word)
if __name__ == '__main__':
pass
# __all__ = ["bar", "spam", "eggs"]
# from W2vModel import W2vModel
# from W2vController import W2vController
......@@ -10,11 +10,10 @@ REPLACE_CHARACTERS_IN_WORD = {
ALL_SPEECH_PARTS = \
['noun', 'adj', 'verb', 'num', 'adv', 'pron', 'prep',
'conj', 'interj', 'burk', 'qub', 'xxx', 'interp', 'aglt']
print(ALL_SPEECH_PARTS)
__noun, __adj, __verb, __num, __adv, __pron, __prep, __conj, \
__interj, __burk, __qub, __xxx, __interp, __aglt = ALL_SPEECH_PARTS
# 'noun', 'adj', 'verb', 'num', 'adv', 'pron', 'prep', 'conj',
# 'interj', 'burk', 'qub', 'xxx', 'interp', 'aglt'
NKJP_POS_MAPPINGS = {
'subst': __noun, 'depr': __noun, 'ger': __noun, 'brev': __noun,
'adj': __adj, 'adja': __adj, 'adjp': __adj, 'adjc': __adj,
......
......@@ -21,8 +21,6 @@ class W2vModel(object):
def most_similar_full(self, positive, negative, top_n=10):
"""."""
print(positive)
print(negative)
return self.__model.most_similar(positive, negative, topn=top_n)
def most_similar(self, lwsp, top_n=10):
......@@ -44,7 +42,3 @@ class W2vModel(object):
def vocab_size(self):
"""Returns size of the model's vocabulary."""
return len(self.__model.vocab)
if __name__ == '__main__':
pass
......@@ -7,7 +7,6 @@ import lex_ws
from src.w2v_service.W2vController import W2vController
my_logger = logging.getLogger(__name__)
_log = logging.getLogger(__name__)
MODELS_PATHS = {
......@@ -22,29 +21,48 @@ class W2vWorker(lex_ws.LexWorker):
@classmethod
def static_init(cls, config):
"""Initializes W2vWorker worker."""
# cls.__service = W2vService(model_type=config["tool"]["model_type"])
"""Initializes W2vWorker worker.
:param config: Configuration dict storing info about model type to use.
:type config: dict
"""
my_logger.info("Loading models...")
model_path = MODELS_PATHS[config["tool"]["model_type"]]
cls.__model = W2vController(model_path)
my_logger.info("Loading finished.")
return
def process(self, input):
"""Running lex process."""
def process(self, service_input):
"""Running lex process.
:param service_input: Input containing information to process
:type service_input: dict
:return: Service results.
:rtype: list | dict
"""
my_logger.info("Doing work!")
if "function" in input:
res = self._evaluate_function(input["function"], input)
if "function" in service_input:
res = self._evaluate_function(
service_input["function"], service_input)
else:
res = self._evaluate_operation(input["task"], input)
res = self._evaluate_operation(service_input["task"], service_input)
my_logger.info("Work done!")
return res
def _evaluate_function(self, function_type, input):
def _evaluate_function(self, function_type, input_to_evaluate):
"""Evaluate input received by the service.
:param function_type: function to be performed on input.
:type function_type: str
:param input_to_evaluate: Data to be evaluated by the worker.
:type input_to_evaluate: dict
:return: response from the model.
:rtype: dict | list
"""
response = {}
if function_type == "list":
element = input["element"]
element = input_to_evaluate["element"]
if "lang" not in element or element["lang"] != "pl":
return response
if "lemma" not in element:
......@@ -57,13 +75,13 @@ class W2vWorker(lex_ws.LexWorker):
"json"], "url": "http://ws.clarin-pl.eu/w2vdemo.shtml"}
elif function_type == "get":
element = input["element"]
element = input_to_evaluate["element"]
if "lang" not in element or element["lang"] != "pl":
return response
if "lemma" not in element:
return response
element = input["element"]
element = input_to_evaluate["element"]
return self.__model.most_similar(element["lemma"])
elif function_type == "getInfo":
with open("src/info.json", "rt", encoding="utf8") as f:
......@@ -72,26 +90,51 @@ class W2vWorker(lex_ws.LexWorker):
return response
def _evaluate_operation(self, operation_type, input):
def _evaluate_operation(self, operation_type, input_to_evaluate):
"""Evaluates operation passed by task.
:param operation_type: task which is supposed to be done.
Can be one of either: vector, similarity, kbest.
:type operation_type: str
:raise Exception: If provided operation is not supported.
:param input_to_evaluate: Data to be evaluated by the worker.
:type input_to_evaluate: dict
:return: task response
:rtype: dict | list
"""
model = self.__model
if operation_type == "notmatch":
return model.most_not_match(input["options"]["words"])
return model.most_not_match(input_to_evaluate["options"]["words"])
if operation_type == "kbestposneg":
return model.most_similar_full(input["options"]["positive"],
input["options"]["negative"])
return model.most_similar_full(
input_to_evaluate["options"]["positive"],
input_to_evaluate["options"]["negative"]
)
if operation_type == "kbest":
return model.most_similar(input["lexeme"])
return model.most_similar(input_to_evaluate["lexeme"])
elif operation_type == "similarity":
return model.similarity_between(input["lexeme"], input["lexeme2"])
return model.similarity_between(
input_to_evaluate["lexeme"],
input_to_evaluate["lexeme2"]
)
elif operation_type == "similarities":
return model.similarity_betweens(input["lexeme1"], input["lexeme2"])
return model.similarity_betweens(
input_to_evaluate["lexeme1"],
input_to_evaluate["lexeme2"]
)
elif operation_type == "doc2vec":
return model.doc2vec(input["lexeme1"], input["counts1"])
return model.doc2vec(
input_to_evaluate["lexeme1"],
input_to_evaluate["counts1"]
)
elif operation_type == "vector":
return model.vector_representations(input["lexeme"])
return model.vector_representations(input_to_evaluate["lexeme"])
elif operation_type == "all" or not operation_type:
return {"vector": model.vector_representations(input["lexeme"]),
"kbest": model.most_similar(input["lexeme"])}
return {
"vector": model.vector_representations(
input_to_evaluate["lexeme"]),
"kbest": model.most_similar(input_to_evaluate["lexeme"])
}
else:
raise Exception(
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment