Commit 4215795e authored by Tomasz Walkowiak's avatar Tomasz Walkowiak

update

parent 0c465779
......@@ -14,6 +14,8 @@ from improvement import get_keywords
import nlp_ws
import logging
import os,shutil
_log = logging.getLogger(__name__)
......@@ -37,12 +39,11 @@ class EmbedRankWorker(nlp_ws.NLPWorker):
def saveResult(self,keywords_dict,outputFile):
def saveResult(self,keywords,scores,outputFile):
file = open(outputFile, 'w')
file.write('[')
if keywords_dict[0] is not None:
for idx in range(len(keywords_dict[0])):
element_dict = {'keyword': keywords_dict[0][idx], 'score': keywords_dict[1][idx], 'alias': keywords_dict[2][idx]}
for idx in range(len(keywords)):
element_dict = {'keyword': keywords[idx], 'score': keywords[idx]}
file.write(str(element_dict)+', ')
file.write(']')
......@@ -58,22 +59,23 @@ class EmbedRankWorker(nlp_ws.NLPWorker):
if os.path.isdir(inputFile):
shutil.copytree(inputFile,outputFile)
#_log.info(inputFile+"/text.ccl")
tagged,lemmas = self.ptagger.pos_tag_raw_text(inputFile+"/text.ccl")
_log.info(lemmas)
tagged, lemmas, raw_text = self.ptagger.pos_tag_raw_text(inputFile+"/text.ccl")
#_log.info(lemmas)
else:
_log.info("CCL")
#_log.info("CCL")
try:
os.makedirs(outputFile)
except:
pass
tagged,lemmas = self.ptagger.pos_tag_raw_text(inputFile)
#_log.info(lemmas)
tagged, lemmas, raw_text = self.ptagger.pos_tag_raw_text(inputFile)
shutil.copy2(inputFile,outputFile+"/text.ccl")
#log.info("tagging finished ")
text_obj = InputTextObj(tagged, 'en')
keywords_dict=MMRPhrase(self.sent2vec, text_obj, lemmas, N=int(taskOptions['N']), beta=0.55, alias_threshold=0.7,lemmatizer=self.lemmatizer)
self.saveResult(keywords_dict,outputFile+"/embedrank.json")
keywords, scores = get_keywords(tagged, lemmas, raw_text, self.sent2vec, self.lemmatizer, 'sect', int(taskOptions['N']))
self.saveResult(keywords,scores,outputFile+"/embedrank.json")
finally:
pass
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment