Commit 4548494f authored by leszeks's avatar leszeks

docstyle and pep8 fixes

parent 6982fd42
Pipeline #1761 failed with stage
in 30 seconds
"""Implementation of MeWex Worker."""
#!/usr/bin/python3
import os
import re
......@@ -11,26 +12,32 @@ from nlp_ws import NLPWorker, NLPService
class MewexWorker(NLPWorker):
"""Implements mewex worker."""
def init(self):
"""Initialize worker."""
self._lemmatizer = WrapLem.CascadeLemmatizer.assembleLemmatizer()
def process(self, input_path, task_options, output_path):
"""Running nlp process."""
args = _parse_mewex_options(task_options.get('mewex_options') or {})
args['input_files'] = (
[os.path.join(input_path, f) for f in os.listdir(input_path)]
if os.path.isdir(input_path)
else (input_path,)
)
print(args['input_files'])
if not os.path.exists(output_path):
os.makedirs(output_path)
args['output_file'] = output_path+"/mewex.csv"
args['output_file'] = output_path + "/mewex.csv"
mwl.call_mewex(**args)
self.lemmatize(output_path+"/mewex.csv",output_path+"/mewexlemmatized.csv")
self.cut_lines(output_path+"/mewexlemmatized.csv",output_path+"/mewexshort.csv",1000)
self.lemmatize(output_path + "/mewex.csv",
output_path + "/mewexlemmatized.csv")
self.cut_lines(output_path + "/mewexlemmatized.csv",
output_path + "/mewexshort.csv", 1000)
def cut_lines(self,inf,outf,lines):
def cut_lines(self, inf, outf, lines):
"""."""
f = open(inf, "r")
copy = open(outf, "w")
n = 0
......@@ -43,10 +50,13 @@ class MewexWorker(NLPWorker):
copy.close()
def lemmatize(self, inf, outf):
"""."""
input_file = io.open(inf, "r", encoding="utf-8")
output_file = open(outf, "w")
next(input_file); next(input_file) # First two rows are header rows, so just skip them
output_file.write("Rank\tQuantity\tRealtion\tBase form\tLemmatized form\tAll forms\n")
next(input_file)
next(input_file) # First two rows are header rows, so just skip them
output_file.write(
"Rank\tQuantity\tRealtion\tBase form\tLemmatized form\tAll forms\n")
orthreg = re.compile(r'[0-9]+:([^(]+)\(([^)]+)\).*')
basereg = re.compile(r'[^:]+:([^ ]+)')
for line in input_file:
......@@ -57,7 +67,7 @@ class MewexWorker(NLPWorker):
orth = orthtuple[0][0].strip()
tag = orthtuple[0][1]
result = self._lemmatizer.lemmatizeS(orth, base, tag, False)
splited.insert(4,result)
splited.insert(4, result)
output_file.write('\t'.join(splited) + '\n')
input_file.close()
output_file.close()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment