Commit bc7dad5c authored by Tomasz Walkowiak's avatar Tomasz Walkowiak

Bug in reading excel fix

parent aa62320f
Pipeline #661 failed with stages
in 37 seconds
......@@ -18,6 +18,7 @@ class Model:
self.synsets = {}
if len(path) > 0:
self.read(path)
#print(self.synsets)
print("New model")
def read(self, path):
......@@ -28,6 +29,7 @@ class Model:
ws = wb.active
num = 0
for row in ws.iter_rows():
#print(num)
if num == 0:
num += 1
continue
......@@ -36,7 +38,9 @@ class Model:
if lemma is None or tag is None:
continue
var = row[1].value
synset = row[3].value
synset = row[3].value if len(row) > 3 else None
if synset is not None:
self.synsets[str(synset)] = tag
else:
......@@ -51,12 +55,13 @@ class Model:
class Category:
"""Category."""
def __init__(self):
def __init__(self,verbose=False):
"""Initialize Category."""
self.model = {}
self.modelpath = ""
self.verbose = verbose
def getModel(self, path):
def get_model(self, path):
"""."""
if self.modelpath == path:
return self.model
......@@ -75,14 +80,14 @@ class Category:
path = ""
if "path" in taskOptions:
path = XLSXpath + taskOptions["path"]
model = self.getModel(path)
model = self.get_model(path)
tree = ET.parse(inputFile)
stat = {"tokens": 0}
for token in tree.iter("tok"):
base = token.find("./lex/base").text
stat["tokens"] += 1
if len(base) == 0:
if base is None or len(base) == 0:
continue
found = False
for prop in token.iter("prop"):
......@@ -90,29 +95,42 @@ class Category:
id = str(prop.text)
if id in model.synsets:
self.inc(stat, model.synsets[id])
if self.verbose:
print("Synset %s, category %s" % (id,model.synsets[id]))
found = True
break
if prop.attrib["key"] == "sense:ukb:unitsstr":
for elem in prop.text.split(" "):
el = elem.split("(")[0]
if el in model.variants:
self.inc(stat, model.variants[el])
found = True
els = elem.split(" ")
for el in els:
el = el.split("(")[0]
if el in model.variants:
self.inc(stat, model.variants[el])
if self.verbose:
print("Variant %s, category %s" % (el,model.variants[el]))
found = True
break
if found:
break
if not found:
if base in model.lemmas:
self.inc(stat, model.lemmas[base])
if self.verbose:
print("Base %s, category %s" % (base, model.lemmas[base]))
if self.verbose:
print(stat)
with open(outputFile, "w") as f:
ujson.dump(stat, f)
def main():
"""Runs the program."""
cat = Category()
cat.process("../test/test1.ccl", {"path": "/test/test.xlsx"},
cat = Category(verbose=True)
cat.process("../test/test2.ccl", {"path": "/test/test.xlsx"},
"../test/test1_out.json", "..")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment