Skip to content
Snippets Groups Projects
Commit 52db134e authored by Tomasz Walkowiak's avatar Tomasz Walkowiak
Browse files

new version

parent b5986697
Branches
No related tags found
No related merge requests found
Pipeline #2569 passed
...@@ -111,10 +111,15 @@ class Category: ...@@ -111,10 +111,15 @@ class Category:
if base is None or len(base) == 0: if base is None or len(base) == 0:
continue continue
found = False found = False
mwe_base = base
for prop in token.iter("prop"): for prop in token.iter("prop"):
if prop.attrib["key"] == "mwe_base":
mwe_base = prop.text
print(mwe_base)
if prop.attrib["key"] == "sense:ukb:syns_id": if prop.attrib["key"] == "sense:ukb:syns_id":
id = str(prop.text) id = str(prop.text)
if id in model.synsets and model.synsets[id][1] == base: if (id in model.synsets and
model.synsets[id][1] == mwe_base):
self.add(stat, model.synsets[id][0], id, sentence) self.add(stat, model.synsets[id][0], id, sentence)
self.inc(stat, model.synsets[id][0]) self.inc(stat, model.synsets[id][0])
if self.verbose: if self.verbose:
...@@ -129,7 +134,7 @@ class Category: ...@@ -129,7 +134,7 @@ class Category:
for el in els: for el in els:
el = el.split("(")[0] el = el.split("(")[0]
if (el in model.variants and if (el in model.variants and
model.variants[el][1] == base): model.variants[el][1] == mwe_base):
self.add(stat, model.variants[el][0], el, self.add(stat, model.variants[el][0], el,
sentence) sentence)
self.inc(stat, model.variants[el][0]) self.inc(stat, model.variants[el][0])
...@@ -158,8 +163,8 @@ class Category: ...@@ -158,8 +163,8 @@ class Category:
def main(): def main():
"""Runs the program.""" """Runs the program."""
cat = Category(verbose=True) cat = Category(verbose=True)
cat.process("./test/test1.ccl", {"path": "/test/test.xlsx", "full": True}, cat.process("./test/tet3.ccl", {"path": "/test/as.xlsx", "full": True},
"./test/test1_out.json", ".") "./test/test3_out.json", ".")
if __name__ == "__main__": if __name__ == "__main__":
......
File added
{"tokens":43,"__SENTENCES__":{"Insight2":{"dowiedzie\u0107_si\u0119.2":["W szczeg\u00f3lno\u015bci kiedy dowiedzia\u0142am si\u0119 o implikacjach tego faktu."],"droga":["Jestem \u015bwiadoma, \u017ce nie jest to naj\u0142atwiejsza droga osi\u0105gni\u0119cia celu, ale domy\u015blam si\u0119, \u017ce musz\u0119 po prostu przestawi\u0107 si\u0119 do nowej rzeczywisto\u015bci."]}},"Insight2":2}
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE chunkList SYSTEM "ccl.dtd">
<chunkList>
<chunk id="1">
<sentence id="1">
<tok>
<orth>To</orth>
<lex disamb="1"><base>ten</base><ctag>adj:sg:nom:n:pos</ctag></lex>
<prop key="sense:ukb:syns_id">514</prop>
<prop key="sense:ukb:syns_rank">514/989.6925251215</prop>
<prop key="sense:ukb:unitsstr">dany.1(42:jak) ten.1(42:jak) następujący.1(42:jak) ów.1(42:jak)</prop>
</tok>
<tok>
<orth>wszystko</orth>
<lex disamb="1"><base>wszystko</base><ctag>subst:sg:nom:n</ctag></lex>
<prop key="sense:ukb:syns_id">37387</prop>
<prop key="sense:ukb:syns_rank">37387/414.5901564664 7954/404.2211765270</prop>
<prop key="sense:ukb:unitsstr">wszystko.2(46:sys) wszystkość.1(23:st) wszechrzecz.1(46:sys)</prop>
</tok>
<tok>
<orth>daje</orth>
<lex disamb="1"><base>dawać</base><ctag>fin:sg:ter:imperf</ctag></lex>
<prop key="sense:ukb:syns_id">3652</prop>
<prop key="sense:ukb:syns_rank">3652/62.5656318295 9022/59.6305764652 63252/55.7070352785 7066684/52.8673573050 63217/52.8024562563 57264/51.1886392563 2364/50.8602821768 193/50.2861288718 57730/49.8155029779 58557/48.9651475603 57263/48.0477814467 63246/47.8140177397 57261/47.5414226259 63247/47.3316630033 63253/47.1795902239 57728/46.4348217795 63243/46.1596627890 105357/45.9370605810 63251/45.8155337584 63249/45.7139318615</prop>
<prop key="sense:ukb:unitsstr">wyprawiać.3(39:sp) dawać.9(34:cwyt) wydawać.7(39:sp)</prop>
</tok>
<tok>
<orth>do</orth>
<lex disamb="1"><base>do</base><ctag>prep:gen</ctag></lex>
</tok>
<tok>
<orth>myślenia</orth>
<lex disamb="1"><base>myślenie</base><ctag>subst:sg:gen:n</ctag></lex>
<prop key="sense:ukb:syns_id">82697</prop>
<prop key="sense:ukb:syns_rank">82697/241.3117890115 82668/224.0688380158 8193/222.0741971430 2685/214.2743462330 83446/192.5855695056</prop>
<prop key="sense:ukb:unitsstr">uważanie.1(2:czy) sądzenie.1(2:czy) mniemanie.1(2:czy) myślenie.4(2:czy)</prop>
</tok>
<ns/>
<tok>
<orth>.</orth>
<lex disamb="1"><base>.</base><ctag>interp</ctag></lex>
</tok>
</sentence>
<sentence id="2">
<tok>
<orth>W</orth>
<lex disamb="1"><base>w</base><ctag>prep:loc:nwok</ctag></lex>
<ann chan="mwe">1</ann>
</tok>
<tok>
<orth>szczególności</orth>
<lex disamb="1"><base>szczególność</base><ctag>subst:sg:loc:f</ctag></lex>
<ann chan="mwe">1</ann>
<prop key="mwe_base">w szczególności</prop>
</tok>
<tok>
<orth>kiedy</orth>
<lex disamb="1"><base>kiedy</base><ctag>adv</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>dowiedziała</orth>
<lex disamb="1"><base>dowiedzieć</base><ctag>praet:sg:f:perf</ctag></lex>
<ann chan="mwe">2</ann>
<prop key="mwe_base">dowiedzieć się</prop>
<prop key="sense:ukb:syns_id">81252</prop>
<prop key="sense:ukb:syns_rank">81252/504.5137356208 654/463.4789475583</prop>
<prop key="sense:ukb:unitsstr">wywiedzieć_się.1(30:cpor) dowiedzieć_się.2(30:cpor) zasięgnąć_języka.1(30:cpor)</prop>
</tok>
<ns/>
<tok>
<orth>m</orth>
<lex disamb="1"><base>być</base><ctag>aglt:sg:pri:perf:nwok</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>się</orth>
<lex disamb="1"><base>się</base><ctag>qub</ctag></lex>
<ann chan="mwe">2</ann>
</tok>
<tok>
<orth>o</orth>
<lex disamb="1"><base>o</base><ctag>prep:loc</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>implikacjach</orth>
<lex disamb="1"><base>implikacja</base><ctag>subst:pl:loc:f</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">105443</prop>
<prop key="sense:ukb:syns_rank">105443/395.6786754039 16952/368.6885921756 5138/328.5931724517</prop>
<prop key="sense:ukb:unitsstr">implikacja.3(6:umy)</prop>
</tok>
<tok>
<orth>tego</orth>
<lex disamb="1"><base>ten</base><ctag>adj:sg:gen:m3:pos</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">514</prop>
<prop key="sense:ukb:syns_rank">514/989.6925251215</prop>
<prop key="sense:ukb:unitsstr">dany.1(42:jak) ten.1(42:jak) następujący.1(42:jak) ów.1(42:jak)</prop>
</tok>
<tok>
<orth>faktu</orth>
<lex disamb="1"><base>fakt</base><ctag>subst:sg:gen:m3</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">3983</prop>
<prop key="sense:ukb:syns_rank">3983/956.1793738729</prop>
<prop key="sense:ukb:unitsstr">fakt.1(8:zdarz)</prop>
</tok>
<ns/>
<tok>
<orth>.</orth>
<lex disamb="1"><base>.</base><ctag>interp</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
</sentence>
<sentence id="3">
<tok>
<orth>Jestem</orth>
<lex disamb="1"><base>być</base><ctag>fin:sg:pri:imperf</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">250925</prop>
<prop key="sense:ukb:syns_rank">250925/1066.1484180744 250920/157.1182144299 50321/148.1611167607 250912/123.3715549903 55138/117.8969452396 299/117.4991439625 57004/101.5662902050 250545/91.2308407392 250918/89.0776554420 250899/89.0562249430</prop>
<prop key="sense:ukb:unitsstr">to.2(40:cst) być.10(40:cst)</prop>
</tok>
<tok>
<orth>świadoma</orth>
<lex disamb="1"><base>świadomy</base><ctag>adj:sg:nom:f:pos</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">459709</prop>
<prop key="sense:ukb:syns_rank">459709/120.6780877163 459700/118.5048415002 3330/118.2788973985 57181/118.0540442235 239381/117.7675372389 470631/116.0052561855 248945/110.1600722548 470629/106.6813793794</prop>
<prop key="sense:ukb:unitsstr">świadomy.5(42:jak) przytomny.2(42:jak)</prop>
</tok>
<ns/>
<tok>
<orth>,</orth>
<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>że</orth>
<lex disamb="1"><base>że</base><ctag>comp</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>nie</orth>
<lex disamb="1"><base>nie</base><ctag>qub</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>jest</orth>
<lex disamb="1"><base>być</base><ctag>fin:sg:ter:imperf</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">250925</prop>
<prop key="sense:ukb:syns_rank">250925/1066.1484180744 250920/157.1182144299 50321/148.1611167607 250912/123.3715549903 55138/117.8969452396 299/117.4991439625 57004/101.5662902050 250545/91.2308407392 250918/89.0776554420 250899/89.0562249430</prop>
<prop key="sense:ukb:unitsstr">to.2(40:cst) być.10(40:cst)</prop>
</tok>
<tok>
<orth>to</orth>
<lex disamb="1"><base>to</base><ctag>pred</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">250925</prop>
<prop key="sense:ukb:syns_rank">250925/1066.1484180744</prop>
<prop key="sense:ukb:unitsstr">to.2(40:cst) być.10(40:cst)</prop>
</tok>
<tok>
<orth>najłatwiejsza</orth>
<lex disamb="1"><base>łatwy</base><ctag>adj:sg:nom:f:sup</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">105150</prop>
<prop key="sense:ukb:syns_rank">105150/205.1227542286 103676/203.7348881992 241995/187.2598043373 9664/177.3395590384 249156/157.8947368421</prop>
<prop key="sense:ukb:unitsstr">łatwy.1(42:jak) lekki.4(42:jak)</prop>
</tok>
<tok>
<orth>droga</orth>
<lex disamb="1"><base>droga</base><ctag>subst:sg:nom:f</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">48193</prop>
<prop key="sense:ukb:syns_rank">48193/141.5652998018 21011/136.7958515690 54187/128.7796449000 3500/120.9795777076 65075/119.5514959981 54185/119.0750963693 54188/115.0738946444 407639/104.6219685971</prop>
<prop key="sense:ukb:unitsstr">droga.1(12:msc)</prop>
</tok>
<tok>
<orth>osiągnięcia</orth>
<lex disamb="1"><base>osiągnąć</base><ctag>ger:sg:gen:n:perf:aff</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>celu</orth>
<lex disamb="1"><base>cel</base><ctag>subst:sg:gen:m3</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">8053</prop>
<prop key="sense:ukb:syns_rank">8053/185.8792597717 5496/184.4028393086 5499/182.5878850837 5508/174.4650531098 5497/172.9432607478</prop>
<prop key="sense:ukb:unitsstr">cel.6(6:umy)</prop>
</tok>
<ns/>
<tok>
<orth>,</orth>
<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>ale</orth>
<lex disamb="1"><base>ale</base><ctag>conj</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>domyślam</orth>
<lex disamb="1"><base>domyślać</base><ctag>fin:sg:pri:imperf</ctag></lex>
<ann chan="mwe">1</ann>
<prop key="mwe_base">domyślać się</prop>
<prop key="sense:ukb:syns_id">610</prop>
<prop key="sense:ukb:syns_rank">610/904.8260658371</prop>
<prop key="sense:ukb:unitsstr">domyślać_się.1(29:cumy)</prop>
</tok>
<tok>
<orth>się</orth>
<lex disamb="1"><base>się</base><ctag>qub</ctag></lex>
<ann chan="mwe">1</ann>
</tok>
<ns/>
<tok>
<orth>,</orth>
<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>że</orth>
<lex disamb="1"><base>że</base><ctag>comp</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>muszę</orth>
<lex disamb="1"><base>musieć</base><ctag>fin:sg:pri:imperf</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">1719</prop>
<prop key="sense:ukb:syns_rank">1719/599.1022615986 65832/437.2667132440</prop>
<prop key="sense:ukb:unitsstr">mieć.2(40:cst) potrzebować.1(29:cumy) musieć.1(29:cumy) być_zmuszonym.1(40:cst)</prop>
</tok>
<tok>
<orth>po</orth>
<lex disamb="1"><base>po</base><ctag>prep:acc</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>prostu</orth>
<lex disamb="1"><base>prosty</base><ctag>adjp</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">462640</prop>
<prop key="sense:ukb:syns_rank">462640/173.8602210669 105149/154.1190119385 229265/139.0985688119 9882/117.8738227237 9191/116.2883470833 436238/115.3964844008 2625/114.9498415307 436236/113.9829690078</prop>
<prop key="sense:ukb:unitsstr">prosty.8(42:jak)</prop>
</tok>
<tok>
<orth>przestawić</orth>
<lex disamb="1"><base>przestawić</base><ctag>inf:perf</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">7076039</prop>
<prop key="sense:ukb:syns_rank">7076039/156.0539961907 65719/140.3120008860 64984/138.8250390391 65722/132.9929934989 65721/127.1527860587 7076043/124.5850157296 65716/122.6844065408</prop>
<prop key="sense:ukb:unitsstr">przestawić.6(39:sp)</prop>
</tok>
<tok>
<orth>się</orth>
<lex disamb="1"><base>się</base><ctag>qub</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>do</orth>
<lex disamb="1"><base>do</base><ctag>prep:gen</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
<tok>
<orth>nowej</orth>
<lex disamb="1"><base>nowy</base><ctag>adj:sg:gen:f:pos</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">18360</prop>
<prop key="sense:ukb:syns_rank">18360/94.9723030717 9964/94.7982458323 420066/94.3473623395 434400/88.5311646149 420063/86.9479343233 434410/83.3012768377 420068/82.6418302074 420084/82.5752384821 9968/81.8199731028 400821/81.1790604021 434401/76.5367354704</prop>
<prop key="sense:ukb:unitsstr">nowy.2(42:jak)</prop>
</tok>
<tok>
<orth>rzeczywistości</orth>
<lex disamb="1"><base>rzeczywistość</base><ctag>subst:sg:gen:f</ctag></lex>
<ann chan="mwe">0</ann>
<prop key="sense:ukb:syns_id">2998</prop>
<prop key="sense:ukb:syns_rank">2998/316.1575631784 4682/313.9052513611 103373/290.5569598277</prop>
<prop key="sense:ukb:unitsstr">rzeczywistość.1(23:st) realia.1(23:st)</prop>
</tok>
<ns/>
<tok>
<orth>.</orth>
<lex disamb="1"><base>.</base><ctag>interp</ctag></lex>
<ann chan="mwe">0</ann>
</tok>
</sentence>
</chunk>
</chunkList>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment