From 2b1b7170aa0e4970239b6467adde6e897a5b7952 Mon Sep 17 00:00:00 2001
From: omekr <roman.kurc@pwr.wroc.pl>
Date: Fri, 17 Jun 2011 17:11:43 +0200
Subject: [PATCH] Add tests and fix problems

---
 libmwereader/mwe.cpp                     |   1 +
 libmwereader/mweparser.cpp               |   3 +
 libmwereader/mwereader.cpp               |   9 +-
 libmwereader/tests/data/fix_gap_mwe.xml  |  20 +-
 libmwereader/tests/data/fix_mwe.xml      |   2 +-
 libmwereader/tests/data/flex_gap_mwe.xml |  41 +-
 libmwereader/tests/data/test1.kipi.xml   | 470 +++++++++++++++++++++++
 libmwereader/tests/data/test1.xml        |   2 +
 libmwereader/tests/data/test_mwe.xml     |   2 +-
 libmwereader/tests/mwefunctional.cpp     |  45 ++-
 10 files changed, 552 insertions(+), 43 deletions(-)

diff --git a/libmwereader/mwe.cpp b/libmwereader/mwe.cpp
index ec4bbe0..4fcc72b 100644
--- a/libmwereader/mwe.cpp
+++ b/libmwereader/mwe.cpp
@@ -13,6 +13,7 @@ LexicalUnit::LexicalUnit(const std::string &base,
 	  base_(base),
 	  nowhere_(Wccl::Position())
 {
+
 	for(strmap::iterator iter = variables.begin();
 		iter != variables.end(); ++iter){
 		potential_bases_.insert(iter->second);
diff --git a/libmwereader/mweparser.cpp b/libmwereader/mweparser.cpp
index 54e8b27..1edb9c6 100644
--- a/libmwereader/mweparser.cpp
+++ b/libmwereader/mweparser.cpp
@@ -63,6 +63,7 @@ namespace Corpus2 {
 					head_cond_);
 
 		if(group_type_ == "fix"){ // group_name_  -> lower case
+
 			mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head,
 											  variables_)));
 		} else if(group_type_ == "flex"){
@@ -85,6 +86,8 @@ namespace Corpus2 {
 				value = a.value;
 			}
 		}
+		if (value == "")
+			throw Wccl::WcclError("Attribute: "+name+" not found");
 		return value;
 	}
 
diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp
index 6b0c861..670d139 100644
--- a/libmwereader/mwereader.cpp
+++ b/libmwereader/mwereader.cpp
@@ -66,12 +66,17 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 										i--;
 								}
 							}
-
+							new_orth_utf8.erase(new_orth_utf8.size()-1, 1);
 							Corpus2::Token *tok = (*sent)[head];
 							tok->set_orth_utf8(new_orth_utf8);
 							foreach(Lexeme& lex, tok->lexemes())
-								if(lex.is_disamb())
+							{
+
+								if(lex.is_disamb()){
+
 									lex.set_lemma_utf8(pLU->get_base());
+								}
+							}
 
 							std::vector <Token*>::iterator del_iter = tokens.begin();
 							while (del_iter  != tokens.end()) {
diff --git a/libmwereader/tests/data/fix_gap_mwe.xml b/libmwereader/tests/data/fix_gap_mwe.xml
index 5c4e868..fe63652 100644
--- a/libmwereader/tests/data/fix_gap_mwe.xml
+++ b/libmwereader/tests/data/fix_gap_mwe.xml
@@ -1,21 +1,21 @@
 <?xml version='1.0' encoding='utf-8'?>
 <units_description tagset='kipi'>
-	<mwegroup name="AdjSubstFix" type="fix" class="subst">
+	<mwegroup name="SubstAdjFix" type="fix" class="subst">
 		<condition>
 			and(
-				inter(base[0],$s:Adj),
-				inter(base[1],$s:Subst),
+				inter(base[0],$s:Subst),
+				rlook(1, 10, $Pos, inter(base[$Pos],$s:Adj)),
 				setvar($Pos1, 0),
-				setvar($Pos2, 1),
-				inter(class[0],{adj}),
-				inter(class[1],{subst,ger,depr}),
-				agrpp(0,1,{nmb,gnd,cas})
+				setvar($Pos2, $Pos),
+				inter(class[0],{subst,ger,depr}),
+				inter(class[$Pos],{adj}),
+				agrpp(0,$Pos,{nmb,gnd,cas})
 			)
 		</condition>
 		<instances>
-			<MWE name="dobra wola">
-				<var name="Adj">dobry</var>
-				<var name="Subst">wola</var>
+			<MWE base="instrument muzyczny">
+				<var name="Subst">instrument</var>
+				<var name="Adj">muzyczny</var>
 				<head>inter(class[0],{subst,ger,depr})</head>
 			</MWE>
 		</instances>
diff --git a/libmwereader/tests/data/fix_mwe.xml b/libmwereader/tests/data/fix_mwe.xml
index 5c4e868..2ffbf3d 100644
--- a/libmwereader/tests/data/fix_mwe.xml
+++ b/libmwereader/tests/data/fix_mwe.xml
@@ -13,7 +13,7 @@
 			)
 		</condition>
 		<instances>
-			<MWE name="dobra wola">
+			<MWE base="dobra wola">
 				<var name="Adj">dobry</var>
 				<var name="Subst">wola</var>
 				<head>inter(class[0],{subst,ger,depr})</head>
diff --git a/libmwereader/tests/data/flex_gap_mwe.xml b/libmwereader/tests/data/flex_gap_mwe.xml
index 107c15e..ead2807 100644
--- a/libmwereader/tests/data/flex_gap_mwe.xml
+++ b/libmwereader/tests/data/flex_gap_mwe.xml
@@ -3,32 +3,31 @@
 <mwegroup name="SubstAdjSgFlex" type="flex" class="subst">
                 <condition>
                         or(
+                               and(
+									inter(base[0],$s:Subst),
+									rlook(1, 10, $Pos, inter(base[$Pos],$s:Adj)),
+									setvar($Pos1, 0),
+									setvar($Pos2, $Pos),
+									inter(class[0],{subst,ger,depr}),
+									inter(class[$Pos],{adj}),
+									agrpp(0,$Pos,{nmb,gnd,cas})
+								),//and
                                 and(
-                                        inter(base[1],$s:Adj),
-                                        inter(base[0],$s:Subst),
-                                        setvar($Pos1, 1),
-                                        setvar($Pos2, 0),
-                                        inter(class[1],{adj}),
-                                        equal(nmb[0], {sg}),
-                                        in(class[0],{subst,ger,depr}),
-                                        agrpp(0,1,{nmb,gnd,cas})
-                                ),//and
-                                and(
-                                        inter(base[0],$s:Adj),
-                                        inter(base[1],$s:Subst),
-                                        setvar($Pos1, 0),
-                                        setvar($Pos2, 1),
-                                        inter(class[0],{adj}),
-                                        equal(nmb[1],{sg}),
-                                        in(class[1],{subst,ger,depr}),
-                                        agrpp(0,1,{nmb,gnd,cas})
+									inter(base[0],$s:Adj),
+									rlook(1, 10, $Pos, inter(base[$Pos],$s:Subst)),		
+									setvar($Pos1, $Pos),
+									setvar($Pos2, 0),
+									inter(class[0],{adj}),
+									equal(nmb[$Pos],{sg}),
+									in(class[$Pos],{subst,ger,depr}),
+									agrpp(0,$Pos,{nmb,gnd,cas})
                                 )
                         )//or
                 </condition>
                 <instances>
-                        <MWE base="dzień dobry">
-                                <var name="Subst">dzień</var>
-                                <var name="Adj">dobry</var>
+                        <MWE base="praca naukowa">
+                                <var name="Subst">praca</var>
+                                <var name="Adj">naukowy</var>
                                 <head>in(class[0],{subst,ger,depr})</head>
                         </MWE>
 		</instances>
diff --git a/libmwereader/tests/data/test1.kipi.xml b/libmwereader/tests/data/test1.kipi.xml
index ea76d4f..5b20000 100644
--- a/libmwereader/tests/data/test1.kipi.xml
+++ b/libmwereader/tests/data/test1.kipi.xml
@@ -1587,5 +1587,475 @@
 </tok>
 </chunk>
 </chunk>
+<chunk type="p" xlink:href="#dv1p1">
+<chunk type="s">
+<tok>
+<orth>Pozycja</orth>
+<lex disamb="1"><base>pozycja</base><ctag>subst:sg:nom:f</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>mimo</orth>
+<lex><base>mima</base><ctag>subst:sg:voc:m1</ctag></lex>
+<lex disamb="1"><base>mimo</base><ctag>prep:gen</ctag></lex>
+</tok>
+<tok>
+<orth>iż</orth>
+<lex disamb="1"><base>iż</base><ctag>conj</ctag></lex>
+</tok>
+<tok>
+<orth>dotyczy</orth>
+<lex disamb="1"><base>dotyczyć</base><ctag>fin:sg:ter:imperf</ctag></lex>
+</tok>
+<tok>
+<orth>trudnych</orth>
+<lex><base>trudny</base><ctag>adj:pl:gen:m1:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:gen:m2:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:gen:m3:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:gen:f:pos</ctag></lex>
+<lex disamb="1"><base>trudny</base><ctag>adj:pl:gen:n:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:m1:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:m2:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:m3:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:f:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:n:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:acc:m1:pos</ctag></lex>
+</tok>
+<tok>
+<orth>zagadnień</orth>
+<lex disamb="1"><base>zagadnienie</base><ctag>subst:pl:gen:n</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>zawiera</orth>
+<lex disamb="1"><base>zawierać</base><ctag>fin:sg:ter:imperf</ctag></lex>
+</tok>
+<tok>
+<orth>jasne</orth>
+<lex disamb="1"><base>jasny</base><ctag>adj:sg:nom:n:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:sg:acc:n:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:nom:m2:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:nom:m3:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:nom:f:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:nom:n:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:acc:m2:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:acc:m3:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:acc:f:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:acc:n:pos</ctag></lex>
+</tok>
+<tok>
+<orth>i</orth>
+<lex disamb="1"><base>i</base><ctag>conj</ctag></lex>
+</tok>
+<tok>
+<orth>klarowne</orth>
+<lex><base>klarowny</base><ctag>adj:sg:nom:n:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:sg:acc:n:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:nom:m2:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:nom:m3:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:nom:f:pos</ctag></lex>
+<lex disamb="1"><base>klarowny</base><ctag>adj:pl:nom:n:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:acc:m2:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:acc:m3:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:acc:f:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:acc:n:pos</ctag></lex>
+</tok>
+<tok>
+<orth>pytania</orth>
+<lex><base>pytać</base><ctag>ger:sg:gen:n:imperf:aff</ctag></lex>
+<lex><base>pytanie</base><ctag>subst:sg:gen:n</ctag></lex>
+<lex disamb="1"><base>pytanie</base><ctag>subst:pl:nom:n</ctag></lex>
+<lex><base>pytanie</base><ctag>subst:pl:acc:n</ctag></lex>
+<lex><base>pytanie</base><ctag>subst:pl:voc:n</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>zrozumiałe</orth>
+<lex><base>zrozumiały</base><ctag>adj:sg:nom:n:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:sg:acc:n:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:nom:m2:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:nom:m3:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:nom:f:pos</ctag></lex>
+<lex disamb="1"><base>zrozumiały</base><ctag>adj:pl:nom:n:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:acc:m2:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:acc:m3:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:acc:f:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:acc:n:pos</ctag></lex>
+</tok>
+<tok>
+<orth>dla</orth>
+<lex disamb="1"><base>dla</base><ctag>prep:gen</ctag></lex>
+</tok>
+<tok>
+<orth>wszystkich</orth>
+<lex><base>wszystek</base><ctag>adj:pl:gen:m1:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:gen:m2:pos</ctag></lex>
+<lex disamb="1"><base>wszystek</base><ctag>adj:pl:gen:m3:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:gen:f:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:gen:n:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:m1:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:m2:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:m3:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:f:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:n:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:acc:m1:pos</ctag></lex>
+</tok>
+<tok>
+<orth>tych</orth>
+<lex disamb="1"><base>ten</base><ctag>adj:pl:gen:m1:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:gen:m2:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:gen:m3:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:gen:f:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:gen:n:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:m1:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:m2:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:m3:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:f:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:n:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:acc:m1:pos</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>którzy</orth>
+<lex disamb="1"><base>który</base><ctag>adj:pl:nom:m1:pos</ctag></lex>
+</tok>
+<tok>
+<orth>zajmujÄ…</orth>
+<lex disamb="1"><base>zajmować</base><ctag>fin:pl:ter:imperf</ctag></lex>
+</tok>
+<tok>
+<orth>siÄ™</orth>
+<lex disamb="1"><base>siÄ™</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>immunologiÄ…</orth>
+<lex disamb="1"><base>immunologia</base><ctag>subst:sg:inst:f</ctag></lex>
+</tok>
+<tok>
+<orth>z</orth>
+<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex>
+<lex><base>z</base><ctag>prep:inst:nwok</ctag></lex>
+<lex><base>z</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>racji</orth>
+<lex disamb="1"><base>racja</base><ctag>subst:sg:gen:f</ctag></lex>
+<lex><base>racja</base><ctag>subst:sg:dat:f</ctag></lex>
+<lex><base>racja</base><ctag>subst:sg:loc:f</ctag></lex>
+<lex><base>racja</base><ctag>subst:pl:gen:f</ctag></lex>
+</tok>
+<tok>
+<orth>studiów</orth>
+<lex disamb="1"><base>studium</base><ctag>subst:pl:gen:n</ctag></lex>
+<lex disamb="1"><base>studio</base><ctag>subst:pl:gen:n</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>pracy</orth>
+<lex disamb="1"><base>praca</base><ctag>subst:sg:gen:f</ctag></lex>
+<lex><base>praca</base><ctag>subst:sg:dat:f</ctag></lex>
+<lex><base>praca</base><ctag>subst:sg:loc:f</ctag></lex>
+</tok>
+<tok>
+<orth>z</orth>
+<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex>
+<lex><base>z</base><ctag>prep:inst:nwok</ctag></lex>
+<lex><base>z</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>natury</orth>
+<lex disamb="1"><base>natura</base><ctag>subst:sg:gen:f</ctag></lex>
+<lex><base>natura</base><ctag>subst:pl:nom:f</ctag></lex>
+<lex><base>natura</base><ctag>subst:pl:acc:f</ctag></lex>
+<lex><base>natura</base><ctag>subst:pl:voc:f</ctag></lex>
+</tok>
+<tok>
+<orth>naukowej</orth>
+<lex disamb="1"><base>naukowy</base><ctag>adj:sg:gen:f:pos</ctag></lex>
+<lex><base>naukowy</base><ctag>adj:sg:dat:f:pos</ctag></lex>
+<lex><base>naukowy</base><ctag>adj:sg:loc:f:pos</ctag></lex>
+</tok>
+<tok>
+<orth>czy</orth>
+<lex disamb="1"><base>czy</base><ctag>conj</ctag></lex>
+<lex><base>czy</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>zawodowej</orth>
+<lex disamb="1"><base>zawodowy</base><ctag>adj:sg:gen:f:pos</ctag></lex>
+<lex><base>zawodowy</base><ctag>adj:sg:dat:f:pos</ctag></lex>
+<lex><base>zawodowy</base><ctag>adj:sg:loc:f:pos</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>.</orth>
+<lex disamb="1"><base>.</base><ctag>interp</ctag></lex>
+</tok>
+</chunk>
+</chunk>
+<chunk type="p" xlink:href="#dv1p1">
+<chunk type="s">
+<tok>
+<orth>Pozycja</orth>
+<lex disamb="1"><base>pozycja</base><ctag>subst:sg:nom:f</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>mimo</orth>
+<lex><base>mima</base><ctag>subst:sg:voc:m1</ctag></lex>
+<lex disamb="1"><base>mimo</base><ctag>prep:gen</ctag></lex>
+</tok>
+<tok>
+<orth>iż</orth>
+<lex disamb="1"><base>iż</base><ctag>conj</ctag></lex>
+</tok>
+<tok>
+<orth>dotyczy</orth>
+<lex disamb="1"><base>dotyczyć</base><ctag>fin:sg:ter:imperf</ctag></lex>
+</tok>
+<tok>
+<orth>trudnych</orth>
+<lex><base>trudny</base><ctag>adj:pl:gen:m1:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:gen:m2:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:gen:m3:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:gen:f:pos</ctag></lex>
+<lex disamb="1"><base>trudny</base><ctag>adj:pl:gen:n:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:m1:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:m2:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:m3:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:f:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:loc:n:pos</ctag></lex>
+<lex><base>trudny</base><ctag>adj:pl:acc:m1:pos</ctag></lex>
+</tok>
+<tok>
+<orth>zagadnień</orth>
+<lex disamb="1"><base>zagadnienie</base><ctag>subst:pl:gen:n</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>zawiera</orth>
+<lex disamb="1"><base>zawierać</base><ctag>fin:sg:ter:imperf</ctag></lex>
+</tok>
+<tok>
+<orth>jasne</orth>
+<lex disamb="1"><base>jasny</base><ctag>adj:sg:nom:n:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:sg:acc:n:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:nom:m2:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:nom:m3:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:nom:f:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:nom:n:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:acc:m2:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:acc:m3:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:acc:f:pos</ctag></lex>
+<lex><base>jasny</base><ctag>adj:pl:acc:n:pos</ctag></lex>
+</tok>
+<tok>
+<orth>i</orth>
+<lex disamb="1"><base>i</base><ctag>conj</ctag></lex>
+</tok>
+<tok>
+<orth>klarowne</orth>
+<lex><base>klarowny</base><ctag>adj:sg:nom:n:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:sg:acc:n:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:nom:m2:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:nom:m3:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:nom:f:pos</ctag></lex>
+<lex disamb="1"><base>klarowny</base><ctag>adj:pl:nom:n:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:acc:m2:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:acc:m3:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:acc:f:pos</ctag></lex>
+<lex><base>klarowny</base><ctag>adj:pl:acc:n:pos</ctag></lex>
+</tok>
+<tok>
+<orth>pytania</orth>
+<lex><base>pytać</base><ctag>ger:sg:gen:n:imperf:aff</ctag></lex>
+<lex><base>pytanie</base><ctag>subst:sg:gen:n</ctag></lex>
+<lex disamb="1"><base>pytanie</base><ctag>subst:pl:nom:n</ctag></lex>
+<lex><base>pytanie</base><ctag>subst:pl:acc:n</ctag></lex>
+<lex><base>pytanie</base><ctag>subst:pl:voc:n</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>zrozumiałe</orth>
+<lex><base>zrozumiały</base><ctag>adj:sg:nom:n:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:sg:acc:n:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:nom:m2:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:nom:m3:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:nom:f:pos</ctag></lex>
+<lex disamb="1"><base>zrozumiały</base><ctag>adj:pl:nom:n:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:acc:m2:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:acc:m3:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:acc:f:pos</ctag></lex>
+<lex><base>zrozumiały</base><ctag>adj:pl:acc:n:pos</ctag></lex>
+</tok>
+<tok>
+<orth>dla</orth>
+<lex disamb="1"><base>dla</base><ctag>prep:gen</ctag></lex>
+</tok>
+<tok>
+<orth>wszystkich</orth>
+<lex><base>wszystek</base><ctag>adj:pl:gen:m1:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:gen:m2:pos</ctag></lex>
+<lex disamb="1"><base>wszystek</base><ctag>adj:pl:gen:m3:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:gen:f:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:gen:n:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:m1:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:m2:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:m3:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:f:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:loc:n:pos</ctag></lex>
+<lex><base>wszystek</base><ctag>adj:pl:acc:m1:pos</ctag></lex>
+</tok>
+<tok>
+<orth>tych</orth>
+<lex disamb="1"><base>ten</base><ctag>adj:pl:gen:m1:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:gen:m2:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:gen:m3:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:gen:f:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:gen:n:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:m1:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:m2:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:m3:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:f:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:loc:n:pos</ctag></lex>
+<lex><base>ten</base><ctag>adj:pl:acc:m1:pos</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>którzy</orth>
+<lex disamb="1"><base>który</base><ctag>adj:pl:nom:m1:pos</ctag></lex>
+</tok>
+<tok>
+<orth>zajmujÄ…</orth>
+<lex disamb="1"><base>zajmować</base><ctag>fin:pl:ter:imperf</ctag></lex>
+</tok>
+<tok>
+<orth>siÄ™</orth>
+<lex disamb="1"><base>siÄ™</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>immunologiÄ…</orth>
+<lex disamb="1"><base>immunologia</base><ctag>subst:sg:inst:f</ctag></lex>
+</tok>
+<tok>
+<orth>z</orth>
+<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex>
+<lex><base>z</base><ctag>prep:inst:nwok</ctag></lex>
+<lex><base>z</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>racji</orth>
+<lex disamb="1"><base>racja</base><ctag>subst:sg:gen:f</ctag></lex>
+<lex><base>racja</base><ctag>subst:sg:dat:f</ctag></lex>
+<lex><base>racja</base><ctag>subst:sg:loc:f</ctag></lex>
+<lex><base>racja</base><ctag>subst:pl:gen:f</ctag></lex>
+</tok>
+<tok>
+<orth>studiów</orth>
+<lex disamb="1"><base>studium</base><ctag>subst:pl:gen:n</ctag></lex>
+<lex disamb="1"><base>studio</base><ctag>subst:pl:gen:n</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>,</orth>
+<lex disamb="1"><base>,</base><ctag>interp</ctag></lex>
+</tok>
+<tok>
+<orth>naukowej</orth>
+<lex disamb="1"><base>naukowy</base><ctag>adj:sg:gen:f:pos</ctag></lex>
+<lex><base>naukowy</base><ctag>adj:sg:dat:f:pos</ctag></lex>
+<lex><base>naukowy</base><ctag>adj:sg:loc:f:pos</ctag></lex>
+</tok>
+<tok>
+<orth>z</orth>
+<lex disamb="1"><base>z</base><ctag>prep:gen:nwok</ctag></lex>
+<lex><base>z</base><ctag>prep:inst:nwok</ctag></lex>
+<lex><base>z</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>natury</orth>
+<lex disamb="1"><base>natura</base><ctag>subst:sg:gen:f</ctag></lex>
+<lex><base>natura</base><ctag>subst:pl:nom:f</ctag></lex>
+<lex><base>natura</base><ctag>subst:pl:acc:f</ctag></lex>
+<lex><base>natura</base><ctag>subst:pl:voc:f</ctag></lex>
+</tok>
+<tok>
+<orth>nie</orth>
+<lex><base>on</base><ctag>ppron3:sg:acc:n:ter:akc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:sg:acc:n:ter:nakc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:pl:acc:m2:ter:akc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:pl:acc:m2:ter:nakc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:pl:acc:m3:ter:akc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:pl:acc:m3:ter:nakc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:pl:acc:f:ter:akc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:pl:acc:f:ter:nakc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:pl:acc:n:ter:akc:praep</ctag></lex>
+<lex><base>on</base><ctag>ppron3:pl:acc:n:ter:nakc:praep</ctag></lex>
+<lex disamb="1"><base>nie</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>całkiem</orth>
+<lex disamb="1"><base>całkiem</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>pracy</orth>
+<lex disamb="1"><base>praca</base><ctag>subst:sg:gen:f</ctag></lex>
+<lex><base>praca</base><ctag>subst:sg:dat:f</ctag></lex>
+<lex><base>praca</base><ctag>subst:sg:loc:f</ctag></lex>
+</tok>
+<tok>
+<orth>czy</orth>
+<lex><base>czy</base><ctag>conj</ctag></lex>
+<lex disamb="1"><base>czy</base><ctag>qub</ctag></lex>
+</tok>
+<tok>
+<orth>zawodowej</orth>
+<lex disamb="1"><base>zawodowy</base><ctag>adj:sg:gen:f:pos</ctag></lex>
+<lex><base>zawodowy</base><ctag>adj:sg:dat:f:pos</ctag></lex>
+<lex><base>zawodowy</base><ctag>adj:sg:loc:f:pos</ctag></lex>
+</tok>
+<ns/>
+<tok>
+<orth>.</orth>
+<lex disamb="1"><base>.</base><ctag>interp</ctag></lex>
+</tok>
+</chunk>
+</chunk>
 </chunkList>
 </cesAna>
diff --git a/libmwereader/tests/data/test1.xml b/libmwereader/tests/data/test1.xml
index 6d37d57..c4b6b96 100644
--- a/libmwereader/tests/data/test1.xml
+++ b/libmwereader/tests/data/test1.xml
@@ -5,4 +5,6 @@
 <chunk type="p" xlink:href="#dv1p1">Projekt rozporządzenia Ministra Edukacji Narodowej w sprawie podstaw programowych kształcenia w zawodach: górnik eksploatacji podziemnej, górnik odkrywkowej eksploatacji złóż, monter instalacji gazowych, monter instrumentów całkowicie muzycznych, monter sieci komunalnych, stolarz, technik hydrolog, technik instrumentów muzycznych, technik meteorolog i technik papiernictwa Projekt rozporządzenia Ministra Edukacji Narodowej w sprawie sposobu i trybu organizowania indywidualnego obowiązkowego rocznego przygotowania przedszkolnego i indywidualnego nauczania dzieci i młodzieży Projekt rozporządzenia Ministra Edukacji Narodowej w sprawie rodzajów innych form wychowania przedszkolnego, warunków tworzenia i organizowania tych form oraz sposobu ich działania Projekt rozporządzenia Ministra Edukacji Narodowej w sprawie rodzajów innych form wychowania przedszkolnego, warunków tworzenia i organizowania tych form oraz sposobu ich działania.</chunk>
 <chunk type="p" xlink:href="#dv1p1">Pozycja, mimo iż dotyczy trudnych zagadnień, zawiera jasne i klarowne pytania, zrozumiałe dla wszystkich tych, którzy zajmują się immunologią z racji studiów, pracy nie całkiem naukowej czy zawodowej.</chunk>
 <chunk type="p" xlink:href="#dv1p1">Pozycja, mimo iż dotyczy trudnych zagadnień, zawiera jasne i klarowne pytania, zrozumiałe dla wszystkich tych, którzy zajmują się immunologią z racji studiów, naukowej nie całkiem pracy czy zawodowej.</chunk>
+<chunk type="p" xlink:href="#dv1p1">Pozycja, mimo iż dotyczy trudnych zagadnień, zawiera jasne i klarowne pytania, zrozumiałe dla wszystkich tych, którzy zajmują się immunologią z racji studiów, pracy z natury naukowej czy zawodowej.</chunk>
+<chunk type="p" xlink:href="#dv1p1">Pozycja, mimo iż dotyczy trudnych zagadnień, zawiera jasne i klarowne pytania, zrozumiałe dla wszystkich tych, którzy zajmują się immunologią z racji studiów, naukowej z natury nie całkiem pracy czy zawodowej.</chunk>
 </chunkList></cesAna>
diff --git a/libmwereader/tests/data/test_mwe.xml b/libmwereader/tests/data/test_mwe.xml
index 85368ac..635b1f1 100644
--- a/libmwereader/tests/data/test_mwe.xml
+++ b/libmwereader/tests/data/test_mwe.xml
@@ -44,7 +44,7 @@
 			)
 		</condition>
 		<instances>
-			<MWE name="dobre imiÄ™">
+			<MWE base="dobre imiÄ™">
 				<var name="Adj">dobry</var>
 				<var name="Subst">imiÄ™</var>
 				<head>inter(class[0],{subst,ger,depr})</head>
diff --git a/libmwereader/tests/mwefunctional.cpp b/libmwereader/tests/mwefunctional.cpp
index 9ec4901..141d3fc 100644
--- a/libmwereader/tests/mwefunctional.cpp
+++ b/libmwereader/tests/mwefunctional.cpp
@@ -37,7 +37,6 @@ BOOST_FIXTURE_TEST_CASE( preferred_lexeme, Fixture)
 	const Corpus2::Tagset& tset = Corpus2::get_named_tagset("kipi");
 	Corpus2::MWEReader mwr(tset, test_corpus.string());
 	mwr.set_option("inner:xces");
-
 	mwr.set_option("mwefile:"+ (data_dir / "fix_mwe.xml").string());
 
 	Corpus2::Sentence::Ptr s1 = mwr.get_next_sentence();	
@@ -71,8 +70,7 @@ BOOST_FIXTURE_TEST_CASE( fix_no_gap , Fixture)
 	mwr.set_option("mwefile:"+ (data_dir / "fix_mwe.xml").string());
 	Corpus2::Sentence::Ptr s1 = mwr.get_next_sentence();	
 	Corpus2::Token* mwu = s1->operator[](4);
-	std::string a = mwu->orth_utf8();	
-	BOOST_CHECK(a == "dobrej woli");
+	BOOST_CHECK(mwu->orth_utf8() == "dobrej woli");
 	BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "dobra wola");
 
 }
@@ -88,10 +86,10 @@ BOOST_FIXTURE_TEST_CASE( flex_no_gap , Fixture)
 	mwr.get_next_sentence();
 	Corpus2::Sentence::Ptr s2 = mwr.get_next_sentence();
 	Corpus2::Sentence::Ptr s3 = mwr.get_next_sentence();
-	Corpus2::Token* mwu = s2->operator[](13);
+	Corpus2::Token* mwu = s2->operator[](14);
 	BOOST_CHECK(mwu->orth_utf8() == "dzień dobry");
 	BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "dzień dobry");
-	Corpus2::Token* mwu2 = s3->operator[](13);
+	Corpus2::Token* mwu2 = s3->operator[](14);
 	BOOST_CHECK(mwu2->orth_utf8() == "dobry dzień");
 	BOOST_CHECK(mwu2->get_preferred_lexeme(tset).lemma_utf8() == "dzień dobry");
 }
@@ -109,7 +107,8 @@ BOOST_FIXTURE_TEST_CASE( fix_gap , Fixture)
 	mwr.get_next_sentence();
 	Corpus2::Sentence::Ptr s4 = mwr.get_next_sentence();
 	Corpus2::Token* mwu = s4->operator[](27);
-	BOOST_CHECK(mwu->orth_utf8() == "instumentów muzycznych");
+
+	BOOST_CHECK(mwu->orth_utf8() == "instrumentów muzycznych");
 	BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "instrument muzyczny");
 }
 BOOST_FIXTURE_TEST_CASE( flex_gap , Fixture)
@@ -119,7 +118,7 @@ BOOST_FIXTURE_TEST_CASE( flex_gap , Fixture)
 	Corpus2::MWEReader mwr(tset, test_corpus.string());
 	mwr.set_option("inner:xces");
 
-	mwr.set_option("mwefile:"+ (data_dir / "flex_mwe.xml").string());
+	mwr.set_option("mwefile:"+ (data_dir / "flex_gap_mwe.xml").string());
 	mwr.get_next_sentence();
 	mwr.get_next_sentence();
 	mwr.get_next_sentence();
@@ -129,8 +128,38 @@ BOOST_FIXTURE_TEST_CASE( flex_gap , Fixture)
 	Corpus2::Token* mwu = s5->operator[](27);
 	BOOST_CHECK(mwu->orth_utf8() == "pracy naukowej");
 	BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "praca naukowa");
-	Corpus2::Token* mwu2 = s6->operator[](27);
+	Corpus2::Token* mwu2 = s6->operator[](29);
 	BOOST_CHECK(mwu2->orth_utf8() == "naukowej pracy");
 	BOOST_CHECK(mwu2->get_preferred_lexeme(tset).lemma_utf8() == "praca naukowa");
 }
+//czy head jest w odpowiednm miejscu jesli rzeczownik jest tam w srodku
+BOOST_FIXTURE_TEST_CASE( flex_gap_noun , Fixture)
+{
+	BOOST_MESSAGE("=====================\ntest: finding flex_gap_noun mwe");
+	const Corpus2::Tagset& tset = Corpus2::get_named_tagset("kipi");
+	Corpus2::MWEReader mwr(tset, test_corpus.string());
+	mwr.set_option("inner:xces");
+
+	mwr.set_option("mwefile:"+ (data_dir / "flex_gap_mwe.xml").string());
+	mwr.get_next_sentence();
+	mwr.get_next_sentence();
+	mwr.get_next_sentence();
+	mwr.get_next_sentence();
+	mwr.get_next_sentence();
+	mwr.get_next_sentence();
+	Corpus2::Sentence::Ptr s7 = mwr.get_next_sentence();
+	Corpus2::Token* mwu = s7->operator[](27);
+	BOOST_CHECK(mwu->orth_utf8() == "pracy naukowej");
+	BOOST_CHECK(mwu->get_preferred_lexeme(tset).lemma_utf8() == "praca naukowa");
+	Corpus2::Sentence::Ptr s8 = mwr.get_next_sentence();
+	Corpus2::Token* mwu2 = s8->operator[](31);
+	std::string a = mwu2->orth_utf8();
+	BOOST_MESSAGE("+++++"+a);
+	BOOST_CHECK(mwu2->orth_utf8() == "naukowej pracy");
+	BOOST_CHECK(mwu2->get_preferred_lexeme(tset).lemma_utf8() == "praca naukowa");
+
+}
+
+
+
 BOOST_AUTO_TEST_SUITE_END()
-- 
GitLab