From 9ac9fb0dd969326eafb84023b56d73932fb02c69 Mon Sep 17 00:00:00 2001
From: Bartosz Broda <bartosz.broda@gmail.com>
Date: Fri, 5 Aug 2011 15:35:08 +0200
Subject: [PATCH] bugfix?

---
 libmwereader/mwereader.cpp | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp
index e2399fa..6586c39 100644
--- a/libmwereader/mwereader.cpp
+++ b/libmwereader/mwereader.cpp
@@ -40,16 +40,24 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 	Sentence::Ptr MWEReader::process_sentence(Wccl::SentenceContext & sc)
 	{
 
-		for(int i = 0; i < sc.size(); ++i){
+		size_t sssize = sc.size();
+		for(int i = 0; i < sc.size() ;++i){
 			sc.set_position(i);
 			Corpus2::Token *pToken = sc.at(i);
+			//std::cout << "AAAAAAAAAAA" << i << " ---- " << sc.size()  << std::endl;
+			//if(!pToken)
+				//continue;
 			//std::cout << pToken->orth_utf8() << " ";
 			std::vector<Lexeme>& lexemes = pToken->lexemes();
+			if(!lexemes.size()){
+				sssize = sc.size();
+				continue;
+			}
 			foreach(const Lexeme& lex, lexemes){
 				if(lex.is_disamb()){
 					std::string base = lex.lemma_utf8();
 					const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
-					std::cout << "potential " << potential.size() << std::endl;
+					//std::cout << "potential " << potential.size() << std::endl;
 					foreach(LexicalUnit::Ptr pLU, potential){
 						std::set<int> positions;
 						int head;
@@ -61,14 +69,21 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 							Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
 
 							std::vector<Token*> &tokens = sent->tokens();
+							int orig_i = i;
 							foreach(const int &pos, positions){
 								Token* tok = tokens [pos];
 								new_orth_utf8 += tok->orth_utf8() + " ";
 								if(pos != head){
 									delete tok;
 									tokens[pos] = NULL;
-									if(pos >= i)
+									//std::cout << "BBBB " << pos  << " " << i << std::endl;
+
+									if(orig_i > pos)
+									{
 										i--;
+										std::cout << "\nTUTUXXXXXX\n";
+									}
+									//std::cout << "XBBBB " << pos  << " " << i << std::endl;
 								}
 							}
 							new_orth_utf8.erase(new_orth_utf8.size()-1, 1);
@@ -84,6 +99,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 					}
 				}
 			}
+			sssize = sc.size();
 		}
 
 
-- 
GitLab