bugfix?

9ac9fb0d · Bartosz Broda · e5f3b925 · 9ac9fb0d
Commit 9ac9fb0d authored 13 years ago by Bartosz Broda
--- a/libmwereader/mwereader.cpp
+++ b/libmwereader/mwereader.cpp
@@ -40,16 +40,24 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 	Sentence::Ptr MWEReader::process_sentence(Wccl::SentenceContext & sc)
 	{
-		for(int i = 0; i < sc.size(); ++i){
+		size_t sssize = sc.size();
+		for(int i = 0; i < sc.size() ;++i){
 			sc.set_position(i);
 			Corpus2::Token *pToken = sc.at(i);
+			//std::cout << "AAAAAAAAAAA" << i << " ---- " << sc.size()  << std::endl;
+			//if(!pToken)
+				//continue;
 			//std::cout << pToken->orth_utf8() << " ";
 			std::vector<Lexeme>& lexemes = pToken->lexemes();
+			if(!lexemes.size()){
+				sssize = sc.size();
+				continue;
+			}
 			foreach(const Lexeme& lex, lexemes){
 				if(lex.is_disamb()){
 					std::string base = lex.lemma_utf8();
 					const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
-					std::cout << "potential " << potential.size() << std::endl;
+					//std::cout << "potential " << potential.size() << std::endl;
 					foreach(LexicalUnit::Ptr pLU, potential){
 						std::set<int> positions;
 						int head;
@@ -61,14 +69,21 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 							Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
 							std::vector<Token*> &tokens = sent->tokens();
+							int orig_i = i;
 							foreach(const int &pos, positions){
 								Token* tok = tokens [pos];
 								new_orth_utf8 += tok->orth_utf8() + " ";
 								if(pos != head){
 									delete tok;
 									tokens[pos] = NULL;
-									if(pos >= i)
+									//std::cout << "BBBB " << pos  << " " << i << std::endl;
+									if(orig_i > pos)
+									{
 										i--;
+										std::cout << "\nTUTUXXXXXX\n";
+									}
+									//std::cout << "XBBBB " << pos  << " " << i << std::endl;
 								}
 							}
 							new_orth_utf8.erase(new_orth_utf8.size()-1, 1);
@@ -84,6 +99,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
 					}
 				}
 			}
+			sssize = sc.size();
 		}