Skip to content
Snippets Groups Projects
Commit 9ac9fb0d authored by Bartosz Broda's avatar Bartosz Broda
Browse files

bugfix?

parent e5f3b925
Branches
No related merge requests found
...@@ -40,16 +40,24 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -40,16 +40,24 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
Sentence::Ptr MWEReader::process_sentence(Wccl::SentenceContext & sc) Sentence::Ptr MWEReader::process_sentence(Wccl::SentenceContext & sc)
{ {
for(int i = 0; i < sc.size(); ++i){ size_t sssize = sc.size();
for(int i = 0; i < sc.size() ;++i){
sc.set_position(i); sc.set_position(i);
Corpus2::Token *pToken = sc.at(i); Corpus2::Token *pToken = sc.at(i);
//std::cout << "AAAAAAAAAAA" << i << " ---- " << sc.size() << std::endl;
//if(!pToken)
//continue;
//std::cout << pToken->orth_utf8() << " "; //std::cout << pToken->orth_utf8() << " ";
std::vector<Lexeme>& lexemes = pToken->lexemes(); std::vector<Lexeme>& lexemes = pToken->lexemes();
if(!lexemes.size()){
sssize = sc.size();
continue;
}
foreach(const Lexeme& lex, lexemes){ foreach(const Lexeme& lex, lexemes){
if(lex.is_disamb()){ if(lex.is_disamb()){
std::string base = lex.lemma_utf8(); std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base); const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
std::cout << "potential " << potential.size() << std::endl; //std::cout << "potential " << potential.size() << std::endl;
foreach(LexicalUnit::Ptr pLU, potential){ foreach(LexicalUnit::Ptr pLU, potential){
std::set<int> positions; std::set<int> positions;
int head; int head;
...@@ -61,14 +69,21 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -61,14 +69,21 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr(); Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
std::vector<Token*> &tokens = sent->tokens(); std::vector<Token*> &tokens = sent->tokens();
int orig_i = i;
foreach(const int &pos, positions){ foreach(const int &pos, positions){
Token* tok = tokens [pos]; Token* tok = tokens [pos];
new_orth_utf8 += tok->orth_utf8() + " "; new_orth_utf8 += tok->orth_utf8() + " ";
if(pos != head){ if(pos != head){
delete tok; delete tok;
tokens[pos] = NULL; tokens[pos] = NULL;
if(pos >= i) //std::cout << "BBBB " << pos << " " << i << std::endl;
if(orig_i > pos)
{
i--; i--;
std::cout << "\nTUTUXXXXXX\n";
}
//std::cout << "XBBBB " << pos << " " << i << std::endl;
} }
} }
new_orth_utf8.erase(new_orth_utf8.size()-1, 1); new_orth_utf8.erase(new_orth_utf8.size()-1, 1);
...@@ -84,6 +99,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -84,6 +99,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
} }
} }
} }
sssize = sc.size();
} }
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment