Skip to content
Snippets Groups Projects
Commit 9ac9fb0d authored by Bartosz Broda's avatar Bartosz Broda
Browse files

bugfix?

parent e5f3b925
No related branches found
No related tags found
No related merge requests found
...@@ -40,16 +40,24 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -40,16 +40,24 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
Sentence::Ptr MWEReader::process_sentence(Wccl::SentenceContext & sc) Sentence::Ptr MWEReader::process_sentence(Wccl::SentenceContext & sc)
{ {
size_t sssize = sc.size();
for(int i = 0; i < sc.size() ;++i){ for(int i = 0; i < sc.size() ;++i){
sc.set_position(i); sc.set_position(i);
Corpus2::Token *pToken = sc.at(i); Corpus2::Token *pToken = sc.at(i);
//std::cout << "AAAAAAAAAAA" << i << " ---- " << sc.size() << std::endl;
//if(!pToken)
//continue;
//std::cout << pToken->orth_utf8() << " "; //std::cout << pToken->orth_utf8() << " ";
std::vector<Lexeme>& lexemes = pToken->lexemes(); std::vector<Lexeme>& lexemes = pToken->lexemes();
if(!lexemes.size()){
sssize = sc.size();
continue;
}
foreach(const Lexeme& lex, lexemes){ foreach(const Lexeme& lex, lexemes){
if(lex.is_disamb()){ if(lex.is_disamb()){
std::string base = lex.lemma_utf8(); std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base); const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
std::cout << "potential " << potential.size() << std::endl; //std::cout << "potential " << potential.size() << std::endl;
foreach(LexicalUnit::Ptr pLU, potential){ foreach(LexicalUnit::Ptr pLU, potential){
std::set<int> positions; std::set<int> positions;
int head; int head;
...@@ -61,14 +69,21 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -61,14 +69,21 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr(); Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
std::vector<Token*> &tokens = sent->tokens(); std::vector<Token*> &tokens = sent->tokens();
int orig_i = i;
foreach(const int &pos, positions){ foreach(const int &pos, positions){
Token* tok = tokens [pos]; Token* tok = tokens [pos];
new_orth_utf8 += tok->orth_utf8() + " "; new_orth_utf8 += tok->orth_utf8() + " ";
if(pos != head){ if(pos != head){
delete tok; delete tok;
tokens[pos] = NULL; tokens[pos] = NULL;
if(pos >= i) //std::cout << "BBBB " << pos << " " << i << std::endl;
if(orig_i > pos)
{
i--; i--;
std::cout << "\nTUTUXXXXXX\n";
}
//std::cout << "XBBBB " << pos << " " << i << std::endl;
} }
} }
new_orth_utf8.erase(new_orth_utf8.size()-1, 1); new_orth_utf8.erase(new_orth_utf8.size()-1, 1);
...@@ -84,6 +99,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -84,6 +99,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
} }
} }
} }
sssize = sc.size();
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment