Skip to content
Snippets Groups Projects
Commit 9ac9fb0d authored by Bartosz Broda's avatar Bartosz Broda
Browse files

bugfix?

parent e5f3b925
Branches
No related merge requests found
......@@ -40,16 +40,24 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
Sentence::Ptr MWEReader::process_sentence(Wccl::SentenceContext & sc)
{
for(int i = 0; i < sc.size(); ++i){
size_t sssize = sc.size();
for(int i = 0; i < sc.size() ;++i){
sc.set_position(i);
Corpus2::Token *pToken = sc.at(i);
//std::cout << "AAAAAAAAAAA" << i << " ---- " << sc.size() << std::endl;
//if(!pToken)
//continue;
//std::cout << pToken->orth_utf8() << " ";
std::vector<Lexeme>& lexemes = pToken->lexemes();
if(!lexemes.size()){
sssize = sc.size();
continue;
}
foreach(const Lexeme& lex, lexemes){
if(lex.is_disamb()){
std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
std::cout << "potential " << potential.size() << std::endl;
//std::cout << "potential " << potential.size() << std::endl;
foreach(LexicalUnit::Ptr pLU, potential){
std::set<int> positions;
int head;
......@@ -61,14 +69,21 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
std::vector<Token*> &tokens = sent->tokens();
int orig_i = i;
foreach(const int &pos, positions){
Token* tok = tokens [pos];
new_orth_utf8 += tok->orth_utf8() + " ";
if(pos != head){
delete tok;
tokens[pos] = NULL;
if(pos >= i)
//std::cout << "BBBB " << pos << " " << i << std::endl;
if(orig_i > pos)
{
i--;
std::cout << "\nTUTUXXXXXX\n";
}
//std::cout << "XBBBB " << pos << " " << i << std::endl;
}
}
new_orth_utf8.erase(new_orth_utf8.size()-1, 1);
......@@ -84,6 +99,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
}
}
}
sssize = sc.size();
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment