Skip to content
Snippets Groups Projects
Commit 9ac9fb0d authored by Bartosz Broda's avatar Bartosz Broda
Browse files

bugfix?

parent e5f3b925
No related branches found
No related tags found
No related merge requests found
......@@ -40,16 +40,24 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
Sentence::Ptr MWEReader::process_sentence(Wccl::SentenceContext & sc)
{
size_t sssize = sc.size();
for(int i = 0; i < sc.size() ;++i){
sc.set_position(i);
Corpus2::Token *pToken = sc.at(i);
//std::cout << "AAAAAAAAAAA" << i << " ---- " << sc.size() << std::endl;
//if(!pToken)
//continue;
//std::cout << pToken->orth_utf8() << " ";
std::vector<Lexeme>& lexemes = pToken->lexemes();
if(!lexemes.size()){
sssize = sc.size();
continue;
}
foreach(const Lexeme& lex, lexemes){
if(lex.is_disamb()){
std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
std::cout << "potential " << potential.size() << std::endl;
//std::cout << "potential " << potential.size() << std::endl;
foreach(LexicalUnit::Ptr pLU, potential){
std::set<int> positions;
int head;
......@@ -61,14 +69,21 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
std::vector<Token*> &tokens = sent->tokens();
int orig_i = i;
foreach(const int &pos, positions){
Token* tok = tokens [pos];
new_orth_utf8 += tok->orth_utf8() + " ";
if(pos != head){
delete tok;
tokens[pos] = NULL;
if(pos >= i)
//std::cout << "BBBB " << pos << " " << i << std::endl;
if(orig_i > pos)
{
i--;
std::cout << "\nTUTUXXXXXX\n";
}
//std::cout << "XBBBB " << pos << " " << i << std::endl;
}
}
new_orth_utf8.erase(new_orth_utf8.size()-1, 1);
......@@ -84,6 +99,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
}
}
}
sssize = sc.size();
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment