Skip to content
Snippets Groups Projects
Commit b068b156 authored by Grzegorz Kostkowski's avatar Grzegorz Kostkowski
Browse files

Fix bugs

parent 57168df6
Branches fix-words-ann
1 merge request!10Fix words and phrases annotation bugs
Pipeline #3905 passed with stage
in 7 minutes and 58 seconds
......@@ -155,7 +155,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
// move context position to next token after MWE elements
int curr_position = sentence_ctx->get_position();
if (curr_position + all.size() < sentence->size()) {
sentence_ctx->set_position(sentence_ctx->get_position() + all.size());
sentence_ctx->set_position(sentence_ctx->get_position() + all.size() - 1);
}
}
......@@ -171,6 +171,8 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
// TODO: pass annotated sentence to methods
// AnnotatedSentencePtr ann_sentence = AnnotatedSentence::wrap_sentence(sentence);
int annotation_number = 0;
// keeps position of last annotated token
int last_ann_pos = -1;
SentenceContextPtr sc = boost::make_shared<Wccl::SentenceContext>(sentence);
for (sc->goto_start(); sc->is_current_inside(); sc->advance())
......@@ -184,6 +186,10 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
if(lex.is_disamb()){
std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
// variables holding values for selected lu
LexicalUnit::Ptr sel_pLU;
std::set<int> sel_positions;
int sel_head;
BOOST_FOREACH (LexicalUnit::Ptr pLU, potential)
{
bool ok = true;
......@@ -202,20 +208,45 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
int head;
bool is_here = pLU->IsHere(sc, positions, head);
if(is_here) {
if (annotate) {
add_mwe_channel(
sc, head, positions,
++annotation_number,
pLU->get_base());
}
else {
sc = clone_sentence_add_mwe(
sc, head, positions,
pLU->get_base());
if (positions.size() > sel_positions.size()) {
sel_pLU = pLU;
sel_positions = positions;
sel_head = head;
}
}
}
}
if (sel_pLU) {
int first_curr_pos;
int last_curr_pos;
if(!sel_positions.empty())
first_curr_pos = *sel_positions.begin();
// annotate only if tokens have not been already
// anotated; Below check relies on first token and
// prevents from re-anotating the last token in the
// sentence (grom previous annotating)
if (first_curr_pos > last_ann_pos) {
if (annotate) {
add_mwe_channel(
sc,
sel_head,
sel_positions,
++annotation_number,
sel_pLU->get_base()
);
}
else {
sc = clone_sentence_add_mwe(
sc,
sel_head,
sel_positions,
sel_pLU->get_base()
);
}
last_curr_pos = *sel_positions.rbegin();
last_ann_pos = last_curr_pos;
}
}
}
}
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment