Skip to content
Snippets Groups Projects
Commit 20edcc90 authored by omekr's avatar omekr
Browse files

test against converted multiword units

parent 45e923f3
No related branches found
No related tags found
No related merge requests found
#include "mwe.h" #include "mwe.h"
#include <boost/algorithm/string.hpp> #include <boost/algorithm/string.hpp>
#include <libwccl/values/strset.h> #include <libwccl/values/strset.h>
#include <boost/algorithm/string/predicate.hpp>
namespace Corpus2{ namespace Corpus2{
...@@ -27,11 +28,14 @@ LexicalUnit::LexicalUnit(const std::string &base, ...@@ -27,11 +28,14 @@ LexicalUnit::LexicalUnit(const std::string &base,
bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc, bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc,
std::set<int> &out_position, int &head_pos) const std::set<int> &out_position, int &head_pos) const
{ {
// set variables // set variables, skip vars with names starting with '!'
for(variables_map::const_iterator ivars = variables_.begin(); for(variables_map::const_iterator ivars = variables_.begin();
ivars != variables_.end(); ++ivars){ ivars != variables_.end(); ++ivars){
if(!boost::starts_with(ivars->first, "!")){
std::cout << ivars->first << " " << std::endl;
condition_->set<Wccl::StrSet>(ivars->first, ivars->second); condition_->set<Wccl::StrSet>(ivars->first, ivars->second);
} }
}
// fire up the operator // fire up the operator
boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc); boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc);
...@@ -102,6 +106,7 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu) ...@@ -102,6 +106,7 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
{ {
foreach(const std::string& base, lu->get_potential_bases()){ foreach(const std::string& base, lu->get_potential_bases()){
value_type::iterator find = index_.find(base); value_type::iterator find = index_.find(base);
std::cout << "b:"<<base<<std::endl;
if(find == index_.end()){ // not found -> create new one if(find == index_.end()){ // not found -> create new one
luvec v; luvec v;
v.push_back(lu); v.push_back(lu);
...@@ -113,6 +118,8 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu) ...@@ -113,6 +118,8 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
} }
const MWEIndex::luvec& MWEIndex::get_potential_lu(const std::string &base){ const MWEIndex::luvec& MWEIndex::get_potential_lu(const std::string &base){
std::cout << "index " << index_.size()<< std::endl;
std::cout << "sb:"<<base<<std::endl;
value_type::iterator find = index_.find(base); value_type::iterator find = index_.find(base);
if(find == index_.end()){ // not found -> return empty if(find == index_.end()){ // not found -> return empty
return empty_; return empty_;
......
...@@ -34,9 +34,9 @@ namespace Corpus2 { ...@@ -34,9 +34,9 @@ namespace Corpus2 {
if(search != where.end()) if(search != where.end())
return search->second; return search->second;
//std::cout << " dddddddddddddd "<< cond << std::endl;
BoolOpPtr op = parser_.parseBoolOperator(cond); BoolOpPtr op = parser_.parseBoolOperator(cond);
//std::cout << " dddddddddddddd $$$$" << cond << std::endl;
where[cond] = op; where[cond] = op;
return op; return op;
...@@ -57,24 +57,28 @@ namespace Corpus2 { ...@@ -57,24 +57,28 @@ namespace Corpus2 {
void MWEParser::create_mwe() void MWEParser::create_mwe()
{ {
print_current_mwe(true); print_current_mwe(true);
//std::cout << " kupa cond" << std::endl;
MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition( MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition(
wccl_operator_); wccl_operator_);
//std::cout << " kupa head" << std::endl;
MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition( MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
head_cond_); head_cond_);
//std::cout << " kupa " << std::endl;
if(group_type_ == "fix"){ // group_name_ -> lower case if(group_type_ == "fix"){ // group_name_ -> lower case
//std::cout << " kupa fix" << std::endl;
mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head, mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head,
variables_))); variables_)));
} else if(group_type_ == "flex"){ } else if(group_type_ == "flex"){
//std::cout << " kupa flex" << std::endl;
mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head, mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head,
variables_))); variables_)));
} else { } else {
throw Wccl::WcclError("Unknown type of lexical unit:" throw Wccl::WcclError("Unknown type of lexical unit:"
+ group_type_); + group_type_);
} }
//std::cout << " kupa clear" << std::endl;
variables_.clear(); variables_.clear();
//std::cout << "po kupie " << std::endl;
} }
std::string MWEParser::get_attribute(const AttributeList& attributes, std::string MWEParser::get_attribute(const AttributeList& attributes,
...@@ -109,8 +113,9 @@ namespace Corpus2 { ...@@ -109,8 +113,9 @@ namespace Corpus2 {
void MWEParser::on_start_element(const Glib::ustring &name, void MWEParser::on_start_element(const Glib::ustring &name,
const AttributeList& attributes) const AttributeList& attributes)
{ {
std::cout << "about to check" << std::endl;
std::cout << state_ << ": " << name << std::endl; std::cout << state_ << ": " << name << std::endl;
std::cout << "done with check" << std::endl;
if(state_ == NONE && name == "units_description"){ if(state_ == NONE && name == "units_description"){
tagset_ = get_attribute(attributes, "tagset"); tagset_ = get_attribute(attributes, "tagset");
mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_))); mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
......
...@@ -47,10 +47,13 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -47,10 +47,13 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
if(lex.is_disamb()){ if(lex.is_disamb()){
std::string base = lex.lemma_utf8(); std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base); const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
std::cout << "potential " << potential.size() << std::endl;
foreach(LexicalUnit::Ptr pLU, potential){ foreach(LexicalUnit::Ptr pLU, potential){
std::set<int> positions; std::set<int> positions;
int head; int head;
//std::cout << " is " << std::endl;
bool is_here = pLU->IsHere(sc, positions, head); bool is_here = pLU->IsHere(sc, positions, head);
//std::cout << " is out" << std::endl;
if(is_here){ if(is_here){
std::string new_orth_utf8; std::string new_orth_utf8;
Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr(); Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
......
...@@ -31,6 +31,8 @@ struct Fixture{ ...@@ -31,6 +31,8 @@ struct Fixture{
}; };
BOOST_FIXTURE_TEST_CASE( preferred_lexeme, Fixture) BOOST_FIXTURE_TEST_CASE( preferred_lexeme, Fixture)
{ {
BOOST_MESSAGE("test: finding preferred lexeme"); BOOST_MESSAGE("test: finding preferred lexeme");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment