Skip to content
Snippets Groups Projects
Commit 20edcc90 authored by omekr's avatar omekr
Browse files

test against converted multiword units

parent 45e923f3
No related merge requests found
#include "mwe.h"
#include <boost/algorithm/string.hpp>
#include <libwccl/values/strset.h>
#include <boost/algorithm/string/predicate.hpp>
namespace Corpus2{
......@@ -27,11 +28,14 @@ LexicalUnit::LexicalUnit(const std::string &base,
bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc,
std::set<int> &out_position, int &head_pos) const
{
// set variables
// set variables, skip vars with names starting with '!'
for(variables_map::const_iterator ivars = variables_.begin();
ivars != variables_.end(); ++ivars){
condition_->set<Wccl::StrSet>(ivars->first, ivars->second);
}
ivars != variables_.end(); ++ivars){
if(!boost::starts_with(ivars->first, "!")){
std::cout << ivars->first << " " << std::endl;
condition_->set<Wccl::StrSet>(ivars->first, ivars->second);
}
}
// fire up the operator
boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc);
......@@ -102,6 +106,7 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
{
foreach(const std::string& base, lu->get_potential_bases()){
value_type::iterator find = index_.find(base);
std::cout << "b:"<<base<<std::endl;
if(find == index_.end()){ // not found -> create new one
luvec v;
v.push_back(lu);
......@@ -113,6 +118,8 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
}
const MWEIndex::luvec& MWEIndex::get_potential_lu(const std::string &base){
std::cout << "index " << index_.size()<< std::endl;
std::cout << "sb:"<<base<<std::endl;
value_type::iterator find = index_.find(base);
if(find == index_.end()){ // not found -> return empty
return empty_;
......
......@@ -34,9 +34,9 @@ namespace Corpus2 {
if(search != where.end())
return search->second;
//std::cout << " dddddddddddddd "<< cond << std::endl;
BoolOpPtr op = parser_.parseBoolOperator(cond);
//std::cout << " dddddddddddddd $$$$" << cond << std::endl;
where[cond] = op;
return op;
......@@ -57,24 +57,28 @@ namespace Corpus2 {
void MWEParser::create_mwe()
{
print_current_mwe(true);
//std::cout << " kupa cond" << std::endl;
MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition(
wccl_operator_);
//std::cout << " kupa head" << std::endl;
MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
head_cond_);
//std::cout << " kupa " << std::endl;
if(group_type_ == "fix"){ // group_name_ -> lower case
//std::cout << " kupa fix" << std::endl;
mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head,
variables_)));
} else if(group_type_ == "flex"){
//std::cout << " kupa flex" << std::endl;
mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head,
variables_)));
} else {
throw Wccl::WcclError("Unknown type of lexical unit:"
+ group_type_);
}
//std::cout << " kupa clear" << std::endl;
variables_.clear();
//std::cout << "po kupie " << std::endl;
}
std::string MWEParser::get_attribute(const AttributeList& attributes,
......@@ -109,8 +113,9 @@ namespace Corpus2 {
void MWEParser::on_start_element(const Glib::ustring &name,
const AttributeList& attributes)
{
std::cout << "about to check" << std::endl;
std::cout << state_ << ": " << name << std::endl;
std::cout << "done with check" << std::endl;
if(state_ == NONE && name == "units_description"){
tagset_ = get_attribute(attributes, "tagset");
mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
......
......@@ -47,10 +47,13 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
if(lex.is_disamb()){
std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
std::cout << "potential " << potential.size() << std::endl;
foreach(LexicalUnit::Ptr pLU, potential){
std::set<int> positions;
int head;
//std::cout << " is " << std::endl;
bool is_here = pLU->IsHere(sc, positions, head);
//std::cout << " is out" << std::endl;
if(is_here){
std::string new_orth_utf8;
Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
......
......@@ -31,6 +31,8 @@ struct Fixture{
};
BOOST_FIXTURE_TEST_CASE( preferred_lexeme, Fixture)
{
BOOST_MESSAGE("test: finding preferred lexeme");
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment