Skip to content
Snippets Groups Projects
Commit 20edcc90 authored by omekr's avatar omekr
Browse files

test against converted multiword units

parent 45e923f3
No related branches found
No related tags found
No related merge requests found
#include "mwe.h"
#include <boost/algorithm/string.hpp>
#include <libwccl/values/strset.h>
#include <boost/algorithm/string/predicate.hpp>
namespace Corpus2{
......@@ -27,11 +28,14 @@ LexicalUnit::LexicalUnit(const std::string &base,
bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc,
std::set<int> &out_position, int &head_pos) const
{
// set variables
// set variables, skip vars with names starting with '!'
for(variables_map::const_iterator ivars = variables_.begin();
ivars != variables_.end(); ++ivars){
if(!boost::starts_with(ivars->first, "!")){
std::cout << ivars->first << " " << std::endl;
condition_->set<Wccl::StrSet>(ivars->first, ivars->second);
}
}
// fire up the operator
boost::shared_ptr<const Wccl::Bool> pResult = condition_->apply(sc);
......@@ -102,6 +106,7 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
{
foreach(const std::string& base, lu->get_potential_bases()){
value_type::iterator find = index_.find(base);
std::cout << "b:"<<base<<std::endl;
if(find == index_.end()){ // not found -> create new one
luvec v;
v.push_back(lu);
......@@ -113,6 +118,8 @@ void MWEIndex::add_lexicalunit(LexicalUnit::Ptr lu)
}
const MWEIndex::luvec& MWEIndex::get_potential_lu(const std::string &base){
std::cout << "index " << index_.size()<< std::endl;
std::cout << "sb:"<<base<<std::endl;
value_type::iterator find = index_.find(base);
if(find == index_.end()){ // not found -> return empty
return empty_;
......
......@@ -34,9 +34,9 @@ namespace Corpus2 {
if(search != where.end())
return search->second;
//std::cout << " dddddddddddddd "<< cond << std::endl;
BoolOpPtr op = parser_.parseBoolOperator(cond);
//std::cout << " dddddddddddddd $$$$" << cond << std::endl;
where[cond] = op;
return op;
......@@ -57,24 +57,28 @@ namespace Corpus2 {
void MWEParser::create_mwe()
{
print_current_mwe(true);
//std::cout << " kupa cond" << std::endl;
MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition(
wccl_operator_);
//std::cout << " kupa head" << std::endl;
MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
head_cond_);
//std::cout << " kupa " << std::endl;
if(group_type_ == "fix"){ // group_name_ -> lower case
//std::cout << " kupa fix" << std::endl;
mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head,
variables_)));
} else if(group_type_ == "flex"){
//std::cout << " kupa flex" << std::endl;
mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head,
variables_)));
} else {
throw Wccl::WcclError("Unknown type of lexical unit:"
+ group_type_);
}
//std::cout << " kupa clear" << std::endl;
variables_.clear();
//std::cout << "po kupie " << std::endl;
}
std::string MWEParser::get_attribute(const AttributeList& attributes,
......@@ -109,8 +113,9 @@ namespace Corpus2 {
void MWEParser::on_start_element(const Glib::ustring &name,
const AttributeList& attributes)
{
std::cout << "about to check" << std::endl;
std::cout << state_ << ": " << name << std::endl;
std::cout << "done with check" << std::endl;
if(state_ == NONE && name == "units_description"){
tagset_ = get_attribute(attributes, "tagset");
mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
......
......@@ -47,10 +47,13 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
if(lex.is_disamb()){
std::string base = lex.lemma_utf8();
const MWEIndex::luvec& potential = mwe_index_.get_potential_lu(base);
std::cout << "potential " << potential.size() << std::endl;
foreach(LexicalUnit::Ptr pLU, potential){
std::set<int> positions;
int head;
//std::cout << " is " << std::endl;
bool is_here = pLU->IsHere(sc, positions, head);
//std::cout << " is out" << std::endl;
if(is_here){
std::string new_orth_utf8;
Corpus2::Sentence::Ptr sent = sc.get_sentence_ptr();
......
......@@ -31,6 +31,8 @@ struct Fixture{
};
BOOST_FIXTURE_TEST_CASE( preferred_lexeme, Fixture)
{
BOOST_MESSAGE("test: finding preferred lexeme");
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment