Newer
Older
#include "mweparser.h"
#include <libpwrutils/foreach.h>
#include <libxml++/libxml++.h>
#include <libxml2/libxml/parser.h>
#include <boost/make_shared.hpp>
#include <boost/algorithm/string.hpp>
}
MWEBuilder::BoolOpPtr MWEBuilder::get_head_condition(
const std::string & headcond)
{
return get_condition(headcond, head_conditions_);
}
MWEBuilder::BoolOpPtr MWEBuilder::get_mwe_condition(
const std::string &cond)
{
return get_condition(cond, main_conditions_);
}
MWEBuilder::BoolOpPtr MWEBuilder::get_condition(
const std::string & cond, value_type& where)
{
value_type::iterator search = where.find(cond);
if(search != where.end())
return search->second;
BoolOpPtr op = parser_.parseBoolOperator(cond);
where[cond] = op;
return op;
//////////////////////////////////////////////////////////////////////
MWEParser::MWEParser(MWEIndex &index)
: BasicSaxParser(), state_(NONE), mwe_index_(index)
MWEParser::~MWEParser()
{
// TODO: something to do?
}
void MWEParser::create_mwe()
{
MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition(
wccl_operator_);
MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
head_cond_);
Bartosz Broda
committed
if(group_type_ == "fix"){ // group_name_ -> lower case
mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head,
variables_)));
Bartosz Broda
committed
} else if(group_type_ == "flex"){
mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head,
variables_)));
Bartosz Broda
committed
} else {
throw Wccl::WcclError("Unknown type of lexical unit:"
+ group_type_);
}
variables_.clear();
std::string MWEParser::get_attribute(const AttributeList& attributes,
const std::string &name) const
{
std::string value;
foreach (const Attribute& a, attributes) {
if (a.name == name) {
value = a.value;
}
}
if (value == "")
throw Wccl::WcclError("Attribute: "+name+" not found");
return value;
}
void MWEParser::parse_mwegroup_attributes(const AttributeList& attributes)
if (a.name == "name") {
group_name_ = a.value;
} else if(a.name == "type"){
group_type_ = a.value;
Bartosz Broda
committed
boost::algorithm::to_lower(group_type_);
} else if(a.name == "class"){
group_class_ = a.value;
}
void MWEParser::on_start_element(const Glib::ustring &name,
const AttributeList& attributes)
{
//std::cout << state_ << ": " << name << std::endl;
if(state_ == NONE && name == "units_description"){
tagset_ = get_attribute(attributes, "tagset");
mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
} else if(state_ == UNITSDESC && name == "mwegroup"){
parse_mwegroup_attributes(attributes);
state_ = MWEGROUP;
} else if(state_ == MWEGROUP && name == "condition"){
state_ = CONDITION;
grab_characters_ = true;
clear_buf();
} else if(state_ == MWEGROUP && name == "instances"){
state_ = INSTANCES;
} else if(state_ == INSTANCES && name == "MWE"){
state_ = MWE;
mwe_base_ = get_attribute(attributes, "base");
} else if(state_ == MWE && name == "var"){
state_ = VAR;
var_name_ = get_attribute(attributes, "name");
grab_characters_ = true;
clear_buf();
} else if(state_ == MWE && name == "head"){
state_ = HEAD;
grab_characters_ = true;
clear_buf();
}
void MWEParser::on_end_element(const Glib::ustring &name)
{
//std::cout << "/" << state_ << ": " << name << std::endl;
if(name == "units_description"){
state_ = NONE;
} else if(state_ == CONDITION && name == "condition"){
wccl_operator_ = finish_get_text();
state_ = MWEGROUP;
} else if(state_ == MWEGROUP && name == "mwegroup"){
} else if(state_ == INSTANCES && name == "instances"){
state_ = MWEGROUP;
} else if(state_ == MWE && name == "MWE"){
state_ = INSTANCES;
} else if(state_ == VAR && name == "var"){
state_ = MWE;
variables_[var_name_] = finish_get_text();
} else if(state_ == HEAD && name == "head"){
state_ = MWE;
head_cond_ = finish_get_text();
} else{
std::cerr << "Wrong state_:" << state_ << " for name: "
<< name << std::endl;
void MWEParser::print_current_mwe(bool with_condition, std::ostream &out)
out << "Forma podstawowa: " << mwe_base_ << "\nZmienne: ";
foreach(str_map::value_type &i, variables_)
out << i.first << ": " << i.second << ", ";
out << "\nWarunek głowy: " << head_cond_ << "\n";
if(with_condition){
out << "Grupa jednostek: " << group_name_ << std::endl;
out << "Operator: " << wccl_operator_ << std::endl;
}
}
std::string MWEParser::finish_get_text()
{
std::string str = get_buf();
boost::algorithm::trim(str);
grab_characters_ = false;
return str;
//std::pair<wcclptr, wcclptr, map<string,string>, string, type>