An error occurred while loading the file. Please try again.
-
Bartosz Broda authoredf7411288
mweparser.cpp 3.17 KiB
#include "mweparser.h"
#include <libpwrutils/foreach.h>
#include <libxml++/libxml++.h>
#include <libxml2/libxml/parser.h>
#include <boost/make_shared.hpp>
#include <boost/algorithm/string.hpp>
namespace Corpus2 {
MWEParser::MWEParser()
: BasicSaxParser(), state_(NONE)
{
}
std::string MWEParser::get_attribute(const AttributeList& attributes,
const std::string &name) const
{
std::string value;
foreach (const Attribute& a, attributes) {
if (a.name == name) {
value = a.value;
}
}
return value;
}
void MWEParser::parse_mwegroup_attributes(const AttributeList& attributes)
{
foreach (const Attribute& a, attributes) {
if (a.name == "name") {
group_name_ = a.value;
} else if(a.name == "type"){
group_type_ = a.value;
} else if(a.name == "class"){
group_class_ = a.value;
}
}
}
void MWEParser::on_start_element(const Glib::ustring &name,
const AttributeList& attributes)
{
std::cout << state_ << ": " << name << std::endl;
if(state_ == NONE && name == "units_description"){
tagset_ = get_attribute(attributes, "tagset");
state_ = UNITSDESC;
} else if(state_ == UNITSDESC && name == "mwegroup"){
parse_mwegroup_attributes(attributes);
state_ = MWEGROUP;
} else if(state_ == MWEGROUP && name == "condition"){
state_ = CONDITION;
grab_characters_ = true;
clear_buf();
} else if(state_ == MWEGROUP && name == "instances"){
state_ = INSTANCES;
} else if(state_ == INSTANCES && name == "MWE"){
state_ = MWE;
mwe_base_ = get_attribute(attributes, "base");
} else if(state_ == MWE && name == "var"){
state_ = VAR;
var_name_ = get_attribute(attributes, "name");
grab_characters_ = true;
clear_buf();
} else if(state_ == MWE && name == "head"){
state_ = HEAD;
grab_characters_ = true;
clear_buf();
}
}
void MWEParser::on_end_element(const Glib::ustring &name)
{
std::cout << "/" << state_ << ": " << name << std::endl;
if(name == "units_description"){
state_ = NONE;
} else if(state_ == CONDITION && name == "condition"){
wccl_operator_ = finish_get_text();
std::cout << wccl_operator_ << std::endl;
state_ = MWEGROUP;
} else if(state_ == MWEGROUP && name == "mwegroup"){
state_ = UNITSDESC;
} else if(state_ == INSTANCES && name == "instances"){
state_ = MWEGROUP;
} else if(state_ == MWE && name == "MWE"){
state_ = INSTANCES;
// TODO: tworzenie jednostki
std::cout << "Tworzenie jednostki: " << mwe_base_ << " dla ";
foreach(str_map::value_type &i, variables_)
std::cout << i.first << ": " << i.second << ", ";
std::cout << "\nhead: " << head_cond_ << "\nop: "
<< wccl_operator_ << std::endl;
std::cout << "MWE Group name: " << group_name_ << std::endl;
} else if(state_ == VAR && name == "var"){
state_ = MWE;
variables_[var_name_] = finish_get_text();
} else if(state_ == HEAD && name == "head"){
state_ = MWE;
head_cond_ = finish_get_text();
} else{
std::cerr << "Wrong state_:" << state_ << " for name: "
<< name << std::endl;
}
}
std::string MWEParser::finish_get_text()
{
std::string str = get_buf();
boost::algorithm::trim(str);
grab_characters_ = false;
return str;
}
} // ns Corpus2