#include "mweparser.h" #include <libpwrutils/foreach.h> #include <libxml++/libxml++.h> #include <libxml2/libxml/parser.h> #include <boost/make_shared.hpp> #include <boost/algorithm/string.hpp> namespace Corpus2 { MWEParser::MWEParser() : BasicSaxParser(), state_(NONE) { } std::string MWEParser::get_attribute(const AttributeList& attributes, const std::string &name) const { std::string value; foreach (const Attribute& a, attributes) { if (a.name == name) { value = a.value; } } return value; } void MWEParser::parse_mwegroup_attributes(const AttributeList& attributes) { foreach (const Attribute& a, attributes) { if (a.name == "name") { group_name_ = a.value; } else if(a.name == "type"){ group_type_ = a.value; } else if(a.name == "class"){ group_class_ = a.value; } } } void MWEParser::on_start_element(const Glib::ustring &name, const AttributeList& attributes) { std::cout << state_ << ": " << name << std::endl; if(state_ == NONE && name == "units_description"){ tagset_ = get_attribute(attributes, "tagset"); state_ = UNITSDESC; } else if(state_ == UNITSDESC && name == "mwegroup"){ parse_mwegroup_attributes(attributes); state_ = MWEGROUP; } else if(state_ == MWEGROUP && name == "condition"){ state_ = CONDITION; grab_characters_ = true; clear_buf(); } else if(state_ == MWEGROUP && name == "instances"){ state_ = INSTANCES; } else if(state_ == INSTANCES && name == "MWE"){ state_ = MWE; mwe_base_ = get_attribute(attributes, "base"); } else if(state_ == MWE && name == "var"){ state_ = VAR; var_name_ = get_attribute(attributes, "name"); grab_characters_ = true; clear_buf(); } else if(state_ == MWE && name == "head"){ state_ = HEAD; grab_characters_ = true; clear_buf(); } } void MWEParser::on_end_element(const Glib::ustring &name) { std::cout << "/" << state_ << ": " << name << std::endl; if(name == "units_description"){ state_ = NONE; } else if(state_ == CONDITION && name == "condition"){ wccl_operator_ = finish_get_text(); std::cout << wccl_operator_ << std::endl; state_ = MWEGROUP; } else if(state_ == MWEGROUP && name == "mwegroup"){ state_ = UNITSDESC; } else if(state_ == INSTANCES && name == "instances"){ state_ = MWEGROUP; } else if(state_ == MWE && name == "MWE"){ state_ = INSTANCES; // TODO: tworzenie jednostki std::cout << "Tworzenie jednostki: " << mwe_base_ << " dla "; foreach(str_map::value_type &i, variables_) std::cout << i.first << ": " << i.second << ", "; std::cout << "\nhead: " << head_cond_ << "\nop: " << wccl_operator_ << std::endl; std::cout << "MWE Group name: " << group_name_ << std::endl; } else if(state_ == VAR && name == "var"){ state_ = MWE; variables_[var_name_] = finish_get_text(); } else if(state_ == HEAD && name == "head"){ state_ = MWE; head_cond_ = finish_get_text(); } else{ std::cerr << "Wrong state_:" << state_ << " for name: " << name << std::endl; } } std::string MWEParser::finish_get_text() { std::string str = get_buf(); boost::algorithm::trim(str); grab_characters_ = false; return str; } } // ns Corpus2