Skip to content
Snippets Groups Projects
mweparser.cpp 3.17 KiB
#include "mweparser.h"

#include <libpwrutils/foreach.h>

#include <libxml++/libxml++.h>
#include <libxml2/libxml/parser.h>
#include <boost/make_shared.hpp>
#include <boost/algorithm/string.hpp>

namespace Corpus2 {

	MWEParser::MWEParser()
		: BasicSaxParser(), state_(NONE)
	{

	}

	std::string MWEParser::get_attribute(const AttributeList& attributes,
										 const std::string &name) const
	{
		std::string value;
		foreach (const Attribute& a, attributes) {
			if (a.name == name) {
				value = a.value;
			}
		}
		return value;
	}

	void MWEParser::parse_mwegroup_attributes(const AttributeList& attributes)
	{
		foreach (const Attribute& a, attributes) {
			if (a.name == "name") {
				group_name_ = a.value;
			} else if(a.name == "type"){
				group_type_ = a.value;
			} else if(a.name == "class"){
				group_class_ = a.value;
			}
		}

	}

	void MWEParser::on_start_element(const Glib::ustring &name,
			const AttributeList& attributes)
	{
		std::cout << state_ << ": " << name << std::endl;

		if(state_ == NONE && name == "units_description"){
			tagset_ = get_attribute(attributes, "tagset");
			state_ = UNITSDESC;
		} else if(state_ == UNITSDESC && name == "mwegroup"){
			parse_mwegroup_attributes(attributes);
			state_ = MWEGROUP;
		} else if(state_ == MWEGROUP && name == "condition"){
			state_ = CONDITION;
			grab_characters_ = true;
			clear_buf();
		} else if(state_ == MWEGROUP && name == "instances"){
			state_ = INSTANCES;
		} else if(state_ == INSTANCES && name == "MWE"){
			state_ = MWE;
			mwe_base_ = get_attribute(attributes, "base");
		} else if(state_ == MWE && name == "var"){
			state_ = VAR;
			var_name_ = get_attribute(attributes, "name");
			grab_characters_ = true;
			clear_buf();
		} else if(state_ == MWE && name == "head"){
			state_ = HEAD;
			grab_characters_ = true;
			clear_buf();
		}

	}

	void MWEParser::on_end_element(const Glib::ustring &name)
	{
		std::cout << "/" << state_ << ": " << name << std::endl;

		if(name == "units_description"){
			state_ = NONE;
		} else if(state_ == CONDITION && name == "condition"){
			wccl_operator_ = finish_get_text();
			std::cout << wccl_operator_ << std::endl;
			state_ = MWEGROUP;
		} else if(state_ == MWEGROUP && name == "mwegroup"){
			state_ = UNITSDESC;
		} else if(state_ == INSTANCES && name == "instances"){
			state_ = MWEGROUP;
		} else if(state_ == MWE && name == "MWE"){
			state_ = INSTANCES;
			// TODO: tworzenie jednostki
			std::cout << "Tworzenie jednostki: " << mwe_base_ << " dla ";
			foreach(str_map::value_type &i, variables_)
				std::cout << i.first << ": " << i.second << ", ";
			std::cout << "\nhead: " << head_cond_ << "\nop: "
					<<  wccl_operator_ << std::endl;
			std::cout << "MWE Group name: " << group_name_ << std::endl;
		} else if(state_ == VAR && name == "var"){
			state_ = MWE;
			variables_[var_name_] = finish_get_text();
		} else if(state_ == HEAD && name == "head"){
			state_ = MWE;
			head_cond_ = finish_get_text();
		} else{
			std::cerr << "Wrong state_:" << state_ << " for name: "
					<< name << std::endl;
		}
	}

	std::string MWEParser::finish_get_text()
	{
		std::string str = get_buf();
		boost::algorithm::trim(str);
		grab_characters_ = false;
		return str;
	}

} // ns Corpus2