Skip to content
Snippets Groups Projects
mweparser.cpp 5.01 KiB
Newer Older
Bartosz Broda's avatar
Bartosz Broda committed
#include "mweparser.h"

#include <libpwrutils/foreach.h>
Bartosz Broda's avatar
Bartosz Broda committed
#include <libcorpus2/tagsetmanager.h>
Bartosz Broda's avatar
Bartosz Broda committed
#include <libxml++/libxml++.h>
#include <libxml2/libxml/parser.h>
#include <boost/make_shared.hpp>
#include <boost/algorithm/string.hpp>
Bartosz Broda's avatar
Bartosz Broda committed

namespace Corpus2 {

Bartosz Broda's avatar
Bartosz Broda committed
	MWEBuilder::MWEBuilder(const Tagset& tagset)
		: tagset_(tagset), parser_(tagset)
	}

	MWEBuilder::BoolOpPtr MWEBuilder::get_head_condition(
		const std::string & headcond)
	{
		return get_condition(headcond, head_conditions_);
	}
	MWEBuilder::BoolOpPtr MWEBuilder::get_mwe_condition(
		const std::string &cond)
	{
		return get_condition(cond, main_conditions_);
	}
	MWEBuilder::BoolOpPtr MWEBuilder::get_condition(
		const std::string & cond, value_type& where)
	{
		value_type::iterator search = where.find(cond);

		if(search != where.end())
			return search->second;

		BoolOpPtr op = parser_.parseBoolOperator(cond);

		where[cond] = op;

		return op;
	//////////////////////////////////////////////////////////////////////

Bartosz Broda's avatar
Bartosz Broda committed
	MWEParser::MWEParser(MWEIndex &index)
		: BasicSaxParser(), state_(NONE), mwe_index_(index)
	MWEParser::~MWEParser()
	{
		// TODO: something to do?
	}

	void MWEParser::create_mwe()
	{
		//print_current_mwe(true);
		MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition(
					wccl_operator_);
		MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition(
					head_cond_);
		if(group_type_ == "fix"){ // group_name_  -> lower case
omekr's avatar
omekr committed

			mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head,
											  variables_)));
			mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head,
											variables_)));
		} else {
			throw Wccl::WcclError("Unknown type of lexical unit:"
									+ group_type_);
		}
	std::string MWEParser::get_attribute(const AttributeList& attributes,
										 const std::string &name) const
	{
		std::string value;
		foreach (const Attribute& a, attributes) {
			if (a.name == name) {
				value = a.value;
			}
		}
omekr's avatar
omekr committed
		if (value == "")
			throw Wccl::WcclError("Attribute: "+name+" not found");
		return value;
	}

	void MWEParser::parse_mwegroup_attributes(const AttributeList& attributes)
Bartosz Broda's avatar
Bartosz Broda committed
	{
		foreach (const Attribute& a, attributes) {
			if (a.name == "name") {
				group_name_ = a.value;
			} else if(a.name == "type"){
				group_type_ = a.value;
				boost::algorithm::to_lower(group_type_);
			} else if(a.name == "class"){
				group_class_ = a.value;
Bartosz Broda's avatar
Bartosz Broda committed
			}
		}
Bartosz Broda's avatar
Bartosz Broda committed
	}

	void MWEParser::on_start_element(const Glib::ustring &name,
			const AttributeList& attributes)
	{
		//std::cout << state_ << ": " << name << std::endl;
Bartosz Broda's avatar
Bartosz Broda committed

		if(state_ == NONE && name == "units_description"){
			tagset_ = get_attribute(attributes, "tagset");
Bartosz Broda's avatar
Bartosz Broda committed
			mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
Bartosz Broda's avatar
Bartosz Broda committed
			state_ = UNITSDESC;
		} else if(state_ == UNITSDESC && name == "mwegroup"){
			parse_mwegroup_attributes(attributes);
			state_ = MWEGROUP;
		} else if(state_ == MWEGROUP && name == "condition"){
			state_ = CONDITION;
			grab_characters_ = true;
			clear_buf();
		} else if(state_ == MWEGROUP && name == "instances"){
			state_ = INSTANCES;
		} else if(state_ == INSTANCES && name == "MWE"){
			state_ = MWE;
			mwe_base_ = get_attribute(attributes, "base");
		} else if(state_ == MWE && name == "var"){
			state_ = VAR;
			var_name_ = get_attribute(attributes, "name");
			grab_characters_ = true;
			clear_buf();
		} else if(state_ == MWE && name == "head"){
			state_ = HEAD;
			grab_characters_ = true;
			clear_buf();
Bartosz Broda's avatar
Bartosz Broda committed
		}
Bartosz Broda's avatar
Bartosz Broda committed
	}

	void MWEParser::on_end_element(const Glib::ustring &name)
	{
		//std::cout << "/" << state_ << ": " << name << std::endl;
Bartosz Broda's avatar
Bartosz Broda committed

		if(name == "units_description"){
			state_ = NONE;
		} else if(state_ == CONDITION && name == "condition"){
			wccl_operator_ = finish_get_text();
			state_ = MWEGROUP;
		} else if(state_ == MWEGROUP && name == "mwegroup"){
Bartosz Broda's avatar
Bartosz Broda committed
			state_ = UNITSDESC;
		} else if(state_ == INSTANCES && name == "instances"){
			state_ = MWEGROUP;
		} else if(state_ == MWE && name == "MWE"){
			state_ = INSTANCES;
			create_mwe();
		} else if(state_ == VAR && name == "var"){
			state_ = MWE;
			variables_[var_name_] = finish_get_text();
		} else if(state_ == HEAD && name == "head"){
			state_ = MWE;
			head_cond_ = finish_get_text();
		} else{
			std::cerr << "Wrong state_:" << state_ << " for name: "
					<< name << std::endl;
	void MWEParser::print_current_mwe(bool with_condition, std::ostream &out)
		out << "Forma podstawowa: " << mwe_base_ << "\nZmienne: ";

		foreach(str_map::value_type &i, variables_)
			out << i.first << ": " << i.second << ", ";
		out << "\nWarunek głowy: " << head_cond_ << "\n";
			out << "Grupa jednostek: " << group_name_ << std::endl;
			out << "Operator: " << wccl_operator_ << std::endl;
	std::string MWEParser::finish_get_text()
	{
		std::string str = get_buf();
		boost::algorithm::trim(str);
		grab_characters_ = false;
		return str;

		//std::pair<wcclptr, wcclptr, map<string,string>, string, type>
Bartosz Broda's avatar
Bartosz Broda committed
} // ns Corpus2