Skip to content
Snippets Groups Projects
mweparser.h 1.25 KiB
Newer Older
Bartosz Broda's avatar
Bartosz Broda committed
#ifndef LIBMWEREADER_MWEPARSER_H
#define LIBMWEREADER_MWEPARSER_H

#include <libcorpus2/io/reader.h>
#include <libcorpus2/io/sax.h>

namespace Corpus2 {

typedef std::map<std::string, std::string> str_map;

Bartosz Broda's avatar
Bartosz Broda committed
class MWEParser : public BasicSaxParser
{
public:
	MWEParser();

protected:
	void on_start_element(const Glib::ustring &name,
						  const AttributeList& attributes);

	void on_end_element(const Glib::ustring &name);
	std::string finish_get_text();
Bartosz Broda's avatar
Bartosz Broda committed


	/// retrives tagset= attribute
	std::string get_attribute(const AttributeList& attributes,
							  const std::string &name) const;
	void parse_mwegroup_attributes(const AttributeList& attributes);
Bartosz Broda's avatar
Bartosz Broda committed

	/// tagset name used in wccl operators
	std::string tagset_;

	enum States{NONE,  // not started
				UNITSDESC, // in <units_description
				MWEGROUP, // in <mwegroup>
				CONDITION, // in <condition>
				INSTANCES, // <instances>
				MWE, // start of MWE, <MWE>
				VAR, // <var> of <MWE>
				HEAD, // <head> condition of MWE
Bartosz Broda's avatar
Bartosz Broda committed
			};
Bartosz Broda's avatar
Bartosz Broda committed
	States state_;

	str_map variables_; // name -> val
	std::string wccl_operator_;
	std::string mwe_base_;
	std::string var_name_;
	std::string group_name_;
	std::string group_type_;
	std::string group_class_;
	std::string head_cond_;
Bartosz Broda's avatar
Bartosz Broda committed
};

} // ns Corpus2

#endif // LIBMWEREADER_MWEPARSER_H