#ifndef LIBMWEREADER_MWEPARSER_H #define LIBMWEREADER_MWEPARSER_H #include <libcorpus2/io/reader.h> #include <libcorpus2/io/sax.h> namespace Corpus2 { typedef std::map<std::string, std::string> str_map; class MWEParser : public BasicSaxParser { public: MWEParser(); protected: void on_start_element(const Glib::ustring &name, const AttributeList& attributes); void on_end_element(const Glib::ustring &name); std::string finish_get_text(); /// retrives tagset= attribute std::string get_attribute(const AttributeList& attributes, const std::string &name) const; void parse_mwegroup_attributes(const AttributeList& attributes); /// tagset name used in wccl operators std::string tagset_; enum States{NONE, // not started UNITSDESC, // in <units_description MWEGROUP, // in <mwegroup> CONDITION, // in <condition> INSTANCES, // <instances> MWE, // start of MWE, <MWE> VAR, // <var> of <MWE> HEAD, // <head> condition of MWE }; States state_; str_map variables_; // name -> val std::string wccl_operator_; std::string mwe_base_; std::string var_name_; std::string group_name_; std::string group_type_; std::string group_class_; std::string head_cond_; }; } // ns Corpus2 #endif // LIBMWEREADER_MWEPARSER_H