diff --git a/libmwereader/mweparser.cpp b/libmwereader/mweparser.cpp index 6cedc52b394d1bfd2d4bbc360eb2eda8fff6246d..add215e060f68460248faf1a1e63176be2b091ce 100644 --- a/libmwereader/mweparser.cpp +++ b/libmwereader/mweparser.cpp @@ -1,6 +1,7 @@ #include "mweparser.h" #include <libpwrutils/foreach.h> +#include <libcorpus2/tagsetmanager.h> #include <libxml++/libxml++.h> #include <libxml2/libxml/parser.h> @@ -9,8 +10,14 @@ namespace Corpus2 { - MWEParser::MWEParser() - : BasicSaxParser(), state_(NONE) + MWEBuilder::MWEBuilder(const Tagset& tagset) + : tagset_(tagset) + { + + } + + MWEParser::MWEParser(MWEIndex &index) + : BasicSaxParser(), state_(NONE), mwe_index_(index) { } @@ -47,6 +54,7 @@ namespace Corpus2 { if(state_ == NONE && name == "units_description"){ tagset_ = get_attribute(attributes, "tagset"); + mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_))); state_ = UNITSDESC; } else if(state_ == UNITSDESC && name == "mwegroup"){ parse_mwegroup_attributes(attributes); diff --git a/libmwereader/mweparser.h b/libmwereader/mweparser.h index 81313e526ecf69be853f4618ac585169c41a8311..85cde5f40bb06b2068efc3e9419c50546067414d 100644 --- a/libmwereader/mweparser.h +++ b/libmwereader/mweparser.h @@ -3,18 +3,31 @@ #include <libcorpus2/io/reader.h> #include <libcorpus2/io/sax.h> +#include <boost/unordered_map.hpp> + +#include "mwe.h" namespace Corpus2 { class MWEBuilder { +public: + MWEBuilder(const Tagset& tagset); + typedef boost::unordered_map<std::string, std::string> value_type; + +private: + const Tagset& tagset_; + /// str -> ptr to ccl operator + value_type main_conditions_; + /// str -> ptr to ccl operator + value_type head_conditions_; }; class MWEParser : public BasicSaxParser { public: - MWEParser(); + MWEParser(MWEIndex &index); protected: typedef std::map<std::string, std::string> str_map; @@ -56,6 +69,9 @@ protected: std::string group_type_; std::string group_class_; std::string head_cond_; + + MWEIndex &mwe_index_; + boost::shared_ptr<MWEBuilder> mwe_builder_; }; } // ns Corpus2 diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index b8e09098971e5737e2fde6d5dd05f900ea309bf1..d7b38c7be546de7bcab295a41783b60012af2fdc 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -73,7 +73,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( void MWEReader::load_mwes(const std::string &filename) { - MWEParser parser; + MWEParser parser(mwe_index_); parser.parse_file(filename); } diff --git a/libmwereader/mwereader.h b/libmwereader/mwereader.h index 7b8b4d9e39d4820b505f04b3fa1c5102c308db18..4c65c2e16c9741df6dbdd68a5a4638911a0b80a2 100644 --- a/libmwereader/mwereader.h +++ b/libmwereader/mwereader.h @@ -49,7 +49,7 @@ public: private: void load_mwes(const std::string& filename); - //MWEIndex mwe_index_; + MWEIndex mwe_index_; /// ptr to inner reader doing the real work of reading a corpus TokenReaderPtr inner_reader_; /// path for inner reader diff --git a/libmwereader/mwertest.cpp b/libmwereader/mwertest.cpp index cb78a83ebf5cabf6df32e1018e0603dac7fcd40d..c5b9cebb1e2172e2e1095db476e5cd1aeecf65ef 100644 --- a/libmwereader/mwertest.cpp +++ b/libmwereader/mwertest.cpp @@ -5,8 +5,10 @@ int main(int ac, char**av) { using namespace Corpus2; - std::cout << "Starting tests" << std::endl; + std::cout << "Starting tests... " << ac<< std::endl; - MWEParser parser; + MWEIndex temp_index; + + MWEParser parser(temp_index); parser.parse_file(av[1]); }