Skip to content
Snippets Groups Projects
Commit 44625c67 authored by Bartosz Broda's avatar Bartosz Broda
Browse files

some work on mwe parsing

parent 304d2f43
No related branches found
No related tags found
No related merge requests found
#include "mweparser.h" #include "mweparser.h"
#include <libpwrutils/foreach.h> #include <libpwrutils/foreach.h>
#include <libcorpus2/tagsetmanager.h>
#include <libxml++/libxml++.h> #include <libxml++/libxml++.h>
#include <libxml2/libxml/parser.h> #include <libxml2/libxml/parser.h>
...@@ -9,8 +10,14 @@ ...@@ -9,8 +10,14 @@
namespace Corpus2 { namespace Corpus2 {
MWEParser::MWEParser() MWEBuilder::MWEBuilder(const Tagset& tagset)
: BasicSaxParser(), state_(NONE) : tagset_(tagset)
{
}
MWEParser::MWEParser(MWEIndex &index)
: BasicSaxParser(), state_(NONE), mwe_index_(index)
{ {
} }
...@@ -47,6 +54,7 @@ namespace Corpus2 { ...@@ -47,6 +54,7 @@ namespace Corpus2 {
if(state_ == NONE && name == "units_description"){ if(state_ == NONE && name == "units_description"){
tagset_ = get_attribute(attributes, "tagset"); tagset_ = get_attribute(attributes, "tagset");
mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
state_ = UNITSDESC; state_ = UNITSDESC;
} else if(state_ == UNITSDESC && name == "mwegroup"){ } else if(state_ == UNITSDESC && name == "mwegroup"){
parse_mwegroup_attributes(attributes); parse_mwegroup_attributes(attributes);
......
...@@ -3,18 +3,31 @@ ...@@ -3,18 +3,31 @@
#include <libcorpus2/io/reader.h> #include <libcorpus2/io/reader.h>
#include <libcorpus2/io/sax.h> #include <libcorpus2/io/sax.h>
#include <boost/unordered_map.hpp>
#include "mwe.h"
namespace Corpus2 { namespace Corpus2 {
class MWEBuilder class MWEBuilder
{ {
public:
MWEBuilder(const Tagset& tagset);
typedef boost::unordered_map<std::string, std::string> value_type;
private:
const Tagset& tagset_;
/// str -> ptr to ccl operator
value_type main_conditions_;
/// str -> ptr to ccl operator
value_type head_conditions_;
}; };
class MWEParser : public BasicSaxParser class MWEParser : public BasicSaxParser
{ {
public: public:
MWEParser(); MWEParser(MWEIndex &index);
protected: protected:
typedef std::map<std::string, std::string> str_map; typedef std::map<std::string, std::string> str_map;
...@@ -56,6 +69,9 @@ protected: ...@@ -56,6 +69,9 @@ protected:
std::string group_type_; std::string group_type_;
std::string group_class_; std::string group_class_;
std::string head_cond_; std::string head_cond_;
MWEIndex &mwe_index_;
boost::shared_ptr<MWEBuilder> mwe_builder_;
}; };
} // ns Corpus2 } // ns Corpus2
......
...@@ -73,7 +73,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( ...@@ -73,7 +73,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
void MWEReader::load_mwes(const std::string &filename) void MWEReader::load_mwes(const std::string &filename)
{ {
MWEParser parser; MWEParser parser(mwe_index_);
parser.parse_file(filename); parser.parse_file(filename);
} }
......
...@@ -49,7 +49,7 @@ public: ...@@ -49,7 +49,7 @@ public:
private: private:
void load_mwes(const std::string& filename); void load_mwes(const std::string& filename);
//MWEIndex mwe_index_; MWEIndex mwe_index_;
/// ptr to inner reader doing the real work of reading a corpus /// ptr to inner reader doing the real work of reading a corpus
TokenReaderPtr inner_reader_; TokenReaderPtr inner_reader_;
/// path for inner reader /// path for inner reader
......
...@@ -5,8 +5,10 @@ ...@@ -5,8 +5,10 @@
int main(int ac, char**av) int main(int ac, char**av)
{ {
using namespace Corpus2; using namespace Corpus2;
std::cout << "Starting tests" << std::endl; std::cout << "Starting tests... " << ac<< std::endl;
MWEParser parser; MWEIndex temp_index;
MWEParser parser(temp_index);
parser.parse_file(av[1]); parser.parse_file(av[1]);
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment