Skip to content
Snippets Groups Projects
Commit 44625c67 authored by Bartosz Broda's avatar Bartosz Broda
Browse files

some work on mwe parsing

parent 304d2f43
Branches
No related merge requests found
#include "mweparser.h"
#include <libpwrutils/foreach.h>
#include <libcorpus2/tagsetmanager.h>
#include <libxml++/libxml++.h>
#include <libxml2/libxml/parser.h>
......@@ -9,8 +10,14 @@
namespace Corpus2 {
MWEParser::MWEParser()
: BasicSaxParser(), state_(NONE)
MWEBuilder::MWEBuilder(const Tagset& tagset)
: tagset_(tagset)
{
}
MWEParser::MWEParser(MWEIndex &index)
: BasicSaxParser(), state_(NONE), mwe_index_(index)
{
}
......@@ -47,6 +54,7 @@ namespace Corpus2 {
if(state_ == NONE && name == "units_description"){
tagset_ = get_attribute(attributes, "tagset");
mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_)));
state_ = UNITSDESC;
} else if(state_ == UNITSDESC && name == "mwegroup"){
parse_mwegroup_attributes(attributes);
......
......@@ -3,18 +3,31 @@
#include <libcorpus2/io/reader.h>
#include <libcorpus2/io/sax.h>
#include <boost/unordered_map.hpp>
#include "mwe.h"
namespace Corpus2 {
class MWEBuilder
{
public:
MWEBuilder(const Tagset& tagset);
typedef boost::unordered_map<std::string, std::string> value_type;
private:
const Tagset& tagset_;
/// str -> ptr to ccl operator
value_type main_conditions_;
/// str -> ptr to ccl operator
value_type head_conditions_;
};
class MWEParser : public BasicSaxParser
{
public:
MWEParser();
MWEParser(MWEIndex &index);
protected:
typedef std::map<std::string, std::string> str_map;
......@@ -56,6 +69,9 @@ protected:
std::string group_type_;
std::string group_class_;
std::string head_cond_;
MWEIndex &mwe_index_;
boost::shared_ptr<MWEBuilder> mwe_builder_;
};
} // ns Corpus2
......
......@@ -73,7 +73,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>(
void MWEReader::load_mwes(const std::string &filename)
{
MWEParser parser;
MWEParser parser(mwe_index_);
parser.parse_file(filename);
}
......
......@@ -49,7 +49,7 @@ public:
private:
void load_mwes(const std::string& filename);
//MWEIndex mwe_index_;
MWEIndex mwe_index_;
/// ptr to inner reader doing the real work of reading a corpus
TokenReaderPtr inner_reader_;
/// path for inner reader
......
......@@ -5,8 +5,10 @@
int main(int ac, char**av)
{
using namespace Corpus2;
std::cout << "Starting tests" << std::endl;
std::cout << "Starting tests... " << ac<< std::endl;
MWEParser parser;
MWEIndex temp_index;
MWEParser parser(temp_index);
parser.parse_file(av[1]);
}
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment