Select Git revision
datarule.cpp 9.44 KiB
#include "datadriven.h"
#include <antlr/RecognitionException.hpp>
#include <libpwrutils/util.h>
#include <libpwrutils/foreach.h>
#include <libpwrutils/pathsearch.h>
#include <libcorpus2/util/settings.h>
#include <libcorpus2/tagsetmanager.h>
#include <libcorpus2/io/xcesreader.h>
#include <libcorpus2/io/cclreader.h>
#include <libcorpus2/ann/annotatedsentence.h>
#include <libwccl/sentencecontext.h>
#include <libwccl/parser/Parser.h>
#include <libwccl/ops/funexeccontext.h>
#include <fstream>
#include <boost/filesystem/fstream.hpp>
#include <iostream>
#include <iomanip>
#include <map>
#include <set>
#include <sstream>
#include <boost/algorithm/string.hpp>
#include <boost/bind.hpp>
#include <boost/filesystem.hpp>
#include <boost/foreach.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/test/unit_test.hpp>
#include <boost/test/parameterized_test.hpp>
using boost::filesystem::directory_iterator;
using boost::filesystem::exists;
using boost::filesystem::is_directory;
using boost::filesystem::path;
using boost::filesystem::ifstream;
namespace {
struct rule_compare_test
{
std::string tagset;
path corpus_file;
path rule_file;
path output_corpus;
};
typedef std::map<std::string, boost::shared_ptr<Corpus2::Chunk> > corpus_cache_t;
corpus_cache_t corpus_cache;
boost::shared_ptr<Corpus2::Chunk> get_corpus(const std::string& path, const Corpus2::Tagset& tagset)
{
corpus_cache_t::const_iterator i;
i = corpus_cache.find(path);
if (i != corpus_cache.end()) {
return i->second;
} else {
boost::shared_ptr<Corpus2::Chunk> chunk;
boost::filesystem::path fp(path);
std::string fn = fp.filename();
if (fn.find("ccl") == fn.npos) {
Corpus2::XcesReader xr(tagset, path);
chunk = xr.get_next_chunk();
} else {
Corpus2::CclReader cr(tagset, path);
chunk = cr.get_next_chunk();
}