Skip to content
Snippets Groups Projects
Commit bd777f2f authored by Adam Radziszewski's avatar Adam Radziszewski
Browse files

Merge branch 'master' of nlp.pwr.wroc.pl:wccl

parents bb165fb0 4592e3f3
Branches
No related merge requests found
FIND_PATH(LOKI_INCLUDE_DIR loki/LokiExport.h /usr/include /usr/local/include)
FIND_LIBRARY(LOKI_LIBRARY NAMES loki PATHS /usr/lib /usr/local/lib)
MARK_AS_ADVANCED(LOKI_LIBRARY)
MARK_AS_ADVANCED(LOKI_INCLUDE_DIR)
IF (LOKI_INCLUDE_DIR AND LOKI_LIBRARY)
SET(LOKI_FOUND TRUE)
ENDIF (LOKI_INCLUDE_DIR AND LOKI_LIBRARY)
IF (LOKI_FOUND)
IF (NOT LOKI_FIND_QUIETLY)
MESSAGE(STATUS "Found LOKI: ${LOKI_LIBRARY}")
ENDIF (NOT LOKI_FIND_QUIETLY)
ELSE (LOKI_FOUND)
IF (Loki_FIND_REQUIRED)
MESSAGE(FATAL_ERROR "Could not find Loki-lib")
ELSE (Loki_FIND_REQUIRED)
MESSAGE(STATUS "Loki not found")
ENDIF (Loki_FIND_REQUIRED)
ENDIF (LOKI_FOUND)
......@@ -5,6 +5,9 @@ include_directories(${LibXML++_INCLUDE_DIRS})
link_directories(${LibXML++_LIBRARY_DIRS})
set(LIBS ${LIBS} ${LibXML++_LIBRARIES})
find_package(Loki REQUIRED QUIET)
set(LIBS ${LIBS} loki)
include_directories( ${CMAKE_SOURCE_DIR} )
add_definitions(-DLIBWCCL_WCCLRUN_DATA_DIR="${PROJECT_SOURCE_DIR}/")
......
......@@ -7,6 +7,8 @@
#include <libwccl/parser/Parser.h>
#include <libwccl/ops/rulesequence.h>
#include <libcorpus2/tagsetmanager.h>
#include <libcorpus2/util/tokentimer.h>
#include <boost/bind.hpp>
#include <boost/algorithm/string.hpp>
......@@ -20,6 +22,7 @@
namespace {
bool quiet = false;
bool progress = false;
struct options {
bool first;
......@@ -39,7 +42,10 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru
ret = parser.parseRuleSequence(is);
if (ret) {
std::cerr << ret->size() << "\n";
if (!quiet) {
std::cerr << "Loaded " << ret->size() << " rule(s) from "
<< filename << "\n";
}
std::copy(ret->begin(), ret->end(), std::back_inserter(rules));
return true;
} else {
......@@ -69,13 +75,27 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru
void do_stream(boost::shared_ptr<Corpus2::TokenWriter> writer, const Corpus2::Tagset& tagset, Wccl::RuleSequence& rules,
std::istream& is, const options& opts)
{
Corpus2::XcesReader xr(tagset, is);
Corpus2::Sentence::Ptr s;
while ((s = xr.get_next_sentence())) {
rules.execute_once(s);
writer->write_sentence(*s);
Corpus2::XcesReader reader(tagset, is);
Corpus2::TokenTimer& timer = Corpus2::global_timer();
while (boost::shared_ptr<Corpus2::Chunk> c = reader.get_next_chunk()) {
foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) {
if (opts.until_done) {
rules.execute_until_done(s, opts.until_done_iterations);
} else {
rules.execute_once(s);
}
timer.count_sentence(*s);
if (progress) {
timer.check_slice();
}
if (opts.first) break;
}
writer->write_chunk(*c);
if (opts.first) break;
}
if (progress) {
timer.stats();
}
}
......@@ -106,6 +126,8 @@ int main(int argc, char** argv)
"Files to load, looking at the extension to determine type\n")
("output-format,o", value(&output_format)->default_value("xces"),
writers_help.c_str())
("progress,p", value(&progress)->zero_tokens(),
"Show progress info")
("quiet,q", value(&quiet)->zero_tokens(),
"Suppress messages\n")
("until-done,u", value(&opts.until_done)->zero_tokens(),
......@@ -161,8 +183,11 @@ int main(int argc, char** argv)
}
}
if (!rules.empty()) {
Corpus2::TokenTimer& timer = Corpus2::global_timer();
timer.register_signal_handler();
boost::shared_ptr<Corpus2::TokenWriter> writer;
writer.reset(Corpus2::TokenWriter::create(output_format, std::cout, tagset)); foreach (const std::string& f, corpora_files) {
writer.reset(Corpus2::TokenWriter::create(output_format, std::cout, tagset));
foreach (const std::string& f, corpora_files) {
std::ifstream ifs(f.c_str());
if (ifs.good()) {
do_stream(writer, tagset, rules, ifs, opts);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment