Skip to content
Snippets Groups Projects
Commit 4dbd3b95 authored by ilor's avatar ilor
Browse files

fix wccl-match and wccl-rules to use the ann reader option instead of manually...

fix wccl-match and wccl-rules to use the ann reader option instead of manually wrapping sentences with annotatedsentences
parent 5767ba52
Branches
No related merge requests found
......@@ -83,7 +83,12 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) {
foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) {
boost::shared_ptr<Corpus2::AnnotatedSentence> as;
as = Corpus2::AnnotatedSentence::wrap_sentence(s);
as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(s);
if (!as) {
std::cerr << "Did not get an AnnotatedSentence from reader,"
"'ann'' option broken?\n";
return;
}
foreach (const boost::shared_ptr<Wccl::MatchRule>& r, rules_) {
r->apply(as);
......@@ -93,9 +98,9 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
if (progress) {
timer.check_slice();
}
writer->write_sentence(*as);
//writer->write_sentence(*as);
}
//writer->write_chunk(*c);
writer->write_chunk(*c);
}
if (progress) {
timer.stats();
......@@ -107,7 +112,8 @@ void usage(char* name)
std::cerr << "This program runs WCCL match rules.\n";
std::cerr << "Usage " << name << " [OPTIONS] FILES\n"
<< "Files ending with .xml are treated as corpora, otherwise \n"
<< "as CCL files. Use - to read corpus from stdin (as with -I)\n";
<< "as CCL files. Use - to read corpus from stdin (as with -I)\n"
<< "Note: the ann option is implied on all input formats\n";
}
int main(int argc, char** argv)
......@@ -208,10 +214,12 @@ int main(int argc, char** argv)
boost::shared_ptr<Corpus2::TokenReader> reader;
foreach (std::string cf, corpora_files) {
reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, cf);
reader->set_option("ann");
runner.apply_rules(reader, writer);
}
if (corpus_stdin) {
reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin);
reader->set_option("ann");
runner.apply_rules(reader, writer);
}
}
......
......@@ -80,7 +80,12 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) {
foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) {
boost::shared_ptr<Corpus2::AnnotatedSentence> as;
as = Corpus2::AnnotatedSentence::wrap_sentence(s);
as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(s);
if (!as) {
std::cerr << "Did not get an AnnotatedSentence from reader,"
"'ann'' option broken?\n";
return;
}
if (opts.until_done) {
rules.execute_until_done(as, opts.until_done_iterations);
} else {
......@@ -91,9 +96,9 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
timer.check_slice();
}
if (opts.first) break;
writer->write_sentence(*as);
//writer->write_sentence(*as);
}
//writer->write_chunk(*c);
writer->write_chunk(*c);
if (opts.first) break;
}
if (progress) {
......@@ -106,7 +111,8 @@ void usage(char* name)
std::cerr << "This program runs WCCL disambiguation rules.\n";
std::cerr << "Usage " << name << " [OPTIONS] FILES\n"
<< "Files ending with .xml are treated as corpora, otherwise \n"
<< "as CCL files. Use - to read corpus from stdin (as with -I)\n";
<< "as CCL files. Use - to read corpus from stdin (as with -I)\n"
<< "Note: the ann option is implied on all input formats\n";
}
......@@ -215,10 +221,12 @@ int main(int argc, char** argv)
boost::shared_ptr<Corpus2::TokenReader> reader;
foreach (const std::string& f, corpora_files) {
reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, f);
reader->set_option("ann");
apply_rules(reader, writer, rules, opts);
}
if (corpus_stdin) {
reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin);
reader->set_option("ann");
apply_rules(reader, writer, rules, opts);
}
}
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment