From e9277e51b678c5766ad621512f07c946349cbe92 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Tue, 12 Apr 2011 17:20:47 +0200 Subject: [PATCH] add mark and unmark operators, tweak wcclrules to work with annotated data --- libwccl/CMakeLists.txt | 2 + libwccl/ops/tagactions/mark.cpp | 93 ++++++++++++++++++++ libwccl/ops/tagactions/mark.h | 88 +++++++++++++++++++ libwccl/ops/tagactions/unmark.cpp | 60 +++++++++++++ libwccl/ops/tagactions/unmark.h | 67 +++++++++++++++ libwccl/parser/grammar.g | 38 +++++++++ tests/CMakeLists.txt | 1 + tests/mark.cpp | 137 ++++++++++++++++++++++++++++++ wcclrules/main.cpp | 38 +++++---- 9 files changed, 508 insertions(+), 16 deletions(-) create mode 100644 libwccl/ops/tagactions/mark.cpp create mode 100644 libwccl/ops/tagactions/mark.h create mode 100644 libwccl/ops/tagactions/unmark.cpp create mode 100644 libwccl/ops/tagactions/unmark.h create mode 100644 tests/mark.cpp diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index fd26bba..89ee70f 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -67,9 +67,11 @@ SET(libwccl_STAT_SRC ops/rulesequence.cpp ops/tagaction.cpp ops/tagactions/delete.cpp + ops/tagactions/mark.cpp ops/tagactions/relabel.cpp ops/tagactions/select.cpp ops/tagactions/unify.cpp + ops/tagactions/unmark.cpp ops/tagrule.cpp parser/grammar.g parser/Parser.cpp diff --git a/libwccl/ops/tagactions/mark.cpp b/libwccl/ops/tagactions/mark.cpp new file mode 100644 index 0000000..42eea2d --- /dev/null +++ b/libwccl/ops/tagactions/mark.cpp @@ -0,0 +1,93 @@ +#include <libwccl/ops/tagactions/mark.h> +#include <libpwrutils/foreach.h> +#include <libcorpus2/ann/annotatedsentence.h> +#include <sstream> + +namespace Wccl { + +Bool Mark::execute(const ActionExecContext& context) const +{ + SentenceContext& sc = context.sentence_context(); + + const boost::shared_ptr<const Position>& range_left = pos_begin_->apply(context); + if (range_left->get_value() == Position::Nowhere) { + return Bool(false); + } + const boost::shared_ptr<const Position>& range_right = pos_end_->apply(context); + if (range_right->get_value() == Position::Nowhere) { + return Bool(false); + } + int abs_left, abs_right; + if (!sc.validate_range(*range_left, *range_right, abs_left, abs_right)) { + return Bool(false); + } + + int abs_head; + if (pos_head_) { + const boost::shared_ptr<const Position>& head = pos_head_->apply(context); + if (head->get_value() == Position::Nowhere) { + return Bool(false); + } + abs_head = sc.get_abs_position(*head); + if (abs_head < abs_left || abs_head > abs_right) { + return Bool(false); + } + } else { + abs_head = abs_left; + } + + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(sc.get_sentence_ptr()); + if (!as) { + throw WcclError("Operator needs an annotated sentence"); + } + if (!as->has_channel(chan_name_)) { + as->create_channel(chan_name_); + } + Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + + int segment_idx = channel.get_new_segment_index(); + //std::cerr << "Marking " << chan_name_ << " from " << abs_left << " to " + // << abs_right << " as " << segment_idx << "\n"; + + for (int i = abs_left; i <= abs_right; ++i) { + if (channel.get_segment_at(i) > 0) { + //std::cerr << "There already is an annotation at " << i + // << " (" << channel.get_segment_at(i) << ")\n"; + //throw WcclError("Mark action would overwrite existing annotation"); + return Bool(false); + } + } + + for (int i = abs_left; i <= abs_right; ++i) { + channel.set_segment_at(i, segment_idx); + channel.set_head_at(i, false); + } + channel.set_head_at(abs_head, true); + return Bool(true); +} + +std::string Mark::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name() << "(" << pos_begin_->to_string(tagset) << ", " + << pos_end_->to_string(tagset); + if (pos_head_) { + os << ", " << pos_head_->to_string(tagset); + } + os << ", \"" << chan_name_ << "\")"; + return os.str(); +} + +std::ostream& Mark::write_to(std::ostream& os) const +{ + os << name() << "(" << *pos_begin_ << ", " << *pos_end_; + if (pos_head_) { + os << ", " << *pos_head_ ; + } + os << ", \"" << chan_name_ << "\")"; + return os; +} + + +} /* end ns Wccl */ diff --git a/libwccl/ops/tagactions/mark.h b/libwccl/ops/tagactions/mark.h new file mode 100644 index 0000000..de0e471 --- /dev/null +++ b/libwccl/ops/tagactions/mark.h @@ -0,0 +1,88 @@ +#ifndef LIBWCCL_OPS_TAGACTIONS_MARK_H +#define LIBWCCL_OPS_TAGACTIONS_MARK_H + +#include <libwccl/ops/tagaction.h> +#include <libwccl/values/position.h> +#include <libwccl/values/bool.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Action to mark an annotation fragment on a channel. + */ +class Mark : public TagAction +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + + Mark( + const PosFunctionPtr& pos_begin, + const PosFunctionPtr& pos_end, + const PosFunctionPtr& pos_head, + const std::string& chan_name) + : pos_begin_(pos_begin), + pos_end_(pos_end), + pos_head_(pos_head), + chan_name_(chan_name) + { + BOOST_ASSERT(pos_begin_); + BOOST_ASSERT(pos_end_); + BOOST_ASSERT(!chan_name.empty()); + } + + Mark( + const PosFunctionPtr& pos_begin, + const PosFunctionPtr& pos_end, + const std::string& chan_name) + : pos_begin_(pos_begin), + pos_end_(pos_end), + pos_head_(), + chan_name_(chan_name) + { + BOOST_ASSERT(pos_begin_); + BOOST_ASSERT(pos_end_); + BOOST_ASSERT(!chan_name.empty()); + } + + /** + * @returns Name of the function. + */ + std::string name() const { + return "mark"; + } + + /** + * @returns String representation of the Action + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + + /** + * Executes the Action on given context: Marks an annotation within + * the given channel on the range supplied. It is an error if there + * already is an annotation anywhere in the range in the channel. + * The Range is trimmed to sentence boundaries. + * No action is made in case of an invalid/empty range. + * @returns True if there were any changes made; False otherwise + */ + Bool execute(const ActionExecContext &context) const; + +protected: + /** + * Writes string representation of the Action to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; + +private: + const PosFunctionPtr pos_begin_; + const PosFunctionPtr pos_end_; + const PosFunctionPtr pos_head_; + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_TAGACTIONS_MARK_H diff --git a/libwccl/ops/tagactions/unmark.cpp b/libwccl/ops/tagactions/unmark.cpp new file mode 100644 index 0000000..e090cbc --- /dev/null +++ b/libwccl/ops/tagactions/unmark.cpp @@ -0,0 +1,60 @@ +#include <libwccl/ops/tagactions/unmark.h> +#include <libpwrutils/foreach.h> +#include <libcorpus2/ann/annotatedsentence.h> +#include <sstream> + +namespace Wccl { + +Bool Unmark::execute(const ActionExecContext& context) const +{ + SentenceContext& sc = context.sentence_context(); + + const boost::shared_ptr<const Position>& position = pos_->apply(context); + if (position->get_value() == Position::Nowhere) { + return Bool(false); + } + + int abs_pos = sc.get_abs_position(*position); + if (!sc.is_inside(abs_pos)) { + return Bool(false); + } + + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + as = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(sc.get_sentence_ptr()); + if (!as) { + throw WcclError("Operator needs an annotated sentence"); + } + if (!as->has_channel(chan_name_)) { + return Bool(false); + } + Corpus2::AnnotationChannel& channel = as->get_channel(chan_name_); + + int segment_idx = channel.get_segment_at(abs_pos); + if (segment_idx == 0) { + return Bool(false); + } + + for (int i = 0; i < channel.size(); ++i) { + if (channel.segments()[i] == segment_idx) { + channel.set_segment_at(i, 0); + } + } + return Bool(true); +} + +std::string Unmark::to_string(const Corpus2::Tagset& tagset) const +{ + std::ostringstream os; + os << name() << "(" << pos_->to_string(tagset) << ", \"" + << chan_name_ << "\")"; + return os.str(); +} + +std::ostream& Unmark::write_to(std::ostream& os) const +{ + os << name() << "(" << *pos_ << ", \"" << chan_name_ << "\")"; + return os; +} + + +} /* end ns Wccl */ diff --git a/libwccl/ops/tagactions/unmark.h b/libwccl/ops/tagactions/unmark.h new file mode 100644 index 0000000..c344fc0 --- /dev/null +++ b/libwccl/ops/tagactions/unmark.h @@ -0,0 +1,67 @@ +#ifndef LIBWCCL_OPS_TAGACTIONS_UNMARK_H +#define LIBWCCL_OPS_TAGACTIONS_UNMARK_H + +#include <libwccl/ops/tagaction.h> +#include <libwccl/values/position.h> +#include <libwccl/values/bool.h> +#include <libwccl/ops/function.h> + +namespace Wccl { + +/** + * Action to unmark (delete) an annotation passing through a token. + */ +class Unmark : public TagAction +{ +public: + typedef boost::shared_ptr<Function<Position> > PosFunctionPtr; + + Unmark( + const PosFunctionPtr& pos, + const std::string& chan_name) + : pos_(pos), + chan_name_(chan_name) + { + BOOST_ASSERT(pos_); + BOOST_ASSERT(!chan_name.empty()); + } + + /** + * @returns Name of the function. + */ + std::string name() const { + return "unmark"; + } + + /** + * @returns String representation of the Action + */ + std::string to_string(const Corpus2::Tagset& tagset) const; + +protected: + /** + * Writes string representation of the Action to + * an output stream. + * @returns Stream written to. + * @note May be incomplete and/or containt internal info. + */ + std::ostream& write_to(std::ostream& ostream) const; + + /** + * Executes the Action on given context: Marks an annotation within + * the given channel on the range supplied. It is an error if there + * already is an annotation anywhere in the range in the channel. + * The Range is trimmed to sentence boundaries. + * No action is made in case of an invalid/empty range. + * @returns True if there were any changes made; False otherwise + */ + Bool execute(const ActionExecContext &context) const; + +private: + const PosFunctionPtr pos_; + const std::string chan_name_; +}; + +} /* end ns Wccl */ + +#endif // LIBWCCL_OPS_TAGACTIONS_UNMARK_H diff --git a/libwccl/parser/grammar.g b/libwccl/parser/grammar.g index 376de76..3bf431b 100644 --- a/libwccl/parser/grammar.g +++ b/libwccl/parser/grammar.g @@ -66,6 +66,8 @@ header { #include <libwccl/ops/tagactions/delete.h> #include <libwccl/ops/tagactions/select.h> #include <libwccl/ops/tagactions/relabel.h> + #include <libwccl/ops/tagactions/mark.h> + #include <libwccl/ops/tagactions/unmark.h> // Match operators #include <libwccl/values/tokenmatch.h> @@ -1485,6 +1487,8 @@ action // | act = action_unify [tagset, vars] // + | act = action_mark [tagset, vars] + | act = action_unmark [tagset, vars] ; // Action sequence - the actions are separated with commas: @@ -1697,6 +1701,40 @@ action_unify } ; +// ---------------------------------------------------------------------------- +// Mark action +action_mark + [const Corpus2::Tagset& tagset, Variables& vars] + returns [boost::shared_ptr<Mark> action] +{ + boost::shared_ptr<Function<Position> > pos_begin, pos_end, pos_head; +} + : "mark" LPAREN + pos_begin = position_operator [tagset, vars] COMMA + pos_end = position_operator [tagset, vars] COMMA + (pos_head = position_operator [tagset, vars] COMMA)? + chan_name: STRING + RPAREN { + action.reset(new Mark(pos_begin, pos_end, pos_head, ((antlr::Token*)chan_name)->getText())); + } +; + +// ---------------------------------------------------------------------------- +// Unmark action +action_unmark + [const Corpus2::Tagset& tagset, Variables& vars] + returns [boost::shared_ptr<Unmark> action] +{ + boost::shared_ptr<Function<Position> > pos; +} + : "unmark" LPAREN + pos = position_operator [tagset, vars] COMMA + chan_name: STRING + RPAREN { + action.reset(new Unmark(pos, ((antlr::Token*)chan_name)->getText())); + } +; + // ---------------------------------------------------------------------------- // ---------------------------------------------------------------------------- // Match rules diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 5ff0f38..4cd9787 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -27,6 +27,7 @@ add_executable(tests getorth.cpp logicalpredicates.cpp main.cpp + mark.cpp match.cpp position.cpp positionpredicates.cpp diff --git a/tests/mark.cpp b/tests/mark.cpp new file mode 100644 index 0000000..23e52b7 --- /dev/null +++ b/tests/mark.cpp @@ -0,0 +1,137 @@ +#include <boost/test/unit_test.hpp> +#include <boost/bind.hpp> +#include <libcorpus2/ann/annotatedsentence.h> +#include <libwccl/ops/functions/constant.h> + +#include <libwccl/ops/tagactions/mark.h> + +using namespace Wccl; + +BOOST_AUTO_TEST_SUITE(mark) + +struct MarkFix +{ + MarkFix() + : as(boost::make_shared<Corpus2::AnnotatedSentence>()), + sc(as), + cx(sc, boost::make_shared<Variables>()), + pos_zero(0), + pos_one(1), + pos_minus_one(-1), + nowhere(Position::Nowhere), + begin(Position::Begin), + end(Position::End), + pos_zero_constant(new Constant<Position>(pos_zero)), + pos_one_constant(new Constant<Position>(pos_one)), + pos_minus_one_constant(new Constant<Position>(pos_minus_one)), + nowhere_constant(new Constant<Position>(nowhere)), + begin_constant(new Constant<Position>(begin)), + end_constant(new Constant<Position>(end)) + { + as->append(new Corpus2::Token(UnicodeString::fromUTF8("t1"), PwrNlp::Whitespace::Newline)); + as->append(new Corpus2::Token(UnicodeString::fromUTF8("t2"), PwrNlp::Whitespace::Newline)); + as->append(new Corpus2::Token(UnicodeString::fromUTF8("t3"), PwrNlp::Whitespace::Newline)); + as->append(new Corpus2::Token(UnicodeString::fromUTF8("t4"), PwrNlp::Whitespace::Newline)); + as->append(new Corpus2::Token(UnicodeString::fromUTF8("t5"), PwrNlp::Whitespace::Newline)); + as->create_channel("ch1"); + as->get_channel("ch1").set_segment_at(2, 1); + as->get_channel("ch1").set_segment_at(3, 1); + as->get_channel("ch1").set_head_at(3, true); + } + + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + SentenceContext sc; + Corpus2::Tagset tagset; + ActionExecContext cx; + + Position pos_zero; + Position pos_one; + Position pos_minus_one; + Position nowhere; + Position begin; + Position end; + boost::shared_ptr<Function<Position> > pos_zero_constant; + boost::shared_ptr<Function<Position> > pos_one_constant; + boost::shared_ptr<Function<Position> > pos_minus_one_constant; + boost::shared_ptr<Function<Position> > nowhere_constant; + boost::shared_ptr<Function<Position> > begin_constant; + boost::shared_ptr<Function<Position> > end_constant; +}; + +BOOST_FIXTURE_TEST_CASE(mark_empty, MarkFix) +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as_clone = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(as->clone_shared()); + Mark mark(pos_minus_one_constant, pos_minus_one_constant, "ch1"); + BOOST_CHECK(!mark.execute(cx)); + //BOOST_CHECK((*as) == (*as_clone)); + BOOST_CHECK_EQUAL(as->get_channel("ch1").dump_alpha(), "__aA_"); +} + +BOOST_FIXTURE_TEST_CASE(mark_already_there, MarkFix) +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as_clone = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(as->clone_shared()); + sc.set_position(1); + Mark mark(begin_constant, pos_one_constant, "ch1"); + BOOST_CHECK(!mark.execute(cx)); + BOOST_CHECK_EQUAL(as->get_channel("ch1").dump_alpha(), "__aA_"); +} + +BOOST_FIXTURE_TEST_CASE(mark_begin, MarkFix) +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as_clone = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(as->clone_shared()); + sc.set_position(1); + Mark mark(begin_constant, pos_minus_one_constant, "ch1"); + BOOST_CHECK(mark.execute(cx)); + BOOST_CHECK_EQUAL(as->get_channel("ch1").dump_alpha(), "B_aA_"); +} + +BOOST_FIXTURE_TEST_CASE(mark_begin_head0, MarkFix) +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as_clone = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(as->clone_shared()); + sc.set_position(0); + Mark mark(begin_constant, pos_one_constant, pos_zero_constant, "ch1"); + BOOST_CHECK(mark.execute(cx)); + BOOST_CHECK_EQUAL(as->get_channel("ch1").dump_alpha(), "BbaA_"); +} + +BOOST_FIXTURE_TEST_CASE(mark_begin_head1, MarkFix) +{ + boost::shared_ptr<Corpus2::AnnotatedSentence> as_clone = boost::dynamic_pointer_cast<Corpus2::AnnotatedSentence>(as->clone_shared()); + sc.set_position(0); + Mark mark(begin_constant, pos_one_constant, pos_one_constant, "ch1"); + BOOST_CHECK(mark.execute(cx)); + BOOST_CHECK_EQUAL(as->get_channel("ch1").dump_alpha(), "bBaA_"); +} + +BOOST_FIXTURE_TEST_CASE(mark_other, MarkFix) +{ + sc.set_position(1); + Mark mark(begin_constant, pos_one_constant, begin_constant, "ch2"); + BOOST_CHECK(mark.execute(cx)); + BOOST_CHECK_EQUAL(as->get_channel("ch2").dump_alpha(), "Aaa__"); +} + + +//------ to_string test cases ------- + +BOOST_FIXTURE_TEST_CASE(mark_to_string, MarkFix) +{ + Mark mark(begin_constant, end_constant, "ch"); + BOOST_CHECK_EQUAL("mark(begin, end, \"ch\")", mark.to_string(tagset)); + Mark mark2(begin_constant, pos_one_constant, "ch2"); + BOOST_CHECK_EQUAL("mark(begin, 1, \"ch2\")", mark2.to_string(tagset)); + Mark mark3(pos_minus_one_constant, end_constant, "ch3"); + BOOST_CHECK_EQUAL("mark(-1, end, \"ch3\")", mark3.to_string(tagset)); +} + +BOOST_FIXTURE_TEST_CASE(mark_to_string_head, MarkFix) + { + Mark mark(begin_constant, end_constant, pos_one_constant, "ch"); + BOOST_CHECK_EQUAL("mark(begin, end, 1, \"ch\")", mark.to_string(tagset)); + Mark mark2(begin_constant, pos_one_constant, pos_zero_constant, "ch2"); + BOOST_CHECK_EQUAL("mark(begin, 1, 0, \"ch2\")", mark2.to_string(tagset)); + Mark mark3(pos_minus_one_constant, end_constant, end_constant, "ch3"); + BOOST_CHECK_EQUAL("mark(-1, end, end, \"ch3\")", mark3.to_string(tagset)); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/wcclrules/main.cpp b/wcclrules/main.cpp index 7b83fe0..2126e5d 100644 --- a/wcclrules/main.cpp +++ b/wcclrules/main.cpp @@ -72,25 +72,28 @@ bool load_more_rules(Wccl::Parser& parser, const std::string& filename, Wccl::Ru return false; } -void do_stream(boost::shared_ptr<Corpus2::TokenWriter> writer, const Corpus2::Tagset& tagset, Wccl::RuleSequence& rules, - std::istream& is, const options& opts) +void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, + boost::shared_ptr<Corpus2::TokenWriter> writer, Wccl::RuleSequence& rules, + const options& opts) { - Corpus2::XcesReader reader(tagset, is); Corpus2::TokenTimer& timer = Corpus2::global_timer(); - while (boost::shared_ptr<Corpus2::Chunk> c = reader.get_next_chunk()) { + while (boost::shared_ptr<Corpus2::Chunk> c = reader->get_next_chunk()) { foreach (boost::shared_ptr<Corpus2::Sentence>& s, c->sentences()) { + boost::shared_ptr<Corpus2::AnnotatedSentence> as; + as = Corpus2::AnnotatedSentence::wrap_sentence(s); if (opts.until_done) { - rules.execute_until_done(s, opts.until_done_iterations); + rules.execute_until_done(as, opts.until_done_iterations); } else { - rules.execute_once(s); + rules.execute_once(as); } - timer.count_sentence(*s); + timer.count_sentence(*as); if (progress) { timer.check_slice(); } if (opts.first) break; + writer->write_sentence(*as); } - writer->write_chunk(*c); + //writer->write_chunk(*c); if (opts.first) break; } if (progress) { @@ -102,6 +105,7 @@ void do_stream(boost::shared_ptr<Corpus2::TokenWriter> writer, const Corpus2::Ta int main(int argc, char** argv) { std::string tagset_load = "kipi"; + std::string input_format; std::string output_format; options opts; opts.first = false; @@ -111,8 +115,10 @@ int main(int argc, char** argv) bool corpus_stdin = true; using boost::program_options::value; + std::string readers = boost::algorithm::join(Corpus2::TokenReader::available_reader_types_help(), " "); + std::string readers_help = "Input format, any of: " + readers + "\n"; std::string writers = boost::algorithm::join(Corpus2::TokenWriter::available_writer_types_help(), " "); - std::string writers_help = "Output format, any of: " + writers + "\n"; + std::string writers_help = "Output format, any of: " + writers + "\n";; boost::program_options::options_description desc("Allowed options"); desc.add_options() @@ -124,6 +130,8 @@ int main(int argc, char** argv) "CCL rule files\n") ("files,f", value(&files), "Files to load, looking at the extension to determine type\n") + ("input-format,i", value(&input_format)->default_value("xces"), + readers_help.c_str()) ("output-format,o", value(&output_format)->default_value("xces"), writers_help.c_str()) ("progress,p", value(&progress)->zero_tokens(), @@ -189,16 +197,14 @@ int main(int argc, char** argv) timer.register_signal_handler(); boost::shared_ptr<Corpus2::TokenWriter> writer; writer.reset(Corpus2::TokenWriter::create(output_format, std::cout, tagset)); + boost::shared_ptr<Corpus2::TokenReader> reader; foreach (const std::string& f, corpora_files) { - std::ifstream ifs(f.c_str()); - if (ifs.good()) { - do_stream(writer, tagset, rules, ifs, opts); - } else { - std::cerr << "Error reading corpus from " << f << "\n"; - } + reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, f); + apply_rules(reader, writer, rules, opts); } if (corpus_stdin) { - do_stream(writer, tagset, rules, std::cin, opts); + reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin); + apply_rules(reader, writer, rules, opts); } } } catch (PwrNlp::PwrNlpError& e) { -- GitLab