diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index 09a03e47c7faddfae67e49a00521b281acadf8c4..96f80e90a2eef8edfc1575fd763d5ba914dff277 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -58,12 +58,14 @@ SET(libcorpus2_STAT_SRC io/cclreader.cpp io/cclwriter.cpp io/fastxces.cpp + io/nonewriter.cpp io/orthwriter.cpp io/plainwriter.cpp io/premorphwriter.cpp io/reader.cpp io/rft.cpp io/sax.cpp + io/statwriter.cpp io/writer.cpp io/xces.cpp io/xcescommon.cpp diff --git a/libcorpus2/io/nonewriter.cpp b/libcorpus2/io/nonewriter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2aaf4eecd5ef0be0c4367637bc368a1ad289902b --- /dev/null +++ b/libcorpus2/io/nonewriter.cpp @@ -0,0 +1,27 @@ +#include <libcorpus2/io/nonewriter.h> + +namespace Corpus2 { + +bool NoneWriter::registered = TokenWriter::register_writer<NoneWriter>( + "none"); + +NoneWriter::NoneWriter(std::ostream& os, const Tagset& tagset, + const string_range_vector& params) + : TokenWriter(os, tagset, params) +{ +} + +void NoneWriter::write_token(const Token&) +{ +} + +void NoneWriter::write_sentence(const Sentence&) +{ +} + +void NoneWriter::write_chunk(const Chunk&) +{ +} + + +} /* end ns Corpus2 */ diff --git a/libcorpus2/io/nonewriter.h b/libcorpus2/io/nonewriter.h new file mode 100644 index 0000000000000000000000000000000000000000..8d6a7190fc1e4255966823a0516967a17a1ac116 --- /dev/null +++ b/libcorpus2/io/nonewriter.h @@ -0,0 +1,25 @@ +#ifndef LIBSORPUS2_IO_NONEWRITER_H +#define LIBCORPUS2_IO_NONEWRITER_H + +#include <libcorpus2/io/writer.h> + +namespace Corpus2 { + +class NoneWriter : public TokenWriter +{ +public: + NoneWriter(std::ostream& os, const Tagset& tagset, + const string_range_vector& params); + + void write_token(const Token& t); + + void write_sentence(const Sentence& t); + + void write_chunk(const Chunk& c); + + static bool registered; +}; + +} /* end ns Corpus2 */ + +#endif // LIBCORPUS2_IO_NONEWRITER_H diff --git a/libcorpus2/io/plainwriter.cpp b/libcorpus2/io/plainwriter.cpp index 45b156bc166c7105cd3645a4e023a1710523b31d..225afe450d266e5f16373061238551d20626ef46 100644 --- a/libcorpus2/io/plainwriter.cpp +++ b/libcorpus2/io/plainwriter.cpp @@ -19,7 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE. namespace Corpus2 { -bool PlainWriter::registered = PlainWriter::register_writer<PlainWriter>( +bool PlainWriter::registered = TokenWriter::register_writer<PlainWriter>( "plain"); PlainWriter::PlainWriter(std::ostream& os, const Tagset& tagset, diff --git a/libcorpus2/io/statwriter.cpp b/libcorpus2/io/statwriter.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b77fbf77a974b8d2894b45a7783cd4d6d42948f8 --- /dev/null +++ b/libcorpus2/io/statwriter.cpp @@ -0,0 +1,55 @@ +#include <libcorpus2/io/statwriter.h> +#include <libcorpus2/ann/annotatedsentence.h> +#include <iomanip> +#include <libpwrutils/foreach.h> + +namespace Corpus2 { + +bool StatWriter::registered = TokenWriter::register_writer<StatWriter>( + "stat"); + +StatWriter::StatWriter(std::ostream& os, const Tagset& tagset, + const string_range_vector& params) + : TokenWriter(os, tagset, params) +{ +} + +void StatWriter::write_token(const Token& t) +{ + if (t.orth().length() == 0) { + os() << "~"; + } else { + UnicodeString o1(t.orth().charAt(0)); + os() << PwrNlp::to_utf8(o1); + } +} + + +void StatWriter::write_sentence(const Sentence& s) +{ + os() << std::setw(8) << "tokens" << " "; + foreach (const Token* t, s.tokens()) { + write_token(*t); + } + os() << "\n"; + const AnnotatedSentence* as = dynamic_cast<const AnnotatedSentence*>(&s); + if (as) { + foreach (const AnnotatedSentence::chan_map_t::value_type& vt, as->all_channels()) { + os() << std::setw(8) << vt.first << " "; + os() << vt.second.dump_alpha(); + os() << "\n"; + } + } + os() << "\n"; +} + +void StatWriter::write_chunk(const Chunk& c) +{ + foreach (const Sentence::Ptr s, c.sentences()) { + write_sentence(*s); + } + os() << "\n"; +} + + +} /* end ns Corpus2 */ diff --git a/libcorpus2/io/statwriter.h b/libcorpus2/io/statwriter.h new file mode 100644 index 0000000000000000000000000000000000000000..a022b9f4f6e1f67eaf864193039d54c89b2c1497 --- /dev/null +++ b/libcorpus2/io/statwriter.h @@ -0,0 +1,25 @@ +#ifndef LIBSORPUS2_IO_STATWRITER_H +#define LIBCORPUS2_IO_STATWRITER_H + +#include <libcorpus2/io/writer.h> + +namespace Corpus2 { + +class StatWriter : public TokenWriter +{ +public: + StatWriter(std::ostream& os, const Tagset& tagset, + const string_range_vector& params); + + void write_token(const Token& t); + + void write_sentence(const Sentence& t); + + void write_chunk(const Chunk& c); + + static bool registered; +}; + +} /* end ns Corpus2 */ + +#endif // LIBCORPUS2_IO_STATWRITER_H