From 903e88c7ede46e04e29ec8311d5ccbad14af1f1e Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Tue, 12 Apr 2011 17:16:08 +0200
Subject: [PATCH] add two new writers: a dummy "none" writer that disables
 output and a "stats" writer that outputs general info only about token counts
 and annotation info

---
 libcorpus2/CMakeLists.txt     |  2 ++
 libcorpus2/io/nonewriter.cpp  | 27 +++++++++++++++++
 libcorpus2/io/nonewriter.h    | 25 ++++++++++++++++
 libcorpus2/io/plainwriter.cpp |  2 +-
 libcorpus2/io/statwriter.cpp  | 55 +++++++++++++++++++++++++++++++++++
 libcorpus2/io/statwriter.h    | 25 ++++++++++++++++
 6 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 libcorpus2/io/nonewriter.cpp
 create mode 100644 libcorpus2/io/nonewriter.h
 create mode 100644 libcorpus2/io/statwriter.cpp
 create mode 100644 libcorpus2/io/statwriter.h

diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt
index 09a03e4..96f80e9 100644
--- a/libcorpus2/CMakeLists.txt
+++ b/libcorpus2/CMakeLists.txt
@@ -58,12 +58,14 @@ SET(libcorpus2_STAT_SRC
 	io/cclreader.cpp
 	io/cclwriter.cpp
 	io/fastxces.cpp
+	io/nonewriter.cpp
 	io/orthwriter.cpp
 	io/plainwriter.cpp
 	io/premorphwriter.cpp
 	io/reader.cpp
 	io/rft.cpp
 	io/sax.cpp
+	io/statwriter.cpp
 	io/writer.cpp
 	io/xces.cpp
 	io/xcescommon.cpp
diff --git a/libcorpus2/io/nonewriter.cpp b/libcorpus2/io/nonewriter.cpp
new file mode 100644
index 0000000..2aaf4ee
--- /dev/null
+++ b/libcorpus2/io/nonewriter.cpp
@@ -0,0 +1,27 @@
+#include <libcorpus2/io/nonewriter.h>
+
+namespace Corpus2 {
+
+bool NoneWriter::registered = TokenWriter::register_writer<NoneWriter>(
+	"none");
+
+NoneWriter::NoneWriter(std::ostream& os, const Tagset& tagset,
+		const string_range_vector& params)
+	: TokenWriter(os, tagset, params)
+{
+}
+
+void NoneWriter::write_token(const Token&)
+{
+}
+
+void NoneWriter::write_sentence(const Sentence&)
+{
+}
+
+void NoneWriter::write_chunk(const Chunk&)
+{
+}
+
+
+} /* end ns Corpus2 */
diff --git a/libcorpus2/io/nonewriter.h b/libcorpus2/io/nonewriter.h
new file mode 100644
index 0000000..8d6a719
--- /dev/null
+++ b/libcorpus2/io/nonewriter.h
@@ -0,0 +1,25 @@
+#ifndef LIBSORPUS2_IO_NONEWRITER_H
+#define LIBCORPUS2_IO_NONEWRITER_H
+
+#include <libcorpus2/io/writer.h>
+
+namespace Corpus2 {
+
+class NoneWriter : public TokenWriter
+{
+public:
+	NoneWriter(std::ostream& os, const Tagset& tagset,
+			const string_range_vector& params);
+
+	void write_token(const Token& t);
+
+	void write_sentence(const Sentence& t);
+
+	void write_chunk(const Chunk& c);
+
+	static bool registered;
+};
+
+} /* end ns Corpus2 */
+
+#endif // LIBCORPUS2_IO_NONEWRITER_H
diff --git a/libcorpus2/io/plainwriter.cpp b/libcorpus2/io/plainwriter.cpp
index 45b156b..225afe4 100644
--- a/libcorpus2/io/plainwriter.cpp
+++ b/libcorpus2/io/plainwriter.cpp
@@ -19,7 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
 
 namespace Corpus2 {
 
-bool PlainWriter::registered = PlainWriter::register_writer<PlainWriter>(
+bool PlainWriter::registered = TokenWriter::register_writer<PlainWriter>(
 	"plain");
 
 PlainWriter::PlainWriter(std::ostream& os, const Tagset& tagset,
diff --git a/libcorpus2/io/statwriter.cpp b/libcorpus2/io/statwriter.cpp
new file mode 100644
index 0000000..b77fbf7
--- /dev/null
+++ b/libcorpus2/io/statwriter.cpp
@@ -0,0 +1,55 @@
+#include <libcorpus2/io/statwriter.h>
+#include <libcorpus2/ann/annotatedsentence.h>
+#include <iomanip>
+#include <libpwrutils/foreach.h>
+
+namespace Corpus2 {
+
+bool StatWriter::registered = TokenWriter::register_writer<StatWriter>(
+	"stat");
+
+StatWriter::StatWriter(std::ostream& os, const Tagset& tagset,
+		const string_range_vector& params)
+	: TokenWriter(os, tagset, params)
+{
+}
+
+void StatWriter::write_token(const Token& t)
+{
+	if (t.orth().length() == 0) {
+		os() << "~";
+	} else {
+		UnicodeString o1(t.orth().charAt(0));
+		os() << PwrNlp::to_utf8(o1);
+	}
+}
+
+
+void StatWriter::write_sentence(const Sentence& s)
+{
+	os() << std::setw(8) << "tokens" << " ";
+	foreach (const Token* t, s.tokens()) {
+		write_token(*t);
+	}
+	os() << "\n";
+	const AnnotatedSentence* as = dynamic_cast<const AnnotatedSentence*>(&s);
+	if (as) {
+		foreach (const AnnotatedSentence::chan_map_t::value_type& vt, as->all_channels()) {
+			os() << std::setw(8) << vt.first << " ";
+			os() << vt.second.dump_alpha();
+			os() << "\n";
+		}
+	}
+	os() << "\n";
+}
+
+void StatWriter::write_chunk(const Chunk& c)
+{
+	foreach (const Sentence::Ptr s, c.sentences()) {
+		write_sentence(*s);
+	}
+	os() << "\n";
+}
+
+
+} /* end ns Corpus2 */
diff --git a/libcorpus2/io/statwriter.h b/libcorpus2/io/statwriter.h
new file mode 100644
index 0000000..a022b9f
--- /dev/null
+++ b/libcorpus2/io/statwriter.h
@@ -0,0 +1,25 @@
+#ifndef LIBSORPUS2_IO_STATWRITER_H
+#define LIBCORPUS2_IO_STATWRITER_H
+
+#include <libcorpus2/io/writer.h>
+
+namespace Corpus2 {
+
+class StatWriter : public TokenWriter
+{
+public:
+	StatWriter(std::ostream& os, const Tagset& tagset,
+			const string_range_vector& params);
+
+	void write_token(const Token& t);
+
+	void write_sentence(const Sentence& t);
+
+	void write_chunk(const Chunk& c);
+
+	static bool registered;
+};
+
+} /* end ns Corpus2 */
+
+#endif // LIBCORPUS2_IO_STATWRITER_H
-- 
GitLab