From a7ae417fef47bf63a077cae080769a4d5182c133 Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Sat, 15 Jun 2013 13:27:52 +0200
Subject: [PATCH] new writer: line (simple chunk line drawings)

---
 CMakeLists.txt               |   2 +-
 libcorpus2/CMakeLists.txt    |   1 +
 libcorpus2/io/linewriter.cpp | 118 +++++++++++++++++++++++++++++++++++
 libcorpus2/io/linewriter.h   |  44 +++++++++++++
 4 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 libcorpus2/io/linewriter.cpp
 create mode 100644 libcorpus2/io/linewriter.h

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 09c8d60..f60324a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,7 +2,7 @@ PROJECT(Corpus2Library)
 
 set(corpus2_ver_major "1")
 set(corpus2_ver_minor "3")
-set(corpus2_ver_patch "3")
+set(corpus2_ver_patch "4")
 
 cmake_minimum_required(VERSION 2.8.0)
 
diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt
index dacd420..c1f931c 100644
--- a/libcorpus2/CMakeLists.txt
+++ b/libcorpus2/CMakeLists.txt
@@ -58,6 +58,7 @@ SET(libcorpus2_STAT_SRC
 	io/helpers.cpp
 	io/fastxces.cpp
 	io/iob-chan.cpp
+	io/linewriter.cpp
 	io/nonewriter.cpp
 	io/orthwriter.cpp
 	io/pathwriter.cpp
diff --git a/libcorpus2/io/linewriter.cpp b/libcorpus2/io/linewriter.cpp
new file mode 100644
index 0000000..deb8650
--- /dev/null
+++ b/libcorpus2/io/linewriter.cpp
@@ -0,0 +1,118 @@
+/*
+	Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski
+	Part of the libcorpus2 project
+
+	This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+	This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the LICENSE.CORPUS2, LICENSE.POLIQARP, COPYING.LESSER and COPYING files for more details.
+*/
+
+#include <libcorpus2/io/linewriter.h>
+#include <libcorpus2/io/linewriter.h>
+#include <libcorpus2/ann/annotatedsentence.h>
+#include <iomanip>
+#include <boost/foreach.hpp>
+
+namespace Corpus2 {
+
+bool LineWriter::registered = TokenWriter::register_writer<LineWriter>(
+	"line");
+
+LineWriter::LineWriter(std::ostream& os, const Tagset& tagset,
+		const string_range_vector& params)
+	: TokenWriter(os, tagset, params)
+{
+}
+
+void LineWriter::write_token(const Token& t)
+{
+	os() << t.orth_utf8();
+}
+
+
+void LineWriter::write_sentence(const Sentence& s)
+{
+	const std::string tok_name("Tokens");
+	const std::string empty_char(" ");
+	const std::string chunk_char("\xe2\x94\x80");
+	//const std::string head_char("\xe2\x94\x81"); // hardcoded utf-8
+	const std::string head_char("\xe2\x95\x90"); // hardcoded utf-8
+
+	const AnnotatedSentence* as = dynamic_cast<const AnnotatedSentence*>(&s);
+	// get longest channel name for padding
+	int name_padding = tok_name.length();
+
+	if (as) {
+		BOOST_FOREACH(const AnnotatedSentence::chan_map_t::value_type& vt, as->all_channels()) {
+			const int that_len = vt.first.length();
+			if (that_len > name_padding) {
+				name_padding = that_len;
+			}
+		}
+	}
+
+	// dump token orths and remember orth lengths
+	std::vector<int> orth_lens;
+	os() << std::left << std::setw(name_padding) << tok_name;
+	BOOST_FOREACH(const Token* t, s.tokens()) {
+		os() << " " << t->orth_utf8();
+		orth_lens.push_back(t->orth().length());
+	}
+	os() << "\n";
+
+	// dump channel line representations
+	if (as) {
+		AnnotatedSentence* hax = const_cast<AnnotatedSentence*>(as); // sorry
+		BOOST_FOREACH(const AnnotatedSentence::chan_map_t::value_type& vt, hax->all_channels()) {
+			os() << std::left << std::setw(name_padding) << vt.first;
+			// use IOB2 representation internally
+			AnnotationChannel &chan = hax->get_channel(vt.first);
+			chan.make_iob_from_segments();
+			IOB::Enum last_tag = IOB::O;
+			// write line representation
+			for (int idx = 0; idx < chan.size(); idx++) {
+				IOB::Enum this_tag = chan.get_iob_at(idx);
+				if (last_tag == IOB::O || this_tag == IOB::O) {
+					os() << " ";
+				}
+				else {
+					os() << chunk_char;
+				}
+				last_tag = chan.get_iob_at(idx);
+				std::string now(" ");
+				if (last_tag != IOB::O) {
+					if (chan.is_head_at(idx)) {
+						now = head_char;
+					}
+					else {
+						now = chunk_char;
+					}
+				}
+				for (int line_pos = orth_lens[idx]; line_pos > 0; line_pos--) {
+					os() << now;
+				}
+				last_tag = this_tag;
+			}
+			os() << "\n";
+		}
+	}
+	os() << "\n";
+}
+
+void LineWriter::write_chunk(const Chunk& c)
+{
+	BOOST_FOREACH(const Sentence::Ptr s, c.sentences()) {
+		write_sentence(*s);
+	}
+	os() << "\n";
+}
+
+
+} /* end ns Corpus2 */
diff --git a/libcorpus2/io/linewriter.h b/libcorpus2/io/linewriter.h
new file mode 100644
index 0000000..a86c2e9
--- /dev/null
+++ b/libcorpus2/io/linewriter.h
@@ -0,0 +1,44 @@
+/*
+	Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski
+	Part of the libcorpus2 project
+
+	This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU Lesser General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+	This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the LICENSE.CORPUS2, LICENSE.POLIQARP, COPYING.LESSER and COPYING files for more details.
+*/
+
+#ifndef LIBCORPUS2_IO_LINEWRITER_H
+#define LIBCORPUS2_IO_LINEWRITER_H
+
+#include <libcorpus2/io/writer.h>
+
+namespace Corpus2 {
+
+/**
+ * A writer that produces simple text (UTF-8) line drawing representation
+ * of syntactic annotation in channels. */
+class LineWriter : public TokenWriter
+{
+public:
+	LineWriter(std::ostream& os, const Tagset& tagset,
+			const string_range_vector& params);
+
+	void write_token(const Token& t);
+
+	void write_sentence(const Sentence& t);
+
+	void write_chunk(const Chunk& c);
+
+	static bool registered;
+};
+
+} /* end ns Corpus2 */
+
+#endif // LIBCORPUS2_IO_STATWRITER_H
-- 
GitLab