diff --git a/CMakeLists.txt b/CMakeLists.txt
index 302b8f652c8cf5c540b09bae4527732866c57439..a9f18ce1eee33ef2c21ccfa1f071f827c79dc4d9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -69,15 +69,17 @@ FIND_PATH(LIBCORPUS2_SRC_DATA_DIR
 )
 MARK_AS_ADVANCED(LIBCORPUS2_SRC_DATA_DIR)
 
-add_subdirectory(libpwrutils)
-add_subdirectory(libcorpus2)
-add_subdirectory(libcorpus2_whole)
 if(CORPUS2_BUILD_POLIQARP)
 	add_subdirectory(poliqarp-library)
 	add_subdirectory(poliqarp)
+	add_definitions( -DWITH_POLIQARP )
 else()
 	message(STATUS "Not building Poliqarp library and wrapper")
 endif(CORPUS2_BUILD_POLIQARP)
+
+add_subdirectory(libpwrutils)
+add_subdirectory(libcorpus2)
+add_subdirectory(libcorpus2_whole)
 add_subdirectory(corpus2tools)
 add_subdirectory(tests)
 
diff --git a/corpus2data/nkjp.tagset b/corpus2data/nkjp.tagset
index c3a2ccc9cfae4691cb8f08b106910fe7efa4aed5..b1e0be196d37fca157d7345ec409d5fd1ebc2990 100644
--- a/corpus2data/nkjp.tagset
+++ b/corpus2data/nkjp.tagset
@@ -35,8 +35,8 @@ depr	nmb cas gnd
 ger	nmb cas gnd asp ngt
 ppron12	nmb cas gnd per [acn]
 ppron3	nmb cas gnd per [acn] [ppr]
-num	nmb cas gnd acm
-numcol	nmb cas gnd acm
+num	nmb cas gnd [acm]
+numcol	nmb cas gnd [acm]
 adj	nmb cas gnd deg
 pact	nmb cas gnd asp ngt
 ppas	nmb cas gnd asp ngt
diff --git a/corpus2data/sgjp.tagset b/corpus2data/sgjp.tagset
index 95a6d09c718970ef8225eef46b59a4e3230d7340..fec42f8d5ef6581825b98381f750d10dc8e418c3 100644
--- a/corpus2data/sgjp.tagset
+++ b/corpus2data/sgjp.tagset
@@ -35,8 +35,8 @@ depr	nmb cas gnd
 ger	nmb cas gnd asp ngt
 ppron12	nmb cas gnd per [acn]
 ppron3	nmb cas gnd per [acn] [ppr]
-num	nmb cas gnd acm
-numcol	nmb cas gnd acm
+num	nmb cas gnd [acm]
+numcol	nmb cas gnd [acm]
 adj	nmb cas gnd deg
 pact	nmb cas gnd asp ngt
 ppas	nmb cas gnd asp ngt
diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt
index d5ccdc18c16c25e2b67dfb00daa582e70c72f0be..ad7900137f2e7ea296900b654c6ab5bdf12300e0 100644
--- a/libcorpus2/CMakeLists.txt
+++ b/libcorpus2/CMakeLists.txt
@@ -53,6 +53,7 @@ SET(libcorpus2_STAT_SRC
 	tokenmetadata.cpp
 	io/cclreader.cpp
 	io/cclwriter.cpp
+	io/conllwriter.cpp
 	io/helpers.cpp
 	io/fastxces.cpp
 	io/iob-chan.cpp
diff --git a/libcorpus2/io/cclreader.cpp b/libcorpus2/io/cclreader.cpp
index 34094a83f57add41f87dd3b078ca222a830fcd80..97b066f0ea2467069d3805c6fb9712de6d343de8 100644
--- a/libcorpus2/io/cclreader.cpp
+++ b/libcorpus2/io/cclreader.cpp
@@ -309,6 +309,8 @@ void CclReader::set_option(const std::string& option)
 		impl_->set_autogen_sent_id(true);
 	} else if (option == "autogen_chunk_id") {
 		impl_->set_autogen_chunk_id(true);
+	} else if (option == "no_warn_unexpected_xml") {
+		impl_->set_warn_on_unexpected(false);
 	}
 	else {
 		BufferedChunkReader::set_option(option);
@@ -320,11 +322,13 @@ std::string CclReader::get_option(const std::string& option) const
 	if (option == "disamb_only") {
 		return impl_->get_disamb_only() ? option : "";
 	} else if (option == "no_warn_inconsistent") {
-		return impl_->get_warn_on_inconsistent() ? option : "";
+		return impl_->get_warn_on_inconsistent() ? "" : option;
 	} else if (option == "autogen_sent_id") {
-		return impl_->get_autogen_sent_id() ? "autogen_sent_id" : "";
+		return impl_->get_autogen_sent_id() ? option : "";
 	} else if (option == "autogen_chunk_id") {
-		return impl_->get_autogen_chunk_id() ? "autogen_chunk_id" : "";
+		return impl_->get_autogen_chunk_id() ? option : "";
+	} else if (option == "no_warn_unexpected_xml") {
+		return impl_->get_warn_on_unexpected() ? "" : option;
 	}
 	return BufferedChunkReader::get_option(option);
 }
diff --git a/libcorpus2/io/conllwriter.cpp b/libcorpus2/io/conllwriter.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8910e617c6dd89f792bb02f66b3c3fce6a43e528
--- /dev/null
+++ b/libcorpus2/io/conllwriter.cpp
@@ -0,0 +1,85 @@
+#include "conllwriter.h"
+#include <libpwrutils/foreach.h>
+#include <boost/algorithm/string.hpp>
+
+
+namespace Corpus2 {
+
+bool ConllWriter::registered = TokenWriter::register_writer<ConllWriter>("conll");
+
+ConllWriter::ConllWriter(std::ostream& os, const Tagset& tagset,
+		const string_range_vector& params)
+	: TokenWriter(os, tagset, params)
+{
+	myTagset=tagset;
+}
+
+ConllWriter::~ConllWriter()
+{
+	finish();
+}
+
+void ConllWriter::write_token(const Token &t)
+{
+	os()<<t.orth_utf8()<<"\t";
+	Lexeme lex = t.get_preferred_lexeme(myTagset);
+	os()<<lex.lemma_utf8()+"\t";
+	std::string tag = myTagset.tag_to_string(lex.tag());
+	std::vector<std::string> strs;
+	boost::split(strs, tag, boost::is_any_of(":"));
+	os()<<strs[0]<<"\t"<<strs[0]<<"\t";
+	if(strs.size()>1)
+	{
+		size_t i;
+		for(i=1;i<strs.size()-1;i++)
+		{
+			os()<<strs[i]<<"|";
+		}
+		os()<<strs[i]<<"\t_\t_\t_\t_";
+	}
+	else
+		os()<<"_\t_\t_\t_\t_";
+}
+
+void ConllWriter::write_sentence(const Sentence& s)
+{
+	int i=1;
+	foreach (const Token* t, s.tokens()) {
+		os()<<i<<"\t";
+		write_token(*t);
+		os()<<"\n";
+		i++;
+	}
+	os()<<"\n";
+}
+
+void ConllWriter::write_chunk(const Chunk &c)
+{
+	foreach (const Sentence::ConstPtr& s, c.sentences()) {
+		write_sentence(*s);
+	}
+}
+
+void ConllWriter::do_header()
+{
+
+}
+
+void ConllWriter::do_footer()
+{
+}
+
+std::string ConllWriter::convert_tag(std::string tag)
+{
+	if(tag.compare("adja")==0||tag.compare("adjc")==0||tag.compare("adjp")==0||tag.compare("padj")==0||tag.compare("pact")==0||tag.compare("ppas")==0)
+		return "adj";
+	if(tag.compare("padv")==0||tag.compare("pant")==0||tag.compare("pcon")==0)
+		return "adv";
+	if(tag.compare("bedzie")==0||tag.compare("fin")==0||tag.compare("imps")==0||tag.compare("impt")==0||tag.compare("inf")==0||tag.compare("praet")==0||tag.compare("pred")==0||tag.compare("winien")==0)
+		return "verb";
+	if(tag.compare("psubst")==0||tag.compare("depr")==0||tag.compare("ger")==0||tag.compare("ppron3")==0||tag.compare("ppron12")==0||tag.compare("siebie")==0)
+		return "subst";
+	return tag;
+}
+
+} /* end ns Corpus2 */
diff --git a/libcorpus2/io/conllwriter.h b/libcorpus2/io/conllwriter.h
new file mode 100644
index 0000000000000000000000000000000000000000..a2132b4ca6223b1805cd4d2c0e4cedcda285c936
--- /dev/null
+++ b/libcorpus2/io/conllwriter.h
@@ -0,0 +1,36 @@
+#ifndef CONLLWRITER_H
+#define CONLLWRITER_H
+
+#include <libcorpus2/io/writer.h>
+
+namespace Corpus2 {
+
+class ConllWriter : public TokenWriter
+{
+public:
+	ConllWriter(std::ostream& os, const Tagset& tagset,
+				const string_range_vector& params);
+
+	~ConllWriter();
+
+	void write_token(const Token &t);
+
+	void write_sentence(const Sentence &s);
+
+	void write_chunk(const Chunk &c);
+
+	static bool registered;
+
+protected:
+	void do_header();
+
+	void do_footer();
+private:
+	Tagset myTagset;
+	std::string convert_tag(std::string tag);
+
+};
+
+} /* end ns Corpus2 */
+
+#endif // CONLLWRITER_H
diff --git a/libcorpus2/io/rft.cpp b/libcorpus2/io/rft.cpp
index 8f28008dafccacd006d0656fa87a81844df67257..d7366c370986b6baea9a01eedb292c6925b85fc2 100644
--- a/libcorpus2/io/rft.cpp
+++ b/libcorpus2/io/rft.cpp
@@ -19,6 +19,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
 #include <libpwrutils/foreach.h>
 
 #include <boost/algorithm/string.hpp>
+#include <boost/algorithm/string/predicate.hpp>
 #include <boost/make_shared.hpp>
 #include <fstream>
 
@@ -143,7 +144,7 @@ Sentence::Ptr RftReader::actual_next_sentence()
 	while (is().good()) {
 		std::getline(is(), line);
 		if (line.empty()
-			|| (mbt_dialect_ && line.find_first_of("<utt>") == 0)) { // TODO: check
+			|| (mbt_dialect_ && boost::starts_with(line, "<utt>"))) {
 			return s;
 		} else {
 			size_t tab = line.find('\t');
diff --git a/libcorpus2_whole/CMakeLists.txt b/libcorpus2_whole/CMakeLists.txt
index e4f6ca987c7e19c00b239ed53ac4f8f4f859f433..8312adfcf37a6c84b92fb401ed487c45179f894f 100644
--- a/libcorpus2_whole/CMakeLists.txt
+++ b/libcorpus2_whole/CMakeLists.txt
@@ -13,11 +13,16 @@ SET(libcorpus2_whole_SRC
 	io/relreader.cpp
 	io/documentreader.cpp
 	io/documentcorpusreader.cpp
-	io/poliqarpdocumentreader.cpp
-	io/poliqarpcorpusreader.cpp
 	io/corpusreader.cpp
 )
 
+if(CORPUS2_BUILD_POLIQARP)
+	SET(libcorpus2_whole_SRC ${libcorpus2_whole_SRC}
+		io/poliqarpdocumentreader.cpp
+		io/poliqarpcorpusreader.cpp
+	)
+endif(CORPUS2_BUILD_POLIQARP)
+
 file(GLOB_RECURSE INCS "*.h")
 
 if(WIN32)
diff --git a/libcorpus2_whole/io/corpusreader.cpp b/libcorpus2_whole/io/corpusreader.cpp
index 56538ad5115a7ce386243bb1214378502f634a99..bbe0154920f040469480b842bc99c09e891b266a 100644
--- a/libcorpus2_whole/io/corpusreader.cpp
+++ b/libcorpus2_whole/io/corpusreader.cpp
@@ -1,7 +1,11 @@
 #include <libcorpus2_whole/io/corpusreader.h>
-#include <libcorpus2_whole/io/poliqarpcorpusreader.h>
 #include <libcorpus2_whole/io/documentcorpusreader.h>
 
+#ifdef WITH_POLIQARP
+#include <libcorpus2_whole/io/poliqarpcorpusreader.h>
+#endif
+
+
 namespace Corpus2 {
 namespace whole{
 
@@ -24,14 +28,16 @@ boost::shared_ptr<Corpus> CorpusReader::read(const std::string& corpus_file_path
 //
 boost::shared_ptr<CorpusReaderI> CorpusReader::get_corpus_reader_by_type()
 {
-	if (corpus_type_ == "poliqarp") {
-		return boost::shared_ptr<PoliqarpCorpusReader>(
-				new PoliqarpCorpusReader(tagset_));
-	} else if (corpus_type_ == "document") {
+	if (corpus_type_ == "document") {
 		return boost::shared_ptr<DocumentCorpusReader>(
 				new DocumentCorpusReader(tagset_));
+#ifdef WITH_POLIQARP
+	} else if (corpus_type_ == "poliqarp") {
+		return boost::shared_ptr<PoliqarpCorpusReader>(
+				new PoliqarpCorpusReader(tagset_));
+#endif
 	}
-	throw Corpus2Error(corpus_type_ + " is unknown reader type!");
+	throw Corpus2Error(corpus_type_ + " is an unknown reader type!");
 }
 
 } // whole ns
diff --git a/libcorpus2_whole/io/documentcorpusreader.cpp b/libcorpus2_whole/io/documentcorpusreader.cpp
index 12d035904f1e8113c28b1fff413df38bd6264872..a85e97cc4a79fe7b3a01679d55fbd416b0a5a995 100644
--- a/libcorpus2_whole/io/documentcorpusreader.cpp
+++ b/libcorpus2_whole/io/documentcorpusreader.cpp
@@ -35,8 +35,7 @@ boost::shared_ptr<Corpus> DocumentCorpusReader::read(const std::string& corpus_f
 			continue;
 		}
 		else if (splitted_line.size() == 1) {
-			// maybe exception?
-			continue;
+			throw Corpus2Error("DocumentReader requires both paths to relations and annotations");
 		}
 
 		ann_path = splitted_line[0];
diff --git a/libcorpus2_whole/io/documentreader.cpp b/libcorpus2_whole/io/documentreader.cpp
index 8aa1a79a421c4e08add2b75b48040fed6ad974af..c30a4fbadf8960189c9585e0284cf69546131f96 100644
--- a/libcorpus2_whole/io/documentreader.cpp
+++ b/libcorpus2_whole/io/documentreader.cpp
@@ -31,6 +31,10 @@ namespace whole {
 		const std::string &annot_path, const std::string &rela_path)
 	{
 		ccl_reader_ = boost::make_shared<CclReader>(tagset, annot_path);
+		// prevent the underlying CCL reader from complaining about
+		// relation XML tags unknown to the reader itself
+		// (in case annot_path and rela_path poin to the same file)
+		ccl_reader_->set_option("no_warn_unexpected_xml");
 		rel_reader_ = boost::make_shared<RelationReader>(rela_path);
 	}
 
diff --git a/poliqarp-library/CMakeLists.txt b/poliqarp-library/CMakeLists.txt
index a5561d8f82a0ab9be92d3c8fa754d547ae43b4c3..c232c81770128e80d978867ef20c6b2b6c82dbd5 100644
--- a/poliqarp-library/CMakeLists.txt
+++ b/poliqarp-library/CMakeLists.txt
@@ -142,7 +142,7 @@ set(poliqarpd_SRC
 )
 
 add_library(poliqarpc2 SHARED ${foostring_SRC} ${progress_SRC} ${unibits_SRC} ${common_SRC} ${sakura_SRC} ${BF_SOURCES} ${PoliqarpLibrary_BINARY_DIR}/sakura/parser.h)
-add_dependencies(poliqarpc2 ${PoliqarpLibrary_BINARY_DIR}/sakura/parser.ha)
+#add_dependencies(poliqarpc2 ${PoliqarpLibrary_BINARY_DIR}/sakura/parser.h)
 set_target_properties(poliqarpc2 PROPERTIES
 	VERSION "${pqlib_ver_major}.${pqlib_ver_minor}"
 	SOVERSION ${pqlib_ver_major})
diff --git a/swig/CMakeLists.txt b/swig/CMakeLists.txt
index 5e257c1d5641f62b111990d81b4524aba6f8cd1e..dd7bb2cdbcc4a500c7e0c717567973cccfc90b57 100644
--- a/swig/CMakeLists.txt
+++ b/swig/CMakeLists.txt
@@ -2,7 +2,11 @@
 
 PROJECT(corpus2SwigWrap)
 
-set(LIBS "corpus2" "corpus2_whole" "corpus2_poliqarpreader" "pwrutils")
+set(LIBS "corpus2" "corpus2_whole" "pwrutils")
+
+if(CORPUS2_BUILD_POLIQARP)
+	set(LIBS ${LIBS} "corpus2_poliqarpreader" )
+endif(CORPUS2_BUILD_POLIQARP)
 
 include_directories (${corpus2_SOURCE_DIR})
 include_directories (${pwrutils_SOURCE_DIR})
diff --git a/utils/corpspace.py b/utils/corpspace.py
new file mode 100755
index 0000000000000000000000000000000000000000..ac3b429a7159152aff331686f52141a012064342
--- /dev/null
+++ b/utils/corpspace.py
@@ -0,0 +1,61 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+from optparse import OptionParser
+import sys
+import corpus2
+from StringIO import StringIO
+from collections import defaultdict as dd
+descr = """%prog [options] TAGOUT MORPHO OUT
+
+Util to synchronise no-space markers between tagger output (TAGOUT) that
+contains the wanted disamb lexemes but may be devoid of no-space markers
+with the tagger input containing proper no-space markers but no disambs.
+"""
+
+
+
+def go():
+	parser = OptionParser(usage=descr)
+	parser.add_option('-i', '--input-format', type='string', action='store',
+		dest='input_format', default='xces',
+		help='set the input format; default: xces')
+	parser.add_option('-o', '--output-format', type='string', action='store',
+		dest='output_format', default='xces',
+		help='set the output format; default: xces')
+	parser.add_option('-t', '--tagset', type='string', action='store',
+		dest='tagset', default='nkjp',
+		help='set the tagset used in input; default: nkjp')
+	parser.add_option('-q', '--quiet', action='store_false', default=True, dest='verbose')
+	parser.add_option('-d', '--debug', action='store_true', dest='debug_mode')
+	(options, args) = parser.parse_args()
+	
+	if len(args) != 3:
+		print 'You need to provide a TAGOUT, MORPHO and OUTPUT files.'
+		print 'See --help for details.'
+		print
+		sys.exit(1)
+	
+	tag_fn, mor_fn, out_fn = args
+	tagset = corpus2.get_named_tagset(options.tagset)
+	
+	tag_rdr = corpus2.TokenReader.create_path_reader(options.input_format, tagset, tag_fn)
+	mor_rdr = corpus2.TokenReader.create_path_reader(options.input_format, tagset, mor_fn)
+	
+	writer = corpus2.TokenWriter.create_path_writer(options.output_format, out_fn, tagset)
+	
+	while True:
+		mor_sent = mor_rdr.get_next_sentence()
+		tag_sent = tag_rdr.get_next_sentence()
+		assert (not mor_sent) == (not tag_sent)
+		if not mor_sent:
+			break
+		for mor_tok, tag_tok in zip(mor_sent.tokens(), tag_sent.tokens()):
+			assert unicode(mor_tok.orth()) == unicode(tag_tok.orth()), unicode(tag_tok.orth())
+			tag_tok.set_wa(mor_tok.wa())
+		writer.write_sentence(tag_sent)
+		
+	writer.finish()
+	
+if __name__ == '__main__':
+	go()
diff --git a/utils/corptext.py b/utils/corptext.py
new file mode 100755
index 0000000000000000000000000000000000000000..36a67d72659835f9a1f354748e312be608d0de61
--- /dev/null
+++ b/utils/corptext.py
@@ -0,0 +1,57 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+descr = """%prog [options] INPUT OUTPUT
+
+Reads input and saves as plain text. By default, paragraphs are separated with
+two newlines, sentence division is not marked."""
+
+from optparse import OptionParser
+import sys, codecs
+import corpus2
+
+def go():
+	parser = OptionParser(usage=descr)
+	parser.add_option('-i', '--input-format', type='string', action='store',
+		dest='input_format', default='xces',
+		help='set the input format; default: xces')
+	parser.add_option('-t', '--tagset', type='string', action='store',
+		dest='tagset', default='nkjp',
+		help='set the tagset used in input; default: nkjp')
+	parser.add_option('-s', '--sent-sep', type='string', action='store',
+		dest='sent_sep', default='',
+		help='set the sentence separator; default: (empty)')
+	parser.add_option('-p', '--par-sep', type='string', action='store',
+		dest='par_sep', default='\n\n',
+		help='set the sentence separator; default: (two newlines)')
+	(options, args) = parser.parse_args()
+	if len(args) != 2:
+		print 'Need to provide input and output.'
+		print 'See --help for details.'
+		print
+		sys.exit(1)
+	
+	fn_input, fn_output = args
+	
+	with codecs.open(fn_output, 'wb', 'utf-8') as out:
+		tagset = corpus2.get_named_tagset(options.tagset)
+		rdr = corpus2.TokenReader.create_path_reader(options.input_format, tagset, fn_input)
+		first = True
+		while True:
+			par = rdr.get_next_chunk()
+			if options.par_sep:
+				first = True # if non-empty par separator, skip pre-spaces
+			if not par:
+				break
+			for sent in par.sentences():
+				if options.sent_sep:
+					first = True # if non-empty sent sep, skip pre-spaces
+				for tok in sent.tokens():
+					if not first and tok.after_space():
+						out.write(' ')
+					out.write(unicode(tok.orth()))
+					first = False
+				out.write(options.sent_sep)
+			out.write(options.par_sep)
+
+if __name__ == '__main__':
+	go()
diff --git a/utils/parfolds.py b/utils/parfolds.py
new file mode 100755
index 0000000000000000000000000000000000000000..d10ed210957322185d0d487ec5ce961e082d3123
--- /dev/null
+++ b/utils/parfolds.py
@@ -0,0 +1,63 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+descr = """%prog [options] INPUT OUTDIR
+
+Generates paragraph-wise folds."""
+
+from optparse import OptionParser
+import sys, codecs, os
+import corpus2
+
+def go():
+	parser = OptionParser(usage=descr)
+	parser.add_option('-i', '--input-format', type='string', action='store',
+		dest='input_format', default='xces',
+		help='set the input format; default: xces')
+	parser.add_option('-o', '--output-format', type='string', action='store',
+		dest='output_format', default='xces',
+		help='set the output format; default: xces')
+	parser.add_option('-t', '--tagset', type='string', action='store',
+		dest='tagset', default='nkjp',
+		help='set the tagset used in input; default: nkjp')
+	parser.add_option('-f', '--num-folds', type='int', action='store',
+		dest='num_folds', default='10',
+		help='set the number of folds (default: 10)')
+	
+	(options, args) = parser.parse_args()
+	if len(args) != 2:
+		print 'Need to provide input and output.'
+		print 'See --help for details.'
+		print
+		sys.exit(1)
+	
+	fold_nums = range(options.num_folds)
+	fn_input, fold_dir = args
+	
+	tagset = corpus2.get_named_tagset(options.tagset)
+	rdr = corpus2.TokenReader.create_path_reader(options.input_format, tagset, fn_input)
+	fold_test = [corpus2.TokenWriter.create_path_writer(
+			options.output_format,
+			os.path.join(fold_dir, 'test%02d.xml' % (num + 1)), tagset)
+			for num in fold_nums]
+	fold_train = [corpus2.TokenWriter.create_path_writer(
+			options.output_format,
+			os.path.join(fold_dir, 'train%02d.xml' % (num + 1)), tagset)
+			for num in fold_nums]
+	first = True
+	fold_now = 0
+	while True:
+		par = rdr.get_next_chunk()
+		if not par:
+			break
+		fold_test[fold_now].write_chunk(par)
+		for other_num in fold_nums:
+			if other_num != fold_now:
+				fold_train[other_num].write_chunk(par)
+		
+		fold_now = (fold_now + 1) % options.num_folds
+		
+	for w in fold_test: w.finish()
+	for w in fold_train: w.finish()
+
+if __name__ == '__main__':
+	go()