diff --git a/libcorpus2/io/xcesreader.cpp b/libcorpus2/io/xcesreader.cpp
index d0e18afa133b06ca82352cfcba848fd64da6c620..a0861e2a1bc234f08220e065213425b71167b59e 100644
--- a/libcorpus2/io/xcesreader.cpp
+++ b/libcorpus2/io/xcesreader.cpp
@@ -18,6 +18,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
 #include <libcorpus2/io/sax.h>
 #include <libpwrutils/foreach.h>
 #include <libxml++/libxml++.h>
+#include <libxml2/libxml/parser.h>
 #include <boost/make_shared.hpp>
 #include <fstream>
 
@@ -37,6 +38,8 @@ protected:
 			const AttributeList& attributes);
 	void on_end_element(const Glib::ustring & name);
 
+	void finish_sentence();
+
 	const Tagset& tagset_;
 
 	enum state_t { XS_NONE, XS_CHUNK, XS_SENTENCE, XS_TOK, XS_ORTH, XS_LEX,
@@ -45,6 +48,8 @@ protected:
 
 	bool chunkless_;
 
+	bool out_of_chunk_;
+
 	PwrNlp::Whitespace::Enum wa_;
 
 	Glib::ustring sbuf_;
@@ -105,7 +110,7 @@ XcesReaderImpl::XcesReaderImpl(const Tagset& tagset,
 		std::deque< boost::shared_ptr<Chunk> >& obuf,
 		bool disamb_only, bool disamb_sh)
 	: BasicSaxParser()
-	, tagset_(tagset), state_(XS_NONE), chunkless_(false)
+	, tagset_(tagset), state_(XS_NONE), chunkless_(false), out_of_chunk_(false)
 	, wa_(PwrNlp::Whitespace::Newline)
 	, sbuf_(), tok_(NULL), sent_(), chunk_(), obuf_(obuf)
 	, disamb_only_(disamb_only), disamb_sh_(disamb_sh)
@@ -127,6 +132,10 @@ void XcesReaderImpl::on_start_element(const Glib::ustring &name,
 				type = a.value;
 			}
 		}
+		if (out_of_chunk_) {
+			finish_sentence();
+			out_of_chunk_ = false;
+		}
 		if (state_ == XS_NONE) {
 			if (type == "s") {
 				//throw XcesError("Top level <chunk> is type=\"s\"");
@@ -191,6 +200,31 @@ void XcesReaderImpl::on_start_element(const Glib::ustring &name,
 		clear_buf();
 	} else if (name == "ns") {
 		wa_ = PwrNlp::Whitespace::None;
+	} else if (name == "tok" && state_ == XS_NONE) {
+		std::cerr << "Warning: out-of-chunk token, assuming sentence start on line ";
+		std::cerr << this->context_->input->line << "\n";
+		chunkless_ = true;
+		out_of_chunk_ = true;
+		chunk_ = boost::make_shared<Chunk>();
+		sent_ = boost::make_shared<Sentence>();
+		state_ = XS_TOK;
+		tok_ = new Token();
+		tok_->set_wa(wa_);
+		wa_ = PwrNlp::Whitespace::Space;
+	}
+}
+
+void XcesReaderImpl::finish_sentence()
+{
+	chunk_->append(sent_);
+	sent_.reset();
+	if (chunkless_) {
+		obuf_.push_back(chunk_);
+		chunk_.reset();
+		state_ = XS_NONE;
+		chunkless_ = false;
+	} else {
+		state_ = XS_CHUNK;
 	}
 }
 
@@ -216,16 +250,7 @@ void XcesReaderImpl::on_end_element(const Glib::ustring &name)
 		tok_ = NULL;
 		state_ = XS_SENTENCE;
 	} else if (state_ == XS_SENTENCE && name == "chunk") {
-		chunk_->append(sent_);
-		sent_.reset();
-		if (chunkless_) {
-			obuf_.push_back(chunk_);
-			chunk_.reset();
-			state_ = XS_NONE;
-			chunkless_ = false;
-		} else {
-			state_ = XS_CHUNK;
-		}
+		finish_sentence();
 	} else if (state_ == XS_CHUNK && name == "chunk") {
 		obuf_.push_back(chunk_);
 		chunk_.reset();
diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp
index 00f31f4ed289bd5d9ee371563fc73c85cf7e3e15..b732690e713d9138195fc069e13da361605377ca 100644
--- a/libcorpus2/tagset.cpp
+++ b/libcorpus2/tagset.cpp
@@ -141,6 +141,7 @@ namespace {
 			std::vector< mask_t > & current,
 			const std::vector<mask_t> & to_add, mask_t to_add_attr)
 	{
+		if (to_add.empty()) return;
 		size_t current_size = current.size();
 		for (size_t ai = 1; ai < to_add.size(); ++ai) {
 			for (size_t oi = 0; oi < current_size; ++oi) {
@@ -176,6 +177,7 @@ void Tagset::parse_tag(const string_range_vector &fields, bool allow_extra,
 			std::vector<mask_t> values;
 			mask_t amask;
 			foreach (string_range& dot, dots) {
+				if (dot.empty()) continue;
 				mask_t v = get_value_mask(boost::copy_range<std::string>(dot));
 				mask_t curr = get_attribute_mask(get_value_attribute(v));
 
@@ -281,6 +283,16 @@ Tag Tagset::make_tag(idx_t pos_idx, mask_t values, bool allow_extra) const
 	//		<< " of " << pos_required_attributes_idx_[pos_idx].size() << "\n";
 	size_t has_req = PwrNlp::count_bits_set(required_values & values);
 	if (has_req != pos_required_attributes_idx_[pos_idx].size()) {
+		foreach (idx_t a, get_pos_attributes(pos_idx)) {
+			if (pos_requires_attribute(pos_idx, a)) {
+				mask_t amask = get_attribute_mask(a);
+				if ((values & amask).none()) {
+					throw TagParseError("Required attribute missing",
+						tag_to_string(Tag(get_pos_mask(pos_idx), values)),
+						get_attribute_name(a), id_string());
+				}
+			}
+		}
 		throw TagParseError("Required attribute missing",
 				tag_to_string(Tag(get_pos_mask(pos_idx), values)),
 				get_pos_name(pos_idx), id_string());
diff --git a/tagset-tool/main.cpp b/tagset-tool/main.cpp
index 791d5ccd603d483697ee6135a14c5adb28b64542..c51a991152e9dfe8358a8a9b1a26f4be1d5d3f9b 100644
--- a/tagset-tool/main.cpp
+++ b/tagset-tool/main.cpp
@@ -92,11 +92,11 @@ void tagset_info(const Corpus2::Tagset& tagset)
 	std::cerr << "Corpus2::Tagset loaded: "
 		<< tagset.pos_count() << " POSes, "
 		<< tagset.attribute_count() << " attributes, "
-		<< tagset.value_count() << " values [";
+		<< tagset.value_count() << " values [ ";
 	for (Corpus2::idx_t a = 0; a < tagset.attribute_count(); ++a) {
 		std::cerr << tagset.get_attribute_values(a).size() << " ";
 	}
-	std::cerr << "\n";
+	std::cerr << "]\n";
 	std::cerr << "Size is " << tagset.size()
 		<< " (extra size is " << tagset.size_extra() << ")\n";
 	std::cerr << "POSes: ";