diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt
index 3cf132509924ddcdf060b5915fee5a6f04323cce..e6873f90dbf24a466b871192bf2aa983a9089f76 100644
--- a/libcorpus2/CMakeLists.txt
+++ b/libcorpus2/CMakeLists.txt
@@ -2,8 +2,8 @@
 PROJECT(corpus2)
 
 set(corpus2_ver_major "0")
-set(corpus2_ver_minor "0")
-set(corpus2_ver_patch "2")
+set(corpus2_ver_minor "1")
+set(corpus2_ver_patch "0")
 
 
 if(NOT LIBCORPUS2_SRC_DATA_DIR)
diff --git a/libcorpus2/lexeme.cpp b/libcorpus2/lexeme.cpp
index cd5361328ed333c2f5b30770c86cc8004325203f..7c1a8355611102516621c68354e9523994833096 100644
--- a/libcorpus2/lexeme.cpp
+++ b/libcorpus2/lexeme.cpp
@@ -19,7 +19,7 @@ Lexeme Lexeme::create(const UnicodeString& lemma, const Tag& tag)
 
 bool Lexeme::is_null() const
 {
-	return lemma().length() == 0 || !tag().has_valid_tagset();
+	return lemma().length() == 0 || tag().is_null();
 }
 
 bool Lexeme::operator<(const Lexeme& other) const
diff --git a/libcorpus2/tag.cpp b/libcorpus2/tag.cpp
index d3f6fe4506cbb46e09cb3278015273c33603bf32..945b5969de08c1d57d4144bdf3bb8f796f025620 100644
--- a/libcorpus2/tag.cpp
+++ b/libcorpus2/tag.cpp
@@ -2,74 +2,58 @@
 #include <libcorpus2/tagsetmanager.h>
 
 #include <libpwrutils/foreach.h>
+#include <libpwrutils/util.h>
 
 #include <cstring>
 #include <sstream>
 
 #include <boost/functional/hash.hpp>
 
-namespace Corpus2 {
-
-Tag::Tag()
-	: pos_id_(-1), tagset_id_(-1)
-{
-}
+#include <bitset>
 
-Tag::Tag(tagset_idx_t tagset_id, pos_idx_t pos)
-	: pos_id_(pos), tagset_id_(tagset_id)
-{
-}
+namespace Corpus2 {
 
-Tag::Tag(tagset_idx_t tagset_id, pos_idx_t pos,
-		const std::vector<value_idx_t> &values)
-	: pos_id_(pos), values_(values), tagset_id_(tagset_id)
+int Tag::pos_count() const
 {
+	return PwrNlp::count_bits_set(pos_);
 }
 
-bool Tag::has_valid_tagset() const
+int Tag::get_pos_index() const
 {
-	return tagset_id_ != static_cast<tagset_idx_t>(-1)
-		&& TagsetManagerSingleton::Instance().get_cache_entry(tagset_id());
+	if (pos_ == 0) return -1;
+	return PwrNlp::lowest_bit(pos_);
 }
 
 std::string Tag::raw_dump() const
 {
 	std::ostringstream ss;
 	ss << "[";
-	ss << static_cast<int>(tagset_id_) << "#" << static_cast<int>(pos_id_);
-	foreach (value_idx_t v, values_) {
-		ss << ":" << static_cast<int>(v) ;
-	}
+	std::bitset<sizeof(mask_t) * CHAR_BIT> binaryp(pos_);
+	std::bitset<sizeof(mask_t) * CHAR_BIT> binaryv(values_);
+	//ss << static_cast<int>(tagset_id_);
+	ss << "" << pos_;
+	ss << ":" << values_;
 	ss << "]";
 	return ss.str();
 }
 
 bool Tag::operator<(const Tag& other) const
 {
-	return tagset_id_ < other.tagset_id_
-			|| (tagset_id_ == other.tagset_id_
-				&& (pos_id_ < other.pos_id_
-				|| (pos_id_ == other.pos_id_
-					&& (values_.size() < other.values_.size()
-					|| (values_.size() == other.values_.size()
-						&& memcmp(&values_[0], &other.values_[0],
-							std::min(values_.size(),
-								other.values_.size())) < 0)))));
+	return pos_ < other.pos_ ||
+		(pos_ == other.pos_ &&
+		 values_ < other.values_);
 }
 
 bool Tag::operator ==(const Tag& other) const
 {
-	return tagset_id_ == other.tagset_id_
-			&& pos_id_ == other.pos_id_
-			&& values_ == other.values_;
+	return pos_ == other.pos_ && values_ == other.values_;
 }
 
 size_t hash_value(const Tag& tag)
 {
 	std::size_t seed = 0;
-	boost::hash_combine(seed, tag.pos_id_);
-	boost::hash_combine(seed, tag.tagset_id_);
-	boost::hash_combine(seed, tag.values_);
+	boost::hash_combine(seed, tag.get_pos());
+	boost::hash_combine(seed, tag.get_values());
 	return seed;
 }
 
diff --git a/libcorpus2/tag.h b/libcorpus2/tag.h
index 6db43d2aac0f6aaa74ff44d956ad18d4e824d229..716188d10c7bcd3bb75363936335beea1f53bffe 100644
--- a/libcorpus2/tag.h
+++ b/libcorpus2/tag.h
@@ -3,20 +3,19 @@
 
 #include <string>
 #include <vector>
-
+#include <cassert>
 #include <boost/cstdint.hpp>
 #include <boost/strong_typedef.hpp>
 #include <boost/operators.hpp>
+#include <libpwrutils/bitset.h>
 
 namespace Corpus2 {
 
 class Tagset;
 
 /// Typedefs for the string -> index mappings
-typedef boost::uint8_t idx_t;
-BOOST_STRONG_TYPEDEF(idx_t, pos_idx_t);
-BOOST_STRONG_TYPEDEF(idx_t, attribute_idx_t);
-BOOST_STRONG_TYPEDEF(idx_t, value_idx_t);
+typedef boost::int8_t idx_t;
+typedef PwrNlp::bitset<64> mask_t;
 BOOST_STRONG_TYPEDEF(boost::uint32_t, tagset_idx_t);
 
 /**
@@ -31,52 +30,100 @@ BOOST_STRONG_TYPEDEF(boost::uint32_t, tagset_idx_t);
  * This allows more sanity checking, esp. during tagset conversion.
  */
 class Tag
-	: boost::equality_comparable<Tag>, boost::less_than_comparable<Tag>
+//	: boost::equality_comparable<Tag>, boost::less_than_comparable<Tag>
 {
 public:
 	/// Empty tag constructor
-	Tag();
+	Tag()
+		: pos_(0), values_(0)
+	{
+	}
 
 	/// Tagset-and-POS (no values) constructor
-	Tag(tagset_idx_t tagset_id, pos_idx_t pos);
+	explicit Tag(mask_t pos)
+		: pos_(pos), values_(0)
+	{
+	}
 
 	/// Tagset-POS-values constructor
-	Tag(tagset_idx_t tagset_id, pos_idx_t pos,
-			const std::vector<value_idx_t>& values);
+	Tag(mask_t pos, mask_t values)
+		: pos_(pos), values_(values)
+	{
+	}
+
+	bool is_null() const {
+		return pos_ == 0 && values_ == 0;
+	}
+
+	int pos_count() const;
+
+	int get_pos_index() const;
 
 	/// POS (part-of-speech) accesor
-	pos_idx_t pos_id() const {
-		return pos_id_;
+	mask_t get_pos() const {
+		return pos_;
 	}
 
 	/// POS setter
-	void set_pos_id(pos_idx_t v) {
-		pos_id_ = v;
+	void set_pos(mask_t v) {
+		pos_ = v;
+	}
+
+	void add_pos(mask_t v) {
+		pos_ |= v;
 	}
 
 	/// values accesor
-	const std::vector<value_idx_t>& values() const {
+	mask_t get_values() const {
 		return values_;
 	}
 
+	mask_t get_values_for(mask_t mask) const {
+		return values_ & mask;
+	}
+
 	/// values accesor -- nonconst reference
-	std::vector<value_idx_t>& values() {
-		return values_;
+	void set_values(mask_t v) {
+		values_ = v;
 	}
 
-	/// debug aid, dump the tag's internal numeric representation
-	std::string raw_dump() const;
+	void add_values(mask_t v) {
+		values_ |= v;
+	}
+
+	void add_values_masked(mask_t value, mask_t mask) {
+		//values_ = (values_ & ~mask) | (value & mask);
+		//see http://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
+		values_ = values_ ^ ((values_ ^ value) & mask);
+	}
+
+	Tag& combine_with(const Tag& other) {
+		pos_ |= other.pos_;
+		values_ |= other.values_;
+		return *this;
+	}
+
+	Tag get_combined(const Tag& other) const {
+		Tag t(*this);
+		return t.combine_with(other);
+	}
 
-	/// tagset id accesor
-	tagset_idx_t tagset_id() const {
-		return tagset_id_;
+	Tag& mask_with(const Tag& other) {
+		pos_ &= other.pos_;
+		values_ &= other.values_;
+		return *this;
 	}
 
-	bool has_valid_tagset() const;
+	Tag get_masked(const Tag& other) const {
+		Tag t(*this);
+		return t.mask_with(other);
+	}
+
+	/// debug aid, dump the tag's internal numeric representation
+	std::string raw_dump() const;
 
 	/**
-	 * Tag comparison. Tags sort by tagset id, then pos id, and finally
-	 * value-by-value. Boost is used to provide other comparison operators.
+	 * Tag comparison.
 	 */
 	bool operator<(const Tag& other) const;
 
@@ -87,17 +134,15 @@ public:
 
 private:
 	/// the POS id
-	pos_idx_t pos_id_;
-\
-	/// the values
-	std::vector<value_idx_t> values_;
+	mask_t pos_;
 
-	/// the tagset id
-	tagset_idx_t tagset_id_;
+	/// the values
+	mask_t values_;
 
-	friend size_t hash_value(const Tag &tag);
 };
 
+size_t hash_value(const Tag &tag);
+
 } /* end ns Corpus2 */
 
 
diff --git a/libcorpus2/tagset.cpp b/libcorpus2/tagset.cpp
index dd193b397e6061e29f6bb75cf3427b141d58cf06..538d2c240a764256ec904bd25caa4f3a73c0fc43 100644
--- a/libcorpus2/tagset.cpp
+++ b/libcorpus2/tagset.cpp
@@ -6,11 +6,13 @@
 #include <libcorpus2/tagsetparser.h>
 
 #include <libpwrutils/foreach.h>
+#include <libpwrutils/util.h>
 
 #include <boost/algorithm/string.hpp>
 #include <boost/strong_typedef.hpp>
 #include <boost/algorithm/string.hpp>
 #include <boost/bind.hpp>
+#include <boost/pending/lowest_bit.hpp>
 
 #include <sstream>
 #include <iostream>
@@ -71,12 +73,11 @@ Tagset::Tagset()
 {
 }
 
-Tagset::Tagset(const char *s)
-	: id_(++next_id_)
+Tagset Tagset::from_data(const char *s)
 {
 	std::stringstream ss;
 	ss << s;
-	*this = TagsetParser::load_ini(ss);
+	return TagsetParser::load_ini(ss);
 }
 
 std::string Tagset::id_string() const
@@ -104,19 +105,19 @@ void Tagset::parse_tag(const string_range &s, bool allow_extra,
 
 namespace {
 	void append_to_multi_tag(
-			std::vector< std::vector<value_idx_t> > & current,
-			const std::vector<value_idx_t> & to_add)
+			std::vector< mask_t > & current,
+			const std::vector<mask_t> & to_add, mask_t to_add_attr)
 	{
-		foreach (std::vector<value_idx_t>& o, current) {
-			o.push_back(to_add[0]);
-		}
 		size_t current_size = current.size();
 		for (size_t ai = 1; ai < to_add.size(); ++ai) {
 			for (size_t oi = 0; oi < current_size; ++oi) {
 				current.push_back(current[oi]);
-				current.back().back() = to_add[ai];
+				current.back() = (current.back() & ~to_add_attr) | to_add[ai];
 			}
 		}
+		for (size_t i = 0; i < current_size; ++i) {
+			current[i] |= to_add[0];
+		}
 	}
 }
 
@@ -126,41 +127,54 @@ void Tagset::parse_tag(const string_range_vector &fields, bool allow_extra,
 	if (fields.empty()) {
 		throw TagParseError("No POS", "", "", id_string());
 	}
-	pos_idx_t pos_id = pos_dict_.get_id(fields[0]);
-	if (!pos_dict_.is_id_valid(pos_id)) {
+	idx_t pos_idx = get_pos_index(fields[0]);
+	if (pos_idx < 0) {
 		throw TagParseError("Invalid POS",
 				boost::copy_range<std::string>(fields[0]), "",
 				id_string());
 	}
-	std::vector< std::vector<value_idx_t> > opts(1);
+	std::vector< mask_t > all_variants;
+	all_variants.push_back(0);
 	for (size_t fi = 1; fi < fields.size(); ++fi) {
 		const string_range& r = fields[fi];
 		if (r.size() != 1 || *r.begin() != '_') {
 			string_range_vector dots;
 			boost::algorithm::split(dots, r, boost::is_any_of("."));
-			std::vector<value_idx_t> values;
+			std::vector<mask_t> values;
+			mask_t amask;
 			foreach (string_range& dot, dots) {
-				value_idx_t v = value_dict_.get_id(dot);
-				if (!value_dict_.is_id_valid(v)) {
+				mask_t v = get_value_mask(boost::copy_range<std::string>(dot));
+				mask_t curr = get_attribute_mask(get_value_attribute(v));
+
+
+				if (amask.none()) {
+					amask = curr;
+				} else if (amask != curr) {
+					throw TagParseError("Values from two attributes split by dot",
+							boost::copy_range<std::string>(r), "",
+							id_string());
+				}
+				if (v.none()) {
 					throw TagParseError("Unknown attribute value",
 							boost::copy_range<std::string>(r), "",
 							id_string());
 				}
 				values.push_back(v);
 			}
-			append_to_multi_tag(opts, values);
+			append_to_multi_tag(all_variants, values, amask);
 		} else if (!r.empty()) { // underscore handling
-			if (fi - 1 >= pos_attributes_[pos_id].size()) {
+			if (fi - 1 >= pos_attributes_[pos_idx].size()) {
 				throw TagParseError(
 						"Underscore beyond last attribute for this POS",
 						"", "", id_string());
 			}
-			attribute_idx_t attr = pos_attributes_[pos_id][fi - 1];
-			append_to_multi_tag(opts, attribute_values_[attr]);
+			idx_t attr = pos_attributes_[pos_idx][fi - 1];
+			mask_t amask = get_attribute_mask(attr);
+			append_to_multi_tag(all_variants, attribute_values_[attr], amask);
 		} // else empty, do nothing
 	}
-	foreach (std::vector<value_idx_t>& opt, opts) {
-		sink(make_tag(pos_id, opt, allow_extra));
+	foreach (mask_t variant, all_variants) {
+		sink(make_tag(pos_idx, variant, allow_extra));
 	}
 }
 
@@ -196,108 +210,101 @@ Tag Tagset::parse_simple_tag(const string_range_vector &ts,
 		throw TagParseError("Empty POS+attribute list", "", "",
 				id_string());
 	}
-	pos_idx_t pos_id = pos_dict_.get_id(ts[0]);
-	if (!pos_dict_.is_id_valid(pos_id)) {
+	idx_t pos_idx = get_pos_index(ts[0]);
+	if (pos_idx < 0) {
 		throw TagParseError("Invalid POS",
 				boost::copy_range<std::string>(ts[0]), "", id_string());
 	}
-	const std::vector<bool>& valid_attrs_mask =
-			get_pos_valid_attributes(pos_id);
-	Tag tag(id_, pos_id);
-	std::vector<value_idx_t> vvv(attribute_dict_.size(),
-			static_cast<value_idx_t>(0));
-	tag.values().swap(vvv);
-
+	mask_t values = 0;
 	for (size_t i = 1; i < ts.size(); ++i) {
 		if (!ts[i].empty()) {
-			value_idx_t val_id = value_dict_.get_id(ts[i]);
-			if (!value_dict_.is_id_valid(val_id)) {
-				attribute_idx_t a = attribute_dict_.get_id(ts[i]);
-				if (attribute_dict_.is_id_valid(a)) {
-					tag.values()[a] = 0;
+			mask_t val = get_value_mask(boost::copy_range<std::string>(ts[i]));
+			if (val == 0) {
+				mask_t a = get_attribute_mask(ts[i]);
+				if (a != 0) {
+					values &= (~a);
 				} else {
 					throw TagParseError("Unknown attribute value",
 							boost::copy_range<std::string>(ts[i]), "",
 							id_string());
 				}
 			} else {
-				attribute_idx_t attr_id = get_value_attribute(val_id);
-				if (valid_attrs_mask[attr_id] || allow_extra) {
-					tag.values()[attr_id] = val_id;
-				}
+				mask_t a = get_attribute_mask(get_value_attribute(val));
+				values = (values & ~a) | val;
 			}
 		}
 	}
-	return tag;
+
+	return make_tag(pos_idx, values, allow_extra);
 }
 
-Tag Tagset::make_tag(pos_idx_t pos, const std::vector<value_idx_t>& values,
-		bool allow_extra) const
+Tag Tagset::make_tag(idx_t pos_idx, mask_t values, bool allow_extra) const
 {
-	const std::vector<bool>& valid_attrs_mask =
-			get_pos_valid_attributes(pos);
-	Tag tag(id_, pos);
-	std::vector<value_idx_t> vvv(attribute_dict_.size(),
-			static_cast<value_idx_t>(0));
-	tag.values().swap(vvv);
-
-	for (size_t i = 0; i < values.size(); ++i) {
-		value_idx_t val_id = values[i];
-		attribute_idx_t attr_id = get_value_attribute(val_id);
-		if (valid_attrs_mask[attr_id] || allow_extra) {
-			tag.values()[attr_id] = val_id;
-		} else {
-			throw TagParseError("Attribute not valid for this POS",
-					attribute_dict_.get_string(attr_id),
-					pos_dict_.get_string(pos), id_string());
-		}
+	mask_t required_values = get_pos_required_mask(pos_idx);
+	//std::cerr << values << "\n";
+	//std::cerr << required_values << "\n";
+	//std::cerr << (required_values & values) << "\n";
+	//std::cerr << PwrNlp::count_bits_set(required_values & values)
+	//		<< " of " << pos_required_attributes_idx_[pos_idx].size() << "\n";
+	size_t has_req = PwrNlp::count_bits_set(required_values & values);
+	if (has_req != pos_required_attributes_idx_[pos_idx].size()) {
+		throw TagParseError("Required attribute missing",
+				tag_to_string(Tag(get_pos_mask(pos_idx), values)),
+				get_pos_name(pos_idx), id_string());
+	}
+	mask_t valid_values = get_pos_value_mask(pos_idx);
+	mask_t invalid = values & ~valid_values;
+	if (invalid.any() && !allow_extra) {
+		mask_t first_invalid = PwrNlp::lowest_bit(invalid);
+		throw TagParseError("Attribute not valid for this POS",
+				get_value_name(first_invalid),
+				get_pos_name(pos_idx), id_string());
 	}
-	return tag;
+	// check singularity?
+	return Tag(get_pos_mask(pos_idx), values);
 }
 
 Tag Tagset::make_ign_tag() const
 {
-	pos_idx_t ign_pos = pos_dictionary().get_id("ign");
-	assert(pos_dictionary().is_id_valid(ign_pos));
-	Tag tag(id_, ign_pos);
-	tag.values().resize(attribute_dict_.size(),
-			static_cast<value_idx_t>(0));
-	return tag;
+	mask_t ign_pos_mask = get_pos_mask("ign");
+	assert(ign_pos_mask.any());
+	return Tag(ign_pos_mask);
 }
 
 bool Tagset::validate_tag(const Tag &t, bool allow_extra,
 		std::ostream* os) const
 {
-	if (!pos_dict_.is_id_valid(t.pos_id())) {
+	if (t.pos_count() != 1) {
 		if (os) {
-			(*os) << " POS not valid : " << (int) t.pos_id();
+			(*os) << " POS not singular :  " << t.pos_count();
 		}
 		return false;
 	}
-	std::vector<bool> valid = get_pos_valid_attributes(t.pos_id());
-	std::vector<bool> required = get_pos_required_attributes(t.pos_id());
-	if (t.values().size() < attribute_dict_.size()) {
+	size_t ts = tag_size(t);
+	if (ts != 1) {
 		if (os) {
-			(*os) << " Values size below tagset attribute count: "
-				<< t.values().size() << "<" << attribute_dict_.size();
+			(*os) << " Tag not singular :  " << ts;
 		}
 		return false;
 	}
-	if (!allow_extra && t.values().size() > attribute_dict_.size()) {
+
+	idx_t pos_idx = t.get_pos_index();
+	if (!pos_dict_.is_id_valid(pos_idx)) {
 		if (os) {
-			(*os) << " Values size above tagset attribute count"
-				<< t.values().size() << ">" << attribute_dict_.size();
+			(*os) << " POS not valid : " << (int)pos_idx;
 		}
 		return false;
 	}
-	for (attribute_idx_t i = static_cast<attribute_idx_t>(0);
-			i < t.values().size(); ++i) {
-		value_idx_t v = t.values()[i];
-		if (v == 0) {
+	std::vector<bool> valid = get_pos_attributes_flag(pos_idx);
+	std::vector<bool> required = get_pos_required_attributes(pos_idx);
+
+	for (idx_t i = 0; i < attribute_count(); ++i) {
+		mask_t value = t.get_values_for(get_attribute_mask(i));
+		if (value == 0) {
 			if (required[i]) {
 				if (os) {
-					(*os)  << " Required attribuite "
-						<< attribute_dictionary().get_string(i)
+					(*os)  << " red attribuite "
+						<< get_attribute_name(i)
 						<< " missing";
 				}
 				return false;
@@ -306,27 +313,9 @@ bool Tagset::validate_tag(const Tag &t, bool allow_extra,
 			if (!valid[i] && !allow_extra) {
 				if (os) {
 					(*os) << " Extra attribute value: "
-						<< value_dictionary().get_string(v)
+						<< get_value_name(value)
 						<< " (attribute "
-						<< attribute_dictionary().get_string(i) << ")";
-				}
-				return false;
-			}
-			if (!value_dict_.is_id_valid(v)) {
-				if (os) {
-					(*os) << " Invalid value at attribite "
-						<< attribute_dictionary().get_string(i);
-				}
-				return false;
-			}
-			attribute_idx_t a = value_attribute_[v];
-			if (a != i) {
-				if (os) {
-					(*os) << " Value does not match attribute, got "
-						<< value_dictionary().get_string(v) << " ("
-						<< attribute_dictionary().get_string(a) << ") in"
-						<< attribute_dictionary().get_string(i)
-						<< "'s position";
+						<< get_attribute_name(i) << ")";
 				}
 				return false;
 			}
@@ -338,23 +327,25 @@ bool Tagset::validate_tag(const Tag &t, bool allow_extra,
 std::string Tagset::tag_to_string(const Tag &tag) const
 {
 	std::ostringstream ss;
-	ss << pos_dict_.get_string(tag.pos_id());
-	const std::vector<attribute_idx_t>& attrs =
-			get_pos_attributes(tag.pos_id());
-	foreach (const attribute_idx_t& a, attrs) {
-		if (pos_required_attributes_[tag.pos_id()][a] ||
-				tag.values()[a] > 0) {
+	idx_t pos_idx = tag.get_pos_index();
+	ss << get_pos_name(pos_idx);
+	const std::vector<idx_t>& attrs = get_pos_attributes(pos_idx);
+	foreach (const idx_t& a, attrs) {
+		mask_t value = tag.get_values_for(get_attribute_mask(a));
+		if (pos_requires_attribute(pos_idx, a) || value.any()) {
 			ss << ":";
-			if (tag.values()[a] > 0) {
-				ss << value_dict_.get_string(tag.values()[a]);
+			if (value.any()) {
+				ss << get_value_name(value);
 			}
 		}
 	}
 	// print extra attributes
-	for (size_t i = 0; i < attribute_dict_.size(); ++i) {
-		if (tag.values()[i] > 0 &&
-				!pos_valid_attributes_[tag.pos_id()][i]) {
-			ss << ":" << value_dict_.get_string(tag.values()[i]);
+	for (idx_t a = 0; a < attribute_count(); ++a) {
+		if (!pos_has_attribute(pos_idx, a)) {
+			mask_t value = tag.get_values_for(get_attribute_mask(a));
+			if (value.any()) {
+				ss << ":" << get_value_name(value);
+			}
 		}
 	}
 	return ss.str();
@@ -363,65 +354,247 @@ std::string Tagset::tag_to_string(const Tag &tag) const
 std::string Tagset::tag_to_no_opt_string(const Tag &tag) const
 {
 	std::ostringstream ss;
-	ss << pos_dict_.get_string(tag.pos_id());
-	const std::vector<attribute_idx_t>& attrs =
-			get_pos_attributes(tag.pos_id());
-	foreach (const attribute_idx_t& a, attrs) {
+	idx_t pos_idx = tag.get_pos_index();
+	ss << get_pos_name(pos_idx);
+	const std::vector<idx_t>& attrs = get_pos_attributes(pos_idx);
+	foreach (const idx_t& a, attrs) {
+		mask_t value = tag.get_values_for(get_attribute_mask(a));
 		ss << ":";
-		if (tag.values()[a] > 0) {
-			ss << value_dict_.get_string(tag.values()[a]);
+		if (value.any()) {
+			ss << get_value_name(value);
 		} else {
-			ss << attribute_dict_.get_string(a);
+			ss << get_attribute_name(a);
 		}
 	}
 	return ss.str();
 }
 
-attribute_idx_t Tagset::get_value_attribute(value_idx_t id) const
+size_t Tagset::tag_size(const Tag& tag) const
 {
-	if (!value_dict_.is_id_valid(id)) {
-		std::stringstream ss;
-		ss << "get_value_attribute fail " << (int)id;
-		throw Corpus2Error(ss.str());
+	size_t s = PwrNlp::count_bits_set(tag.get_pos());
+	foreach (mask_t attribute_mask, all_attribute_masks()) {
+		mask_t values = tag.get_values_for(attribute_mask);
+		size_t x = PwrNlp::count_bits_set(values);
+		if (x > 1) {
+			s *= x;
+		}
 	}
-	return value_attribute_[id];
+	return s;
 }
 
-const std::vector<value_idx_t>& Tagset::get_attribute_values(
-		attribute_idx_t a) const
+bool Tagset::tag_is_singular(const Tag& tag) const
 {
-	assert(attribute_dict_.is_id_valid(a));
-	return attribute_values_[a];
+	if (PwrNlp::count_bits_set(tag.get_pos()) != 1) return false;
+	foreach (mask_t attribute_mask, all_attribute_masks()) {
+		mask_t values = tag.get_values_for(attribute_mask);
+		if (PwrNlp::count_bits_set(values) > 1) return false;
+	}
+	return true;
 }
 
-const std::vector<attribute_idx_t>& Tagset::get_pos_attributes(
-		pos_idx_t pos) const
+std::vector<Tag> Tagset::split_tag(const Tag& tag) const
+{
+	std::vector<Tag> tags;
+	mask_t pos = tag.get_pos();
+	while (pos.any()) {
+		idx_t pos_idx = PwrNlp::lowest_bit(pos);
+		mask_t pos_mask = static_cast<mask_t>(1) << pos_idx;
+		pos ^= pos_mask;
+		tags.push_back(Tag(pos_mask));
+	}
+
+	for (idx_t a = 0; a < attribute_count(); ++a) {
+		mask_t ma = get_attribute_mask(a);
+		mask_t v = tag.get_values_for(ma);
+		if (ma.any()) {
+			bool dup = false;
+			size_t sz = tags.size();
+			foreach (mask_t vm, get_attribute_values(a)) {
+				if ((v & vm).any()) {
+					if (dup) {
+						for (size_t i = 0; i < sz; ++i) {
+							tags.push_back(tags[i]);
+						}
+					}
+					dup = true;
+					for (size_t i = 0; i < sz; ++i) {
+						tags[i].add_values(vm);
+					}
+				}
+			}
+		}
+	}
+	return tags;
+}
+
+idx_t Tagset::get_pos_index(const string_range& pos) const
+{
+	return pos_dict_.get_id(pos);
+}
+
+const std::string& Tagset::get_pos_name(idx_t pos) const
+{
+	return pos_dict_.get_string(pos);
+}
+
+const std::string& Tagset::get_pos_name(mask_t pos) const
+{
+	return pos_dict_.get_string(get_pos_index(pos));
+}
+
+mask_t Tagset::get_pos_mask(const string_range& pos) const
+{
+	return get_pos_mask(get_pos_index(pos));
+}
+
+mask_t Tagset::get_pos_mask(idx_t pos) const
+{
+	if (pos >= 0) {
+		return static_cast<mask_t>(1) << pos;
+	} else {
+		return 0;
+	}
+}
+
+idx_t Tagset::get_pos_index(mask_t pos) const
+{
+	if (pos.none()) {
+		return -1;
+	} else {
+		return PwrNlp::lowest_bit(pos);
+	}
+}
+
+idx_t Tagset::get_attribute_index(const string_range& a) const
+{
+	return attribute_dict_.get_id(a);
+}
+
+const std::string& Tagset::get_attribute_name(idx_t a) const
+{
+	return attribute_dict_.get_string(a);
+}
+
+const std::vector<mask_t>& Tagset::get_attribute_values(idx_t a) const
+{
+	static std::vector<mask_t> null_vec;
+	if (a < 0 || a >= attribute_count()) {
+		return null_vec;
+	} else {
+		return attribute_values_[a];
+	}
+}
+
+mask_t Tagset::get_attribute_mask(idx_t a) const
+{
+	if (a < 0 || a >= attribute_count()) {
+		return 0;
+	} else {
+		return attribute_masks_[a];
+	}
+}
+
+mask_t Tagset::get_attribute_mask(const string_range& a) const
+{
+	return get_attribute_mask(get_attribute_index(a));
+}
+
+mask_t Tagset::get_value_mask(const std::string& v) const
+{
+	std::map<std::string, mask_t>::const_iterator ci;
+	ci = string_to_value_mask_.find(v);
+	if (ci == string_to_value_mask_.end()) {
+		return 0;
+	} else {
+		return ci->second;
+	}
+}
+
+const std::string& Tagset::get_value_name(mask_t v) const
+{
+	static std::string nullstr;
+	std::map<mask_t, std::string>::const_iterator ci;
+	ci = value_mask_to_string_.find(v);
+	if (ci == value_mask_to_string_.end()) {
+		return nullstr;
+	} else {
+		return ci->second;
+	}
+}
+
+idx_t Tagset::get_value_attribute(mask_t v) const
+{
+	std::map<mask_t, idx_t>::const_iterator ci;
+	ci = value_mask_to_attribute_index_.find(v);
+	if (ci == value_mask_to_attribute_index_.end()) {
+		return -1;
+	} else {
+		return ci->second;
+	}
+}
+
+const std::vector<idx_t>& Tagset::get_pos_attributes(idx_t pos) const
 {
 	assert(pos_dict_.is_id_valid(pos));
 	return pos_attributes_[pos];
 }
 
-const std::vector<bool>& Tagset::get_pos_valid_attributes(
-		pos_idx_t pos) const
+const std::vector<bool>& Tagset::get_pos_attributes_flag(
+		idx_t pos) const
 {
 	assert(pos_dict_.is_id_valid(pos));
 	return pos_valid_attributes_[pos];
 }
 
 const std::vector<bool>& Tagset::get_pos_required_attributes(
-		pos_idx_t pos) const
+		idx_t pos) const
 {
 	assert(pos_dict_.is_id_valid(pos));
 	return pos_required_attributes_[pos];
 }
 
+bool Tagset::pos_requires_attribute(idx_t pos, idx_t attribute) const
+{
+	return pos_required_attributes_[pos][attribute];
+}
+
+bool Tagset::pos_has_attribute(idx_t pos, idx_t attribute) const
+{
+	return pos_valid_attributes_[pos][attribute];
+}
+
+mask_t Tagset::get_pos_value_mask(idx_t pos) const
+{
+	return pos_valid_value_masks_[pos];
+}
+
+mask_t Tagset::get_pos_required_mask(idx_t pos) const
+{
+	return pos_required_value_masks_[pos];
+}
+
+int Tagset::pos_count() const
+{
+	return pos_dict_.size();
+}
+
+int Tagset::attribute_count() const
+{
+	return attribute_dict_.size();
+}
+
+int Tagset::value_count() const
+{
+	return value_mask_to_string_.size();
+}
+
 size_t Tagset::size() const
 {
 	size_t sum = 0;
 	for (size_t p = 0; p < pos_dict_.size(); ++p) {
 		size_t pos_size = 1;
 		for (size_t i = 0; i < pos_attributes_[p].size(); ++i) {
-			attribute_idx_t a = pos_attributes_[p][i];
+			idx_t a = pos_attributes_[p][i];
 			if (pos_required_attributes_[p][a]) {
 				pos_size *= attribute_values_[a].size();
 			} else {
@@ -461,14 +634,14 @@ void Tagset::lexemes_into_token(Token& tok, const UnicodeString& lemma,
 	}
 }
 
-size_t Tagset::get_original_pos_index(pos_idx_t pos) const
+int Tagset::get_original_pos_index(idx_t pos) const
 {
-	std::map<pos_idx_t, size_t>::const_iterator i =
+	std::map<idx_t, int>::const_iterator i =
 			original_pos_indices_.find(pos);
 	if (i != original_pos_indices_.end()) {
 		return i->second;
 	} else {
-		return static_cast<size_t>(-1);
+		return -1;
 	}
 }
 
diff --git a/libcorpus2/tagset.h b/libcorpus2/tagset.h
index 1ec350e12d8559ed9b5d8e2306450b067996cd81..44cfbf25bb4effbc1906e070ef545cc57a1a7d2b 100644
--- a/libcorpus2/tagset.h
+++ b/libcorpus2/tagset.h
@@ -99,10 +99,10 @@ public:
 	Tagset();
 
 	/**
-	 * Tagset convenience constructor, parse a string as if it were the
+	 * Tagset convenience creation function, parse a string as if it were the
 	 * contents of a tagset ini file
 	 */
-	explicit Tagset(const char*);
+	static Tagset from_data(const char*);
 
 	/**
 	 * Tag parsing -- functional version, whole tag string.
@@ -210,8 +210,7 @@ public:
 	 * The values are assumed to be valid in this tagset, but are checked
 	 * for correctness with regards to the POS.
 	 */
-	Tag make_tag(pos_idx_t pos, const std::vector<value_idx_t>& values,
-			bool allow_extra) const;
+	Tag make_tag(idx_t pos, mask_t values, bool allow_extra) const;
 
 	/**
 	 * Convenience function for creating a 'ign' (ignored) tag within this
@@ -251,39 +250,144 @@ public:
 	 */
 	std::string tag_to_no_opt_string(const Tag &tag) const;
 
+	/**
+	 * Compute the number of singular tags that can be represented by the given
+	 * tag, with the following restrictions:
+	 * - the tags must be sub-tags of the given tag
+	 * - the tags must have a value for every attribute where the given tag has
+	 *   a non-zero value
+	 *
+	 * @returns 0 if the tag is null, 1 if the tag is sigular, otherwise the
+	 *          number of different singular tags conforming to the
+	 *          restrictions above that can be constructed from the given tag.
+	 */
+	size_t tag_size(const Tag& tag) const;
+
+	/**
+	 * Check if a tag actually represents only one tag.
+	 *
+	 * A tag is singular if it:
+	 *  - has exactly one POS bit set
+	 *  - has at most one bit set in each attribute.
+	 * Note that the tag might be invalid, this is not checked.
+	 */
+	bool tag_is_singular(const Tag& tag) const;
+
+	/**
+	 * Split a tag into a vector of singular tags. Validity is not checked.
+	 *
+	 * @see tag_size
+	 *
+	 * @returns a vector of tags, each of which is singular, with size equal
+	 *          to tag_size called on the tag being split. Each returned tag
+	 *          in the resulting vector is a sub-tag of the original tag, and
+	 *          all not-empty attributes of the original tag are not empty in
+	 *          the split tag as well.
+	 */
+	std::vector<Tag> split_tag(const Tag& tag) const;
+
 	/// POS name <-> index dictionary getter
-	const SymbolDictionary<pos_idx_t>& pos_dictionary() const {
+	const SymbolDictionary<idx_t>& pos_dictionary() const {
 		return pos_dict_;
 	}
 
 	/// attribute name <-> index dictionary getter
-	const SymbolDictionary<attribute_idx_t>& attribute_dictionary() const {
+	const SymbolDictionary<idx_t>& attribute_dictionary() const {
 		return attribute_dict_;
 	}
 
-	/// value name <-> index dictionary getter
-	const SymbolDictionary<value_idx_t>& value_dictionary() const {
-		return value_dict_;
-	}
+	/// POS name -> index mapping
+	/// @returns -1 on invalid name
+	idx_t get_pos_index(const string_range& pos) const;
+
+	/// POS index -> name
+	/// @returns empty string on invalid index
+	const std::string& get_pos_name(idx_t pos) const;
+
+	/// POS mask -> name
+	/// @returns empty string on invalid index
+	const std::string& get_pos_name(mask_t pos) const;
+
+	/// POS name -> mask mapping
+	/// @return null mask on invalid name
+	mask_t get_pos_mask(const string_range& pos) const;
+
+	/// POS index -> mask mapping
+	/// @return null mask on invalid index
+	mask_t get_pos_mask(idx_t pos) const;
+
+	/// POS mask -> index mapping
+	/// @return -1 on empty mask, unspecified in more tha one POS set
+	idx_t get_pos_index(mask_t pos) const;
+
+
+	/// Attribute name -> index mapping
+	/// @returns -1 on invalid name
+	idx_t get_attribute_index(const string_range& a) const;
+
+	/// Attribute index -> name
+	/// @returns empty string on invalid index
+	const std::string& get_attribute_name(idx_t pos) const;
+
+	/// Value mask -> attribute index mapping.
+	/// if the value mask contains values from more than one attribute,
+	/// behavior is not well defined
+	/// @return -1 on invalid mask
+	idx_t get_value_attribute(mask_t v) const;
 
-	/// Getter for the value -> attribute mapping
-	attribute_idx_t get_value_attribute(value_idx_t id) const;
+	/// Attribute index -> vector of valid value masks mapping
+	/// @return empty vector on invalid index
+	const std::vector<mask_t>& get_attribute_values(idx_t a) const;
+
+	/// Attribute index -> combined value mask
+	/// @return null mask on invalid index
+	mask_t get_attribute_mask(idx_t a) const;
+
+	/// Attribute name -> combined value mask
+	/// @return null mask on invalid name
+	mask_t get_attribute_mask(const string_range& a) const;
+
+
+	/// Value name -> mask
+	/// @returns null mask on invalid name
+	mask_t get_value_mask(const std::string& v) const;
+
+	/// Value mask -> name
+	/// @returns empty string on invalid mask
+	const std::string& get_value_name(mask_t v) const;
 
-	/// Getter for the attribute -> valid values mapping
-	const std::vector<value_idx_t>& get_attribute_values(
-			attribute_idx_t a) const;
 
 	/// Getter for the pos -> valid attributes (in order) mapping
-	const std::vector<attribute_idx_t>& get_pos_attributes(
-			pos_idx_t pos) const;
+	/// Valid attributes are both the required and optional attributes.
+	/// Generally the optonal ones should be after the required ones.
+	const std::vector<idx_t>& get_pos_attributes(idx_t pos) const;
 
 	/// Getter for the pos -> valid attributes flag vector
-	const std::vector<bool>& get_pos_valid_attributes(
-			pos_idx_t pos) const;
+	const std::vector<bool>& get_pos_attributes_flag(idx_t pos) const;
 
 	/// Getter for the pos -> required attributes flag vector
-	const std::vector<bool>& get_pos_required_attributes(
-			pos_idx_t pos) const;
+	const std::vector<bool>& get_pos_required_attributes(idx_t pos) const;
+
+	/// @returns true if the given pos _requires_ the given attribute
+	bool pos_requires_attribute(idx_t pos, idx_t attribute) const;
+
+	/// @returns true if the given pos _allows_ the given attribute
+	bool pos_has_attribute(idx_t pos, idx_t attribute) const;
+
+	/// Getter for a mask covering all valid values for a given pos
+	mask_t get_pos_value_mask(idx_t pos) const;
+
+	/// Getter for a mask covering all required attributes of a pos
+	mask_t get_pos_required_mask(idx_t pos) const;
+
+	/// The number of POSes in this tagset
+	int pos_count() const;
+
+	/// The number of attributes in this tagset
+	int attribute_count() const;
+
+	/// The number of values in this tagset
+	int value_count() const;
 
 	/**
 	 * Tagset cardinality counter -- the number of different valid tags
@@ -341,7 +445,52 @@ public:
 	}
 
 	/// get the original index of the POS in the tagset definition
-	size_t get_original_pos_index(pos_idx_t pos) const;
+	int get_original_pos_index(idx_t pos) const;
+
+	/// Helper iterator class for the mask ranges
+	struct mask_iterator
+	{
+		typedef mask_t value_type;
+		typedef std::forward_iterator_tag iterator_category;
+		typedef int difference_type;
+		typedef const mask_t *pointer;
+		typedef const mask_t &reference;
+		mask_iterator(const mask_iterator &i): i_(i.i_) {}
+		mask_iterator(const mask_t& i) : i_(i) {}
+
+		mask_iterator &operator++() { i_ <<= 1; return *this; }
+		mask_iterator operator++(int) { return mask_iterator(i_ << 1); }
+		mask_iterator &operator--() { i_ >>= 1; return *this; }
+		mask_iterator operator--(int) { return mask_iterator(i_ >> 1); }
+
+		const mask_t &operator*() const { return i_; }
+
+		bool operator==(const mask_iterator &i) const { return i_ == i.i_; }
+		bool operator!=(const mask_iterator &i) const { return i_ != i.i_; }
+
+	private:
+		mask_t i_;
+	};
+
+	/// Range getter for all the valid POS masks, in order, compatible with
+	/// boost's foreach
+	/// It is possible to use a foreach (mask_t m, tagset.all_*_masks()) {...}
+	boost::iterator_range<mask_iterator> all_pos_masks() const {
+		return boost::iterator_range<mask_iterator>(static_cast<mask_t>(1),
+				static_cast<mask_t>(1) << pos_count());
+	}
+
+	/// Range getter for all valid value masks
+	boost::iterator_range<mask_iterator> all_value_masks() const {
+		return boost::iterator_range<mask_iterator>(static_cast<mask_t>(1),
+				static_cast<mask_t>(1) << value_count());
+	}
+
+	/// Getter attribute masks
+	const std::vector<mask_t>& all_attribute_masks() const {
+		return attribute_masks_;
+	}
+
 
 private:
 	/// Temporary solution to allow splitting the parser into a separate
@@ -358,34 +507,49 @@ private:
 	static tagset_idx_t next_id_;
 
 	/// String - number dictionary for the POS names
-	SymbolDictionary<pos_idx_t> pos_dict_;
+	SymbolDictionary<idx_t> pos_dict_;
 
 	/// String - number dictionary for the attribute names
-	SymbolDictionary<attribute_idx_t> attribute_dict_;
+	SymbolDictionary<idx_t> attribute_dict_;
 
-	/// String - number dictionary for the attribute values
-	SymbolDictionary<value_idx_t> value_dict_;
+	/// Value names to masks
+	std::map<std::string, mask_t> string_to_value_mask_;
+
+	/// Value masks to names
+	std::map<mask_t, std::string> value_mask_to_string_;
 
 	/// The original indices of the POSes in the tagset definition
-	std::map<pos_idx_t, size_t> original_pos_indices_;
+	std::map<idx_t, int> original_pos_indices_;
+
+	/// mapping from attribute indices to valid value masks
+	std::vector< std::vector<mask_t> > attribute_values_;
 
-	/// mapping from attribute indices to valid value indices
-	std::vector< std::vector<value_idx_t> > attribute_values_;
+	/// Attribute index to combined value mask
+	std::vector<mask_t> attribute_masks_;
 
-	/// reverse mapping, from a value index to the respective attribute
+	/// reverse mapping, from a value mask to the respective attribute
 	/// index (values are assumed to be unique and not shared between
 	/// attributes)
-	std::vector<attribute_idx_t> value_attribute_;
+	std::map<mask_t, idx_t> value_mask_to_attribute_index_;
 
 	/// POS to valid attribute indices mapping
 	/// The order of the attributes is important, as it affects string
 	/// output and the behavior of the _ special character in parsing
-	std::vector< std::vector<attribute_idx_t> > pos_attributes_;
+	std::vector< std::vector<idx_t> > pos_attributes_;
+
+	/// POS to required attribute indices
+	std::vector< std::vector<idx_t> > pos_required_attributes_idx_;
+
+	/// POS to combined valid attriubute value mask
+	std::vector<mask_t> pos_valid_value_masks_;
+
+	/// POS to combined required attriubute value mask
+	std::vector<mask_t> pos_required_value_masks_;
 
-	/// Flags for attributes which are valid for a given POS
+	/// Flags for attribute indices which are valid for a given POS
 	std::vector< std::vector<bool> > pos_valid_attributes_;
 
-	/// Flags for attributes which are required for a given POS
+	/// Flags for attribute indices which are required for a given POS
 	std::vector< std::vector<bool> > pos_required_attributes_;
 };
 
diff --git a/libcorpus2/tagsetparser.cpp b/libcorpus2/tagsetparser.cpp
index 3d167cd172ada4f20e2a2bec8597507d20a6b3de..3cf0e9edabdde05b6af6600127e4bb089079012e 100644
--- a/libcorpus2/tagsetparser.cpp
+++ b/libcorpus2/tagsetparser.cpp
@@ -36,7 +36,7 @@ Tagset TagsetParser::load_ini(std::istream &is)
 	std::set<std::string> symbols(values);
 	typedef std::map< std::string, std::deque<std::string> > vmap_t;
 	vmap_t vmap;
-	typedef std::map< std::string, std::vector<attribute_idx_t> > pmap_t;
+	typedef std::map< std::string, std::vector<idx_t> > pmap_t;
 	pmap_t pmap;
 	typedef std::map< std::string, std::vector<bool> > reqmap_t;
 	reqmap_t reqmap;
@@ -76,26 +76,32 @@ Tagset TagsetParser::load_ini(std::istream &is)
 		}
 	}
 
-	std::vector<std::string> vec;
-	std::copy(values.begin(), values.end(),
-			std::inserter(vec, vec.begin()));
-	if (vec[0] != "@null") {
-		throw TagsetParseError("First value not '@null'", line_no, vec[0]);
+	if (*values.begin() != "@null") {
+		throw TagsetParseError("First value not '@null'", line_no,
+			*values.begin());
 	}
-	tagset.value_dict_.load_sorted_data(vec);
 
-	vec.clear();
-	tagset.value_attribute_.resize(values.size());
+	mask_t current_value = 1;
+	std::vector<std::string> vec;
+	idx_t current_attribute_index = 0;
 	foreach (const vmap_t::value_type v, vmap) {
+		mask_t attribute_mask = 0;
 		vec.push_back(v.first);
 		tagset.attribute_values_.resize(
 				tagset.attribute_values_.size() + 1);
 		foreach (const std::string& s, v.second) {
-			tagset.attribute_values_.back().push_back(
-					tagset.value_dict_.get_id(s));
-			value_idx_t v = tagset.value_dict_.get_id(s);
-			tagset.value_attribute_[v] = vec.size() - 1;
+			tagset.attribute_values_.back().push_back(current_value);
+			tagset.value_mask_to_attribute_index_.insert(
+					std::make_pair(current_value, current_attribute_index));
+			tagset.string_to_value_mask_.insert(
+					std::make_pair(s, current_value));
+			tagset.value_mask_to_string_.insert(
+					std::make_pair(current_value, s));
+			attribute_mask |= current_value;
+			current_value <<= 1;
 		}
+		tagset.attribute_masks_.push_back(attribute_mask);
+		++current_attribute_index;
 	}
 	tagset.attribute_dict_.load_sorted_data(vec);
 
@@ -111,7 +117,7 @@ Tagset TagsetParser::load_ini(std::istream &is)
 				throw TagsetParseError("Duplicate symbol", line_no, v[0]);
 			}
 			poses_plain.push_back(v[0]);
-			std::vector<attribute_idx_t>& pattrs = pmap[v[0]];
+			std::vector<idx_t>& pattrs = pmap[v[0]];
 			std::vector<bool>& req_mask = reqmap[v[0]];
 			req_mask.resize(tagset.attribute_dict_.size());
 			v.pop_front();
@@ -122,7 +128,7 @@ Tagset TagsetParser::load_ini(std::istream &is)
 					required = false;
 					s = s.substr(1, s.size() - 2);
 				}
-				attribute_idx_t a = tagset.attribute_dict_.get_id(s);
+				idx_t a = tagset.attribute_dict_.get_id(s);
 				if (!tagset.attribute_dict_.is_id_valid(a)) {
 					throw TagsetParseError("Attribute name invalid",
 							line_no, s);
@@ -137,20 +143,31 @@ Tagset TagsetParser::load_ini(std::istream &is)
 	vec.clear();
 	foreach (const pmap_t::value_type v, pmap) {
 		vec.push_back(v.first);
+		mask_t valid(0);
+		mask_t required(0);
 		tagset.pos_attributes_.push_back(v.second);
+		tagset.pos_required_attributes_idx_.resize(
+				tagset.pos_required_attributes_idx_.size() + 1);
 		tagset.pos_valid_attributes_.push_back(
 				std::vector<bool>(tagset.attribute_values_.size(), false));
-		foreach (attribute_idx_t a, v.second) {
+		foreach (idx_t a, v.second) {
+			valid |= tagset.get_attribute_mask(a);
+			if (reqmap[v.first][a]) {
+				required |= tagset.get_attribute_mask(a);
+				tagset.pos_required_attributes_idx_.back().push_back(a);
+			}
 			tagset.pos_valid_attributes_.back()[a] = true;
 		}
 		tagset.pos_required_attributes_.push_back(reqmap[v.first]);
+		tagset.pos_valid_value_masks_.push_back(valid);
+		tagset.pos_required_value_masks_.push_back(required);
 	}
 	tagset.pos_dict_.load_sorted_data(vec);
 	if (tagset.pos_dict_.size() == 0) {
-		throw TagsetParseError("No POS in tagset", 0, "");
+		throw TagsetParseError("No POS in tagset", line_no, "");
 	}
 	for (size_t i = 0; i < poses_plain.size(); ++i) {
-		pos_idx_t p = tagset.pos_dictionary().get_id(poses_plain[i]);
+		idx_t p = tagset.pos_dictionary().get_id(poses_plain[i]);
 		tagset.original_pos_indices_.insert(std::make_pair(p,i));
 	}
 
@@ -161,20 +178,20 @@ void TagsetParser::save_ini(const Tagset &tagset, std::ostream &os)
 {
 	os << "# Autogenerated by Corpus2\n\n";
 	os << "[ATTR]\n";
-	attribute_idx_t a(0);
+	idx_t a(0);
 	while (tagset.attribute_dict_.is_id_valid(a)) {
 		os << tagset.attribute_dict_.get_string(a) << "\t= ";
-		foreach (value_idx_t v, tagset.get_attribute_values(a)) {
-			os << tagset.value_dict_.get_string(v) << " ";
+		foreach (mask_t m, tagset.get_attribute_values(a)) {
+			os << tagset.get_value_name(m) << " ";
 		}
 		os << "\n";
 		++a;
 	}
 	os << "\n[POS]\n";
-	pos_idx_t p(0);
+	idx_t p(0);
 	while (tagset.pos_dict_.is_id_valid(p)) {
 		os << tagset.pos_dict_.get_string(p) << "\t= ";
-		foreach (attribute_idx_t a, tagset.get_pos_attributes(p)) {
+		foreach (idx_t a, tagset.get_pos_attributes(p)) {
 			if (tagset.pos_required_attributes_[p][a]) {
 				os << tagset.attribute_dict_.get_string(a) << " ";
 			} else {
diff --git a/libcorpus2/token.cpp b/libcorpus2/token.cpp
index ac86d5c6886992ca0f5fd80bc4bb3430e356babb..1b07c3fd3fbec6f48561674aa6f215ad63668f20 100644
--- a/libcorpus2/token.cpp
+++ b/libcorpus2/token.cpp
@@ -37,9 +37,9 @@ struct preferred_lexeme_cmp
 	bool operator()(const Lexeme& l1, const Lexeme& l2) const {
 		return (!l1.is_disamb() && l2.is_disamb())
 				|| (l1.is_disamb() == l2.is_disamb()
-				&& (tagset->get_original_pos_index(l1.tag().pos_id()) >
-					tagset->get_original_pos_index(l2.tag().pos_id())
-				|| (l1.tag().pos_id() == l2.tag().pos_id()
+				&& (tagset->get_original_pos_index(l1.tag().get_pos_index()) >
+					tagset->get_original_pos_index(l2.tag().get_pos_index())
+				|| (l1.tag().get_pos() == l2.tag().get_pos()
 				&& l1 < l2)));
 	}
 };
@@ -89,14 +89,14 @@ bool Token::remove_duplicate_lexemes()
 	return old_size != lexemes_.size();
 }
 
-bool Token::orth_pos_match(pos_idx_t pos, const UnicodeString &orth) const
+bool Token::orth_pos_match(mask_t pos, const UnicodeString &orth) const
 {
 	if (orth.length() > 0) {
 		if (orth.caseCompare(orth_, 0) != 0) return false;
 	}
-	if (pos != static_cast<pos_idx_t>(-1)) {
+	if (pos.any()) {
 		foreach (const Lexeme& lex, lexemes_) {
-			if (lex.tag().pos_id() != pos) return false;
+			if (lex.tag().get_pos() != pos) return false;
 		}
 	}
 	return true;
diff --git a/libcorpus2/token.h b/libcorpus2/token.h
index 6ea73cdc878daf9d556878bbc87cbd114dbea91f..2961eb7e32a6b7d99c5ea7e16e1ba98a4ff61747 100644
--- a/libcorpus2/token.h
+++ b/libcorpus2/token.h
@@ -117,7 +117,7 @@ public:
 	 * @returns true if the orth and lexemes pass the check, false
 	 * otherwise
 	 */
-	bool orth_pos_match(pos_idx_t pos, const UnicodeString& orth) const;
+	bool orth_pos_match(mask_t pos, const UnicodeString& orth) const;
 
 private:
 	/// The orth (actual encountered form)
diff --git a/libcorpus2/util/symboldictionary.h b/libcorpus2/util/symboldictionary.h
index f703a8f9899b470261a921e82991c4b98e59f593..f0027a95a472213f7b5da65365c7d71ac0e974bd 100644
--- a/libcorpus2/util/symboldictionary.h
+++ b/libcorpus2/util/symboldictionary.h
@@ -39,7 +39,7 @@ public:
 	bool is_id_valid(IndexT idx) const;
 
 	/// Getter for the size of this dictionary
-	size_t  size() const;
+	size_t size() const;
 
 	/**
 	 * Get the index for a given string identifier, const char* version.
@@ -158,7 +158,7 @@ template <typename IndexT>
 const std::string& SymbolDictionary<IndexT>::get_string(IndexT id) const
 {
 	size_t idx = static_cast<size_t>(id);
-	if (id < data_.size()) {
+	if (idx < data_.size()) {
 		return data_[idx];
 	} else {
 		return nullstr;
diff --git a/libcorpus2/version.in b/libcorpus2/version.in
index eacb822dc141beeb2d4bcdbbb11f1df08d411e59..87022b142a71294f04a51fad2b566e5e7c41d613 100644
--- a/libcorpus2/version.in
+++ b/libcorpus2/version.in
@@ -1,9 +1,9 @@
 #ifndef LIBCORPUS2_VERSION_H
 #define LIBCORPUS2_VERSION_H
 
-#define LIBCORPUS2_VERSION_MAJOR @ver_major@
-#define LIBCORPUS2_VERSION_MINOR @ver_minor@
-#define LIBCORPUS2_VERSION_PATCH @ver_patch@
+#define LIBCORPUS2_VERSION_MAJOR @corpus2_ver_major@
+#define LIBCORPUS2_VERSION_MINOR @corpus2_ver_minor@
+#define LIBCORPUS2_VERSION_PATCH @corpus2_ver_patch@
 #define LIBCORPUS2_VERSION "@LIBCORPUS2_VERSION@"
 
 #endif
diff --git a/libpwrutils/CMakeLists.txt b/libpwrutils/CMakeLists.txt
index f6c221d3e2c9c495603043bbcda95ca8933d7a14..b7e137017971a96234fb7dc87fcf81b176bbeb6b 100644
--- a/libpwrutils/CMakeLists.txt
+++ b/libpwrutils/CMakeLists.txt
@@ -4,7 +4,7 @@ PROJECT(pwrutils)
 
 set(pwrutils_ver_major "0")
 set(pwrutils_ver_minor "0")
-set(pwrutils_ver_patch "1")
+set(pwrutils_ver_patch "2")
 
 set(LIBPWRUTILS_VERSION
 	"${pwrutils_ver_major}.${pwrutils_ver_minor}.${pwrutils_ver_patch}")
diff --git a/libpwrutils/bitset.h b/libpwrutils/bitset.h
new file mode 100644
index 0000000000000000000000000000000000000000..fa4ddc3a420613a0e4994fdb01d8295738da006d
--- /dev/null
+++ b/libpwrutils/bitset.h
@@ -0,0 +1,112 @@
+#ifndef PWRNLP_BITSET_H
+#define PWRNLP_BITSET_H
+
+#include <libpwrutils/foreach.h>
+#include <boost/range.hpp>
+#include <bitset>
+#include <boost/functional/hash.hpp>
+#include <boost/pending/lowest_bit.hpp>
+#include <climits>
+
+
+namespace PwrNlp {
+
+using std::bitset;
+
+static const size_t ulong_bits = sizeof(unsigned long) * CHAR_BIT;
+
+typedef bitset<ulong_bits> word_bitset;
+
+
+/**
+ * Count set bits in a integral type.
+ * http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+ */
+template <typename T> inline
+int count_bits_set(T v)
+{
+	v = v - ((v >> 1) & (T)~(T)0/3);                              // temp
+	v = (v & (T)~(T)0/15*3) + ((v >> 2) & (T)~(T)0/15*3);         // temp
+	v = (v + (v >> 4)) & (T)~(T)0/255*15;                         // temp
+	return (T)(v * ((T)~(T)0/255)) >> (sizeof(T) - 1) * CHAR_BIT; // count
+}
+
+template <size_t S> inline
+size_t count_bits_set(const std::bitset<S>& b)
+{
+	return b.count();
+}
+
+template <size_t S> inline
+size_t lowest_bit(const bitset<S>& b)
+{
+	// GCC specific
+	return b._Find_first();
+}
+
+/**
+ * Get index of lowest set bit in an integral type
+ */
+inline size_t lowest_bit(const unsigned long long& t)
+{
+	if (t <= 0) return static_cast<size_t>(-1);
+	return boost::lowest_bit(t);
+}
+
+inline size_t lowest_bit(const unsigned long& t)
+{
+	if (t <= 0) return static_cast<size_t>(-1);
+	return boost::lowest_bit(t);
+}
+
+} /* end ns PwrNlp */
+
+namespace std {
+
+template<size_t S> inline
+size_t hash_value(bitset<S> b)
+{
+	size_t seed = 0;
+	const bitset<S> mask(std::numeric_limits<unsigned long>::max());
+	while (b.any()) {
+		boost::hash_combine(seed, (b & mask).to_ulong());
+		b >>= PwrNlp::ulong_bits;
+	}
+	return seed;
+}
+
+template<> inline
+size_t hash_value(bitset<PwrNlp::ulong_bits> b)
+{
+	size_t seed = 0;
+	boost::hash_combine(seed, b.to_ulong());
+	return seed;
+}
+
+template<size_t S> inline
+bool operator<(bitset<S> left, bitset<S> right)
+{
+	const bitset<S> mask(std::numeric_limits<unsigned long>::max());
+	while (left.any()) {
+		unsigned long l1 = (left & mask).to_ulong();
+		unsigned long r1 = (right & mask).to_ulong();
+		if (l1 < r1) {
+			return true;
+		} else if (l1 > r1) {
+			return false;
+		}
+		left >>= PwrNlp::ulong_bits;
+		right >>= PwrNlp::ulong_bits;
+	}
+	return right.any();
+}
+
+template<> inline
+bool operator<(bitset<PwrNlp::ulong_bits> left, bitset<PwrNlp::ulong_bits> right)
+{
+	return left.to_ulong() < right.to_ulong();
+}
+
+}
+
+#endif // PWRNLP_BITSET_H
diff --git a/libpwrutils/util.h b/libpwrutils/util.h
index b1bb7fb3e6db2283e77e2ed520629977e6d4835c..52fdf1cac9bc480b52015f930bb82b0bc045cd1a 100644
--- a/libpwrutils/util.h
+++ b/libpwrutils/util.h
@@ -21,6 +21,7 @@ or FITNESS FOR A PARTICULAR PURPOSE.
 
 #include <iostream>
 #include <string>
+#include <climits>
 
 namespace PwrNlp {
 
@@ -76,6 +77,8 @@ void utf8_string_to_uchar_container(const std::string& s,
 	}
 }
 
+
+
 } /* end ns PwrNlp */
 
 #endif // PWRNLP_UTIL_H
diff --git a/tagset-tool/main.cpp b/tagset-tool/main.cpp
index 553bc4db2caa43ed0efef2b60c584935a5941faa..4ae2cdb3b7841d323c605ce4a3e8607646ab2bb9 100644
--- a/tagset-tool/main.cpp
+++ b/tagset-tool/main.cpp
@@ -74,9 +74,13 @@ void libedit_read_loop(boost::function<void (const std::string&)>& line_cb)
 void tagset_info(const Corpus2::Tagset& tagset)
 {
 	std::cerr << "Corpus2::Tagset loaded: "
-		<< tagset.pos_dictionary().size() << " POSes, "
-		<< tagset.attribute_dictionary().size() << " attributes, "
-		<< tagset.value_dictionary().size() << " values\n";
+		<< tagset.pos_count() << " POSes, "
+		<< tagset.attribute_count() << " attributes, "
+		<< tagset.value_count() << " values [";
+	for (Corpus2::idx_t a = 0; a < tagset.attribute_count(); ++a) {
+		std::cerr << tagset.get_attribute_values(a).size() << " ";
+	}
+	std::cerr << "\n";
 	std::cerr << "Size is " << tagset.size()
 		<< " (extra size is " << tagset.size_extra() << ")\n";
 	std::cerr << "POSes: ";
@@ -86,50 +90,61 @@ void tagset_info(const Corpus2::Tagset& tagset)
 	std::cerr << "\n";
 }
 
-void tagset_query_cb(const Corpus2::Tagset& tagset, const std::string& s)
+void tagset_query_cb(const Corpus2::Tagset& tagset, const std::string& s,
+		bool internals)
 {
-	Corpus2::pos_idx_t pos = tagset.pos_dictionary().get_id(s);
-	Corpus2::attribute_idx_t atr = tagset.attribute_dictionary().get_id(s);
-	Corpus2::value_idx_t val = tagset.value_dictionary().get_id(s);
-	if (tagset.pos_dictionary().is_id_valid(pos)) {
+	Corpus2::idx_t pos = tagset.pos_dictionary().get_id(s);
+	Corpus2::idx_t atr = tagset.attribute_dictionary().get_id(s);
+	Corpus2::mask_t val = tagset.get_value_mask(s);
+	if (pos >= 0) {
+		if (internals) {
+			std::cout << tagset.get_pos_mask(pos) << " (" << (int)pos << ")\n";
+		}
 		std::cout << s << " -> POS ->" ;
-		foreach (Corpus2::attribute_idx_t a, tagset.get_pos_attributes(pos)) {
+		foreach (Corpus2::idx_t a, tagset.get_pos_attributes(pos)) {
 			std::string astr = tagset.attribute_dictionary().get_string(a);
-			if (tagset.get_pos_required_attributes(pos)[a]) {
+			if (tagset.pos_requires_attribute(pos, a)) {
 				std::cout << " " << astr;
 			} else {
 				std::cout << " [" << astr << "]";
 			}
 		}
 		std::cout << "\n";
-	} else if (tagset.attribute_dictionary().is_id_valid(atr)) {
+	} else if (atr > 0) {
+		if (internals) {
+			std::cout << tagset.get_attribute_mask(atr) << " (" << (int)atr << ")\n";
+		}
 		std::cout << s << " -> attribute ->";
-		foreach (Corpus2::value_idx_t v, tagset.get_attribute_values(atr)) {
-			std::cout << " " << tagset.value_dictionary().get_string(v);
+		foreach (Corpus2::mask_t v, tagset.get_attribute_values(atr)) {
+			std::cout << " " << tagset.get_value_name(v);
 		}
 		std::cout << "\nIn POSes:";
-		for (Corpus2::pos_idx_t p = (Corpus2::pos_idx_t)(0); p < tagset.pos_dictionary().size(); ++p) {
-			if (tagset.get_pos_valid_attributes(p)[atr]) {
+		for (Corpus2::idx_t p = 0; p < tagset.pos_count(); ++p) {
+			if (tagset.pos_has_attribute(p,atr)) {
 				std::cout << " " << tagset.pos_dictionary().get_string(p);
-				if (!tagset.get_pos_required_attributes(p)[atr]) {
+				if (!tagset.pos_requires_attribute(p, atr)) {
 					std::cout << "?";
 				}
 			}
 		}
 		std::cout << "\n";
-	} else if (tagset.value_dictionary().is_id_valid(val)) {
-		Corpus2::attribute_idx_t a = tagset.get_value_attribute(val);
+	} else if (val.any()) {
+		Corpus2::idx_t a = tagset.get_value_attribute(val);
+		if (internals) {
+			std::cout << val << " (" << PwrNlp::lowest_bit(val) << ")\n";
+			std::cout << tagset.get_attribute_mask(a) << " (" << (int)a << ")\n";
+		}
 		std::cout << s << " -> value -> attribute ";
 		std::cout << tagset.attribute_dictionary().get_string(a);
 		std::cout << " .";
-		foreach (Corpus2::value_idx_t v, tagset.get_attribute_values(a)) {
-			std::cout << " " << tagset.value_dictionary().get_string(v);
+		foreach (Corpus2::mask_t v, tagset.get_attribute_values(a)) {
+			std::cout << " " << tagset.get_value_name(v);
 		}
 		std::cout << "\nIn POSes:";
-		for (Corpus2::pos_idx_t p = (Corpus2::pos_idx_t)(0); p < tagset.pos_dictionary().size(); ++p) {
-			if (tagset.get_pos_valid_attributes(p)[a]) {
+		for (Corpus2::idx_t p = 0; p < tagset.pos_count(); ++p) {
+			if (tagset.pos_has_attribute(p, a)) {
 				std::cout << " " << tagset.pos_dictionary().get_string(p);
-				if (!tagset.get_pos_required_attributes(p)[a]) {
+				if (!tagset.pos_requires_attribute(p, a)) {
 					std::cout << "?";
 				}
 			}
@@ -140,7 +155,8 @@ void tagset_query_cb(const Corpus2::Tagset& tagset, const std::string& s)
 	}
 }
 
-void tag_parse_cb(const Corpus2::Tagset& tagset, bool validate, bool sort, const std::string& s)
+void tag_parse_cb(const Corpus2::Tagset& tagset, bool validate, bool sort,
+		const std::string& s, bool internals)
 {
 	try {
 		Corpus2::Token t;
@@ -152,6 +168,9 @@ void tag_parse_cb(const Corpus2::Tagset& tagset, bool validate, bool sort, const
 			if (validate) {
 				tagset.validate_tag(lex.tag(), false, &ss);
 			}
+			if (internals) {
+				ss << "\n" << lex.tag().raw_dump() << "";
+			}
 			out.push_back(ss.str());
 		}
 		if (sort) {
@@ -167,7 +186,7 @@ void tag_parse_cb(const Corpus2::Tagset& tagset, bool validate, bool sort, const
 int main(int argc, char** argv)
 {
 	std::string tagset_load, tagset_save;
-	bool quiet = false;
+	bool quiet = false, internals = false;
 	bool parse = false, validate = false, sort = false;
 	using boost::program_options::value;
 
@@ -181,6 +200,8 @@ int main(int argc, char** argv)
 			 "Suppress startup info\n")
 			("parse,p", value(&parse)->zero_tokens(),
 			 "Parse complex tag strings mode")
+			("internals,i", value(&internals)->zero_tokens(),
+			 "Output internal representations")
 			("validate,v", value(&validate)->zero_tokens(),
 			 "Validate parsed tags")
 			("sort,s", value(&sort)->zero_tokens(),
@@ -232,13 +253,13 @@ int main(int argc, char** argv)
 					std::cerr << "(Tag parse mode)\n";
 				}
 				_prompt = "tag-parse> ";
-				f = boost::bind(&tag_parse_cb, boost::ref(tagset), validate, sort, _1);
+				f = boost::bind(&tag_parse_cb, boost::ref(tagset), validate, sort, _1, internals);
 			} else {
 				if (!quiet) {
 					std::cerr << "(Tagset query mode)\n";
 				}
 				_prompt = "tagset-query> ";
-				f = boost::bind(&tagset_query_cb, boost::ref(tagset), _1);
+				f = boost::bind(&tagset_query_cb, boost::ref(tagset), _1, internals);
 			}
 #ifdef HAVE_LIBEDIT
 			libedit_read_loop(f);
diff --git a/tests/basic.cpp b/tests/basic.cpp
index d9651327aca94202a2fe8232d1f6a8c7e6081ec5..d8a8e8ffba451d45b09748443d820a147316e4a2 100644
--- a/tests/basic.cpp
+++ b/tests/basic.cpp
@@ -20,7 +20,7 @@ BOOST_AUTO_TEST_CASE( token_dup_lexemes )
 {
 	Corpus2::Token t(UnicodeString::fromUTF8("ZZ"), PwrNlp::Whitespace::ManySpaces);
 	//Corpus2::Tagset tagset(tagsetstr1);
-	Corpus2::Tag t1(Corpus2::tagset_idx_t(0), Corpus2::pos_idx_t(0));
+	Corpus2::Tag t1(Corpus2::mask_t(0));
 	Corpus2::Lexeme l1(UnicodeString::fromUTF8("aaa"), t1);
 	Corpus2::Lexeme l2(UnicodeString::fromUTF8("bbb"), t1);
 	BOOST_CHECK(!t.check_duplicate_lexemes());
diff --git a/tests/tag_split.cpp b/tests/tag_split.cpp
index c0d93c35bf5f825c8eb08497991a724f8ae517b0..1f4ca44dc1bf89e6fed6fd98d8d615b10dc32af0 100644
--- a/tests/tag_split.cpp
+++ b/tests/tag_split.cpp
@@ -12,8 +12,9 @@ struct F {
 			"A tag tog other a3 \n"
 			"B data thing tag-thing thang\n"
 			"C a b c \n"
-			"[POS]\n some A B [C]\n";
-		tagset.reset(new Corpus2::Tagset(tagset_string));
+			"[POS]\n some A B [C]\n same A B \n P3 [A] [B]\n";
+		tagset.reset(new Corpus2::Tagset());
+		*tagset = Corpus2::Tagset::from_data(tagset_string);
 	}
 	boost::shared_ptr<Corpus2::Tagset> tagset;
 
@@ -22,10 +23,15 @@ struct F {
 		std::set<std::string> actual;
 		std::vector<Corpus2::Tag> tags;
 		Corpus2::Token t;
-		tagset->lexemes_into_token(t, UnicodeString(), s);
+		try {
+			tagset->lexemes_into_token(t, UnicodeString(), s);
+		} catch (Corpus2::TagParseError& e) {
+			throw;
+		}
+
 		foreach (const Corpus2::Lexeme& lex, t.lexemes()) {
 			const Corpus2::Tag& tag = lex.tag();
-			BOOST_WARN(tagset->validate_tag(tag, false));
+			BOOST_WARN(tagset->validate_tag(tag, false, &std::cerr));
 			actual.insert(tagset->tag_to_string(tag));
 			tags.push_back(tag);
 		}
@@ -89,9 +95,9 @@ BOOST_FIXTURE_TEST_CASE( dots_plus, F )
 
 BOOST_FIXTURE_TEST_CASE( missing, F )
 {
-	const char tag[] = "some:data";
+	const char tag[] = "P3:data";
 	std::set<std::string> r;
-	r.insert("some::data");
+	r.insert("P3:data");
 	check_split(tag, r);
 }
 
@@ -158,4 +164,35 @@ BOOST_FIXTURE_TEST_CASE( underscore_dots, F )
 
 	check_split(tag, r);
 }
+
+
+BOOST_FIXTURE_TEST_CASE( tag_size, F )
+{
+	Corpus2::Tag t = tagset->parse_simple_tag("some:tag:data", false);
+	Corpus2::Tag t2 = tagset->parse_simple_tag("some:tog:data", false);
+	Corpus2::Tag t3 = tagset->parse_simple_tag("same:tag:data", false);
+	BOOST_CHECK(tagset->tag_is_singular(t));
+	BOOST_CHECK_EQUAL(tagset->tag_size(t), 1);
+	BOOST_CHECK(tagset->tag_is_singular(t2));
+	BOOST_CHECK_EQUAL(tagset->tag_size(t2), 1);
+	BOOST_CHECK(tagset->tag_is_singular(t3));
+	BOOST_CHECK_EQUAL(tagset->tag_size(t3), 1);
+	t.add_values(t2.get_values());
+	BOOST_CHECK(!tagset->tag_is_singular(t));
+	BOOST_CHECK_EQUAL(tagset->tag_size(t), 2);
+	t.add_pos(t3.get_pos());
+	BOOST_CHECK(!tagset->tag_is_singular(t));
+	BOOST_CHECK_EQUAL(tagset->tag_size(t), 4);
+	Corpus2::Tag t4 = tagset->parse_simple_tag("same:other:thang", true);
+	t.add_values(t4.get_values() & tagset->get_attribute_mask(std::string("A")));
+	BOOST_CHECK_EQUAL(tagset->tag_size(t), 6);
+	std::vector<Corpus2::Tag> tags = tagset->split_tag(t);
+	BOOST_CHECK_EQUAL(tags.size(), 6);
+	Corpus2::Tag tt;
+	foreach (Corpus2::Tag t, tags) {
+		tt.combine_with(t);
+	}
+	BOOST_CHECK(tt == t);
+}
+
 BOOST_AUTO_TEST_SUITE_END()
diff --git a/tests/tagset_parse.cpp b/tests/tagset_parse.cpp
index 892e072945c95a86f4654f71c7dea018803a70fe..d5faa7d1792748ac4adc94545f99d22a5371da45 100644
--- a/tests/tagset_parse.cpp
+++ b/tests/tagset_parse.cpp
@@ -26,19 +26,23 @@ BOOST_AUTO_TEST_CASE( empty )
 
 BOOST_AUTO_TEST_CASE( minimal )
 {
+	Corpus2::Tagset t;
 	try {
-		parse(PRE POSA);
+		t = parse(PRE POSA);
 	} catch (Corpus2::TagsetParseError& e) {
 		BOOST_FAIL(e.info());
 	}
+	BOOST_CHECK_EQUAL(t.pos_count(), 1);
 }
 BOOST_AUTO_TEST_CASE( minimal_nonewline )
 {
+	Corpus2::Tagset t;
 	try {
-		parse(PRE "[POS]\n POS1");
+		t = parse(PRE "[POS]\n POS1");
 	} catch (Corpus2::TagsetParseError& e) {
 		BOOST_FAIL(e.info());
 	}
+	BOOST_CHECK_EQUAL(t.pos_count(), 1);
 }
 
 BOOST_AUTO_TEST_CASE( dupe_val )