From 986765559e5a77f21c128451e2f132577f6981bd Mon Sep 17 00:00:00 2001
From: ilor <kailoran@gmail.com>
Date: Thu, 21 Oct 2010 17:19:17 +0200
Subject: [PATCH] modernize tagset-tool, add internals output mode

---
 tagset-tool/main.cpp | 45 ++++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/tagset-tool/main.cpp b/tagset-tool/main.cpp
index 0462e8e..a9640ed 100644
--- a/tagset-tool/main.cpp
+++ b/tagset-tool/main.cpp
@@ -90,32 +90,39 @@ void tagset_info(const Corpus2::Tagset& tagset)
 	std::cerr << "\n";
 }
 
-void tagset_query_cb(const Corpus2::Tagset& tagset, const std::string& s)
+void tagset_query_cb(const Corpus2::Tagset& tagset, const std::string& s,
+		bool internals)
 {
 	Corpus2::idx_t pos = tagset.pos_dictionary().get_id(s);
 	Corpus2::idx_t atr = tagset.attribute_dictionary().get_id(s);
 	Corpus2::mask_t val = tagset.get_value_mask(s);
-	if (tagset.pos_dictionary().is_id_valid(pos)) {
+	if (pos >= 0) {
+		if (internals) {
+			std::cout << tagset.get_pos_mask(pos) << " (" << (int)pos << ")\n";
+		}
 		std::cout << s << " -> POS ->" ;
 		foreach (Corpus2::idx_t a, tagset.get_pos_attributes(pos)) {
 			std::string astr = tagset.attribute_dictionary().get_string(a);
-			if (tagset.get_pos_required_attributes(pos)[a]) {
+			if (tagset.pos_requires_attribute(pos, a)) {
 				std::cout << " " << astr;
 			} else {
 				std::cout << " [" << astr << "]";
 			}
 		}
 		std::cout << "\n";
-	} else if (tagset.attribute_dictionary().is_id_valid(atr)) {
+	} else if (atr > 0) {
+		if (internals) {
+			std::cout << tagset.get_attribute_mask(atr) << " (" << (int)atr << ")\n";
+		}
 		std::cout << s << " -> attribute ->";
 		foreach (Corpus2::mask_t v, tagset.get_attribute_values(atr)) {
 			std::cout << " " << tagset.get_value_name(v);
 		}
 		std::cout << "\nIn POSes:";
-		for (Corpus2::idx_t p = 0; p < tagset.pos_dictionary().size(); ++p) {
-			if (tagset.get_pos_attributes_flag(p)[atr]) {
+		for (Corpus2::idx_t p = 0; p < tagset.pos_count(); ++p) {
+			if (tagset.pos_has_attribute(p,atr)) {
 				std::cout << " " << tagset.pos_dictionary().get_string(p);
-				if (!tagset.get_pos_required_attributes(p)[atr]) {
+				if (!tagset.pos_requires_attribute(p, atr)) {
 					std::cout << "?";
 				}
 			}
@@ -123,6 +130,10 @@ void tagset_query_cb(const Corpus2::Tagset& tagset, const std::string& s)
 		std::cout << "\n";
 	} else if (val.any()) {
 		Corpus2::idx_t a = tagset.get_value_attribute_index(val);
+		if (internals) {
+			std::cout << val << " (" << PwrNlp::lowest_bit(val) << ")\n";
+			std::cout << tagset.get_attribute_mask(a) << " (" << (int)a << ")\n";
+		}
 		std::cout << s << " -> value -> attribute ";
 		std::cout << tagset.attribute_dictionary().get_string(a);
 		std::cout << " .";
@@ -130,10 +141,10 @@ void tagset_query_cb(const Corpus2::Tagset& tagset, const std::string& s)
 			std::cout << " " << tagset.get_value_name(v);
 		}
 		std::cout << "\nIn POSes:";
-		for (Corpus2::idx_t p = 0; p < tagset.pos_dictionary().size(); ++p) {
-			if (tagset.get_pos_attributes_flag(p)[a]) {
+		for (Corpus2::idx_t p = 0; p < tagset.pos_count(); ++p) {
+			if (tagset.pos_has_attribute(p, a)) {
 				std::cout << " " << tagset.pos_dictionary().get_string(p);
-				if (!tagset.get_pos_required_attributes(p)[a]) {
+				if (!tagset.pos_requires_attribute(p, a)) {
 					std::cout << "?";
 				}
 			}
@@ -144,7 +155,8 @@ void tagset_query_cb(const Corpus2::Tagset& tagset, const std::string& s)
 	}
 }
 
-void tag_parse_cb(const Corpus2::Tagset& tagset, bool validate, bool sort, const std::string& s)
+void tag_parse_cb(const Corpus2::Tagset& tagset, bool validate, bool sort,
+		const std::string& s, bool internals)
 {
 	try {
 		Corpus2::Token t;
@@ -156,6 +168,9 @@ void tag_parse_cb(const Corpus2::Tagset& tagset, bool validate, bool sort, const
 			if (validate) {
 				tagset.validate_tag(lex.tag(), false, &ss);
 			}
+			if (internals) {
+				ss << "\n" << lex.tag().raw_dump() << "";
+			}
 			out.push_back(ss.str());
 		}
 		if (sort) {
@@ -171,7 +186,7 @@ void tag_parse_cb(const Corpus2::Tagset& tagset, bool validate, bool sort, const
 int main(int argc, char** argv)
 {
 	std::string tagset_load, tagset_save;
-	bool quiet = false;
+	bool quiet = false, internals = false;
 	bool parse = false, validate = false, sort = false;
 	using boost::program_options::value;
 
@@ -185,6 +200,8 @@ int main(int argc, char** argv)
 			 "Suppress startup info\n")
 			("parse,p", value(&parse)->zero_tokens(),
 			 "Parse complex tag strings mode")
+			("internals,i", value(&internals)->zero_tokens(),
+			 "Output internal representations")
 			("validate,v", value(&validate)->zero_tokens(),
 			 "Validate parsed tags")
 			("sort,s", value(&sort)->zero_tokens(),
@@ -236,13 +253,13 @@ int main(int argc, char** argv)
 					std::cerr << "(Tag parse mode)\n";
 				}
 				_prompt = "tag-parse> ";
-				f = boost::bind(&tag_parse_cb, boost::ref(tagset), validate, sort, _1);
+				f = boost::bind(&tag_parse_cb, boost::ref(tagset), validate, sort, _1, internals);
 			} else {
 				if (!quiet) {
 					std::cerr << "(Tagset query mode)\n";
 				}
 				_prompt = "tagset-query> ";
-				f = boost::bind(&tagset_query_cb, boost::ref(tagset), _1);
+				f = boost::bind(&tagset_query_cb, boost::ref(tagset), _1, internals);
 			}
 #ifdef HAVE_LIBEDIT
 			libedit_read_loop(f);
-- 
GitLab