From 712d1481ac504f7286e7a60f7da00562c69712b7 Mon Sep 17 00:00:00 2001
From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl>
Date: Wed, 13 Jul 2011 12:56:24 +0200
Subject: [PATCH] starter of convenience functions for tagging

---
 libcorpus2/CMakeLists.txt |  1 +
 libcorpus2/tagging.cpp    | 44 +++++++++++++++++++++++++++++++++++++++
 libcorpus2/tagging.h      | 40 +++++++++++++++++++++++++++++++++++
 swig/corpus2.i            |  1 +
 swig/tagging.i            | 24 +++++++++++++++++++++
 5 files changed, 110 insertions(+)
 create mode 100644 libcorpus2/tagging.cpp
 create mode 100644 libcorpus2/tagging.h
 create mode 100644 swig/tagging.i

diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt
index d00090a..9d07000 100644
--- a/libcorpus2/CMakeLists.txt
+++ b/libcorpus2/CMakeLists.txt
@@ -50,6 +50,7 @@ SET(libcorpus2_STAT_SRC
 	lexeme.cpp
 	sentence.cpp
 	tag.cpp
+	tagging.cpp
 	tagset.cpp
 	tagsetmanager.cpp
 	tagsetparser.cpp
diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp
new file mode 100644
index 0000000..b98aee1
--- /dev/null
+++ b/libcorpus2/tagging.cpp
@@ -0,0 +1,44 @@
+/*
+	Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski
+	Part of the libcorpus2 project
+
+	This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+	This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the LICENSE and COPYING files for more details.
+*/
+
+#include <libcorpus2/tagging.h>
+#include <libcorpus2/tagsetmanager.h>
+
+// #include <libpwrutils/foreach.h>
+
+namespace Corpus2 {
+
+Tag get_attribute_mask(const Tagset& tagset, std::string attr_name)
+{
+	if(attr_name.empty())
+	{
+		// return all-POS mask
+		Tag t;
+
+		for (idx_t pos_idx = 0; pos_idx < tagset.pos_count(); ++pos_idx) {
+			t.add_pos(tagset.get_pos_mask(pos_idx));
+		}
+
+		return t;
+	}
+	else
+	{
+		return Tag(0, tagset.get_attribute_mask(attr_name));
+	}
+}
+
+
+} /* end ns Corpus2 */
diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h
new file mode 100644
index 0000000..b767a85
--- /dev/null
+++ b/libcorpus2/tagging.h
@@ -0,0 +1,40 @@
+/*
+	Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski
+	Part of the libcorpus2 project
+
+	This program is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3 of the License, or (at your option)
+any later version.
+
+	This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+or FITNESS FOR A PARTICULAR PURPOSE.
+
+	See the LICENSE and COPYING files for more details.
+*/
+
+#ifndef LIBCORPUS2_TAGGING_H
+#define LIBCORPUS2_TAGGING_H
+
+#include <libcorpus2/tag.h>
+#include <libcorpus2/tagset.h>
+
+namespace Corpus2 {
+
+/**
+  * Creates a mask having all values of the given attribute set.
+  * Pass an empty string to get a mask for all POS values.
+  * The resulting object should only be used for masking as it won't be
+  * a valid tag.
+  *
+  * @arg tagset_name tagset to use
+  * @arg attr_name attrinbute as defined in tagset or empty string for POS
+  * @return mask for given attribute
+  */
+Tag get_attribute_mask(const Tagset& tagset,
+					   const std::string attr_name);
+
+} /* end ns Corpus2 */
+
+#endif // LIBCORPUS2_TAGGING_H
diff --git a/swig/corpus2.i b/swig/corpus2.i
index 4c181af..1763c5c 100644
--- a/swig/corpus2.i
+++ b/swig/corpus2.i
@@ -20,6 +20,7 @@
 %include "tokenreader.i"
 %include "tokenwriter.i"
 %include "libpwrnlperror.i"
+%include "tagging.i"
 
 %{
 #include <libcorpus2/util/settings.h>
diff --git a/swig/tagging.i b/swig/tagging.i
new file mode 100644
index 0000000..8d2301a
--- /dev/null
+++ b/swig/tagging.i
@@ -0,0 +1,24 @@
+#ifndef SWIG_LIBCORPUS2_TAGGING_I
+#define SWIG_LIBCORPUS2_TAGGING_I
+
+%module libcorpustagging
+%{
+  #include <libcorpus2/tagging.h>
+%}
+
+%include "std_string.i"
+
+%include "tag.i"
+%include "tagset.i"
+
+namespace Corpus2 {
+
+Tag get_attribute_mask(const Tagset& tagset,
+					   const std::string attr_name);
+}
+
+using namespace std;
+using namespace Corpus2;
+
+
+#endif /* SWIG_LIBCORPUS2_TAGGING_I */
-- 
GitLab