From 712d1481ac504f7286e7a60f7da00562c69712b7 Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Wed, 13 Jul 2011 12:56:24 +0200 Subject: [PATCH] starter of convenience functions for tagging --- libcorpus2/CMakeLists.txt | 1 + libcorpus2/tagging.cpp | 44 +++++++++++++++++++++++++++++++++++++++ libcorpus2/tagging.h | 40 +++++++++++++++++++++++++++++++++++ swig/corpus2.i | 1 + swig/tagging.i | 24 +++++++++++++++++++++ 5 files changed, 110 insertions(+) create mode 100644 libcorpus2/tagging.cpp create mode 100644 libcorpus2/tagging.h create mode 100644 swig/tagging.i diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index d00090a..9d07000 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -50,6 +50,7 @@ SET(libcorpus2_STAT_SRC lexeme.cpp sentence.cpp tag.cpp + tagging.cpp tagset.cpp tagsetmanager.cpp tagsetparser.cpp diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp new file mode 100644 index 0000000..b98aee1 --- /dev/null +++ b/libcorpus2/tagging.cpp @@ -0,0 +1,44 @@ +/* + Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski + Part of the libcorpus2 project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE and COPYING files for more details. +*/ + +#include <libcorpus2/tagging.h> +#include <libcorpus2/tagsetmanager.h> + +// #include <libpwrutils/foreach.h> + +namespace Corpus2 { + +Tag get_attribute_mask(const Tagset& tagset, std::string attr_name) +{ + if(attr_name.empty()) + { + // return all-POS mask + Tag t; + + for (idx_t pos_idx = 0; pos_idx < tagset.pos_count(); ++pos_idx) { + t.add_pos(tagset.get_pos_mask(pos_idx)); + } + + return t; + } + else + { + return Tag(0, tagset.get_attribute_mask(attr_name)); + } +} + + +} /* end ns Corpus2 */ diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h new file mode 100644 index 0000000..b767a85 --- /dev/null +++ b/libcorpus2/tagging.h @@ -0,0 +1,40 @@ +/* + Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski + Part of the libcorpus2 project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE and COPYING files for more details. +*/ + +#ifndef LIBCORPUS2_TAGGING_H +#define LIBCORPUS2_TAGGING_H + +#include <libcorpus2/tag.h> +#include <libcorpus2/tagset.h> + +namespace Corpus2 { + +/** + * Creates a mask having all values of the given attribute set. + * Pass an empty string to get a mask for all POS values. + * The resulting object should only be used for masking as it won't be + * a valid tag. + * + * @arg tagset_name tagset to use + * @arg attr_name attrinbute as defined in tagset or empty string for POS + * @return mask for given attribute + */ +Tag get_attribute_mask(const Tagset& tagset, + const std::string attr_name); + +} /* end ns Corpus2 */ + +#endif // LIBCORPUS2_TAGGING_H diff --git a/swig/corpus2.i b/swig/corpus2.i index 4c181af..1763c5c 100644 --- a/swig/corpus2.i +++ b/swig/corpus2.i @@ -20,6 +20,7 @@ %include "tokenreader.i" %include "tokenwriter.i" %include "libpwrnlperror.i" +%include "tagging.i" %{ #include <libcorpus2/util/settings.h> diff --git a/swig/tagging.i b/swig/tagging.i new file mode 100644 index 0000000..8d2301a --- /dev/null +++ b/swig/tagging.i @@ -0,0 +1,24 @@ +#ifndef SWIG_LIBCORPUS2_TAGGING_I +#define SWIG_LIBCORPUS2_TAGGING_I + +%module libcorpustagging +%{ + #include <libcorpus2/tagging.h> +%} + +%include "std_string.i" + +%include "tag.i" +%include "tagset.i" + +namespace Corpus2 { + +Tag get_attribute_mask(const Tagset& tagset, + const std::string attr_name); +} + +using namespace std; +using namespace Corpus2; + + +#endif /* SWIG_LIBCORPUS2_TAGGING_I */ -- GitLab