diff --git a/libcorpus2/CMakeLists.txt b/libcorpus2/CMakeLists.txt index d00090a5dd567ca3db2f9b680198852812060682..9d0700052bd15aeca89a6ea792c221b0ed070424 100644 --- a/libcorpus2/CMakeLists.txt +++ b/libcorpus2/CMakeLists.txt @@ -50,6 +50,7 @@ SET(libcorpus2_STAT_SRC lexeme.cpp sentence.cpp tag.cpp + tagging.cpp tagset.cpp tagsetmanager.cpp tagsetparser.cpp diff --git a/libcorpus2/tagging.cpp b/libcorpus2/tagging.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b98aee1c6d5e677eed92002d2209d3c38927d824 --- /dev/null +++ b/libcorpus2/tagging.cpp @@ -0,0 +1,44 @@ +/* + Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski + Part of the libcorpus2 project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE and COPYING files for more details. +*/ + +#include <libcorpus2/tagging.h> +#include <libcorpus2/tagsetmanager.h> + +// #include <libpwrutils/foreach.h> + +namespace Corpus2 { + +Tag get_attribute_mask(const Tagset& tagset, std::string attr_name) +{ + if(attr_name.empty()) + { + // return all-POS mask + Tag t; + + for (idx_t pos_idx = 0; pos_idx < tagset.pos_count(); ++pos_idx) { + t.add_pos(tagset.get_pos_mask(pos_idx)); + } + + return t; + } + else + { + return Tag(0, tagset.get_attribute_mask(attr_name)); + } +} + + +} /* end ns Corpus2 */ diff --git a/libcorpus2/tagging.h b/libcorpus2/tagging.h new file mode 100644 index 0000000000000000000000000000000000000000..b767a853fdad6d800827e53ed94c643215baf5a5 --- /dev/null +++ b/libcorpus2/tagging.h @@ -0,0 +1,40 @@ +/* + Copyright (C) 2010 Tomasz Åšniatowski, Adam Radziszewski + Part of the libcorpus2 project + + This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3 of the License, or (at your option) +any later version. + + This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. + + See the LICENSE and COPYING files for more details. +*/ + +#ifndef LIBCORPUS2_TAGGING_H +#define LIBCORPUS2_TAGGING_H + +#include <libcorpus2/tag.h> +#include <libcorpus2/tagset.h> + +namespace Corpus2 { + +/** + * Creates a mask having all values of the given attribute set. + * Pass an empty string to get a mask for all POS values. + * The resulting object should only be used for masking as it won't be + * a valid tag. + * + * @arg tagset_name tagset to use + * @arg attr_name attrinbute as defined in tagset or empty string for POS + * @return mask for given attribute + */ +Tag get_attribute_mask(const Tagset& tagset, + const std::string attr_name); + +} /* end ns Corpus2 */ + +#endif // LIBCORPUS2_TAGGING_H diff --git a/swig/corpus2.i b/swig/corpus2.i index 4c181af88f9a2ad626b50bdbda1f1495599acede..1763c5c39cf97524533ab2b5417c97f0a2b9bd8d 100644 --- a/swig/corpus2.i +++ b/swig/corpus2.i @@ -20,6 +20,7 @@ %include "tokenreader.i" %include "tokenwriter.i" %include "libpwrnlperror.i" +%include "tagging.i" %{ #include <libcorpus2/util/settings.h> diff --git a/swig/tagging.i b/swig/tagging.i new file mode 100644 index 0000000000000000000000000000000000000000..8d2301a690fd17c615467df646776bd40f4f30cf --- /dev/null +++ b/swig/tagging.i @@ -0,0 +1,24 @@ +#ifndef SWIG_LIBCORPUS2_TAGGING_I +#define SWIG_LIBCORPUS2_TAGGING_I + +%module libcorpustagging +%{ + #include <libcorpus2/tagging.h> +%} + +%include "std_string.i" + +%include "tag.i" +%include "tagset.i" + +namespace Corpus2 { + +Tag get_attribute_mask(const Tagset& tagset, + const std::string attr_name); +} + +using namespace std; +using namespace Corpus2; + + +#endif /* SWIG_LIBCORPUS2_TAGGING_I */