Skip to content
Snippets Groups Projects
Commit b7d34d61 authored by ilor's avatar ilor
Browse files

Add the tagset-symbol-getter operator (GetSymbols), bump required Corpus2...

Add the tagset-symbol-getter operator (GetSymbols), bump required Corpus2 version to 1.0.2 since new features are needed.
parent 925af5c9
No related merge requests found
......@@ -4,7 +4,7 @@ PROJECT(wccl)
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/include/ )
find_package(Corpus2 0.1.2 REQUIRED)
find_package(Corpus2 1.0.2 REQUIRED)
set(LIBS ${LIBS} ${Corpus2_LIBRARY})
find_package(PwrUtils 0.0.3 REQUIRED)
......@@ -39,6 +39,7 @@ SET(libwccl_STAT_SRC
ops/functions/strset/getorth.cpp
ops/functions/strset/tolower.cpp
ops/functions/strset/toupper.cpp
ops/functions/tset/getsymbols.cpp
parser/grammar.g
parser/Parser.cpp
parser/ParserException.cpp
......
#include <libwccl/ops/functions/tset/getsymbols.h>
#include <libwccl/ops/formatters.h>
#include <libwccl/ops/functions/constant.h>
namespace Wccl {
std::string GetSymbols::to_string(const Corpus2::Tagset& tagset) const
{
return UnaryFunctionFormatter::to_string(tagset, *this, *pos_expr_, "[", "]");
}
std::string GetSymbols::to_raw_string() const {
return UnaryFunctionFormatter::to_raw_string(*this, *pos_expr_, "[", "]");
}
std::string GetSymbols::name(const Corpus2::Tagset &tagset) const
{
return tagset.get_attribute_name(mask_.get_values());
}
std::string GetSymbols::raw_name() const
{
return mask_.raw_dump();
}
GetSymbols::BaseRetValPtr GetSymbols::apply_internal(const FunExecContext& context) const
{
const boost::shared_ptr<const Position>& pos = pos_expr_->apply(context);
const SentenceContext& sc = context.sentence_context();
if(pos->is_outside(sc) || !sc.is_current_inside()) {
return detail::DefaultFunction<TSet>()->apply(context);
}
boost::shared_ptr<TSet> tset = boost::make_shared<TSet>();
const Corpus2::Token* token = sc.at(*pos);
foreach (const Corpus2::Lexeme& lexeme, token->lexemes()) {
tset->combine_with(lexeme.tag());
}
return tset;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H
#define LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H
#include <libwccl/values/tset.h>
#include <libwccl/values/position.h>
#include <libwccl/ops/function.h>
namespace Wccl {
class GetSymbols : public Function<TSet> {
public:
typedef boost::shared_ptr<Function<Position> > PosFunctionPtr;
GetSymbols(const PosFunctionPtr& pos_expr, const Corpus2::Tag& mask)
: pos_expr_(pos_expr), mask_(mask)
{
BOOST_ASSERT(pos_expr_);
}
/**
* @returns String representation of the function in the form of:
* "attribute_name[pos_expr_string]"
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
/**
* @returns String representation of the function in the form of:
* "attribute_name[pos_expr_string]"
* @note This version does not require tagset, but will be inclomplete
* and/or contain internal info.
*/
std::string to_raw_string() const;
std::string raw_name() const;
std::string name(const Corpus2::Tagset& tagset) const;
protected:
const PosFunctionPtr pos_expr_;
Corpus2::Tag mask_;
/**
* Gets a position from the argument expression, then gets the
* word at that position from the Sentence in the SentenceContext,
* then gets the tagset symbols matching the mask of the word.
*
* @returns A tagset symbol set of the word if position pointed to
* lies within boundaries of the Sentence. Empty Tset otherwise.
*/
BaseRetValPtr apply_internal(const FunExecContext& context) const;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_FUNCTIONS_TSET_GETSYMBOLS_H
......@@ -61,6 +61,15 @@ public:
return tag_ == other.tag_;
}
void combine_with(const Corpus2::Tag& other) {
tag_.combine_with(other);
}
void combine_with(const TSet& other) {
tag_.combine_with(other.get_value());
}
std::string to_string(const Corpus2::Tagset &) const;
std::string to_raw_string() const;
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment