Skip to content
Snippets Groups Projects
Commit 261c5b03 authored by Adam Wardynski's avatar Adam Wardynski
Browse files

text() match condition, matching given text.

parent ce4d1483
Branches
No related merge requests found
......@@ -65,6 +65,7 @@ SET(libwccl_STAT_SRC
ops/match/conditions/conjconditions.cpp
ops/match/conditions/isannotatedas.cpp
ops/match/conditions/longest.cpp
ops/match/conditions/matchtext.cpp
ops/match/conditions/oneof.cpp
ops/match/conditions/optionalmatch.cpp
ops/match/conditions/repeatedmatch.cpp
......
#include <libwccl/ops/match/conditions/matchtext.h>
#include <sstream>
#include <libpwrutils/util.h>
namespace Wccl {
MatchResult MatchText::apply(const ActionExecContext& context) const
{
SentenceContext& sc = context.sentence_context();
int orig_iter = sc.get_position();
UnicodeString sent_frag(sc.at(orig_iter)->orth());
int iter_pos = orig_iter + 1;
while(sent_frag.length() <= text_.length() && iter_pos < sc.size()) {
if (sc.at(iter_pos)->wa() != PwrNlp::Whitespace::None) {
sent_frag += " ";
}
sent_frag += sc.at(iter_pos)->orth();
++iter_pos;
}
if (sent_frag != text_) {
return MatchResult();
}
boost::shared_ptr<MatchVector> v(new MatchVector());
for(int i = orig_iter; i < iter_pos; ++i) {
v->append(boost::shared_ptr<TokenMatch>(new TokenMatch(i));
}
// increase current sentence position to the point after the matched tokens.
sc.set_position(orig_iter + v->size());
return MatchResult(v);
}
std::string MatchText::to_string(const Corpus2::Tagset&) const
{
std::ostringstream os;
os << name() << "(\"" << PwrNlp::to_utf8(text_) << "\")";
return os.str();
}
std::ostream& MatchText::write_to(std::ostream& os) const
{
return os << name() << "(\"" << PwrNlp::to_utf8(text_) << "\")";
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H
#define LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H
#include <libwccl/ops/match/matchcondition.h>
#include <libwccl/ops/function.h>
#include <unicode/unistr.h>
namespace Wccl {
/**
* text() match condition - matches a text
*/
class MatchText : public MatchCondition
{
public:
MatchText(const UnicodeString& text)
: text_(text) {
BOOST_ASSERT(!text_.isEmpty());
}
/**
* @returns Name of the Condition.
*/
std::string name() const {
return "text";
}
/**
* Applies the condition to the given execution context.
* If a match is found, the current sentence Position is increased
* by the amount of matched tokens.
*/
MatchResult apply(const ActionExecContext& context) const;
/**
* @returns String representation of the Condition
*/
std::string to_string(const Corpus2::Tagset& tagset) const;
protected:
/**
* Writes the string representation of the Condition to
* an output stream.
* @returns Stream written to.
* @note May be incomplete and/or containt internal info.
*/
std::ostream& write_to(std::ostream& ostream) const;
private:
const UnicodeString text_;
};
} /* end ns Wccl */
#endif // LIBWCCL_OPS_MATCH_CONDITIONS_MATCHTEXT_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment