Skip to content
Snippets Groups Projects
Commit a8e745f5 authored by ilor's avatar ilor
Browse files

Merge branch 'master' of nlp.pwr.wroc.pl:wccl

parents 10349ee3 1aa187c4
No related branches found
No related tags found
No related merge requests found
...@@ -77,6 +77,7 @@ SET(libwccl_STAT_SRC ...@@ -77,6 +77,7 @@ SET(libwccl_STAT_SRC
ops/match/conditions/repeatedmatch.cpp ops/match/conditions/repeatedmatch.cpp
ops/match/conditions/tokencondition.cpp ops/match/conditions/tokencondition.cpp
ops/matchrule.cpp ops/matchrule.cpp
ops/matchrulesequence.cpp
ops/opsequence.cpp ops/opsequence.cpp
ops/tagaction.cpp ops/tagaction.cpp
ops/tagactions/delete.cpp ops/tagactions/delete.cpp
......
#include <libwccl/ops/matchrulesequence.h>
#include <libpwrutils/foreach.h>
namespace Wccl {
void MatchRuleSequence::apply_all(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence)
{
if(!sentence || sentence->empty()) {
throw InvalidArgument(
"sentence",
"Received an empty sentence.");
}
foreach (MatchRule& rule, *this) {
rule.apply(sentence);
}
}
std::string MatchRuleSequence::to_string(const Corpus2::Tagset& tagset) const
{
std::ostringstream os;
os << "match_rules(\n";
for (size_t i = 0; i < size(); ++i) {
if (i != 0) {
os << ";\n";
}
os << at(i).to_string(tagset);
}
os << ")";
return os.str();
}
std::ostream& MatchRuleSequence::write_to(std::ostream &os) const
{
os << "match_rules(\n";
for (size_t i = 0; i < size(); ++i) {
if (i != 0) {
os << ";\n";
}
os << at(i);
}
os << ")";
return os;
}
} /* end ns Wccl */
#ifndef LIBWCCL_OPS_MATCHRULESEQUENCE_H
#define LIBWCCL_OPS_MATCHRULESEQUENCE_H
#include <libwccl/ops/matchrule.h>
namespace Wccl {
/**
* Represents a sequence of parsed WCCL Match rules. It's a conveniency wrapper around
* vector of MatchRule objects, that allows automatic execution of all contained MatchRules
* one by one.
* @note The class methods are not thread-safe
*/
class MatchRuleSequence : public std::vector<MatchRule>, public Expression
{
public:
MatchRuleSequence(const std::vector<MatchRule>& rules);
MatchRuleSequence();
/**
* Executes all contained Rules sequentially, once for each position
* starting from 0 to given sentence's end.
* @returns True if any of the Rules made a change on any of the sentence
* positions, False otherwise.
* @param sentence Sentence to execute on.
* @see execute_once() - equivalent method; the \link operator()() operator() \endlink allows
* more convenient functional notation, however if you only have a pointer
* you might prefer the execute_once() method as shown below. The choice is yours.
* @see execute_until_done() - executes all Rules repeatedly, until there are no changes.
* \code
* Bool res;
* res = ruleseq(sentence);
* // versus
* res = ruleseq.execute_once(sentence);
* // or if you have a pointer...
* res = (*ruleseq_ptr)(sentence);
* // versus
* res = ruleseq_ptr->execute_once(sentence);
* \endcode
*/
void operator()(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence);
/**
* Executes all contained Rules sequentially, once for each position
* starting from 0 to given sentence's end.
* @returns True if any of the Rules made a change on any of the sentence's
* positions, False otherwise.
* @param sentence Sentence to execute on.
* @see \link operator()() operator() \endlink - an equivalent of this method that
* allows functional notation, treating RuleSeqence directly as a function object
* @see execute_until_done() - executes all Rules repeatedly, until there are no changes.
*/
void apply_all(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence);
std::string to_string(const Corpus2::Tagset& tagset) const;
protected:
std::ostream& write_to(std::ostream& os) const;
};
//
//--- implementation details ---
//
inline
MatchRuleSequence::MatchRuleSequence(const std::vector<MatchRule>& rules)
: std::vector<MatchRule>(rules) {
}
inline
MatchRuleSequence::MatchRuleSequence()
: std::vector<MatchRule>() {
}
inline
void MatchRuleSequence::operator()(const boost::shared_ptr<Corpus2::AnnotatedSentence>& sentence) {
apply_all(sentence);
}
} /* end ns Wccl */
#endif // LIBWCCL_OPS_MATCHRULESEQUENCE_H
...@@ -6,15 +6,15 @@ ...@@ -6,15 +6,15 @@
namespace Wccl { namespace Wccl {
/** /**
* Represents a sequence of parsed WCCL Rules. It's a conveniency wrapper around * Represents a sequence of parsed WCCL tag rules. It's a conveniency wrapper around
* vector of Rule objects, that allows automatic execution of all contained Rules * vector of TagRule objects, that allows automatic execution of all contained TagRules
* for all positions of a Sentence. * for all positions of a Sentence.
* @note The class methods are not thread-safe * @note The class methods are not thread-safe
*/ */
class TagRuleSequence : public std::vector<TagRule>, public Expression class TagRuleSequence : public std::vector<TagRule>, public Expression
{ {
public: public:
TagRuleSequence(std::vector<TagRule> rules); TagRuleSequence(const std::vector<TagRule>& rules);
TagRuleSequence(); TagRuleSequence();
...@@ -77,7 +77,7 @@ protected: ...@@ -77,7 +77,7 @@ protected:
//--- implementation details --- //--- implementation details ---
// //
inline inline
TagRuleSequence::TagRuleSequence(std::vector<TagRule> rules) TagRuleSequence::TagRuleSequence(const std::vector<TagRule>& rules)
: std::vector<TagRule>(rules) { : std::vector<TagRule>(rules) {
} }
......
...@@ -1915,6 +1915,7 @@ match_rules_section [WcclFile& wccl_file] ...@@ -1915,6 +1915,7 @@ match_rules_section [WcclFile& wccl_file]
{ {
ParsingScope scope(wccl_file); ParsingScope scope(wccl_file);
boost::shared_ptr<MatchRule> match_rule; boost::shared_ptr<MatchRule> match_rule;
boost::shared_ptr<MatchRuleSequence> rule_seq = boost::make_shared<MatchRuleSequence>();
} }
: "match_rules" { : "match_rules" {
if (wccl_file.has_match_rules()) { if (wccl_file.has_match_rules()) {
...@@ -1923,16 +1924,16 @@ match_rules_section [WcclFile& wccl_file] ...@@ -1923,16 +1924,16 @@ match_rules_section [WcclFile& wccl_file]
} }
LPAREN LPAREN
match_rule = match_rule_operator [scope] { match_rule = match_rule_operator [scope] {
wccl_file.add_match_rule(match_rule); rule_seq->push_back(*match_rule);
scope.reset_variables(); scope.reset_variables();
} }
( (
SEMI match_rule = match_rule_operator [scope] { SEMI match_rule = match_rule_operator [scope] {
wccl_file.add_match_rule(match_rule); rule_seq->push_back(*match_rule);
scope.reset_variables(); scope.reset_variables();
} }
)* )*
RPAREN RPAREN { wccl_file.set_match_rules(rule_seq); }
; ;
any_operator_section any_operator_section
......
...@@ -36,6 +36,14 @@ boost::shared_ptr<const TagRuleSequence> WcclFile::get_tag_rules_ptr() const ...@@ -36,6 +36,14 @@ boost::shared_ptr<const TagRuleSequence> WcclFile::get_tag_rules_ptr() const
return tag_rules_; return tag_rules_;
} }
boost::shared_ptr<const MatchRuleSequence> WcclFile::get_match_rules_ptr() const
{
if (!has_match_rules()) {
throw WcclError("There are no match rules.");
}
return match_rules_;
}
std::ostream& WcclFile::write_to(std::ostream& os) const std::ostream& WcclFile::write_to(std::ostream& os) const
{ {
if (has_lexicons()) { if (has_lexicons()) {
...@@ -51,14 +59,7 @@ std::ostream& WcclFile::write_to(std::ostream& os) const ...@@ -51,14 +59,7 @@ std::ostream& WcclFile::write_to(std::ostream& os) const
os << tag_rules_->to_string(tagset_) << '\n'; os << tag_rules_->to_string(tagset_) << '\n';
} }
if (has_match_rules()) { if (has_match_rules()) {
os << "match_rules(\n"; os << match_rules_->to_string(tagset_) << '\n';
for (size_t i = 0; i < match_rules_.size(); ++i) {
if (i != 0) {
os << ";\n";
}
os << match_rules_[i]->to_string(tagset_);
}
os << "\n)\n";
} }
return os; return os;
} }
......
...@@ -8,13 +8,19 @@ ...@@ -8,13 +8,19 @@
#include <libwccl/values/tset.h> #include <libwccl/values/tset.h>
#include <libwccl/wcclfileopsections.h> #include <libwccl/wcclfileopsections.h>
#include <libwccl/ops/tagrulesequence.h> #include <libwccl/ops/tagrulesequence.h>
#include <libwccl/ops/matchrule.h> #include <libwccl/ops/matchrulesequence.h>
#include <libwccl/lexicon/lexicons.h> #include <libwccl/lexicon/lexicons.h>
#include <libwccl/exception.h> #include <libwccl/exception.h>
#include <libpwrutils/pathsearch.h> #include <libpwrutils/pathsearch.h>
namespace Wccl { namespace Wccl {
/**
* Class representing contents of parsed WCCL files
* - imported lexicons, any typed and untyped named
* operator sections, up to one tag rules section,
* and up to one match rules section.
*/
class WcclFile class WcclFile
: WcclFileOpSections<UntypedOpSequence>, : WcclFileOpSections<UntypedOpSequence>,
WcclFileOpSections<OpSequence<StrSet> >, WcclFileOpSections<OpSequence<StrSet> >,
...@@ -26,6 +32,9 @@ class WcclFile ...@@ -26,6 +32,9 @@ class WcclFile
public: public:
WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path); WcclFile(const Corpus2::Tagset& tagset, const std::string& search_path);
//
// Untyped and typed operator sections: @X:"sectioname" ( op1; op2 )
//
const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections(); const std::vector<boost::shared_ptr<UntypedOpSequence> >& untyped_sections();
template<class T> template<class T>
const typename std::vector<boost::shared_ptr<OpSequence<T> > >& sections(); const typename std::vector<boost::shared_ptr<OpSequence<T> > >& sections();
...@@ -76,14 +85,6 @@ public: ...@@ -76,14 +85,6 @@ public:
FunctionalOpSequence::name_op_v_t gen_all_op_pairs(); FunctionalOpSequence::name_op_v_t gen_all_op_pairs();
FunctionalOpSequence::name_op_v_c_t gen_all_op_pairs() const; FunctionalOpSequence::name_op_v_c_t gen_all_op_pairs() const;
void import_lexicon(const boost::shared_ptr<Lexicon>& lexicon);
bool has_lexicon(const std::string& name) const;
bool has_lexicons() const;
boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const;
const Lexicon& get_lexicon(const std::string& name) const;
boost::shared_ptr<const Lexicons> get_lexicons_ptr() const;
const Lexicons& get_lexicons() const;
void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section); void add_untyped_section(const boost::shared_ptr<UntypedOpSequence>& section);
void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section); void add_untyped_section(const boost::shared_ptr<const UntypedOpSequence>& section);
void add_untyped_section(const UntypedOpSequence& section); void add_untyped_section(const UntypedOpSequence& section);
...@@ -94,6 +95,20 @@ public: ...@@ -94,6 +95,20 @@ public:
template<class T> template<class T>
void add_section(const OpSequence<T>& section); void add_section(const OpSequence<T>& section);
//
// Lexicons, import sections: import("path", "name")
//
void import_lexicon(const boost::shared_ptr<Lexicon>& lexicon);
bool has_lexicon(const std::string& name) const;
bool has_lexicons() const;
boost::shared_ptr<const Lexicon> get_lexicon_ptr(const std::string& name) const;
const Lexicon& get_lexicon(const std::string& name) const;
boost::shared_ptr<const Lexicons> get_lexicons_ptr() const;
const Lexicons& get_lexicons() const;
//
// Tag rules section: tag_rules ( rule1; rule2 )
//
bool has_tag_rules() const; bool has_tag_rules() const;
void set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules); void set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules);
...@@ -102,10 +117,19 @@ public: ...@@ -102,10 +117,19 @@ public:
boost::shared_ptr<TagRuleSequence> get_tag_rules_ptr(); boost::shared_ptr<TagRuleSequence> get_tag_rules_ptr();
boost::shared_ptr<const TagRuleSequence> get_tag_rules_ptr() const; boost::shared_ptr<const TagRuleSequence> get_tag_rules_ptr() const;
//
// Match rules section: match_rules ( apply1; apply2 )
//
bool has_match_rules() const; bool has_match_rules() const;
void add_match_rule(const boost::shared_ptr<MatchRule>& match_rule);
const std::vector<boost::shared_ptr<MatchRule> >& get_match_rules();
void set_match_rules(const boost::shared_ptr<MatchRuleSequence>& match_rules);
const MatchRuleSequence& get_match_rules() const;
boost::shared_ptr<MatchRuleSequence> get_match_rules_ptr();
boost::shared_ptr<const MatchRuleSequence> get_match_rules_ptr() const;
//
// Miscelaneous
//
friend std::ostream& operator<<(std::ostream& ostream, const WcclFile& wccl_file); friend std::ostream& operator<<(std::ostream& ostream, const WcclFile& wccl_file);
std::string to_string() const; std::string to_string() const;
...@@ -117,7 +141,7 @@ private: ...@@ -117,7 +141,7 @@ private:
std::ostream& write_to(std::ostream& ostream) const; std::ostream& write_to(std::ostream& ostream) const;
std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_; std::vector<boost::shared_ptr<FunctionalOpSequence> > all_sections_;
boost::shared_ptr<TagRuleSequence> tag_rules_; boost::shared_ptr<TagRuleSequence> tag_rules_;
std::vector<boost::shared_ptr<MatchRule> > match_rules_; boost::shared_ptr<MatchRuleSequence> match_rules_;
boost::shared_ptr<Lexicons> lexicons_; boost::shared_ptr<Lexicons> lexicons_;
const Corpus2::Tagset& tagset_; const Corpus2::Tagset& tagset_;
PwrNlp::PathSearcher<Wccl::FileNotFound> path_; PwrNlp::PathSearcher<Wccl::FileNotFound> path_;
...@@ -410,19 +434,22 @@ void WcclFile::set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules ...@@ -410,19 +434,22 @@ void WcclFile::set_tag_rules(const boost::shared_ptr<TagRuleSequence>& tag_rules
inline inline
bool WcclFile::has_match_rules() const bool WcclFile::has_match_rules() const
{ {
return !match_rules_.empty(); return match_rules_;
} }
inline inline
void WcclFile::add_match_rule(const boost::shared_ptr<MatchRule>& match_rule) const MatchRuleSequence& WcclFile::get_match_rules() const
{ {
match_rules_.push_back(match_rule); return *get_match_rules_ptr();
} }
inline inline
const std::vector<boost::shared_ptr<MatchRule> >& WcclFile::get_match_rules() void WcclFile::set_match_rules(const boost::shared_ptr<MatchRuleSequence>& match_rules)
{ {
return match_rules_; if (has_match_rules()) {
throw WcclError("Tag rules already added.");
}
match_rules_ = match_rules;
} }
inline inline
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment