Newer
Older
//don't try to add all the headers inside our namespace
#include <libwccl/parser/ParserException.h>
#include <libwccl/parser/parsingscope.h>
#include <cstdio>
#include <antlr/Token.hpp>
#include <boost/lexical_cast.hpp>
// values/variables
#include <libwccl/variables.h>
#include <libwccl/values/bool.h>
#include <libwccl/values/tset.h>
#include <libwccl/values/strset.h>
#include <libwccl/values/position.h>
// sentence context
#include <libwccl/sentencecontext.h>
// operators
#include <libwccl/ops/operator.h>
#include <libwccl/ops/functions/constant.h>
#include <libwccl/ops/functions/vargetter.h>
#include <libwccl/ops/functions/conditional.h>
#include <libwccl/ops/functions/setops.h>
#include <libwccl/ops/functions/bool/varsetter.h>
ilor
committed
#include <libwccl/ops/functions/bool/predicates/debug.h>
#include <libwccl/ops/functions/bool/predicates/ambiguous.h>
#include <libwccl/ops/functions/bool/predicates/issingular.h>
#include <libwccl/ops/functions/bool/predicates/or.h>
#include <libwccl/ops/functions/bool/predicates/nor.h>
#include <libwccl/ops/functions/bool/predicates/and.h>
#include <libwccl/ops/functions/bool/predicates/regex.h>
#include <libwccl/ops/functions/bool/predicates/intersects.h>
#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
#include <libwccl/ops/functions/bool/predicates/isinside.h>
#include <libwccl/ops/functions/bool/predicates/isoutside.h>
#include <libwccl/ops/functions/bool/predicates/isempty.h>
#include <libwccl/ops/functions/bool/predicates/equals.h>
#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
#include <libwccl/ops/functions/bool/predicates/strongagreement.h>
#include <libwccl/ops/functions/bool/predicates/anntoken.h>
#include <libwccl/ops/functions/strset/affix.h>
#include <libwccl/ops/functions/strset/getorth.h>
#include <libwccl/ops/functions/strset/toupper.h>
#include <libwccl/ops/functions/strset/tolower.h>
#include <libwccl/ops/functions/strset/getlemmas.h>
#include <libwccl/ops/functions/strset/lextranslator.h>
#include <libwccl/ops/functions/strset/propval.h>
Adam Radziszewski
committed
#include <libwccl/ops/functions/strset/anninter.h>
#include <libwccl/ops/functions/tset/getsymbols.h>
#include <libwccl/ops/functions/tset/getwordclass.h>
#include <libwccl/ops/functions/tset/getsymbolsinrange.h>
#include <libwccl/ops/functions/position/relativeposition.h>
#include <libwccl/ops/functions/position/lasttoken.h>
#include <libwccl/ops/functions/position/firsttoken.h>
#include <libwccl/ops/functions/bool/iterations/only.h>
#include <libwccl/ops/functions/bool/iterations/atleast.h>
#include <libwccl/ops/functions/bool/iterations/leftlook.h>
#include <libwccl/ops/functions/bool/iterations/rightlook.h>
#include <libwccl/ops/functions/bool/iterations/skip.h>
ilor
committed
#include <libwccl/ops/matchrule.h>
#include <libwccl/ops/tagrulesequence.h>
#include <libwccl/ops/tagactions/unify.h>
#include <libwccl/ops/tagactions/delete.h>
#include <libwccl/ops/tagactions/select.h>
#include <libwccl/ops/tagactions/relabel.h>
#include <libwccl/ops/tagactions/mark.h>
#include <libwccl/ops/tagactions/unmark.h>
// Match operators
#include <libwccl/values/tokenmatch.h>
#include <libwccl/values/annotationmatch.h>
#include <libwccl/values/matchvector.h>
#include <libwccl/ops/match/applyoperator.h>
#include <libwccl/ops/match/conditions/optionalmatch.h>
#include <libwccl/ops/match/conditions/repeatedmatch.h>
#include <libwccl/ops/match/conditions/isannotatedas.h>
#include <libwccl/ops/match/conditions/matchtext.h>
#include <libwccl/ops/match/conditions/conjconditions.h>
#include <libwccl/ops/match/conditions/tokencondition.h>
#include <libwccl/ops/match/conditions/oneof.h>
#include <libwccl/ops/match/conditions/longest.h>
#include <libwccl/ops/match/actions/markmatch.h>
#include <libwccl/ops/match/actions/unmarkmatch.h>
#include <libwccl/ops/match/actions/setpropmatch.h>
#include <libwccl/ops/match/actions/overwritematch.h>
#include <libwccl/ops/functions/match/submatch.h>
// Wccl whole file syntax
#include <libwccl/wcclfile.h>
#include <libwccl/lexicon/lexiconparser.h>
// Unicode String
#include <unicode/uniset.h>
#include <unicode/unistr.h>
ANTLR_BEGIN_NAMESPACE(Wccl)
Paweł Kędzia
committed
genHashLines = false;
namespace = "Wccl";
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
exportVocab = ANTLRExpr;
const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const {
return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
Paweł Kędzia
committed
const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const {
UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);
if (ustr.length() < 3) {
return "";
}
ustr.extract(1, ustr.length() - 2, ret_ustr);
return ret_ustr;
}
Paweł Kędzia
committed
//
const std::string str_token_rem_grav(antlr::RefToken& rstr) const {
size_t len = 0;
std::string ret = token_ref_to_std_string(rstr);
if ((len = ret.length()) < 2) {
return ret;
}
if (ret[0] == '`' && ret[len - 1] == '`') {
return ret.substr(1, len - 2);
}
return ret;
}
//
const std::string token_ref_to_std_string(antlr::RefToken& rstr) const {
return (((antlr::Token*)rstr)->getText());
//
int token_ref_to_int(antlr::RefToken& rstr) {
return atoi(((antlr::Token*)rstr)->getText().c_str());
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rule for parsing string set operator with scope.
// Returns boost::shared_ptr<Operator<StrSet> >
parse_strset_operator
Paweł Kędzia
committed
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<Operator<StrSet> > res]
Lexicons empty_lex;
ParsingScope scope(tagset, empty_lex);
: res = functional_operator_strset [scope]
EOF
// ----------------------------------------------------------------------------
// Rule for parsing bool operator with scope.
// Returns boost::shared_ptr<Operator<Bool> >
parse_bool_operator
Paweł Kędzia
committed
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<Operator<Bool> > res]
Lexicons empty_lex;
ParsingScope scope(tagset, empty_lex);
: res = functional_operator_bool [scope]
EOF
// ----------------------------------------------------------------------------
// Rule for parsing symbol set operator with scope.
// Returns boost::shared_ptr<Operator<TSet> >
parse_symset_operator
Paweł Kędzia
committed
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<Operator<TSet> > res]
Lexicons empty_lex;
ParsingScope scope(tagset, empty_lex);
: res = functional_operator_symset [scope]
EOF
// ----------------------------------------------------------------------------
// Rule for parsing position operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_position_operator
returns [boost::shared_ptr<Operator<Position> > res]
Lexicons empty_lex;
ParsingScope scope(tagset, empty_lex);
: res = functional_operator_position [scope]
EOF
;
// ----------------------------------------------------------------------------
// Rule for parsing match operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_match_operator
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<Operator<Match> > res]
{
Lexicons empty_lex;
ParsingScope scope(tagset, empty_lex);
: res = functional_operator_match [scope]
EOF
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing single WCCL Rule
Adam Radziszewski
committed
parse_single_tag_rule
[const Corpus2::Tagset &tagset]
Lexicons empty_lex;
ParsingScope scope(tagset, empty_lex);
Adam Radziszewski
committed
: rle = tag_rule [scope]
Adam Radziszewski
committed
// Rule for parsing tag rule section in the wccl file
// Returns boost::shared_ptr<TagRuleSequence>
Adam Radziszewski
committed
parse_tag_rule_sequence
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<TagRuleSequence> rule_seq]
Lexicons empty_lex;
ParsingScope scope(tagset, empty_lex);
Adam Radziszewski
committed
: rule_seq = tag_rules[scope]
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing the match rules
ilor
committed
// Returns boost::shared_ptr<Matching::MatchRule>
parse_match_rule
[const Corpus2::Tagset& tagset]
ilor
committed
returns [boost::shared_ptr<Matching::MatchRule> ret_match]
Lexicons empty_lex;
ParsingScope scope(tagset, empty_lex);
: ret_match = match_rule_operator[scope]
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing wccl files
parse_wccl_file
[const Corpus2::Tagset& tagset, const std::string search_path]
returns [boost::shared_ptr<WcclFile> wccl_file]
{
wccl_file = boost::make_shared<WcclFile>(tagset, search_path);
: (imports_section [*wccl_file])?
(wccl_file_section [*wccl_file])+
EOF
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Single or multiple (comma separated) elements in string set, may be:
// 'a' "a" [] ['a'] ['a', 'b'] ["a"] ["a", "b"] ['a', "b"]
// Parsing strset literal and returning plain strset value.
// Returns boost::shared_ptr<StrSet>
strset_literal
returns [boost::shared_ptr<StrSet> s_set]
Paweł Kędzia
committed
{
s_set.reset(new StrSet());
Paweł Kędzia
committed
}
: s0: STRING {
s_set->insert(token_ref_to_ustring(s0));
| LBRACKET
(
s1: STRING {
s_set->insert(token_ref_to_ustring(s1));
}
(
COMMA s2: STRING {
s_set->insert(token_ref_to_ustring(s2));
}
)*
)?
RBRACKET
;
// Returns boost::shared_ptr<Constant<StrSet> >
strset_value
returns [boost::shared_ptr<Constant<StrSet> > val]
boost::shared_ptr<StrSet> set;
: set = strset_literal {
val.reset(new Constant<StrSet>(*set));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
// Element of sym set. This rule inserts element into symbol set
// using corresponding tagset.
// WARNING! This rule can throw ParserException! Be careful!
symset_elem
[const Corpus2::Tagset& tagset, boost::shared_ptr<TSet>& t_set]
}
catch(Corpus2::TagParseError &e) {
throw(ParserException(e.info()));
}
// Symset literal. Symset element may be:
// a, `a ` (this is guaranteed by lexer rule - SYMBOL) or {a} {`a`} {a, b}
// {`a`, `b`} {a, `b`} {`a`, b}
// Parsing symset literal and returning plain symset value.
// Returns boost::shared_ptr<TSet>
symset_literal
returns [boost::shared_ptr<TSet> t_set]
Paweł Kędzia
committed
{
t_set.reset(new TSet());
Paweł Kędzia
committed
}
: symset_elem [tagset, t_set]
(
symset_elem [tagset, t_set] (COMMA symset_elem [tagset, t_set])*
)?
// Symset value, as constant symbol set
// Returns boost::shared_ptr<Constant<TSet> >
symset_value
returns [boost::shared_ptr<Constant<TSet> > val]
boost::shared_ptr<TSet> set;
: set = symset_literal [tagset] {
val.reset(new Constant<TSet>(*set));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
// Bool literal. May be True or False. Parsing bool literal and returning
// plain bool value.
// Returns boost::shared_ptr<Bool>
Paweł Kędzia
committed
bool_literal
returns [boost::shared_ptr<Bool> val]
: "True" { val.reset(new Bool(Bool(true ))); }
| "False" { val.reset(new Bool(Bool(false))); }
Paweł Kędzia
committed
;
// Bool value, as constat bool Value
// Returns boost::shared_ptr<Constant<Bool> >
bool_value
returns [boost::shared_ptr<Constant<Bool> > val]
Paweł Kędzia
committed
{
boost::shared_ptr<Bool> bool_lit;
Paweł Kędzia
committed
}
: bool_lit = bool_literal {
val.reset(new Constant<Bool>(*bool_lit));
Paweł Kędzia
committed
}
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
// Position literal may be:
// (+|-)?(0-9)+ or begin or end or nowhere
// Parsing position literal and returning plain position value.
// returns boost::shared_ptr<Position>
Paweł Kędzia
committed
position_literal
returns [boost::shared_ptr<Position> val]
val.reset(new Position(Position(i)));
val.reset(new Position(Position(Position::Begin)));
Paweł Kędzia
committed
}
val.reset(new Position(Position(Position::End)));
val.reset(new Position(Position(Position::Nowhere)));
Paweł Kędzia
committed
}
;
// Position as constant position value
// Returns boost::shared_ptr<Constant<Position> >
Paweł Kędzia
committed
position_value
returns [boost::shared_ptr<Constant<Position> > val]
Paweł Kędzia
committed
{
boost::shared_ptr<Position> pos_lit;
Paweł Kędzia
committed
}
: pos_lit = position_literal {
val.reset(new Constant<Position>(*pos_lit));
// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<Match>
Adam Wardynski
committed
match_literal
returns [boost::shared_ptr<Match> val]
{
boost::shared_ptr<MatchData> m;
}
Adam Wardynski
committed
: m = match_data_literal {
val.reset(new Match(m));
}
;
// Constant match value
// Returns boost::shared_ptr<Constant<Match> >
match_value_const
returns [boost::shared_ptr<Constant<Match> > val]
{
boost::shared_ptr<Match> m;
}
Adam Wardynski
committed
: m = match_literal {
val.reset(new Constant<Match>(*m));
}
;
// ----------------------------------------------------------------------------
// Value used into match operator such as TOK[position] and ANN[position, name]
// Returns boost::shared_ptr<MatchData>
Adam Wardynski
committed
match_data_literal
returns [boost::shared_ptr<MatchData> val]
Adam Wardynski
committed
: val = token_match_literal
| val = ann_match_literal
| val = match_vector_literal
Adam Wardynski
committed
// token match literal - TOK[position]
// Returns boost::shared_ptr<TokenMatch>
Adam Wardynski
committed
token_match_literal
returns [boost::shared_ptr<TokenMatch> val]
{
}
: "TOK" LBRACKET u: UNSIGNED_INT RBRACKET {
val.reset(new TokenMatch(token_ref_to_int(u)));
Adam Wardynski
committed
// annotation match literal - ANN[position, name]
// Returns boost::shared_ptr<AnnotationMatch>
Adam Wardynski
committed
ann_match_literal
returns [boost::shared_ptr<AnnotationMatch> val]
{
}
: "ANN" LBRACKET u : UNSIGNED_INT COMMA channel : STRING RBRACKET {
val.reset(new AnnotationMatch(token_ref_to_int(u), token_ref_to_std_string(channel)));
Adam Wardynski
committed
// annotation match vector literal: MATCH() or MATCH(token, ann, MATCH())
// Returns boost::shared_ptr<MatchVector>
Adam Wardynski
committed
match_vector_literal
returns [boost::shared_ptr<MatchVector> val]
{
val.reset(new MatchVector());
}
Adam Wardynski
committed
: "MATCH" LPAREN (match_vector_literal_item[val])? RPAREN
;
// Body of the MATCH value. It only adds vector items to the MatchVector
// Item may be single or multiple
Adam Wardynski
committed
match_vector_literal_item [boost::shared_ptr<MatchVector>& mvector]
{
boost::shared_ptr<Match> m_val;
}
Adam Wardynski
committed
: m_val = match_literal {
mvector->append(m_val);
}
(
COMMA
Adam Wardynski
committed
m_val = match_literal {
mvector->append(m_val);
}
)*
;
// ----------------------------------------------------------------------------
// Number may be unsigned or signed: 1, +1, -1
number
returns [int ret]
{
ret = 0;
}
: s: SIGNED_INT { ret = token_ref_to_int(s); }
| u: UNSIGNED_INT { ret = token_ref_to_int(u); }
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VARIABLES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Position: $Name
// Get position variable (however, before put into) from variables
// Returns boost::shared_ptr<VariableAccessor<Position> >
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<Position> > pos_acc]
Paweł Kędzia
committed
: POS_PREFIX n: SYMBOL {
vars.get_put<Position>(str_token_rem_grav(n));
VariableAccessor<Position> acc =
vars.create_accessor<Position>(str_token_rem_grav(n));
pos_acc.reset(new VariableAccessor<Position>(acc));
// VarGetter for Position variable. This rule wrapped position_variable_acc.
// Returs boost::shared_ptr<VarGetter<Position> >
[Variables& vars]
returns [boost::shared_ptr<VarGetter<Position> > op]
boost::shared_ptr<VariableAccessor<Position> > pos_acc;
}
: pos_acc = position_variable_acc [vars] {
op.reset(new VarGetter<Position>(*pos_acc));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// String set, $s:name
// This expression gets variable of StrSet type from the scope's Variables.
// Creates one first if it's not yet there.
//
// Returns boost::shared_ptr<VariableAccessor<StrSet> >
strset_variable_acc
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<StrSet> > strset_acc]
Paweł Kędzia
committed
: STR_PREFIX n: SYMBOL {
vars.get_put<StrSet>(str_token_rem_grav(n));
VariableAccessor<StrSet> acc =
vars.create_accessor<StrSet>(str_token_rem_grav(n));
strset_acc.reset(new VariableAccessor<StrSet>(acc));
// Vargetter for StrSet variable. This rule wrapped strset_variable_acc.
// Returns boost::shared_ptr<VarGetter<StrSet> >
strset_variable
[Variables& vars]
returns [boost::shared_ptr<VarGetter<StrSet> > op]
boost::shared_ptr<VariableAccessor<StrSet> > strset_acc;
: strset_acc = strset_variable_acc [vars] {
op.reset(new VarGetter<StrSet>(*strset_acc));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// Symbol set: $t:name
// This expression gets variable of TSet type from the scope's Variables.
// Creates one first if it's not there yet.
//
// Returns boost::shared_ptr<VariableAccessor<TSet> >
symset_variable_acc
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<TSet> > symset_acc]
Paweł Kędzia
committed
: TST_PREFIX n: SYMBOL {
vars.get_put<TSet>(str_token_rem_grav(n));
VariableAccessor<TSet> acc =
vars.create_accessor<TSet>(str_token_rem_grav(n));
symset_acc.reset(new VariableAccessor<TSet>(acc));
// Vargetter for symbol set variable. This rule wrapped symset_variable_acc
// Returns boost::shared_ptr<VarGetter<TSet> >
symset_variable
[Variables& vars]
returns [boost::shared_ptr<VarGetter<TSet> > op]
boost::shared_ptr<VariableAccessor<TSet> > symset_acc;
: symset_acc = symset_variable_acc [vars] {
op.reset(new VarGetter<TSet>(*symset_acc));
Paweł Kędzia
committed
// ----------------------------------------------------------------------------
Paweł Kędzia
committed
// Bool: $b:name
// This expression gets variable of Bool type from the scope's Variables.
// Creates one first if it's not there yet.
// Returns boost::shared_ptr<VariableAccessor<Bool> >
bool_variable_acc
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<Bool> > bool_acc]
Paweł Kędzia
committed
: BOOL_PREFIX n: SYMBOL {
vars.get_put<Bool>(str_token_rem_grav(n));
VariableAccessor<Bool> acc =
vars.create_accessor<Bool>(str_token_rem_grav(n));
bool_acc.reset(new VariableAccessor<Bool>(acc));
// Vargetter for bool variable. It is only wrapper for bool_variable_acc
// Returns boost::shared_ptr<VarGetter<Bool> >
bool_variable
[Variables& vars]
returns [boost::shared_ptr<VarGetter<Bool> > op]
boost::shared_ptr<VariableAccessor<Bool> > bool_acc;
: bool_acc = bool_variable_acc [vars] {
op.reset(new VarGetter<Bool>(*bool_acc));
// ----------------------------------------------------------------------------
// Match: $m:name
// This expression gets variable of Match type from the scope's Variables.
// Creates one first if it's not there yet.
// Returns boost::shared_ptr<VariableAccessor<Match> >
match_vector_variable_acc
[Variables& vars]
returns [boost::shared_ptr<VariableAccessor<Match> > mvv_acc]
: MATCH_VECTOR_PREFIX n: SYMBOL {
vars.get_put<Match>(str_token_rem_grav(n));
VariableAccessor<Match> acc =
vars.create_accessor<Match>(str_token_rem_grav(n));
mvv_acc.reset(new VariableAccessor<Match>(acc));
}
;
// Vargetter for the match vector variable. Wrapper for match_vector_variable_acc
// Returns boost::shared_ptr<VarGetter<Match> >
match_vector_variable
[Variables& vars]
returns [boost::shared_ptr<VarGetter<Match> > mvv]
{
boost::shared_ptr<VariableAccessor<Match> > mvv_acc;
}
: mvv_acc = match_vector_variable_acc [vars] {
mvv.reset(new VarGetter<Match>(*mvv_acc));
}
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// Symbol set (tagset) operators
// Returns boost::shared_ptr<Function<TSet> >
///////////////////////////////////////////////////////////////////////////////
symset_operator
returns [boost::shared_ptr<Function<TSet> > ret]
: ret = symset_condition [scope]
| {LA(1)==SYMBOL && LA(2)==LBRACKET}? (ret = symset_getsymbol [scope])
| ret = symset_var_val [scope]
| ret = symset_class [scope]
| ret = symset_range [scope]
| ret = symset_catflt [scope]
| ret = symset_agrflt [scope]
| ret = symset_union [scope]
| ret = symset_intersection [scope]
//
| LPAREN ret = symset_operator [scope] RPAREN
// ----------------------------------------------------------------------------
// comma-separated symset operators
symset_operator_comma_sep
[ParsingScope& scope]
returns
[boost::shared_ptr<std::vector<boost::shared_ptr<Function<TSet> > > > ret_v]
{
boost::shared_ptr<Function<TSet> > pred;
ret_v.reset(
new std::vector<boost::shared_ptr<Function<TSet> > >
);
}
: pred = symset_operator [scope] {
ret_v->push_back(pred);
}
(
COMMA pred = symset_operator [scope] {
ret_v->push_back(pred);
}
)*
;
// ----------------------------------------------------------------------------
// Wrapper from Function<TSet> to Operator<TSet>
functional_operator_symset
[ParsingScope& scope]
returns [boost::shared_ptr<Operator<TSet> > op]
{
boost::shared_ptr<Function<TSet> > body;
}
: body = symset_operator [scope] {
op.reset(new Operator<TSet>(body, scope.variables()));
}
;
// ----------------------------------------------------------------------------
symset_var_val
returns [boost::shared_ptr<Function<TSet> > op]
: op = symset_variable [scope.variables()]
| op = symset_value [scope.tagset()]
// ----------------------------------------------------------------------------
// Condition of the symset value:
// if (Bool, TSet, TSet)
// ? TSet ? Bool : {}
symset_condition
returns [boost::shared_ptr<Function<TSet> > op]
boost::shared_ptr<Function<Bool> > test;
boost::shared_ptr<Function<TSet> > p_true, p_false;
: "if" LPAREN test = bool_operator [scope] COMMA
p_true = symset_operator [scope]
(COMMA p_false = symset_operator [scope])?
RPAREN {
Paweł Kędzia
committed
if (p_false) {
op.reset(new Conditional<TSet>(test, p_true, p_false));
Paweł Kędzia
committed
}
else {
op.reset(new Conditional<TSet>(test, p_true));
Paweł Kędzia
committed
}
}
Paweł Kędzia
committed
| Q_MARK
(p_true = symset_operator [scope])
Paweł Kędzia
committed
Q_MARK
(test = bool_operator [scope]) {
op.reset(new Conditional<TSet>(test, p_true));
Paweł Kędzia
committed
}
// ----------------------------------------------------------------------------
// GetSymbol operator may be cas, m1, f, sg...
// WARNING! This rule can throw ParserException! Be careful!
symset_getsymbol
returns [boost::shared_ptr<Function<TSet> > op]
{
Corpus2::Tag tag;
boost::shared_ptr<Wccl::Function<Position> > position;
}
: t: SYMBOL LBRACKET position = position_operator [scope] RBRACKET {
tag = scope.tagset().parse_symbol(str_token_rem_grav(t));
}
catch(Corpus2::TagParseError &e) {
throw(ParserException(e.info()));
}
op.reset(new Wccl::GetSymbols(tag, position));
}
;
// ----------------------------------------------------------------------------
// Class operator.
symset_class
returns [boost::shared_ptr<Function<TSet> > ret]
{
boost::shared_ptr<Function<Position> > pos;
}
: "class" LBRACKET pos = position_operator [scope] RBRACKET {
ret.reset(new GetWordClass(pos));
}
;
// ----------------------------------------------------------------------------
// Range operator: range(class, begin, end) or range({...}, begin, end)
returns [boost::shared_ptr<Function<TSet> > ret]
{
boost::shared_ptr<TSet> tset;
boost::shared_ptr<Function<Position> > p1, p2;
}
(tset = symset_literal [scope.tagset()] | tag_class: "class") COMMA
p1 = position_operator [scope] COMMA
p2 = position_operator [scope]
ret.reset(new GetSymbolsInRange(Corpus2::Tag::all_pos_mask, p1, p2));
ret.reset(new GetSymbolsInRange(tset->get_value(), p1, p2));
// ----------------------------------------------------------------------------
// Catflt operator
symset_catflt
returns [boost::shared_ptr<Function<TSet> > ret]
{
boost::shared_ptr<Function<TSet> > selector, mask;
boost::shared_ptr<Function<Position> > position;
}
: "catflt" LPAREN
position = position_operator [scope] COMMA
selector = symset_operator [scope] COMMA
mask = symset_operator [scope]
RPAREN {
ret.reset(new CatFilter(position, selector, mask));
}
;
// ----------------------------------------------------------------------------
// Agrflt operator
symset_agrflt
returns [boost::shared_ptr<Function<TSet> > ret]
{
boost::shared_ptr<Function<TSet> > attr, mask;
boost::shared_ptr<Function<Position> > lpos, rpos;
}
: "agrflt" LPAREN
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA
attr = symset_operator [scope] COMMA
mask = symset_operator [scope]
ret.reset(new AgrFilter(lpos, rpos, attr, mask, scope.tagset()));
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
// ----------------------------------------------------------------------------
// Union operator
symset_union
[ParsingScope& scope]
returns [boost::shared_ptr<Function<TSet> > ret]
{
boost::shared_ptr< std::vector< boost::shared_ptr<Function<TSet> > > > sets;
}
: "union" LPAREN
sets = symset_operator_comma_sep [scope]
RPAREN {
ret.reset(new SetUnion<TSet>(sets));
}
;
// ----------------------------------------------------------------------------
// Intersection operator
symset_intersection
[ParsingScope& scope]
returns [boost::shared_ptr<Function<TSet> > ret]
{
boost::shared_ptr< std::vector< boost::shared_ptr<Function<TSet> > > > sets;
}
: "intersection" LPAREN
sets = symset_operator_comma_sep [scope]
RPAREN {
ret.reset(new SetIntersection<TSet>(sets));
}
;
///////////////////////////////////////////////////////////////////////////////
// Position operator
// Returns boost::shared_ptr<Function<Position> >
///////////////////////////////////////////////////////////////////////////////
position_operator
returns [boost::shared_ptr<Function<Position> > ret]
( ret = position_var_val [scope.variables()]
| ret = position_condition [scope]
| ret = position_first_token [scope]
| ret = position_last_token [scope]
| LPAREN ret = position_operator [scope] RPAREN
)
( // if there is SIGNED_INT after the position, it is actually a relative position
i: SIGNED_INT {
ret.reset(new RelativePosition(ret, token_ref_to_int(i)));
}
)?
;
// ----------------------------------------------------------------------------
// Wrapper from Function<Position> to Operator<Position>
functional_operator_position
[ParsingScope& scope]
returns [boost::shared_ptr<Operator<Position> > op]
{
boost::shared_ptr<Function<Position> > body;
}
: body = position_operator [scope] {
op.reset(new Operator<Position>(body, scope.variables()));
}
;
// ----------------------------------------------------------------------------
// Wrapper for position variable and position value
position_var_val
[Variables& vars]
returns [boost::shared_ptr<Function<Position> > ret]
: ret = position_value
| ret = position_variable [vars]
;
// ----------------------------------------------------------------------------
// Condition of the position value
// if (Bool, Position, Position)
// ? Position ? Bool : 0
position_condition
returns [boost::shared_ptr<Function<Position> > op]
{
boost::shared_ptr<Function<Bool> > test;
boost::shared_ptr<Function<Position> > p_true, p_false;
}
: "if" LPAREN test = bool_operator [scope] COMMA
p_true = position_operator [scope]
(COMMA p_false = position_operator [scope])?
RPAREN {
if (p_false) {
op.reset(new Conditional<Position>(test, p_true, p_false));
}
else {
op.reset(new Conditional<Position>(test, p_true));
}
}
| Q_MARK
p_true = position_operator [scope]
Q_MARK
test = bool_operator [scope] {
op.reset(new Conditional<Position>(test, p_true));
}
// ----------------------------------------------------------------------------
// Taking position of a first token in a match
// first(Match)
position_first_token [ParsingScope& scope]
returns [boost::shared_ptr<Function<Position> > ret]
{
boost::shared_ptr<Function<Match> > m;
}
: "first" LPAREN m = match_operator [scope] RPAREN {
ret.reset(new FirstToken(m));
}
;
// ----------------------------------------------------------------------------
// Taking position of a first token in a match
// last(Match)
position_last_token [ParsingScope& scope]
returns [boost::shared_ptr<Function<Position> > ret]
{
boost::shared_ptr<Function<Match> > m;
}
: "last" LPAREN m = match_operator [scope] RPAREN {
ret.reset(new LastToken(m));
}
;
///////////////////////////////////////////////////////////////////////////////
// Returns boost::shared_ptr<Function<StrSet> >
///////////////////////////////////////////////////////////////////////////////
strset_operator [ParsingScope& scope]
returns [boost::shared_ptr<Function<StrSet> > ret]
: ret = strset_orth [scope]
| ret = strset_base [scope]
| ret = strset_prop [scope]
| ret = strset_lower [scope]
| ret = strset_upper [scope]
| ret = strset_affix [scope]
| ret = strset_var_val [scope]
| ret = strset_condition [scope]
| ret = strset_union [scope]
| ret = strset_intersection [scope]
Adam Radziszewski
committed
| ret = strset_anninter [scope]
//
| LPAREN ret = strset_operator [scope] RPAREN
;
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
// ----------------------------------------------------------------------------
// comma-separated strset operators
strset_operator_comma_sep
[ParsingScope& scope]
returns
[boost::shared_ptr< std::vector<boost::shared_ptr<Function<StrSet> > > > ret_v]
{
boost::shared_ptr<Function<StrSet> > pred;
ret_v.reset(
new std::vector<boost::shared_ptr<Function<StrSet> > >
);
}
: pred = strset_operator [scope] {
ret_v->push_back(pred);
}
(
COMMA pred = strset_operator [scope] {
ret_v->push_back(pred);
}
)*
;
// ----------------------------------------------------------------------------
// Wrapper from Function<StrSet> to Operator<StrSet>
functional_operator_strset
[ParsingScope& scope]
returns [boost::shared_ptr<Operator<StrSet> > op]
{
boost::shared_ptr<Function<StrSet> > body;
}
: body = strset_operator [scope] {
op.reset(new Operator<StrSet>(body, scope.variables()));
}
;
// ----------------------------------------------------------------------------
// Orth operator.
strset_orth
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<Position> > pos;
: "orth" LBRACKET pos = position_operator [scope] RBRACKET {
ret.reset(new GetOrth(pos));
// ----------------------------------------------------------------------------
// Base operator.
strset_base
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<Position> > pos;
: "base" LBRACKET pos = position_operator [scope] RBRACKET {
// ----------------------------------------------------------------------------
// Token-level property value operator.
strset_prop
[ParsingScope& scope]
returns [boost::shared_ptr<Function<StrSet> > ret]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<StrSet> > keys;
}
: "prop" LPAREN
pos = position_operator [scope] COMMA
keys = strset_operator [scope]
RPAREN {
ret.reset(new PropVal(pos, keys));
}
;
// ----------------------------------------------------------------------------
// Lower operator.
strset_lower
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<StrSet> > o_ret;
: "lower" LPAREN o_ret = strset_operator [scope] RPAREN {
ret.reset(new ToLower(o_ret));
// ----------------------------------------------------------------------------
// Upper operator.
strset_upper
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<StrSet> > o_ret;
: "upper" LPAREN o_ret = strset_operator [scope] RPAREN {
ret.reset(new ToUpper(o_ret));
// ----------------------------------------------------------------------------
// Affix operator.
strset_affix
returns [boost::shared_ptr<Function<StrSet> > ret]
boost::shared_ptr<Function<StrSet> > o_ret;
: "affix" LPAREN
o_ret = strset_operator [scope] COMMA offset = number
RPAREN {
ret.reset(new Affix(o_ret, offset));
}
// ----------------------------------------------------------------------------
// A wrapper for strset value and strset variable
strset_var_val
returns [boost::shared_ptr<Function<StrSet> > op]
: op = strset_value
| op = strset_variable [scope.variables()]
// ----------------------------------------------------------------------------
// Condition of the strset value
// if (Bool, StrSet, StrSet)
// ? StrSet ? Bool : []
strset_condition
returns [boost::shared_ptr<Function<StrSet> > op]
boost::shared_ptr<Function<Bool> > test;
boost::shared_ptr<Function<StrSet> > p_true, p_false;
: "if" LPAREN test = bool_operator [scope] COMMA
p_true = strset_operator [scope]
(COMMA p_false = strset_operator [scope])?
RPAREN {
Paweł Kędzia
committed
if (p_false) {
op.reset(new Conditional<StrSet>(test, p_true, p_false));
Paweł Kędzia
committed
}
else {
op.reset(new Conditional<StrSet>(test, p_true));
Paweł Kędzia
committed
}
Paweł Kędzia
committed
| Q_MARK
p_true = strset_operator [scope]
Paweł Kędzia
committed
Q_MARK
test = bool_operator [scope] {
op.reset(new Conditional<StrSet>(test, p_true));
Paweł Kędzia
committed
}
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
// ----------------------------------------------------------------------------
// Union operator, strset
strset_union
[ParsingScope& scope]
returns [boost::shared_ptr<Function<StrSet> > ret]
{
boost::shared_ptr<std::vector<boost::shared_ptr<Function<StrSet> > > > sets;
}
: "union" LPAREN
sets = strset_operator_comma_sep [scope]
RPAREN {
ret.reset(new SetUnion<StrSet>(sets));
}
;
// ----------------------------------------------------------------------------
// Intersection operator, strset
strset_intersection
[ParsingScope& scope]
returns [boost::shared_ptr<Function<StrSet> > ret]
{
boost::shared_ptr<std::vector<boost::shared_ptr<Function<StrSet> > > > sets;
}
: "intersection" LPAREN
sets = strset_operator_comma_sep [scope]
RPAREN {
ret.reset(new SetIntersection<StrSet>(sets));
}
;
Adam Radziszewski
committed
strset_anninter
[ParsingScope& scope]
returns [boost::shared_ptr<Function<StrSet> > op]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<StrSet> > in_strs;
}
: "anninter"
LPAREN
pos = position_operator [scope]
COMMA
in_strs = strset_operator [scope]
RPAREN
{
op.reset(new AnnInter(pos, in_strs));
}
;
strset_lex
[ParsingScope& scope]
returns [boost::shared_ptr<Function<StrSet> > op]
{
boost::shared_ptr<Function<StrSet> > s;
}
: "lex" LPAREN s = strset_operator [scope] COMMA name : STRING RPAREN
{
op.reset(new LexTranslator(
s,
scope.lexicons().get_ptr(token_ref_to_std_string(name))));
}
exception catch [WcclError ex] {
throw ParserException(ex.what());
}
;
///////////////////////////////////////////////////////////////////////////////
// Returns boost::shared_ptr<Function<Bool> >
///////////////////////////////////////////////////////////////////////////////
bool_operator
returns [boost::shared_ptr<Function<Bool> > ret]
: ret = bool_and [scope]
| ret = bool_or [scope]
| ret = bool_nor [scope]
| ret = bool_var_val [scope]
| ret = bool_regex [scope]
| ret = bool_inout [scope]
| ret = bool_condition [scope]
// setvar:
| ret = setvar_operator [scope]
// empty
| ret = empty_operator [scope]
// equal/in/inter:
| ret = equal_operator [scope]
| ret = in_operator [scope]
| ret = inter_operator [scope]
| ret = bool_iteration [scope]
// agreement
| ret = bool_agreement [scope]
//
| ret = bool_ann [scope]
| ret = bool_annsub [scope]
// singular/amb
| ret = bool_ambiguous [scope]
| ret = bool_singular [scope]
ilor
committed
// debug operators
| ret = debug_print_operator [scope]
| LPAREN ret = bool_operator [scope] RPAREN
// ----------------------------------------------------------------------------
// wrapper from Function<Bool> to Operator<Bool>
functional_operator_bool
[ParsingScope& scope]
returns [boost::shared_ptr<Operator<Bool> > op]
{
boost::shared_ptr<Function<Bool> > body;
}
: body = bool_operator [scope] {
op.reset(new Operator<Bool>(body, scope.variables()));
}
;
// ----------------------------------------------------------------------------
// comma-separated predicates (bool operators)
bool_operator_comma_sep
returns
[boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v]
boost::shared_ptr<Function<Bool> > pred;
new std::vector<boost::shared_ptr<Function<Bool> > >
: pred = bool_operator [scope] {
ret_v->push_back(pred);
}
(
COMMA pred = bool_operator [scope] {
ret_v->push_back(pred);
}
)*
// ----------------------------------------------------------------------------
// And operator.
bool_and
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
: "and" LPAREN ret_v = bool_operator_comma_sep [scope] RPAREN {
op.reset(new And(ret_v));
// ----------------------------------------------------------------------------
// Or operator
bool_or
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
: "or" LPAREN ret_v = bool_operator_comma_sep [scope] RPAREN {
op.reset(new Or(ret_v));
// ----------------------------------------------------------------------------
// Nor/Not operator
bool_nor
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > ret_v;
: "not" LPAREN ret_v = bool_operator_comma_sep [scope] RPAREN {
op.reset(new Nor(ret_v));
// ----------------------------------------------------------------------------
// Wrapper for bool value and bool variable
bool_var_val
returns [boost::shared_ptr<Function<Bool> > op]
: op = bool_value
| op = bool_variable [scope.variables()]
// ----------------------------------------------------------------------------
// Regex operator
bool_regex
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr<Function<StrSet> > expr;
}
: "regex"
LPAREN
expr = strset_operator [scope] COMMA reg: STRING
RPAREN {
op.reset(new Regex(expr, token_ref_to_ustring(reg)));
}
;
// ----------------------------------------------------------------------------
// Input/output operator
bool_inout
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr<Function<Position> > ret_pos;
}
: "inside" LPAREN ret_pos = position_operator [scope] RPAREN {
op.reset(new IsInside(ret_pos));
}
| "outside" LPAREN ret_pos = position_operator [scope] RPAREN {
op.reset(new IsOutside(ret_pos));
}
;
// ----------------------------------------------------------------------------
// if (Bool, Bool, Bool)
// ? Bool ? Bool : False
bool_condition
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr<Function<Bool> > test, p_true, p_false;
}
: "if" LPAREN test = bool_operator [scope] COMMA
p_true = bool_operator [scope]
(COMMA p_false = bool_operator [scope])?
RPAREN {
if (p_false) {
op.reset(new Conditional<Bool>(test, p_true, p_false));
}
else {
op.reset(new Conditional<Bool>(test, p_true));
}
}
| Q_MARK
p_true = bool_operator [scope]
Q_MARK
test = bool_operator [scope] {
op.reset(new Conditional<Bool>(test, p_true));
}
;
// ----------------------------------------------------------------------------
// Equal operator
equal_operator
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > t1, t2;
boost::shared_ptr<Function<Bool> > b1, b2;
boost::shared_ptr<Function<StrSet> > s1, s2;
boost::shared_ptr<Function<Position> > p1, p2;
: "equal" LPAREN
(position_operator [scope]) =>
p1 = position_operator [scope] COMMA
p2 = position_operator [scope] {
op.reset(new Equals<Position>(p1, p2));
(symset_operator [scope]) =>
t1 = symset_operator [scope] COMMA
t2 = symset_operator [scope] {
op.reset(new Equals<TSet>(t1, t2));
}
)
|
(strset_operator [scope]) =>
s1 = strset_operator [scope] COMMA
s2 = strset_operator [scope] {
op.reset(new Equals<StrSet>(s1, s2));
}
)
|
(
b1 = bool_operator [scope] COMMA
b2 = bool_operator [scope] {
op.reset(new Equals<Bool>(b1, b2));
// ----------------------------------------------------------------------------
// In operator
in_operator
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > t1, t2;
boost::shared_ptr<Function<StrSet> > s1, s2;
Paweł Kędzia
committed
}
:
"in" LPAREN
(symset_operator [scope]) =>
t1 = symset_operator [scope] COMMA
t2 = symset_operator [scope] {
op.reset(new IsSubsetOf<TSet>(t1, t2));
s1 = strset_operator [scope] COMMA
s2 = strset_operator [scope] {
op.reset(new IsSubsetOf<StrSet>(s1, s2));
;
// ----------------------------------------------------------------------------
// Inter operator
inter_operator
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > t1, t2;
boost::shared_ptr<Function<StrSet> > s1, s2;
:
"inter" LPAREN
(symset_operator [scope]) =>
t1 = symset_operator [scope] COMMA
t2 = symset_operator [scope] {
op.reset(new Intersects<TSet>(t1, t2));
s1 = strset_operator [scope] COMMA
s2 = strset_operator [scope] {
op.reset(new Intersects<StrSet>(s1, s2));
// ----------------------------------------------------------------------------
// Annotation operator.
bool_ann
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr< Function<Match> > match_from;
boost::shared_ptr< Function<Match> > match_to;
}
: "ann" LPAREN
match_from = match_operator [scope] COMMA
(match_to = match_operator [scope] COMMA)?
ilor
committed
channel : STRING
ilor
committed
op.reset(new Ann(match_from, match_to, token_ref_to_std_string(channel)));
ilor
committed
op.reset(new Ann(match_from, token_ref_to_std_string(channel)));
// ----------------------------------------------------------------------------
// Annotation-sub operator.
bool_annsub
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr< Function<Match> > match_from;
boost::shared_ptr< Function<Match> > match_to;
}
: "annsub" LPAREN
match_from = match_operator [scope] COMMA
(match_to = match_operator [scope] COMMA)?
ilor
committed
channel : STRING
ilor
committed
op.reset(new AnnSub(match_from, match_to, token_ref_to_std_string(channel)));
ilor
committed
op.reset(new AnnSub(match_from, token_ref_to_std_string(channel)));
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
// ----------------------------------------------------------------------------
// Ambiguity checking operator
bool_ambiguous
[ParsingScope& scope]
returns [boost::shared_ptr<Function<Bool> > ret]
{
boost::shared_ptr<Function<TSet> > tf;
boost::shared_ptr<Function<StrSet> > sf;
boost::shared_ptr<Function<Position> > pf;
}
: "ambiguous" LPAREN
(
(position_operator [scope]) =>
(
pf = position_operator [scope] {
ret.reset(new IsAmbiguous<Position>(pf));
}
)
|
(symset_operator [scope]) =>
(
tf = symset_operator [scope] {
ret.reset(new IsAmbiguous<TSet>(tf));
}
)
|
(strset_operator [scope]) =>
(
sf = strset_operator [scope] {
ret.reset(new IsAmbiguous<StrSet>(sf));
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
// Tag singularity checking operator
bool_singular
[ParsingScope& scope]
returns [boost::shared_ptr<Function<Bool> > ret]
{
boost::shared_ptr< Function<TSet> > v;
}
: "singular" LPAREN
v = symset_operator [scope]
RPAREN
{
ret.reset(new IsSingular(v, scope.tagset()));
}
;
ilor
committed
// ----------------------------------------------------------------------------
// Debug printing:
debug_print_operator
ilor
committed
returns [boost::shared_ptr<Function<Bool> > ret]
{
boost::shared_ptr<FunctionBase> v;
}
: "debug" LPAREN
(
(position_operator [scope]) =>
ilor
committed
(
v = position_operator [scope] {
ilor
committed
ret.reset(new DebugPrint(v));
}
)
|
(symset_operator [scope]) =>
ilor
committed
(
v = symset_operator [scope] {
ilor
committed
ret.reset(new DebugPrint(v));
}
)
|
(strset_operator [scope]) =>
ilor
committed
(
v = strset_operator [scope] {
ilor
committed
ret.reset(new DebugPrint(v));
}
)
|
ilor
committed
(
ilor
committed
ret.reset(new DebugPrint(v));
}
)
v = match_operator [scope] {
ret.reset(new DebugPrint(v));
}
)
ilor
committed
)
RPAREN
;
// ----------------------------------------------------------------------------
// Iterations:
bool_iteration
returns [boost::shared_ptr<Function<Bool> > ret]
{
int min_match = 0;
boost::shared_ptr<Function<Bool> > expr;
boost::shared_ptr<Function<Position> > lpos, rpos;
boost::shared_ptr<VariableAccessor<Position> > pacc;
}
: "only" LPAREN
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA
pacc = position_variable_acc [scope.variables()] COMMA
expr = bool_operator [scope]
RPAREN {
ret.reset(new Only(lpos, rpos, *pacc, expr));
}
| "atleast" LPAREN
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA
pacc = position_variable_acc [scope.variables()] COMMA
expr = bool_operator [scope] COMMA
min_match = number
RPAREN {
ret.reset(new AtLeast(lpos, rpos, *pacc, expr, min_match));
}
| "llook" LPAREN //note inverted rpos/lpos order
rpos = position_operator [scope] COMMA
lpos = position_operator [scope] COMMA
pacc = position_variable_acc [scope.variables()] COMMA
expr = bool_operator [scope]
RPAREN {
ret.reset(new LeftLook(lpos, rpos, *pacc, expr));
}
| "rlook" LPAREN
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA
pacc = position_variable_acc [scope.variables()] COMMA
expr = bool_operator [scope]
RPAREN {
ret.reset(new RightLook(lpos, rpos, *pacc, expr));
}
| "skip" LPAREN
lpos = position_operator [scope] COMMA
pacc = position_variable_acc [scope.variables()] COMMA
expr = bool_operator [scope] COMMA
offset = number
RPAREN {
ret.reset(new Skip(lpos, *pacc, expr, offset));
}
// ----------------------------------------------------------------------------
// Agreement operator: agr, agrpp, wagr
bool_agreement
returns [boost::shared_ptr<Function<Bool> > ret]
{
boost::shared_ptr<Function<TSet> > expr;
boost::shared_ptr<Function<Position> > lpos, rpos;
}
: "agr" LPAREN
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA
expr = symset_operator [scope]
ret.reset(new StrongAgreement(lpos, rpos, expr, scope.tagset()));
}
| "agrpp" LPAREN
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA
expr = symset_operator [scope]
ret.reset(new PointAgreement(lpos, rpos, expr, scope.tagset()));
}
| "wagr" LPAREN
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA
expr = symset_operator [scope]
ret.reset(new WeakAgreement(lpos, rpos, expr, scope.tagset()));
}
;
// ----------------------------------------------------------------------------
// Parse operator on L1 level
bool_phrase
: ret = bool_phrase_annotation [scope]
| ret = bool_phrase_iteration [scope]
;
// ----------------------------------------------------------------------------
// Annotation operator: phrase, phrase_beg, phrase_end, phrase_whole, phrase_pp
bool_phrase_annotation
returns [boost::shared_ptr<Function<Bool> > ret]
{
boost::shared_ptr<Function<Position> > lpos, rpos;
}
lpos = position_operator [scope] COMMA n1: STRING
Adam Radziszewski
committed
ret.reset(new AnnToken(lpos, token_ref_to_std_string(n1), AnnToken::O_ANY));
lpos = position_operator [scope] COMMA n2: STRING
Adam Radziszewski
committed
ret.reset(new AnnToken(lpos, token_ref_to_std_string(n2), AnnToken::O_FIRST));
lpos = position_operator [scope] COMMA n3: STRING
Adam Radziszewski
committed
ret.reset(new AnnToken(lpos, token_ref_to_std_string(n3), AnnToken::O_LAST));
| "isannhead" LPAREN
lpos = position_operator [scope] COMMA n4: STRING
Adam Radziszewski
committed
ret.reset(new AnnToken(lpos, token_ref_to_std_string(n4), AnnToken::O_HEAD));
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA n5: STRING
| "isannpp" LPAREN
lpos = position_operator [scope] COMMA
rpos = position_operator [scope] COMMA n6: STRING
RPAREN {
// TODO
}
;
// ----------------------------------------------------------------------------
// Phrase iteration operator: lphrase, rphrase
bool_phrase_iteration
returns [boost::shared_ptr<Function<Bool> > ret]
{
boost::shared_ptr<Function<Position> > position;
boost::shared_ptr<VarGetter<Position> > var_position;
}
position = position_operator [scope] COMMA
var_position = position_variable [scope.variables()] COMMA
position = position_operator [scope] COMMA
var_position = position_variable [scope.variables()] COMMA
n2: STRING
RPAREN {
// TODO
}
;
// ----------------------------------------------------------------------------
// Setvar operator
// Returns boost::shared_ptr<Function<Bool> >
// ----------------------------------------------------------------------------
setvar_operator
returns [boost::shared_ptr<Function<Bool> > ret]
: "setvar" LPAREN
(
ret = position_setvar [scope]
| ret = bool_setvar [scope]
| ret = strset_setvar [scope]
| ret = symset_setvar [scope]
)
RPAREN
;
// ----------------------------------------------------------------------------
// Setvar for position
position_setvar
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<Position> > ret_op;
boost::shared_ptr<VariableAccessor<Position> > ret_acc;
: ret_acc = position_variable_acc [scope.variables()]
COMMA
ret_op = position_operator [scope] {
op.reset(new VarSetter<Position>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
// Setvar for bool
bool_setvar
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<Bool> > ret_op;
boost::shared_ptr<VariableAccessor<Bool> > ret_acc;
: ret_acc = bool_variable_acc [scope.variables()]
COMMA
ret_op = bool_operator [scope] {
op.reset(new VarSetter<Bool>(*ret_acc, ret_op));
// ----------------------------------------------------------------------------
// Setvar for strset
strset_setvar
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<StrSet> > ret_op;
boost::shared_ptr<VariableAccessor<StrSet> > ret_acc;
: ret_acc = strset_variable_acc [scope.variables()]
COMMA
ret_op = strset_operator [scope] {
op.reset(new VarSetter<StrSet>(*ret_acc, ret_op));
Paweł Kędzia
committed
}
// ----------------------------------------------------------------------------
// Setvar for symset
symset_setvar
returns [boost::shared_ptr<Function<Bool> > op]
boost::shared_ptr<Function<TSet> > ret_op;
boost::shared_ptr<VariableAccessor<TSet> > ret_acc;
: ret_acc = symset_variable_acc [scope.variables()]
COMMA
ret_op = symset_operator [scope] {
op.reset(new VarSetter<TSet>(*ret_acc, ret_op));
}
;
// ----------------------------------------------------------------------------
// empty() operator
// Returns boost::shared_ptr<Function<Bool> >
//----------------------------------------------------------------------------
empty_operator
returns [boost::shared_ptr<Function<Bool> > op]
: "empty" LPAREN
(
)
RPAREN
;
/*
empty_operator
returns [boost::shared_ptr<Function<Bool> > op]
: "empty" LPAREN
(
op = match_empty [scope]
| op = symset_empty [scope]
| op = strset_empty [scope]
)
RPAREN
;
*/
//----------------------------------------------------------------------------
// match empty() operator
// Returns boost::shared_ptr<Function<Bool> >
match_empty
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr<Function<Match> > arg;
}
: arg = match_operator [scope] {
op.reset(new IsEmpty<Match>(arg));
}
;
//----------------------------------------------------------------------------
// SymSet empty() operator
// Returns boost::shared_ptr<Function<Bool> >
symset_empty
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr<Function<TSet> > arg;
}
: arg = symset_operator [scope] {
op.reset(new IsEmpty<TSet>(arg));
}
;
//----------------------------------------------------------------------------
// Strset empty() operator
ilor
committed
// Returns boost::shared_ptr<Function<Bool> >
strset_empty
returns [boost::shared_ptr<Function<Bool> > op]
{
boost::shared_ptr<Function<StrSet> > arg;
}
: arg = strset_operator [scope] {
op.reset(new IsEmpty<StrSet>(arg));
}
;
Adam Wardynski
committed
///////////////////////////////////////////////////////////////////////////////
// Match functional operators,
// which return boost::shared_ptr<Function<Match> >
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// A wrapper for match variable and match value.
match_var_val [ParsingScope& scope]
Adam Wardynski
committed
returns [boost::shared_ptr<Function<Match> > ret]
: ret = match_vector_variable [scope.variables()]
Adam Wardynski
committed
| ret = match_value_const
;
///////////////////////////////////////////////////////////////////////////////
// Match operators.
// Returns boost::shared_ptr<Function<Match> >
///////////////////////////////////////////////////////////////////////////////
match_operator
Adam Wardynski
committed
returns [boost::shared_ptr<Function<Match> > ret]
{
//
}
:
( ret = match_var_val [scope]
Adam Wardynski
committed
| {LA(1)==LITERAL_M || LA(1)==COLON}? ("M")? {
ret.reset(new VarGetter<Match>(scope.variables().create_accessor<Match>("_M")));
Adam Wardynski
committed
ret.reset(new Submatch(ret, 1));
}
| "MA" {
ret.reset(new VarGetter<Match>(scope.variables().create_accessor<Match>("_M")));
Adam Wardynski
committed
ret.reset(new Submatch(ret, 2));
}
| LPAREN ret = match_operator [scope] RPAREN
Adam Wardynski
committed
)
( // if there's a colon after the match, we have a submatch reference
Adam Wardynski
committed
COLON i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
)*
;
// ----------------------------------------------------------------------------
// Wrapper from Function<Match> to Operator<Match>
functional_operator_match
[ParsingScope& scope]
returns [boost::shared_ptr<Operator<Match> > op]
{
boost::shared_ptr<Function<Match> > body;
}
: body = match_operator [scope] {
op.reset(new Operator<Match>(body, scope.variables()));
}
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// WCCL FILE PARSING RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
imports_section [WcclFile& wccl_file]
: (import [wccl_file])+
;
import [WcclFile& wccl_file]
: "import" LPAREN file_path : STRING COMMA lexicon_name : STRING RPAREN {
wccl_file.import_lexicon(
LexiconParser::parse_lexicon(
wccl_file.path(),
token_ref_to_std_string(lexicon_name),
token_ref_to_std_string(file_path)));
}
;
2118
2119
2120
2121
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135
2136
2137
2138
wccl_file_section [WcclFile& wccl_file]
: any_operator_section [wccl_file]
| tag_rules_section [wccl_file]
| match_rules_section [wccl_file]
;
tag_rules_section [WcclFile& wccl_file]
{
boost::shared_ptr<TagRuleSequence> rule_seq;
}
: rule_seq = parse_tag_rule_sequence [wccl_file.tagset()] {
if (wccl_file.has_tag_rules()) {
throw ParserException("Only one tag_rules section allowed in a WCCL file.");
}
wccl_file.set_tag_rules(rule_seq);
}
;
match_rules_section [WcclFile& wccl_file]
{
ParsingScope scope(wccl_file);
ilor
committed
boost::shared_ptr<Matching::MatchRule> match_rule;
boost::shared_ptr<Matching::MatchRuleSequence> rule_seq = boost::make_shared<Matching::MatchRuleSequence>();
}
: "match_rules" {
if (wccl_file.has_match_rules()) {
throw ParserException("Only one match_rules section allowed in a WCCL file.");
}
}
LPAREN
match_rule = match_rule_operator [scope] {
rule_seq->push_back(*match_rule);
scope.reset_variables();
}
(
SEMI match_rule = match_rule_operator [scope] {
rule_seq->push_back(*match_rule);
)*
RPAREN { wccl_file.set_match_rules(rule_seq); }
any_operator_section
[WcclFile& wccl_file]
{
boost::shared_ptr<UntypedOpSequence> untyped_seq;
boost::shared_ptr<OpSequence<Bool> > bool_seq;
boost::shared_ptr<OpSequence<TSet> > symset_seq;
boost::shared_ptr<OpSequence<StrSet> > strset_seq;
boost::shared_ptr<OpSequence<Position> > pos_seq;
boost::shared_ptr<OpSequence<Match> > m_seq;
}
: untyped_seq = untyped_operator_sequence [wccl_file] {
| bool_seq = bool_operator_sequence [wccl_file] {
| symset_seq = symset_operator_sequence [wccl_file] {
| strset_seq = strset_operator_sequence [wccl_file] {
| pos_seq = position_operator_sequence [wccl_file] {
| m_seq = match_operator_sequence [wccl_file] {
wccl_file.add_section(m_seq);
}
;
bool_operator_sequence
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<Bool> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<Bool> > op;
}
: BOOL_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name)));
}
op = functional_operator_bool [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_bool [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<TSet> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<TSet> > op;
}
: TST_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_symset [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_symset [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<StrSet> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<StrSet> > op;
}
: STR_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_strset [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_strset [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<Position> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<Position> > op;
}
: POS_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Position>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_position [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_position [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<UntypedOpSequence> seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<FunctionalOperator> op;
}
: AT_MARK name: STRING {
seq.reset(new UntypedOpSequence(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_any [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_any [scope] { seq->append(op); scope.reset_variables(); })*
[const WcclFile& wccl_file]
returns [boost::shared_ptr<OpSequence<Match> > seq]
{
ParsingScope scope(wccl_file);
boost::shared_ptr<Operator<Match> > op;
}
: MATCH_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Match>(token_ref_to_std_string(name)));
}
LPAREN
op = functional_operator_match [scope] { seq->append(op); scope.reset_variables(); }
(SEMI op = functional_operator_match [scope] { seq->append(op); scope.reset_variables(); })*
functional_operator_any
[ParsingScope& scope]
returns [boost::shared_ptr<FunctionalOperator> op]
{
}
: (position_operator [scope]) => op = functional_operator_position [scope]
| (symset_operator [scope]) => op = functional_operator_symset [scope]
| (strset_operator [scope]) => op = functional_operator_strset [scope]
| (match_operator [scope]) => op = functional_operator_match [scope]
| op = functional_operator_bool [scope]
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Tagging actions and rules:
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Single action such as select, delete, relabel or unify
action
returns [boost::shared_ptr<TagAction> act]
: act = action_select [scope]
| act = action_delete [scope]
| act = action_relabel [scope]
| act = action_unify [scope]
| act = action_mark [scope]
| act = action_unmark [scope]
// Action sequence - the actions are separated with commas:
// select(...), select(...), delete(...)
action_sequence
returns [boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > v_act]
boost::shared_ptr<TagAction> act;
v_act.reset(new std::vector<boost::shared_ptr<TagAction> >);
v_act->push_back(act);
}
(
v_act->push_back(act);
}
)*
;
// ----------------------------------------------------------------------------
// Single rule:
// rule(NAME, ACTIONS) or rule(NAME, COND, ACTIONS)
Adam Radziszewski
committed
tag_rule
{
boost::shared_ptr<Function<Bool> > condition;
boost::shared_ptr<std::vector<boost::shared_ptr<TagAction> > > actions;
: "rule" LPAREN name: STRING COMMA
(condition = bool_operator [scope] COMMA)?
actions = action_sequence [scope]
RPAREN {
if (condition) {
rle.reset(
new TagRule(token_ref_to_std_string(name), scope.variables(), actions, condition));
}
else {
rle.reset(
new TagRule(token_ref_to_std_string(name), scope.variables(), actions));
;
// Rule sequence
Adam Radziszewski
committed
tag_rule_sequence
[ParsingScope& scope]
returns [boost::shared_ptr<TagRuleSequence> rule_seq]
rule_seq.reset(new TagRuleSequence());
: rle = tag_rule [scope] {
scope.reset_variables();
SEMI rle = tag_rule [scope] {
}
)*
;
// ----------------------------------------------------------------------------
Adam Radziszewski
committed
// This is wrapper for tag_rule_sequence in rules section in the wccl file
tag_rules
[ParsingScope& scope]
returns [boost::shared_ptr<TagRuleSequence> rule_seq]
Adam Radziszewski
committed
: "tag_rules" LPAREN rule_seq = tag_rule_sequence [scope] RPAREN {
//
}
;
// ----------------------------------------------------------------------------
// Select action:
// select(position, predicate) or select(predicate);
action_select
returns [boost::shared_ptr<Select> action]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<Bool> > condition;
}
: "select" LPAREN
(
(position_operator [scope]) =>
pos = position_operator [scope] COMMA
condition = bool_operator [scope] {
// select(positon, condition);
action.reset(new Select(condition, pos));
}
)
|
(
condition = bool_operator [scope] {
// select(condition);
action.reset(new Select(condition));
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
// Delete action
// delete(position, predicate) or delete(predicate);
action_delete
returns [boost::shared_ptr<Delete> action]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<Bool> > condition;
}
: "delete" LPAREN
(
(position_operator [scope]) =>
pos = position_operator [scope] COMMA
condition = bool_operator [scope] {
// delete(positon, condition);
}
)
|
(
condition = bool_operator [scope] {
// delete(condition);
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
// Relabel action
// relabel(pos, symset, predicate) or relabel(symset, predicate)
action_relabel
returns [boost::shared_ptr<Relabel> action]
{
boost::shared_ptr<Function<Position> > pos;
boost::shared_ptr<Function<Bool> > condition;
boost::shared_ptr<Function<TSet> > replace_with;
}
: "relabel" LPAREN
(
(position_operator [scope]) =>
pos = position_operator [scope] COMMA
replace_with = symset_operator [scope] COMMA
condition = bool_operator [scope] {
// relabel(pos, symset, predicate)
action.reset(new Relabel(replace_with, condition, pos));
}
)
|
(
replace_with = symset_operator [scope] COMMA
condition = bool_operator [scope] {
// relabel(symset, predicate)
action.reset(new Relabel(replace_with, condition));
}
)
)
RPAREN
;
// ----------------------------------------------------------------------------
// Unify action
action_unify
returns [boost::shared_ptr<Unify> action]
{
boost::shared_ptr<Function<TSet> > attribs_expr;
boost::shared_ptr<Function<Position> > pos_begin, pos_end;
}
: "unify" LPAREN
pos_begin = position_operator [scope] COMMA
pos_end = position_operator [scope] COMMA
attribs_expr = symset_operator [scope]
RPAREN {
action.reset(new Unify(pos_begin, pos_end, attribs_expr));
}
;
// ----------------------------------------------------------------------------
// Mark action
action_mark
returns [boost::shared_ptr<Mark> action]
{
boost::shared_ptr<Function<Position> > pos_begin, pos_end, pos_head;
}
: "mark" LPAREN
pos_begin = position_operator [scope] COMMA
pos_end = position_operator [scope] COMMA
(pos_head = position_operator [scope] COMMA)?
chan_name: STRING
RPAREN {
action.reset(new Mark(pos_begin, pos_end, pos_head, ((antlr::Token*)chan_name)->getText()));
}
;
// ----------------------------------------------------------------------------
// Unmark action
action_unmark
returns [boost::shared_ptr<Unmark> action]
{
boost::shared_ptr<Function<Position> > pos;
}
: "unmark" LPAREN
pos = position_operator [scope] COMMA
chan_name: STRING
RPAREN {
action.reset(new Unmark(pos, ((antlr::Token*)chan_name)->getText()));
}
;
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Match rules
ilor
committed
// Returns boost::shared_ptr<Matching::MatchRule>
ilor
committed
returns [boost::shared_ptr<Matching::MatchRule> ret_op]
ilor
committed
boost::shared_ptr<Matching::ApplyOperator> apply;
scope.variables().get_put<Match>("_M");
: apply = match_apply_operator [scope] {
ilor
committed
ret_op = boost::make_shared<Matching::MatchRule>(scope.variables(), apply);
ilor
committed
}
;
// Match apply operator:
// apply(match(), cond(conditions), actions(actions))
// apply(match(), actions(actions))
ilor
committed
// Returns boost::shared_ptr<Matching::ApplyOperator>
ilor
committed
returns [boost::shared_ptr<Matching::ApplyOperator> ret_op]
VariableAccessor<Match> matches = scope.variables().create_accessor<Match>("_M");;
ilor
committed
boost::shared_ptr<Matching::ConjConditions> match_cond;
boost::shared_ptr<std::vector<boost::shared_ptr<Matching::MatchAction> > > actions;
boost::shared_ptr<std::vector<boost::shared_ptr<Function<Bool> > > > conditions;
: "apply" LPAREN
"match" LPAREN match_cond = match_condition [scope] RPAREN COMMA
("cond" LPAREN conditions = bool_operator_comma_sep [scope] RPAREN COMMA)?
"actions" LPAREN actions = match_action_comma_sep [scope] RPAREN
RPAREN {
if (conditions) {
ret_op.reset(
ilor
committed
new Matching::ApplyOperator(matches, match_cond, actions, conditions)
);
}
else {
ret_op.reset(
ilor
committed
new Matching::ApplyOperator(matches, match_cond, actions)
);
}
}
;
// Match conditions. Wrapper for vector of the match conditions
match_condition
ilor
committed
returns [boost::shared_ptr<Matching::ConjConditions> condition]
ilor
committed
std::vector<boost::shared_ptr<const Matching::MatchCondition> > m_cond;
: m_cond = match_condition_in [scope] {
ilor
committed
condition.reset(new Matching::ConjConditions(m_cond));
}
;
// Match conditions.
ilor
committed
// Retutns std::vector< boost::shared_ptr<const Matching::MatchCondition> >
ilor
committed
returns [std::vector< boost::shared_ptr<const Matching::MatchCondition> > ret]
ilor
committed
boost::shared_ptr<const Matching::MatchCondition> r_cond;
: r_cond = match_cond_all[scope] {
ret.push_back(r_cond);
}
(
COMMA
r_cond = match_cond_all[scope] {
ret.push_back(r_cond);
}
)*
;
// Match variants variant(v1), variant(v2), ...
ilor
committed
// Retutns boost::shared_ptr<std::vector<Matching::ConjConditions> >
match_variants
ilor
committed
returns [boost::shared_ptr<std::vector<boost::shared_ptr<Matching::ConjConditions> > > variants]
ilor
committed
variants.reset(new std::vector<boost::shared_ptr<Matching::ConjConditions> >());
ilor
committed
boost::shared_ptr<Matching::ConjConditions> variant;
: "variant" LPAREN variant = match_condition [scope] RPAREN {
}
(
COMMA "variant" LPAREN variant = match_condition [scope] RPAREN {
}
)*
;
ilor
committed
// Returns boost::shared_ptr<const Matching::MatchCondition>
ilor
committed
returns [boost::shared_ptr<const Matching::MatchCondition> ret]
: ret = match_cond_optional [scope]
| ret = match_cond_repeate [scope]
| ret = match_cond_token [scope]
| ret = match_cond_oneof [scope]
| ret = match_cond_longest [scope]
// Match condition - token (wraps a L0 predicate)
ilor
committed
// Returns boost::shared_ptr<const Matching::MatchCondition>
ilor
committed
returns [boost::shared_ptr<const Matching::TokenCondition> ret]
{
boost::shared_ptr<Function<Bool> > bool_op;
}
: bool_op = bool_operator [scope] {
ilor
committed
ret = boost::make_shared<Matching::TokenCondition>(bool_op);
// Match condition - optional
ilor
committed
// Returns boost::shared_ptr<Matching::OptionalMatch>
ilor
committed
returns [boost::shared_ptr<Matching::OptionalMatch> mtch]
ilor
committed
boost::shared_ptr<Matching::ConjConditions> m_cond;
: "optional" LPAREN m_cond = match_condition [scope] RPAREN {
ilor
committed
mtch.reset(new Matching::OptionalMatch(m_cond));
ilor
committed
// Returns boost::shared_ptr<Matching::RepeatedMatch>
ilor
committed
returns [boost::shared_ptr<Matching::RepeatedMatch> mtch]
ilor
committed
boost::shared_ptr<Matching::ConjConditions> m_cond;
: "repeat" LPAREN m_cond = match_condition [scope] RPAREN {
ilor
committed
mtch.reset(new Matching::RepeatedMatch(m_cond));
ilor
committed
// Returns boost::shared_ptr<Matching::IsAnnotatedAs>
ilor
committed
returns [boost::shared_ptr<Matching::IsAnnotatedAs> mtch]
: "is" LPAREN annotation_name: STRING RPAREN {
ilor
committed
mtch.reset(new Matching::IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
ilor
committed
// Returns boost::shared_ptr<Matching::MatchText>
ilor
committed
returns [boost::shared_ptr<Matching::MatchText> mtch]
: "text" LPAREN txt: STRING RPAREN {
ilor
committed
mtch.reset(new Matching::MatchText(token_ref_to_ustring(txt)));
// Match condition - oneof(variant1(v1), variant(v2), ...)
ilor
committed
// Returns boost::shared_ptr<Matching::OneOf>
ilor
committed
returns [boost::shared_ptr<Matching::OneOf> onf]
ilor
committed
boost::shared_ptr<std::vector<boost::shared_ptr<Matching::ConjConditions> > > variants;
: "oneof" LPAREN variants = match_variants [scope] RPAREN {
ilor
committed
onf.reset(new Matching::OneOf(variants));
// Match condition - longest(variant1(v1), variant(v2), ...)
ilor
committed
// Returns boost::shared_ptr<Matching::Longest>
ilor
committed
returns [boost::shared_ptr<Matching::Longest> lng]
ilor
committed
boost::shared_ptr<std::vector<boost::shared_ptr<Matching::ConjConditions> > > variants;
: "longest" LPAREN variants = match_variants [scope] RPAREN {
ilor
committed
lng.reset(new Matching::Longest(variants));
// ----------------------------------------------------------------------------
// Match actions. Match action can be mark or unmark
ilor
committed
// Returns boost::shared_ptr<Matching::MatchAction>
match_action
ilor
committed
returns [boost::shared_ptr<Matching::MatchAction> m_act]
: m_act = match_mark_action [scope]
| m_act = match_unmark_action [scope]
| m_act = match_remark_action [scope]
| m_act = match_setprop_action [scope]
;
// Match mark action
match_mark_action
ilor
committed
returns [boost::shared_ptr<Matching::MarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_to;
boost::shared_ptr<Function<Match> > match_from;
boost::shared_ptr<Function<Match> > head_match;
match_from = match_operator[scope] COMMA
( match_to = match_operator[scope] COMMA
( head_match = match_operator[scope] COMMA )?
if (!match_to) {
m_act.reset(
ilor
committed
new Matching::MarkMatch(
match_from,
((antlr::Token*)annotation_name)->getText()));
} else {
if (!head_match) {
m_act.reset(
ilor
committed
new Matching::MarkMatch(
match_from,
match_to,
((antlr::Token*)annotation_name)->getText()));
} else {
m_act.reset(
ilor
committed
new Matching::MarkMatch(
match_from,
match_to,
head_match,
((antlr::Token*)annotation_name)->getText()));
}
;
// Match unmark action
ilor
committed
// Returns boost::shared_ptr<Matching::UnmarkMatch>
match_unmark_action
ilor
committed
returns [boost::shared_ptr<Matching::UnmarkMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_at;
}
: "unmark" LPAREN
match_at = match_operator[scope] COMMA
annotation_name : STRING
RPAREN {
m_act.reset(
ilor
committed
new Matching::UnmarkMatch(
match_at,
((antlr::Token*)annotation_name)->getText()));
}
;
2810
2811
2812
2813
2814
2815
2816
2817
2818
2819
2820
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840
2841
2842
2843
2844
2845
2846
2847
2848
2849
2850
// Overwrite-match mark action ("remark")
// Returns
match_remark_action
[ParsingScope& scope]
returns [boost::shared_ptr<Matching::OverwriteMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_to;
boost::shared_ptr<Function<Match> > match_from;
boost::shared_ptr<Function<Match> > head_match;
}
: "remark" LPAREN
match_from = match_operator[scope] COMMA
( match_to = match_operator[scope] COMMA
( head_match = match_operator[scope] COMMA )?
)?
annotation_name : STRING
RPAREN {
if (!match_to) {
m_act.reset(
new Matching::OverwriteMatch(
match_from,
((antlr::Token*)annotation_name)->getText()));
} else {
if (!head_match) {
m_act.reset(
new Matching::OverwriteMatch(
match_from,
match_to,
((antlr::Token*)annotation_name)->getText()));
} else {
m_act.reset(
new Matching::OverwriteMatch(
match_from,
match_to,
head_match,
((antlr::Token*)annotation_name)->getText()));
}
}
}
;
2851
2852
2853
2854
2855
2856
2857
2858
2859
2860
2861
2862
2863
2864
2865
2866
2867
2868
2869
2870
2871
// Match prop action
// Returns boost::shared_ptr<Matching::PropMatch>
match_setprop_action
[ParsingScope& scope]
returns [boost::shared_ptr<Matching::PropMatch> m_act]
{
boost::shared_ptr<Function<Match> > match_at;
}
: "setprop" LPAREN
match_at = match_operator[scope] COMMA
key_name : STRING COMMA
value_name : STRING
RPAREN {
m_act.reset(
new Matching::PropMatch(
match_at,
((antlr::Token*)key_name)->getText(),
((antlr::Token*)value_name)->getText()));
}
;
// Match action separated by comma
ilor
committed
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<Matching::MatchAction> > >
match_action_comma_sep
ilor
committed
returns [boost::shared_ptr<std::vector<boost::shared_ptr<Matching::MatchAction> > > r_vec]
ilor
committed
boost::shared_ptr<Matching::MatchAction> act;
r_vec.reset(
ilor
committed
new std::vector<boost::shared_ptr<Matching::MatchAction> >
);
}
: act = match_action [scope] {
r_vec->push_back(act);
}
(
COMMA act = match_action [scope] {
r_vec->push_back(act);
}
)*
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexer extends Lexer;
options {
exportVocab = ANTLRExpr;
charVocabulary = '\3'..'\377';
: '"'! (~('"' | '\n' | '\r'))* '"'!
| '\''! (~('\'' | '\n' | '\r'))* '\''!
: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+
UNSIGNED_INT
options {
paraphrase = "Unsigned integer";
}
: ('0'..'9')+
;
STR_PREFIX
options {
paraphrase = "String prefix";
}
Paweł Kędzia
committed
: "$s:"
;
TST_PREFIX
options {
Paweł Kędzia
committed
: "$t:"
;
BOOL_PREFIX
options {
paraphrase = "Bool prefix";
}
Paweł Kędzia
committed
: "$b:"
Paweł Kędzia
committed
paraphrase = "Position prefix";
Paweł Kędzia
committed
: '$'
MATCH_VECTOR_PREFIX
options {
paraphrase = "Match vector prefix";
}
: "$m:"
;
2995
2996
2997
2998
2999
3000
3001
3002
3003
3004
3005
3006
3007
3008
3009
3010
3011
3012
3013
3014
3015
3016
3017
3018
3019
3020
3021
3022
3023
3024
3025
3026
3027
3028
3029
STR_SECTION_PREFIX
options {
paraphrase = "Wccl file string operators section prefix";
}
: "@s:"
;
POS_SECTION_PREFIX
options {
paraphrase = "Wccl file position operators section prefix";
}
: "@p:"
;
BOOL_SECTION_PREFIX
options {
paraphrase = "Wccl file bool operators section prefix";
}
: "@b:"
;
TST_SECTION_PREFIX
options {
paraphrase = "Wccl file symset operators section prefix";
}
: "@t:"
;
MATCH_SECTION_PREFIX
options {
paraphrase = "Wccl file symset operators section prefix";
}
: "@m:"
;
RBRACKET
options {
paraphrase = "']'";
}
: ']'
LPAREN
options {
paraphrase = "'('";
}
: '('
RPAREN
options {
paraphrase = "')'";
}
: ')'
LCURLY
options {
paraphrase = "'{'";
}
: '{'
RCURLY
options {
paraphrase = "'}'";
}
: '}'
AT_MARK
options {
paraphrase = "'@'";
}
: '@'
ARROW
options {
paraphrase = "->";
}
: "->"
;
Adam Wardynski
committed
COLON
options {
paraphrase = ":";
}
: ':'
;
SEMI
options {
paraphrase = ";";
}
: ';'
;
: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
| '\t'
| '\f'
|
( "\r\n"
| '\r'
| '\n'
) { newline(); }
) { $setType(antlr::Token::SKIP); }
paraphrase = "Single line comment";
}
: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); }
;
ML_COMMENT
options {
paraphrase = "Multi line comment";
( // TODO: test it and add reference to the site it's taken from!
/* This actually works OK despite the ambiguity that
'\r' '\n' can be matched in one alternative or by matching
'\r' in one iteration and '\n' in another.. But
this is really matched just by one rule per (...)*
loop iteration, so it's OK.
This is exactly how they do it all over the web - just
turn off the warning for this particular token.*/
options {
generateAmbigWarnings = false;
}
: { LA(2)!='/' }? '*'
| '\r' '\n' { newline(); }
| '\r' { newline(); }
| '\n' { newline(); }
)*
{ $setType(antlr::Token::SKIP); }
//DSEPARATOR
//options {
// paraphrase = "':-'";
//}
// : ":-"
//;