Skip to content
Snippets Groups Projects
grammar.g 66.4 KiB
Newer Older
	returns [boost::shared_ptr<std::vector<ConjConditions> > variants]
{
	variants.reset(new std::vector<ConjConditions>());

	boost::shared_ptr<ConjConditions> variant;
}
	: "variant" LPAREN variant = match_condition [tagset, vars] RPAREN {
		// TODO
		// variants->push_back(variant);
	} 
	(
		COMMA "variant" LPAREN variant = match_condition [tagset, vars] RPAREN {
			// TODO
			// variants->push_back(variant);
		}
	)*
;

// One of the match condition
// Returns boost::shared_ptr<const MatchCondition>
match_cond_all
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<const MatchCondition> ret]
	: ret = match_cond_optional [tagset, vars]
	| ret = match_cond_repeate  [tagset, vars]
	| ret = match_cond_token    [tagset, vars]
	| ret = match_cond_oneof    [tagset, vars]
	| ret = match_cond_longest  [tagset, vars]
	| ret = match_cond_is
	| ret = match_cond_text
// Match condition - token (wraps a L0 predicate)
// Returns boost::shared_ptr<const MatchCondition>
match_cond_token
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<const TokenCondition> ret]
{
	boost::shared_ptr<Function<Bool> > bool_op;
}
	: bool_op = bool_operator [tagset, vars] {
		ret = boost::make_shared<TokenCondition>(bool_op);
	}
;


// Returns boost::shared_ptr<OptionalMatch>
match_cond_optional
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<OptionalMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "optional" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
		mtch.reset(new OptionalMatch(m_cond));
	}
;

// Match condition - repeat
// Returns boost::shared_ptr<RepeatedMatch>
match_cond_repeate
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<RepeatedMatch> mtch]
{
	boost::shared_ptr<ConjConditions> m_cond;
}
	: "repeat" LPAREN m_cond = match_condition [tagset, vars] RPAREN {
		mtch.reset(new RepeatedMatch(m_cond));
// Match condition - is(ann_name)
// Returns boost::shared_ptr<IsAnnotatedAs>
match_cond_is
	returns [boost::shared_ptr<IsAnnotatedAs> mtch]
	: "is" LPAREN annotation_name: STRING RPAREN {
		mtch.reset(new IsAnnotatedAs(token_ref_to_std_string(annotation_name)));
	}
;

// Match condition - text(text)
// Returns boost::shared_ptr<MatchText>
match_cond_text
	returns [boost::shared_ptr<MatchText> mtch]
	: "text" LPAREN txt: STRING RPAREN {
		mtch.reset(new MatchText(token_ref_to_ustring(txt)));
	}
;

// Match condition - oneof(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<OneOf>
match_cond_oneof
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<OneOf> onf]
{
	boost::shared_ptr<std::vector<ConjConditions> > variants;
}
	: "oneof" LPAREN variants = match_variants [tagset, vars] RPAREN {
		onf.reset(new OneOf(variants));
	}
;

// Match condition - longest(variant1(v1), variant(v2), ...)
// Returns boost::shared_ptr<Longest>
match_cond_longest
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Longest> lng]
{
	boost::shared_ptr<std::vector<ConjConditions> > variants;
}
	: "longest" LPAREN variants = match_variants [tagset, vars] RPAREN {
		lng.reset(new Longest(variants));
	}
;

// ----------------------------------------------------------------------------

// Match actions. Match action can be mark or unmark
// Returns boost::shared_ptr<MatchAction>
match_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MatchAction> m_act]
	: m_act = match_mark_action   [tagset, vars]
	| m_act = match_unmark_action [tagset, vars]
;

// Match mark action
// Returns 
match_mark_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<MarkMatch> m_act]
Paweł Kędzia's avatar
Paweł Kędzia committed
{
	boost::shared_ptr<Function<Match> > match_to;
	boost::shared_ptr<Function<Match> > match_from;
	boost::shared_ptr<Function<Match> > head_match;
Paweł Kędzia's avatar
Paweł Kędzia committed
}
	: "mark" LPAREN 
			match_from = match_fit[tagset, vars] COMMA
			( match_to  = match_fit[tagset, vars] COMMA
				( head_match = match_fit[tagset, vars] COMMA )?
			)?
Paweł Kędzia's avatar
Paweł Kędzia committed
			annotation_name : STRING
		RPAREN {
			if (!match_to) {
				m_act.reset(
					new MarkMatch(
						match_from,
						((antlr::Token*)annotation_name)->getText()));
			} else {
				if (!head_match) {
					m_act.reset(
						new MarkMatch(
							match_from,
							match_to,
							((antlr::Token*)annotation_name)->getText()));
				} else {
					m_act.reset(
						new MarkMatch(
							match_from,
							match_to,
							head_match,
							((antlr::Token*)annotation_name)->getText()));
				}
Paweł Kędzia's avatar
Paweł Kędzia committed
		}
// Returns boost::shared_ptr<UnmarkMatch>
match_unmark_action
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<UnmarkMatch> m_act]
{
	boost::shared_ptr<Function<Match> > match_at;
}
	: "unmark" LPAREN
				match_at = match_fit[tagset, vars] COMMA
				annotation_name : STRING
			RPAREN {
				m_act.reset(
					new UnmarkMatch(
							match_at,
							((antlr::Token*)annotation_name)->getText()));
			}
;

// Match action separated by comma
// Returns boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > >
match_action_comma_sep
	[const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<std::vector<boost::shared_ptr<MatchAction> > > r_vec]
{
	boost::shared_ptr<MatchAction> act;

	r_vec.reset(
		new std::vector<boost::shared_ptr<MatchAction> >
	);
}
	: act = match_action [tagset, vars] {
			r_vec->push_back(act);
	}
	(
		COMMA act = match_action [tagset, vars] {
			r_vec->push_back(act);
		}
	)*
;

Paweł Kędzia's avatar
Paweł Kędzia committed
// Function<Match> is wrapper for Constant<Match> and Function<Match>
// Returns boost::shared_ptr<Function<Match> >
match_fit
  [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Match> > ret]
{
	//
}
	:
	( ret = match_var_val [tagset, vars]
	| "M" { ret.reset(new VarGetter<Match>(vars.create_accessor<Match>("_M")));	}
	| LPAREN ret = match_fit [tagset, vars] RPAREN
	)
	( // if there's an arrow after the match, we have a submatch reference
		ARROW i: UNSIGNED_INT { ret.reset(new Submatch(ret, token_ref_to_int(i))); }
;

match_var_val
  [const Corpus2::Tagset& tagset, Variables& vars]
	returns [boost::shared_ptr<Function<Match> > ret]
Paweł Kędzia's avatar
Paweł Kędzia committed
	: ret = match_vector_variable [vars]
	| ret = match_value_const
;

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
// ANTLR LEXER
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
rk's avatar
rk committed
class ANTLRLexer extends Lexer;
options {
	exportVocab    = ANTLRExpr;
	charVocabulary = '\3'..'\377';
	testLiterals   = false;
rk's avatar
rk committed
}

rk's avatar
rk committed
STRING
options {
	paraphrase = "a string";
}
	: '"'!  (~('"'  | '\n' | '\r'))* '"'!
	| '\''! (~('\'' | '\n' | '\r'))* '\''!
SIGNED_INT
rk's avatar
rk committed
options {
	paraphrase = "Signed integer";
rk's avatar
rk committed
}
	: ('-'|'+') (' '!|'\t'!)* ('0'..'9')+ 
rk's avatar
rk committed

UNSIGNED_INT
options {
	paraphrase = "Unsigned integer";
}
	: ('0'..'9')+ 
;	


rk's avatar
rk committed
QUOT_MARK
options {
	paraphrase = "Quote";
rk's avatar
rk committed

APOS_MARK
options {
	paraphrase = "Apostrophe";
rk's avatar
rk committed
Q_MARK
options {
	paraphrase = "Question mark";
rk's avatar
rk committed
}
	: '?'
rk's avatar
rk committed

E_MARK
options {
	paraphrase = "Exclamation mark";
rk's avatar
rk committed
}
	: '!'
rk's avatar
rk committed

STR_PREFIX
options {
	paraphrase = "String prefix";
}
	paraphrase = "Symset prefix";
;

BOOL_PREFIX
options {
	paraphrase = "Bool prefix";
}
rk's avatar
rk committed

POS_PREFIX
options {
MATCH_VECTOR_PREFIX
options {
	paraphrase = "Match vector prefix";
}
	: "$m:"
;

rk's avatar
rk committed
LBRACKET 
options {
	paraphrase = "'['";
}
	: '[' 
rk's avatar
rk committed

RBRACKET 
options {
	paraphrase = "']'";
}
	: ']' 
rk's avatar
rk committed

LPAREN
options {
	paraphrase = "'('";
}   
	: '(' 
rk's avatar
rk committed

RPAREN 
options {
	paraphrase = "')'";
} 
	: ')' 
rk's avatar
rk committed

LCURLY 
options {
	paraphrase = "'{'";
} 
	: '{' 
rk's avatar
rk committed

RCURLY 
options {
	paraphrase = "'}'";
} 
	: '}' 
rk's avatar
rk committed

AT_MARK 
options {
	paraphrase = "'@'";
} 
	: '@' 
rk's avatar
rk committed

COMMA
options { 
	paraphrase = "','";
rk's avatar
rk committed
}
	: ','
rk's avatar
rk committed

rk's avatar
rk committed
SYMBOL
options { 
	paraphrase = "Symbol"; 
rk's avatar
rk committed
	testLiterals = true; 
}
	: ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')*
	| '`' ('a'..'z' | 'A'..'Z' | '_') ('a'..'z' | 'A'..'Z' | '_' | '0'..'9')* '`'
rk's avatar
rk committed
WS
	  	| '\t'
	  	| '\f'
	  	| 
			( "\r\n"
				| '\r'
				| '\n'
			) { newline(); } 
		) { $setType(antlr::Token::SKIP); } 
rk's avatar
rk committed

COMMENT
options {
	paraphrase = "Single line comment";
}
	: "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP);  }
;

ML_COMMENT
options {
	paraphrase = "Multi line comment";
rk's avatar
rk committed
}
	(			// TODO: test it and add reference to the site it's taken from!
				/* This actually works OK despite the ambiguity that
				'\r' '\n' can be matched in one alternative or by matching
				'\r' in one iteration and '\n' in another.. But 
				this is really matched just by one rule per (...)* 
				loop iteration, so it's OK.
				This is exactly how they do it all over the web - just
				turn off the warning for this particular token.*/
      : { LA(2)!='/' }? '*'
      | '\r' '\n' { newline(); }
      | '\r' { newline(); }
      | '\n' { newline(); }
      | ~('*'|'\n'|'\r')
rk's avatar
rk committed

HASH
options { 
	paraphrase = "'#'"; 
}
	: '#' 
rk's avatar
rk committed

//DSEPARATOR
//options { 
//	paraphrase = "':-'"; 
//}
//	: ":-" 
//;