Skip to content
Snippets Groups Projects
Commit f741ad7b authored by Adam Wardynski's avatar Adam Wardynski
Browse files

Parsing WcclFile.

parent 4b53e952
No related branches found
No related tags found
No related merge requests found
......@@ -95,6 +95,9 @@ header {
#include <libwccl/ops/match/actions/unmarkmatch.h>
#include <libwccl/ops/functions/match/submatch.h>
// Wccl whole file syntax
#include <libwccl/wcclfile.h>
// Unicode String
#include <unicode/uniset.h>
#include <unicode/unistr.h>
......@@ -183,7 +186,7 @@ parse_strset_operator
: body = strset_operator [scope] {
res.reset(new Operator<StrSet>(body, scope.variables()));
}
EOF
(EOF | SEMI)
;
// ----------------------------------------------------------------------------
......@@ -199,7 +202,7 @@ parse_bool_operator
: body = bool_operator [scope] {
res.reset(new Operator<Bool>(body, scope.variables()));
}
EOF
(EOF | SEMI)
;
// ----------------------------------------------------------------------------
......@@ -215,7 +218,7 @@ parse_symset_operator
: body = symset_operator [scope] {
res.reset(new Operator<TSet>(body, scope.variables()));
}
EOF
(EOF | SEMI)
;
// ----------------------------------------------------------------------------
......@@ -231,7 +234,23 @@ parse_position_operator
: body = position_operator [scope] {
res.reset(new Operator<Position>(body, scope.variables()));
}
EOF
(EOF | SEMI)
;
// ----------------------------------------------------------------------------
// Rule for parsing match operator with scope.
// Returns boost::shared_ptr<Operator<Position> >
parse_match_operator
[const Corpus2::Tagset &tagset]
returns [boost::shared_ptr<Operator<Match> > res]
{
ParsingScope scope(tagset);
boost::shared_ptr<Function<Match> > body;
}
: body = match_operator [scope] {
res.reset(new Operator<Match>(body, scope.variables()));
}
(EOF | SEMI)
;
// ----------------------------------------------------------------------------
......@@ -272,6 +291,25 @@ parse_match_rule
: ret_match = match_rule_operator[scope]
;
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Rule for parsing wccl files
parse_wccl_file
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<WcclFile> wccl_file]
{
wccl_file = boost::make_shared<WcclFile>(tagset);
boost::shared_ptr<TagRuleSequence> rule_seq;
}
: (any_operator_section [*wccl_file] )*
(
rule_seq = parse_rule_sequence [tagset] { wccl_file->set_tag_rules(rule_seq); }
(any_operator_section [*wccl_file] )*
)?
EOF
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// VALUES
......@@ -1764,6 +1802,139 @@ match_operator
)*
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// WCCL FILE PARSING RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
any_operator_section
[WcclFile& wccl_file]
{
boost::shared_ptr<UntypedOpSequence> untyped_seq;
boost::shared_ptr<OpSequence<Bool> > bool_seq;
boost::shared_ptr<OpSequence<TSet> > symset_seq;
boost::shared_ptr<OpSequence<StrSet> > strset_seq;
boost::shared_ptr<OpSequence<Position> > pos_seq;
boost::shared_ptr<OpSequence<Match> > m_seq;
}
: untyped_seq = untyped_operator_sequence [wccl_file.tagset()] {
wccl_file.add_untyped_section(untyped_seq);
}
| bool_seq = bool_operator_sequence [wccl_file.tagset()] {
wccl_file.add_section(bool_seq);
}
| symset_seq = symset_operator_sequence [wccl_file.tagset()] {
wccl_file.add_section(symset_seq);
}
| strset_seq = strset_operator_sequence [wccl_file.tagset()] {
wccl_file.add_section(strset_seq);
}
| pos_seq = position_operator_sequence [wccl_file.tagset()] {
wccl_file.add_section(pos_seq);
}
| m_seq = match_operator_sequence [wccl_file.tagset()] {
wccl_file.add_section(m_seq);
}
;
bool_operator_sequence
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<OpSequence<Bool> > seq]
{
boost::shared_ptr<Operator<Bool> > op;
}
: BOOL_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Bool>(token_ref_to_std_string(name)));
}
LPAREN
(op = parse_bool_operator [tagset] { seq->append(op); })+
RPAREN
;
symset_operator_sequence
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<OpSequence<TSet> > seq]
{
boost::shared_ptr<Operator<TSet> > op;
}
: TST_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<TSet>(token_ref_to_std_string(name)));
}
LPAREN
(op = parse_symset_operator [tagset] { seq->append(op); })+
RPAREN
;
strset_operator_sequence
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<OpSequence<StrSet> > seq]
{
boost::shared_ptr<Operator<StrSet> > op;
}
: STR_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<StrSet>(token_ref_to_std_string(name)));
}
LPAREN
(op = parse_strset_operator [tagset] { seq->append(op); })+
RPAREN
;
position_operator_sequence
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<OpSequence<Position> > seq]
{
boost::shared_ptr<Operator<Position> > op;
}
: POS_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Position>(token_ref_to_std_string(name)));
}
LPAREN
(op = parse_position_operator [tagset] { seq->append(op); })+
RPAREN
;
untyped_operator_sequence
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<UntypedOpSequence> seq]
{
boost::shared_ptr<FunctionalOperator> op;
}
: AT_MARK name: STRING {
seq.reset(new UntypedOpSequence(token_ref_to_std_string(name)));
}
LPAREN
(op = any_operator [tagset] { seq->append(op); })+
RPAREN
;
match_operator_sequence
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<OpSequence<Match> > seq]
{
boost::shared_ptr<Operator<Match> > op;
}
: MATCH_SECTION_PREFIX name: STRING {
seq.reset(new OpSequence<Match>(token_ref_to_std_string(name)));
}
LPAREN
(op = parse_match_operator [tagset] { seq->append(op); })+
RPAREN
;
any_operator
[const Corpus2::Tagset& tagset]
returns [boost::shared_ptr<FunctionalOperator> op]
{
static ParsingScope _s(tagset); // just a bogus scope for predicates
}
: (position_operator [_s]) => op = parse_position_operator [tagset]
| (symset_operator [_s]) => op = parse_symset_operator [tagset]
| (strset_operator [_s]) => op = parse_strset_operator [tagset]
| (match_operator [_s]) => op = parse_match_operator [tagset]
| op = parse_bool_operator [tagset]
;
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Tagging actions and rules:
......@@ -2417,6 +2588,41 @@ options {
: "$m:"
;
STR_SECTION_PREFIX
options {
paraphrase = "Wccl file string operators section prefix";
}
: "@s:"
;
POS_SECTION_PREFIX
options {
paraphrase = "Wccl file position operators section prefix";
}
: "@p:"
;
BOOL_SECTION_PREFIX
options {
paraphrase = "Wccl file bool operators section prefix";
}
: "@b:"
;
TST_SECTION_PREFIX
options {
paraphrase = "Wccl file symset operators section prefix";
}
: "@t:"
;
MATCH_SECTION_PREFIX
options {
paraphrase = "Wccl file symset operators section prefix";
}
: "@m:"
;
LBRACKET
options {
paraphrase = "'['";
......@@ -2466,6 +2672,7 @@ options {
: '@'
;
COMMA
options {
paraphrase = "','";
......@@ -2487,6 +2694,13 @@ options {
: ':'
;
SEMI
options {
paraphrase = ";";
}
: ';'
;
SYMBOL
options {
paraphrase = "Symbol";
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment