Skip to content
Snippets Groups Projects
grammar.g 80 KiB
Newer Older
rk's avatar
rk committed
header {
Paweł Kędzia's avatar
Paweł Kędzia committed
	//don't try to add all the headers inside our namespace
	#include <libwccl/parser/ParserException.h>
	#include <libwccl/parser/parsingscope.h>
rk's avatar
rk committed

	#include <cstdio>
	#include <antlr/Token.hpp>
	#include <boost/lexical_cast.hpp>
	// values/variables
	#include <libwccl/variables.h>
	#include <libwccl/values/bool.h>
	#include <libwccl/values/tset.h>
	#include <libwccl/values/strset.h>
	#include <libwccl/values/position.h>
	
	// sentence context
	#include <libwccl/sentencecontext.h>

	// operators
	#include <libwccl/ops/operator.h>

	#include <libwccl/ops/functions/constant.h>
	#include <libwccl/ops/functions/vargetter.h>
	#include <libwccl/ops/functions/conditional.h>

	#include <libwccl/ops/functions/setops.h>
	#include <libwccl/ops/functions/bool/varsetter.h>
	#include <libwccl/ops/functions/bool/predicates/debug.h>
	#include <libwccl/ops/functions/bool/predicates/ambiguous.h>
	#include <libwccl/ops/functions/bool/predicates/issingular.h>
	#include <libwccl/ops/functions/bool/predicates/or.h>
	#include <libwccl/ops/functions/bool/predicates/nor.h>
	#include <libwccl/ops/functions/bool/predicates/and.h>
	#include <libwccl/ops/functions/bool/predicates/regex.h>
	#include <libwccl/ops/functions/bool/predicates/intersects.h>
	#include <libwccl/ops/functions/bool/predicates/issubsetof.h>
	#include <libwccl/ops/functions/bool/predicates/isinside.h>
	#include <libwccl/ops/functions/bool/predicates/isoutside.h>
	#include <libwccl/ops/functions/bool/predicates/isempty.h>
	#include <libwccl/ops/functions/bool/predicates/equals.h>
	#include <libwccl/ops/functions/bool/predicates/weakagreement.h>
	#include <libwccl/ops/functions/bool/predicates/pointagreement.h>
	#include <libwccl/ops/functions/bool/predicates/strongagreement.h>
ilor's avatar
ilor committed
	#include <libwccl/ops/functions/bool/predicates/annsub.h>
ilor's avatar
ilor committed
	#include <libwccl/ops/functions/bool/predicates/ann.h>

	#include <libwccl/ops/functions/strset/affix.h>
	#include <libwccl/ops/functions/strset/getorth.h>
	#include <libwccl/ops/functions/strset/toupper.h>
	#include <libwccl/ops/functions/strset/tolower.h>
	#include <libwccl/ops/functions/strset/getlemmas.h>
	#include <libwccl/ops/functions/strset/lextranslator.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/agrfilter.h>
Adam Radziszewski's avatar
Adam Radziszewski committed
	#include <libwccl/ops/functions/tset/catfilter.h>
	#include <libwccl/ops/functions/tset/getsymbols.h>
	#include <libwccl/ops/functions/tset/getwordclass.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	#include <libwccl/ops/functions/tset/getsymbolsinrange.h>
	#include <libwccl/ops/functions/position/relativeposition.h>
	#include <libwccl/ops/functions/position/lasttoken.h>
	#include <libwccl/ops/functions/position/firsttoken.h>
	#include <libwccl/ops/functions/bool/iterations/only.h>
	#include <libwccl/ops/functions/bool/iterations/atleast.h>
	#include <libwccl/ops/functions/bool/iterations/leftlook.h>
	#include <libwccl/ops/functions/bool/iterations/rightlook.h>
Adam Wardynski's avatar
Adam Wardynski committed
	#include <libwccl/ops/tagrule.h>
	#include <libwccl/ops/tagrulesequence.h>
	#include <libwccl/ops/tagactions/unify.h>
	#include <libwccl/ops/tagactions/delete.h>
	#include <libwccl/ops/tagactions/select.h>
	#include <libwccl/ops/tagactions/relabel.h>
	#include <libwccl/ops/tagactions/mark.h>
	#include <libwccl/ops/tagactions/unmark.h>
	// Match operators
	#include <libwccl/values/tokenmatch.h>
	#include <libwccl/values/annotationmatch.h>
	#include <libwccl/values/matchvector.h>
	#include <libwccl/ops/match/applyoperator.h>
	#include <libwccl/ops/match/conditions/optionalmatch.h>
	#include <libwccl/ops/match/conditions/repeatedmatch.h>
	#include <libwccl/ops/match/conditions/isannotatedas.h>
	#include <libwccl/ops/match/conditions/matchtext.h>
	#include <libwccl/ops/match/conditions/conjconditions.h>
	#include <libwccl/ops/match/conditions/tokencondition.h>
	#include <libwccl/ops/match/conditions/oneof.h>
	#include <libwccl/ops/match/conditions/longest.h>
	#include <libwccl/ops/match/actions/markmatch.h>
	#include <libwccl/ops/match/actions/unmarkmatch.h>
	#include <libwccl/ops/functions/match/submatch.h>
Adam Wardynski's avatar
Adam Wardynski committed
	// Wccl whole file syntax
	#include <libwccl/wcclfile.h>
	
	#include <libwccl/lexicon/lexiconparser.h>

rk's avatar
rk committed
	// Unicode String
	#include <unicode/uniset.h>
	#include <unicode/unistr.h>
Paweł Kędzia's avatar
Paweł Kędzia committed
	// start our namespace again
	ANTLR_BEGIN_NAMESPACE(Wccl)
rk's avatar
rk committed
}

options {
	language = "Cpp";
//	genHashLines = true;
rk's avatar
rk committed
}

// ----------------------------------------------------------------------------
rk's avatar
rk committed
// ANTLR PARSER
// ----------------------------------------------------------------------------
rk's avatar
rk committed
class ANTLRParser extends Parser;
options {
rk's avatar
rk committed
	buildAST = false;
rk's avatar
rk committed
	defaultErrorHandler = false;
}
{
private:
	// 
rk's avatar
rk committed
	const UnicodeString token_ref_to_ustring(antlr::RefToken& rstr) const { 
		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
	const UnicodeString str_token_ref_to_ustring(antlr::RefToken& rstr) const { 
		UnicodeString ret_ustr, ustr = token_ref_to_ustring(rstr);

		if (ustr.length() < 3) {
			return "";
		}

		ustr.extract(1, ustr.length() - 2, ret_ustr);

		return ret_ustr;
	}
Paweł Kędzia's avatar
Paweł Kędzia committed
	const std::string str_token_rem_grav(antlr::RefToken& rstr) const {
		size_t len = 0;
		std::string ret = token_ref_to_std_string(rstr);

		if ((len = ret.length()) < 2) {
			return ret;
		}

		if (ret[0] == '`' && ret[len - 1] == '`') {
			return ret.substr(1, len - 2);
		}

		return ret;
	}
	//
rk's avatar
rk committed
	const std::string token_ref_to_std_string(antlr::RefToken& rstr) const { 
		return (((antlr::Token*)rstr)->getText());
rk's avatar
rk committed
	}
	//
	int token_ref_to_int(antlr::RefToken& rstr) { 
rk's avatar
rk committed
		return atoi(((antlr::Token*)rstr)->getText().c_str());
rk's avatar
rk committed
	}

///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// "GLOBAL" RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ----------------------------------------------------------------------------
// Rule for parsing string set operator with scope. 
// Returns boost::shared_ptr<Operator<StrSet> >
parse_strset_operator
	returns [boost::shared_ptr<Operator<StrSet> > res]
	Lexicons empty_lex;
	ParsingScope scope(tagset, empty_lex);
	: res = functional_operator_strset [scope]
	EOF
// ----------------------------------------------------------------------------
// Rule for parsing bool operator with scope. 
// Returns boost::shared_ptr<Operator<Bool> > 
Loading full blame...