From 3ce9a0ab2c56964821dcecf0a61b30651aa94123 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20K=C4=99dzia?= <Pawel.Kedzia@pwr.wroc.pl> Date: Mon, 18 Feb 2013 14:46:26 +0100 Subject: [PATCH] Single and multi line comment handling added to lexicon grammar --- libwccl/lexicon/lexfilegrammar.g | 35 +++++++++++++++++++++++++++++++- tests/data/indecl.lex | 4 ++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/libwccl/lexicon/lexfilegrammar.g b/libwccl/lexicon/lexfilegrammar.g index ce36648..2efc4b1 100644 --- a/libwccl/lexicon/lexfilegrammar.g +++ b/libwccl/lexicon/lexfilegrammar.g @@ -28,7 +28,7 @@ options { // ---------------------------------------------------------------------------- class ANTLRLexiconParser extends Parser; options { - k = 1; + k = 2; buildAST = false; exportVocab = ANTLRLexicons; defaultErrorHandler = false; @@ -113,3 +113,36 @@ NEWLINE | '\n' ) { newline(); $setType(antlr::Token::SKIP); } ; + +COMMENT +options { + paraphrase = "Single line comment"; +} + : "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); } +; + +ML_COMMENT +options { + paraphrase = "Multi line comment"; +} + : "/*" + ( // TODO: test it and add reference to the site it's taken from! + /* This actually works OK despite the ambiguity that + '\r' '\n' can be matched in one alternative or by matching + '\r' in one iteration and '\n' in another.. But + this is really matched just by one rule per (...)* + loop iteration, so it's OK. + This is exactly how they do it all over the web - just + turn off the warning for this particular token.*/ + options { + generateAmbigWarnings = false; + } + : { LA(2)!='/' }? '*' + | '\r' '\n' { newline(); } + | '\r' { newline(); } + | '\n' { newline(); } + | ~('*'|'\n'|'\r') + )* + "*/" + { $setType(antlr::Token::SKIP); } +; diff --git a/tests/data/indecl.lex b/tests/data/indecl.lex index 97474c1..e4a533c 100644 --- a/tests/data/indecl.lex +++ b/tests/data/indecl.lex @@ -1,3 +1,7 @@ +/** + * ML COMMENT + */ +// Single line comment by part och interj ach interj -- GitLab