diff --git a/libwccl/lexicon/lexfilegrammar.g b/libwccl/lexicon/lexfilegrammar.g index ce3664818000e66f3eff7c68d0cb0d9cf31ecb56..2efc4b1a9d9ad00ec9f8647d55b0c1455276c0c2 100644 --- a/libwccl/lexicon/lexfilegrammar.g +++ b/libwccl/lexicon/lexfilegrammar.g @@ -28,7 +28,7 @@ options { // ---------------------------------------------------------------------------- class ANTLRLexiconParser extends Parser; options { - k = 1; + k = 2; buildAST = false; exportVocab = ANTLRLexicons; defaultErrorHandler = false; @@ -113,3 +113,36 @@ NEWLINE | '\n' ) { newline(); $setType(antlr::Token::SKIP); } ; + +COMMENT +options { + paraphrase = "Single line comment"; +} + : "//" (~('\n'|'\r'))* { $setType(antlr::Token::SKIP); } +; + +ML_COMMENT +options { + paraphrase = "Multi line comment"; +} + : "/*" + ( // TODO: test it and add reference to the site it's taken from! + /* This actually works OK despite the ambiguity that + '\r' '\n' can be matched in one alternative or by matching + '\r' in one iteration and '\n' in another.. But + this is really matched just by one rule per (...)* + loop iteration, so it's OK. + This is exactly how they do it all over the web - just + turn off the warning for this particular token.*/ + options { + generateAmbigWarnings = false; + } + : { LA(2)!='/' }? '*' + | '\r' '\n' { newline(); } + | '\r' { newline(); } + | '\n' { newline(); } + | ~('*'|'\n'|'\r') + )* + "*/" + { $setType(antlr::Token::SKIP); } +; diff --git a/tests/data/indecl.lex b/tests/data/indecl.lex index 97474c1014105692c029a72cbacc69b70aa6b483..e4a533cd561649ad2b8c03d076915b4b473a51b5 100644 --- a/tests/data/indecl.lex +++ b/tests/data/indecl.lex @@ -1,3 +1,7 @@ +/** + * ML COMMENT + */ +// Single line comment by part och interj ach interj