Skip to content
Snippets Groups Projects
Commit f1d0815d authored by Adam Wardynski's avatar Adam Wardynski
Browse files

Parser for lexicon files.

parent 2d156086
Branches
No related merge requests found
...@@ -27,7 +27,9 @@ endif(WIN32) ...@@ -27,7 +27,9 @@ endif(WIN32)
SET(libwccl_STAT_SRC SET(libwccl_STAT_SRC
exception.cpp exception.cpp
lexicon/lexfilegrammar.g
lexicon/lexicon.cpp lexicon/lexicon.cpp
lexicon/lexiconparser.cpp
lexicon/lexicons.cpp lexicon/lexicons.cpp
ops/formatters.cpp ops/formatters.cpp
ops/functions/bool/iteration.cpp ops/functions/bool/iteration.cpp
...@@ -105,6 +107,8 @@ SET(libwccl_STAT_SRC ...@@ -105,6 +107,8 @@ SET(libwccl_STAT_SRC
SET(libwccl_STAT_SRC ${libwccl_STAT_SRC} SET(libwccl_STAT_SRC ${libwccl_STAT_SRC}
${PROJECT_BINARY_DIR}/parser/ANTLRLexer.cpp ${PROJECT_BINARY_DIR}/parser/ANTLRLexer.cpp
${PROJECT_BINARY_DIR}/parser/ANTLRParser.cpp ${PROJECT_BINARY_DIR}/parser/ANTLRParser.cpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.cpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.cpp
) )
include_directories(${PROJECT_BINARY_DIR}/parser) include_directories(${PROJECT_BINARY_DIR}/parser)
...@@ -119,8 +123,24 @@ ADD_CUSTOM_COMMAND( ...@@ -119,8 +123,24 @@ ADD_CUSTOM_COMMAND(
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/parser COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/parser
COMMAND runantlr -o parser ${PROJECT_SOURCE_DIR}/parser/grammar.g COMMAND runantlr -o parser ${PROJECT_SOURCE_DIR}/parser/grammar.g
DEPENDS ${PROJECT_SOURCE_DIR}/parser/grammar.g DEPENDS ${PROJECT_SOURCE_DIR}/parser/grammar.g
COMMENT "-- Generating ANTLR parser cpp/hpp/txt files") COMMENT "-- Generating ANTLR parser cpp/hpp/txt files for WCCL ---")
include_directories(${PROJECT_BINARY_DIR}/lexicon)
ADD_CUSTOM_COMMAND(
OUTPUT
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.cpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.hpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.cpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.hpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconsTokenTypes.hpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconsTokenTypes.txt
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/lexicon
COMMAND runantlr -o lexicon ${PROJECT_SOURCE_DIR}/lexicon/lexfilegrammar.g
DEPENDS ${PROJECT_SOURCE_DIR}/lexicon/lexfilegrammar.g
COMMENT "-- Generating ANTLR parser cpp/hpp/txt files for lexicon files ---")
file(GLOB_RECURSE INCS "*.h") file(GLOB_RECURSE INCS "*.h")
if(WIN32) if(WIN32)
......
header {
//don't try to add all the headers inside our namespace
ANTLR_END_NAMESPACE
#include <antlr/Token.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/make_shared.hpp>
#include <libwccl/lexicon/lexicon.h>
// Unicode String
#include <unicode/uniset.h>
#include <unicode/unistr.h>
// start our namespace again
ANTLR_BEGIN_NAMESPACE(Wccl)
}
options {
language = "Cpp";
genHashLines = false;
namespace = "Wccl";
// genHashLines = true;
}
// ----------------------------------------------------------------------------
// ANTLR PARSER FOR LEXICON FILES
// ----------------------------------------------------------------------------
class ANTLRLexiconParser extends Parser;
options {
k = 1;
buildAST = false;
exportVocab = ANTLRLexicons;
defaultErrorHandler = false;
}
{
private:
//
const UnicodeString to_ustring(antlr::RefToken& rstr) const {
return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
}
//
const std::string to_std_string(antlr::RefToken& rstr) const {
return (((antlr::Token*)rstr)->getText());
}
}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// PARSER RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
parse_lexicon_file
[const std::string& name]
returns [boost::shared_ptr<Lexicon> lex]
{
lex = boost::make_shared<Lexicon>(name);
}
: (lexicon_entry [*lex])*
EOF
;
lexicon_entry
[Lexicon& lex]
{
UnicodeString key, value;
}
: key = ustring (TAB value = ustring)? {
if (value.isEmpty()) {
lex.insert(key);
} else {
lex.insert(key, value);
}
}
;
ustring
returns [UnicodeString us]
: s : STRING { us = to_ustring(s); }
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ANTLR LEXER FOR LEXICON FILES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexiconLexer extends Lexer;
options {
exportVocab = ANTLRLexicons;
charVocabulary = '\3'..'\377';
testLiterals = false;
k = 2;
}
STRING
options {
paraphrase = "a string";
}
: (~( '\t' | '\n' | '\r'))+
;
TAB
options {
paraphrase = "a tabulator";
}
: '\t'
;
NEWLINE
: ( "\r\n"
| '\r'
| '\n'
) { newline(); $setType(antlr::Token::SKIP); }
;
\ No newline at end of file
#include <libwccl/lexicon/lexiconparser.h>
#include "ANTLRLexiconParser.hpp"
#include "ANTLRLexiconLexer.hpp"
#include <libwccl/lexicon/lexicon.h>
#include <libwccl/exception.h>
#include <fstream>
namespace Wccl {
boost::shared_ptr<Lexicon> LexiconParser::parse_lexicon(
const std::string& lexicon_name,
const std::string& path)
{
std::ifstream is(path.c_str());
if (!is.good()) {
throw Wccl::FileNotFound(path, "", __FUNCTION__);
}
ANTLRLexiconLexer lexer(is);
ANTLRLexiconParser parser(lexer);
return parser.parse_lexicon_file(lexicon_name);
}
} /* end ns Wccl */
#ifndef LIBWCCL_LEXICON_LEXICONPARSER_H
#define LIBWCCL_LEXICON_LEXICONPARSER_H
#include <boost/shared_ptr.hpp>
namespace Wccl {
class Lexicon;
class LexiconParser
{
public:
static boost::shared_ptr<Lexicon> parse_lexicon(
const std::string& lexicon_name,
const std::string& path);
};
} /* end ns Wccl */
#endif // LIBWCCL_LEXICON_LEXICONPARSER_H
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment