diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt index 165786b645d69ff7417ef1a01a0aff1e9a0959ea..2e4b2bc29aedb37db29bb5b85cdfe8ddd9d1a24a 100644 --- a/libwccl/CMakeLists.txt +++ b/libwccl/CMakeLists.txt @@ -27,7 +27,9 @@ endif(WIN32) SET(libwccl_STAT_SRC exception.cpp + lexicon/lexfilegrammar.g lexicon/lexicon.cpp + lexicon/lexiconparser.cpp lexicon/lexicons.cpp ops/formatters.cpp ops/functions/bool/iteration.cpp @@ -105,6 +107,8 @@ SET(libwccl_STAT_SRC SET(libwccl_STAT_SRC ${libwccl_STAT_SRC} ${PROJECT_BINARY_DIR}/parser/ANTLRLexer.cpp ${PROJECT_BINARY_DIR}/parser/ANTLRParser.cpp + ${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.cpp + ${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.cpp ) include_directories(${PROJECT_BINARY_DIR}/parser) @@ -119,8 +123,24 @@ ADD_CUSTOM_COMMAND( COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/parser COMMAND runantlr -o parser ${PROJECT_SOURCE_DIR}/parser/grammar.g DEPENDS ${PROJECT_SOURCE_DIR}/parser/grammar.g - COMMENT "-- Generating ANTLR parser cpp/hpp/txt files") + COMMENT "-- Generating ANTLR parser cpp/hpp/txt files for WCCL ---") +include_directories(${PROJECT_BINARY_DIR}/lexicon) + +ADD_CUSTOM_COMMAND( + OUTPUT + ${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.cpp + ${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.hpp + ${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.cpp + ${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.hpp + ${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconsTokenTypes.hpp + ${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconsTokenTypes.txt + COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/lexicon + COMMAND runantlr -o lexicon ${PROJECT_SOURCE_DIR}/lexicon/lexfilegrammar.g + DEPENDS ${PROJECT_SOURCE_DIR}/lexicon/lexfilegrammar.g + COMMENT "-- Generating ANTLR parser cpp/hpp/txt files for lexicon files ---") + + file(GLOB_RECURSE INCS "*.h") if(WIN32) diff --git a/libwccl/lexicon/lexfilegrammar.g b/libwccl/lexicon/lexfilegrammar.g new file mode 100644 index 0000000000000000000000000000000000000000..a0cb15940c72835dd84269a0d463c1510badb48e --- /dev/null +++ b/libwccl/lexicon/lexfilegrammar.g @@ -0,0 +1,116 @@ +header { + //don't try to add all the headers inside our namespace + ANTLR_END_NAMESPACE + + #include <antlr/Token.hpp> + + #include <boost/shared_ptr.hpp> + #include <boost/make_shared.hpp> + + #include <libwccl/lexicon/lexicon.h> + + // Unicode String + #include <unicode/uniset.h> + #include <unicode/unistr.h> + + // start our namespace again + ANTLR_BEGIN_NAMESPACE(Wccl) +} + +options { + language = "Cpp"; + genHashLines = false; + namespace = "Wccl"; +// genHashLines = true; +} + +// ---------------------------------------------------------------------------- +// ANTLR PARSER FOR LEXICON FILES +// ---------------------------------------------------------------------------- +class ANTLRLexiconParser extends Parser; +options { + k = 1; + buildAST = false; + exportVocab = ANTLRLexicons; + defaultErrorHandler = false; +} + +{ +private: + // + const UnicodeString to_ustring(antlr::RefToken& rstr) const { + return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape(); + } + // + const std::string to_std_string(antlr::RefToken& rstr) const { + return (((antlr::Token*)rstr)->getText()); + } +} + +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// PARSER RULES +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +parse_lexicon_file + [const std::string& name] + returns [boost::shared_ptr<Lexicon> lex] +{ + lex = boost::make_shared<Lexicon>(name); +} + : (lexicon_entry [*lex])* + EOF +; + +lexicon_entry + [Lexicon& lex] +{ + UnicodeString key, value; +} + : key = ustring (TAB value = ustring)? { + if (value.isEmpty()) { + lex.insert(key); + } else { + lex.insert(key, value); + } + } +; + +ustring + returns [UnicodeString us] + : s : STRING { us = to_ustring(s); } +; +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +// ANTLR LEXER FOR LEXICON FILES +/////////////////////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// +class ANTLRLexiconLexer extends Lexer; +options { + exportVocab = ANTLRLexicons; + charVocabulary = '\3'..'\377'; + testLiterals = false; + k = 2; +} + +STRING +options { + paraphrase = "a string"; +} + : (~( '\t' | '\n' | '\r'))+ +; + +TAB +options { + paraphrase = "a tabulator"; +} + : '\t' +; + +NEWLINE + : ( "\r\n" + | '\r' + | '\n' + ) { newline(); $setType(antlr::Token::SKIP); } +; \ No newline at end of file diff --git a/libwccl/lexicon/lexiconparser.cpp b/libwccl/lexicon/lexiconparser.cpp new file mode 100644 index 0000000000000000000000000000000000000000..77f75611309587d2b7c0d8e1274c76244f4d8bb7 --- /dev/null +++ b/libwccl/lexicon/lexiconparser.cpp @@ -0,0 +1,26 @@ +#include <libwccl/lexicon/lexiconparser.h> +#include "ANTLRLexiconParser.hpp" +#include "ANTLRLexiconLexer.hpp" + +#include <libwccl/lexicon/lexicon.h> +#include <libwccl/exception.h> + +#include <fstream> + +namespace Wccl { + +boost::shared_ptr<Lexicon> LexiconParser::parse_lexicon( + const std::string& lexicon_name, + const std::string& path) +{ + std::ifstream is(path.c_str()); + if (!is.good()) { + throw Wccl::FileNotFound(path, "", __FUNCTION__); + } + + ANTLRLexiconLexer lexer(is); + ANTLRLexiconParser parser(lexer); + return parser.parse_lexicon_file(lexicon_name); +} + +} /* end ns Wccl */ diff --git a/libwccl/lexicon/lexiconparser.h b/libwccl/lexicon/lexiconparser.h new file mode 100644 index 0000000000000000000000000000000000000000..521d19a6c794d45456961ad872dbd74219feab6d --- /dev/null +++ b/libwccl/lexicon/lexiconparser.h @@ -0,0 +1,19 @@ +#ifndef LIBWCCL_LEXICON_LEXICONPARSER_H +#define LIBWCCL_LEXICON_LEXICONPARSER_H + +#include <boost/shared_ptr.hpp> + +namespace Wccl { + +class Lexicon; + +class LexiconParser +{ +public: + static boost::shared_ptr<Lexicon> parse_lexicon( + const std::string& lexicon_name, + const std::string& path); +}; +} /* end ns Wccl */ + +#endif // LIBWCCL_LEXICON_LEXICONPARSER_H