Skip to content
Snippets Groups Projects
Commit f1d0815d authored by Adam Wardynski's avatar Adam Wardynski
Browse files

Parser for lexicon files.

parent 2d156086
No related branches found
No related tags found
No related merge requests found
......@@ -27,7 +27,9 @@ endif(WIN32)
SET(libwccl_STAT_SRC
exception.cpp
lexicon/lexfilegrammar.g
lexicon/lexicon.cpp
lexicon/lexiconparser.cpp
lexicon/lexicons.cpp
ops/formatters.cpp
ops/functions/bool/iteration.cpp
......@@ -105,6 +107,8 @@ SET(libwccl_STAT_SRC
SET(libwccl_STAT_SRC ${libwccl_STAT_SRC}
${PROJECT_BINARY_DIR}/parser/ANTLRLexer.cpp
${PROJECT_BINARY_DIR}/parser/ANTLRParser.cpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.cpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.cpp
)
include_directories(${PROJECT_BINARY_DIR}/parser)
......@@ -119,7 +123,23 @@ ADD_CUSTOM_COMMAND(
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/parser
COMMAND runantlr -o parser ${PROJECT_SOURCE_DIR}/parser/grammar.g
DEPENDS ${PROJECT_SOURCE_DIR}/parser/grammar.g
COMMENT "-- Generating ANTLR parser cpp/hpp/txt files")
COMMENT "-- Generating ANTLR parser cpp/hpp/txt files for WCCL ---")
include_directories(${PROJECT_BINARY_DIR}/lexicon)
ADD_CUSTOM_COMMAND(
OUTPUT
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.cpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.hpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.cpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.hpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconsTokenTypes.hpp
${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconsTokenTypes.txt
COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/lexicon
COMMAND runantlr -o lexicon ${PROJECT_SOURCE_DIR}/lexicon/lexfilegrammar.g
DEPENDS ${PROJECT_SOURCE_DIR}/lexicon/lexfilegrammar.g
COMMENT "-- Generating ANTLR parser cpp/hpp/txt files for lexicon files ---")
file(GLOB_RECURSE INCS "*.h")
......
header {
//don't try to add all the headers inside our namespace
ANTLR_END_NAMESPACE
#include <antlr/Token.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/make_shared.hpp>
#include <libwccl/lexicon/lexicon.h>
// Unicode String
#include <unicode/uniset.h>
#include <unicode/unistr.h>
// start our namespace again
ANTLR_BEGIN_NAMESPACE(Wccl)
}
options {
language = "Cpp";
genHashLines = false;
namespace = "Wccl";
// genHashLines = true;
}
// ----------------------------------------------------------------------------
// ANTLR PARSER FOR LEXICON FILES
// ----------------------------------------------------------------------------
class ANTLRLexiconParser extends Parser;
options {
k = 1;
buildAST = false;
exportVocab = ANTLRLexicons;
defaultErrorHandler = false;
}
{
private:
//
const UnicodeString to_ustring(antlr::RefToken& rstr) const {
return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
}
//
const std::string to_std_string(antlr::RefToken& rstr) const {
return (((antlr::Token*)rstr)->getText());
}
}
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// PARSER RULES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
parse_lexicon_file
[const std::string& name]
returns [boost::shared_ptr<Lexicon> lex]
{
lex = boost::make_shared<Lexicon>(name);
}
: (lexicon_entry [*lex])*
EOF
;
lexicon_entry
[Lexicon& lex]
{
UnicodeString key, value;
}
: key = ustring (TAB value = ustring)? {
if (value.isEmpty()) {
lex.insert(key);
} else {
lex.insert(key, value);
}
}
;
ustring
returns [UnicodeString us]
: s : STRING { us = to_ustring(s); }
;
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
// ANTLR LEXER FOR LEXICON FILES
///////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////
class ANTLRLexiconLexer extends Lexer;
options {
exportVocab = ANTLRLexicons;
charVocabulary = '\3'..'\377';
testLiterals = false;
k = 2;
}
STRING
options {
paraphrase = "a string";
}
: (~( '\t' | '\n' | '\r'))+
;
TAB
options {
paraphrase = "a tabulator";
}
: '\t'
;
NEWLINE
: ( "\r\n"
| '\r'
| '\n'
) { newline(); $setType(antlr::Token::SKIP); }
;
\ No newline at end of file
#include <libwccl/lexicon/lexiconparser.h>
#include "ANTLRLexiconParser.hpp"
#include "ANTLRLexiconLexer.hpp"
#include <libwccl/lexicon/lexicon.h>
#include <libwccl/exception.h>
#include <fstream>
namespace Wccl {
boost::shared_ptr<Lexicon> LexiconParser::parse_lexicon(
const std::string& lexicon_name,
const std::string& path)
{
std::ifstream is(path.c_str());
if (!is.good()) {
throw Wccl::FileNotFound(path, "", __FUNCTION__);
}
ANTLRLexiconLexer lexer(is);
ANTLRLexiconParser parser(lexer);
return parser.parse_lexicon_file(lexicon_name);
}
} /* end ns Wccl */
#ifndef LIBWCCL_LEXICON_LEXICONPARSER_H
#define LIBWCCL_LEXICON_LEXICONPARSER_H
#include <boost/shared_ptr.hpp>
namespace Wccl {
class Lexicon;
class LexiconParser
{
public:
static boost::shared_ptr<Lexicon> parse_lexicon(
const std::string& lexicon_name,
const std::string& path);
};
} /* end ns Wccl */
#endif // LIBWCCL_LEXICON_LEXICONPARSER_H
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment