From f1d0815dd017b666f7b12ff3bc4442e1fb981e04 Mon Sep 17 00:00:00 2001
From: Adam Wardynski <award@.(win7-laptop)>
Date: Fri, 29 Apr 2011 21:13:21 +0200
Subject: [PATCH] Parser for lexicon files.

---
 libwccl/CMakeLists.txt            |  22 +++++-
 libwccl/lexicon/lexfilegrammar.g  | 116 ++++++++++++++++++++++++++++++
 libwccl/lexicon/lexiconparser.cpp |  26 +++++++
 libwccl/lexicon/lexiconparser.h   |  19 +++++
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 libwccl/lexicon/lexfilegrammar.g
 create mode 100644 libwccl/lexicon/lexiconparser.cpp
 create mode 100644 libwccl/lexicon/lexiconparser.h

diff --git a/libwccl/CMakeLists.txt b/libwccl/CMakeLists.txt
index 165786b..2e4b2bc 100644
--- a/libwccl/CMakeLists.txt
+++ b/libwccl/CMakeLists.txt
@@ -27,7 +27,9 @@ endif(WIN32)
 
 SET(libwccl_STAT_SRC
 	exception.cpp
+	lexicon/lexfilegrammar.g
 	lexicon/lexicon.cpp
+	lexicon/lexiconparser.cpp
 	lexicon/lexicons.cpp
 	ops/formatters.cpp
 	ops/functions/bool/iteration.cpp
@@ -105,6 +107,8 @@ SET(libwccl_STAT_SRC
 SET(libwccl_STAT_SRC ${libwccl_STAT_SRC}
 	${PROJECT_BINARY_DIR}/parser/ANTLRLexer.cpp
 	${PROJECT_BINARY_DIR}/parser/ANTLRParser.cpp
+	${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.cpp
+	${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.cpp
 )
 
 include_directories(${PROJECT_BINARY_DIR}/parser)
@@ -119,8 +123,24 @@ ADD_CUSTOM_COMMAND(
 	COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/parser
 	COMMAND runantlr -o parser ${PROJECT_SOURCE_DIR}/parser/grammar.g
 	DEPENDS ${PROJECT_SOURCE_DIR}/parser/grammar.g
-	COMMENT "-- Generating ANTLR parser cpp/hpp/txt files")
+	COMMENT "-- Generating ANTLR parser cpp/hpp/txt files for WCCL ---")
 
+include_directories(${PROJECT_BINARY_DIR}/lexicon)
+
+ADD_CUSTOM_COMMAND(
+	OUTPUT
+		${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.cpp
+		${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconLexer.hpp
+		${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.cpp
+		${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconParser.hpp
+		${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconsTokenTypes.hpp
+		${PROJECT_BINARY_DIR}/lexicon/ANTLRLexiconsTokenTypes.txt
+	COMMAND ${CMAKE_COMMAND} -E remove_directory ${PROJECT_BINARY_DIR}/lexicon
+	COMMAND runantlr -o lexicon ${PROJECT_SOURCE_DIR}/lexicon/lexfilegrammar.g
+	DEPENDS ${PROJECT_SOURCE_DIR}/lexicon/lexfilegrammar.g
+	COMMENT "-- Generating ANTLR parser cpp/hpp/txt files for lexicon files ---")
+	
+	
 file(GLOB_RECURSE INCS "*.h")
 
 if(WIN32)
diff --git a/libwccl/lexicon/lexfilegrammar.g b/libwccl/lexicon/lexfilegrammar.g
new file mode 100644
index 0000000..a0cb159
--- /dev/null
+++ b/libwccl/lexicon/lexfilegrammar.g
@@ -0,0 +1,116 @@
+header {
+	//don't try to add all the headers inside our namespace
+	ANTLR_END_NAMESPACE
+
+	#include <antlr/Token.hpp>
+	
+	#include <boost/shared_ptr.hpp>
+	#include <boost/make_shared.hpp>
+	
+	#include <libwccl/lexicon/lexicon.h>
+	
+	// Unicode String
+	#include <unicode/uniset.h>
+	#include <unicode/unistr.h>
+
+	// start our namespace again
+	ANTLR_BEGIN_NAMESPACE(Wccl)
+}
+
+options {
+	language = "Cpp";
+	genHashLines = false;
+	namespace = "Wccl";
+//	genHashLines = true;
+}
+
+// ----------------------------------------------------------------------------
+// ANTLR PARSER FOR LEXICON FILES
+// ----------------------------------------------------------------------------
+class ANTLRLexiconParser extends Parser;
+options {
+	k = 1;
+	buildAST = false;
+	exportVocab = ANTLRLexicons;
+	defaultErrorHandler = false;
+}
+
+{
+private:
+	// 
+	const UnicodeString to_ustring(antlr::RefToken& rstr) const { 
+		return UnicodeString::fromUTF8(((antlr::Token*)rstr)->getText().c_str()).unescape();
+	}
+	//
+	const std::string to_std_string(antlr::RefToken& rstr) const { 
+		return (((antlr::Token*)rstr)->getText());
+	}
+}
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+// PARSER RULES
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+
+parse_lexicon_file
+	[const std::string& name]
+	returns [boost::shared_ptr<Lexicon> lex]
+{
+	lex = boost::make_shared<Lexicon>(name);
+}
+	: (lexicon_entry [*lex])*
+	  EOF
+;
+
+lexicon_entry
+	[Lexicon& lex]
+{
+	UnicodeString key, value;
+}
+	: key = ustring (TAB value = ustring)? {
+		if (value.isEmpty()) {
+			lex.insert(key);
+		} else {
+			lex.insert(key, value);
+		}
+	}
+;
+
+ustring
+	returns [UnicodeString us]
+	: s : STRING { us = to_ustring(s); }
+;
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+// ANTLR LEXER FOR LEXICON FILES
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+class ANTLRLexiconLexer extends Lexer;
+options {
+	exportVocab    = ANTLRLexicons;
+	charVocabulary = '\3'..'\377';
+	testLiterals   = false;
+	k              = 2;
+}
+
+STRING
+options {
+	paraphrase = "a string";
+}
+	: (~( '\t' | '\n' | '\r'))+
+;
+
+TAB
+options {
+	paraphrase = "a tabulator";
+}
+	: '\t'
+;
+
+NEWLINE
+	: ( "\r\n"
+		| '\r'
+		| '\n'
+		) { newline(); $setType(antlr::Token::SKIP); }
+;
\ No newline at end of file
diff --git a/libwccl/lexicon/lexiconparser.cpp b/libwccl/lexicon/lexiconparser.cpp
new file mode 100644
index 0000000..77f7561
--- /dev/null
+++ b/libwccl/lexicon/lexiconparser.cpp
@@ -0,0 +1,26 @@
+#include <libwccl/lexicon/lexiconparser.h>
+#include "ANTLRLexiconParser.hpp"
+#include "ANTLRLexiconLexer.hpp"
+
+#include <libwccl/lexicon/lexicon.h>
+#include <libwccl/exception.h>
+
+#include <fstream>
+
+namespace Wccl {
+
+boost::shared_ptr<Lexicon> LexiconParser::parse_lexicon(
+	const std::string& lexicon_name,
+	const std::string& path)
+{
+	std::ifstream is(path.c_str());
+	if (!is.good()) {
+		throw Wccl::FileNotFound(path, "", __FUNCTION__);
+	}
+
+	ANTLRLexiconLexer lexer(is);
+	ANTLRLexiconParser parser(lexer);
+	return parser.parse_lexicon_file(lexicon_name);
+}
+
+} /* end ns Wccl */
diff --git a/libwccl/lexicon/lexiconparser.h b/libwccl/lexicon/lexiconparser.h
new file mode 100644
index 0000000..521d19a
--- /dev/null
+++ b/libwccl/lexicon/lexiconparser.h
@@ -0,0 +1,19 @@
+#ifndef LIBWCCL_LEXICON_LEXICONPARSER_H
+#define LIBWCCL_LEXICON_LEXICONPARSER_H
+
+#include <boost/shared_ptr.hpp>
+
+namespace Wccl {
+
+class Lexicon;
+
+class LexiconParser
+{
+public:
+	static boost::shared_ptr<Lexicon> parse_lexicon(
+		const std::string& lexicon_name,
+		const std::string& path);
+};
+} /* end ns Wccl */
+
+#endif // LIBWCCL_LEXICON_LEXICONPARSER_H
-- 
GitLab