-
Adam Radziszewski authored5b4080b2
wccl-parser.cpp 8.39 KiB
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <libwccl/values/strset.h>
#include <libwccl/parser/Parser.h>
#include <libcorpus2/tagsetmanager.h>
// not checking for HAVE_VERSION..., no reason for its absence
#include <libwccl/version.h>
#include <boost/bind.hpp>
#include <boost/make_shared.hpp>
#include <boost/program_options.hpp>
#include <libcorpus2/io/xcesreader.h>
#include <antlr/NoViableAltException.hpp>
#include <antlr/MismatchedTokenException.hpp>
// ----------------------------------------------------------------------------
#ifdef HAVE_LIBEDIT
#include <histedit.h>
#endif
#ifdef _WINDOWS
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
#endif
/**
* @desc A simple command line tester for testing operators
*/
namespace {
const char* _prompt = "Enter any operator expression: ";
}
void std_read_loop(boost::function<bool (const std::string&)>& line_cb)
{
while (std::cin.good()) {
std::cout << _prompt << std::flush;
std::string s;
getline(std::cin, s);
if (line_cb(s)) {
return;
}
}
}
#if defined(HAVE_LIBEDIT) && !defined(_WINDOWS) //non-Windows libedit read loop
const char* query_prompt(EditLine*) {
return _prompt;
}
void read_loop(boost::function<bool (const std::string&)>& line_cb)
{
EditLine *el = el_init("wccl-parser", stdin, stdout, stderr);
el_set(el, EL_PROMPT, &query_prompt);
el_set(el, EL_EDITOR, "emacs");
History* myhistory = history_init();
if (myhistory == NULL) {
std::cerr << "EditLine history init error\n";
el_end(el);
std_read_loop(line_cb);
return;
}
HistEvent ev;
history(myhistory, &ev, H_SETSIZE, 1024);
el_set(el, EL_HIST, history, myhistory);
bool more = true;
while (more) {
int count;
const char *line = el_gets(el, &count); // line gets a trailing \n
if (line == NULL || line[0] == 0) {
more = false;
} else {
std::string s(line, strlen(line) - 1);
if (line_cb(s)) {
more = false;
} else {
history(myhistory, &ev, H_ENTER, line);
}
}
}
history_end(myhistory);
el_end(el);
}
#elif defined(_WINDOWS) // windows console read loop, doing UNICODE
struct ConsoleCPSetter
{
UINT original_CP;
ConsoleCPSetter(UINT cp) {
original_CP = GetConsoleCP();
SetConsoleCP(cp);
}
~ConsoleCPSetter() {
SetConsoleCP(original_CP);
}
};
void read_loop(boost::function<bool (const std::string&)>& line_cb)
{
HANDLE stdinhandle = GetStdHandle(STD_INPUT_HANDLE);
WCHAR buffer[1024];
const int max = sizeof(buffer)/sizeof(WCHAR) - 1;
ConsoleCPSetter cp(CP_UTF8);
bool more = true;
while(more) {
std::cout << _prompt << std::flush;
std::wstring s;
while(more) {
DWORD read;
if (ReadConsoleW(stdinhandle, buffer, max, &read, NULL)) {
buffer[read] = NULL;
while(buffer[read - 1] == L'\n' || buffer[read - 1] == L'\r') {
buffer[--read] = 0;
}
more = (read == max);
s += buffer;
} else {
if (GetLastError() == ERROR_INVALID_HANDLE) {
// assume stdin redirect, fallback to standard getline
getline(std::wcin, s);
}
more = false;
}
}
UnicodeString us(s.c_str());
more = !line_cb(PwrNlp::to_utf8(us));
}
}
#else // standard io read loop
inline
void read_loop(boost::function<bool (const std::string&)>& line_cb)
{
std_read_loop(line_cb);
}
#endif
int clear_screen()
{
#ifdef _WINDOWS
return std::system("cls");
#else
return std::system("clear");
#endif
}
bool process_line(const std::string& line, Wccl::Parser& parser,
Wccl::SentenceContext& sc, bool all_positions, bool dump_variables)
{
if (line.empty() || line == "exit" || line == "quit") {
return true;
} else if (line == "clear" || line == "cls") {
if (clear_screen()) {}
return false;
}
boost::shared_ptr<const Wccl::Value> retVal;
boost::shared_ptr<Wccl::FunctionalOperator> retOp;
try {
retOp = parser.parseAnyOperator(line);
if (retOp) {
int pb, pe;
if (all_positions) {
pb = 0;
pe = sc.get_sentence().empty() ? 1 : sc.size();
} else {
pb = sc.get_position();
pe = sc.get_position() + 1;
}
for (int i = pb; i < pe; ++i) {
sc.set_position(i);
retVal = retOp->base_apply(sc);
if (retVal) {
std::cerr << "[" << std::setw(2) << sc.get_position() << "] "
<< "Parsed expression: "
<< retVal->to_string(parser.tagset())
<< std::endl;
if (dump_variables) {
retOp->dump_variables(std::cerr, parser.tagset());
}
} else {
std::cerr << "Problem while parsing -- "
<< "retVal is NULL!" << std::endl;
}
}
} else {
std::cerr << "Problem while parsing -- "
<< "parser returned NULL!" << std::endl;
}
} catch (antlr::MismatchedTokenException &e) {
std::cerr << e.getFileLineColumnString()
<< " " << e.getMessage() << std::endl;
} catch(antlr::NoViableAltException &e) {
std::cerr << e.getFileLineColumnString()
<< " " << e.getMessage() << std::endl;
} catch (Wccl::InvalidVariableName &e) {
std::cerr << "Wccl::InvalidVariableName " << e.info() << std::endl;
} catch (Wccl::VariableTypeMismatch &e) {
std::cerr << "Wccl::VariableTypeMismatch " << e.info() << std::endl;
} catch (Wccl::WcclError& e) {
std::cerr << "Wccl::WcclError:" << e.info() << std::endl;
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << "PwrNlp::PwrNlpError " << e.info() << std::endl;
} catch (antlr::ANTLRException& e) {
std::cerr << "Antlr error " << e.getMessage() << std::endl;
}
return false;
}
int main(int argc, char** argv)
{
std::string tagset_load = "kipi";
std::string query = "";
std::string sentence_load = "";
std::string query_load = "";
std::string position = "0";
bool quiet = false;
bool dump_variables = false;
using boost::program_options::value;
boost::program_options::options_description desc("Allowed options");
desc.add_options()
("tagset,t", value(&tagset_load),
"Tagset to use\n")
("sentence,s", value(&sentence_load),
"Sentence to load (XCES)\n")
("position,p", value(&position),
"Position in the sentence to use, 'all' iterates through the sentence\n")
("query-file,f", value(&query_load),
"Run query from file (disables interohaiactive mode) (- is stdin)\n")
("query,Q", value(&query),
"Query to run (disables interactive mode)\n")
("quiet,q", value(&quiet)->zero_tokens(),
"Suppress messages\n")
("variables,V", value(&dump_variables)->zero_tokens(),
"Dump variables after running each query\n")
("help,h", "Show help")
("version", "print version string")
;
boost::program_options::variables_map vm;
boost::program_options::positional_options_description p;
p.add("query", -1);
try {
boost::program_options::store(
boost::program_options::command_line_parser(argc, argv)
.options(desc).positional(p).run(), vm);
} catch (boost::program_options::error& e) {
std::cerr << e.what() << std::endl;
return 2;
}
boost::program_options::notify(vm);
if (vm.count("help")) {
std::cout << desc << "\n";
return 1;
}
if (vm.count("version")) {
std::cout << "wccl-parser (libwccl) " << LIBWCCL_VERSION << "\n";
return 0;
}
try {
const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
boost::shared_ptr<Corpus2::Sentence> sentence;
if (sentence_load.empty()) {
sentence = boost::make_shared<Corpus2::Sentence>();
sentence->append(new Corpus2::Token("", PwrNlp::Whitespace::ManySpaces));
} else {
std::ifstream ifs(sentence_load.c_str());
if (ifs.good()) {
Corpus2::XcesReader reader(tagset, ifs, false);
sentence = reader.get_next_sentence();
std::cerr << "Sentence loaded, " << sentence->size()
<< " tokens.\n";
} else {
throw Wccl::FileNotFound(sentence_load, "", "Sentence");
}
}
Wccl::SentenceContext sc(sentence);
int pos = atoi(position.c_str());
sc.set_position(pos);
Wccl::Parser parser(tagset);
if (!query.empty()) {
process_line(query, parser, sc, position == "all", dump_variables);
return 0;
} else if (!query_load.empty()) {
std::stringstream ss;
if (query_load == "-") {
ss << std::cin.rdbuf();
} else {
std::ifstream qf(query_load.c_str());
if (qf.good()) {
ss << qf.rdbuf();
} else {
throw Wccl::FileNotFound(sentence_load, "", "Query file");
}
}
process_line(ss.str(), parser, sc, position == "all", dump_variables);
return 0;
}
boost::function<bool (const std::string&)> f;
f = boost::bind(&process_line, _1, boost::ref(parser), boost::ref(sc),
position == "all", dump_variables);
read_loop(f);
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << e.info() << std::endl;
return 2;
}
return 0;
}