diff --git a/wcclparser/main.cpp b/wcclparser/main.cpp index f1ddd3e7a31e5c73427c19cff5ef67f935387e36..aed4a47d274549a704a008b0723fb4f8a722b674 100644 --- a/wcclparser/main.cpp +++ b/wcclparser/main.cpp @@ -1,4 +1,5 @@ #include <cstdlib> +#include <fstream> #include <libwccl/values/strset.h> #include <libwccl/parser/Parser.h> @@ -6,6 +7,7 @@ #include <boost/bind.hpp> #include <boost/program_options.hpp> +#include <libcorpus2/io/xcesreader.h> #include <antlr/MismatchedTokenException.hpp> // ---------------------------------------------------------------------------- @@ -86,7 +88,7 @@ void libedit_read_loop(boost::function<bool (const std::string&)>& line_cb) } #endif -bool process_line(const std::string& line, Parser& parser) +bool process_line(const std::string& line, Parser& parser, Wccl::SentenceContext& sc) { if (line.empty() || line == "exit" || line == "quit") { return true; @@ -97,10 +99,6 @@ bool process_line(const std::string& line, Parser& parser) boost::shared_ptr<const Wccl::Value> retVal; boost::shared_ptr<ANTLRParserResultBase> retOp; - boost::shared_ptr<Corpus2::Sentence> sentence; - sentence.reset(new Corpus2::Sentence); - Wccl::SentenceContext sc(sentence); - try { retOp = parser.parseAnyOperator(line); if (retOp) { @@ -138,6 +136,8 @@ int main(int argc, char** argv) { std::string tagset_load = "kipi"; std::string query = ""; + std::string sentence_load = ""; + std::string position = "0"; bool quiet = false; using boost::program_options::value; @@ -145,6 +145,10 @@ int main(int argc, char** argv) desc.add_options() ("tagset,t", value(&tagset_load), "Tagset to use\n") + ("sentence,s", value(&sentence_load), + "Sentence to load (XCES)\n") + ("position,p", value(&position), + "Position in the sentence to use, 'all' iterates through the sentence\n") ("query,Q", value(&query), "Query to run (disables interactive mode)\n") ("quiet,q", value(&quiet)->zero_tokens(), @@ -171,31 +175,40 @@ int main(int argc, char** argv) } try { - Corpus2::get_named_tagset(tagset_load); - } catch (Corpus2::FileNotFound& e) { - std::cerr << e.info() << std::endl; - return 2; - } - const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load); - - Parser parser(tagset); - - if (!query.empty()) { - process_line(query, parser); - return 0; - } - - if (clear_screen()) { - // - } - - boost::function<bool (const std::string&)> f; - f = boost::bind(&process_line, _1, boost::ref(parser)); + const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load); + boost::shared_ptr<Corpus2::Sentence> sentence; + if (sentence_load.empty()) { + sentence.reset(new Corpus2::Sentence); + } else { + std::ifstream ifs(sentence_load.c_str()); + if (ifs.good()) { + Corpus2::XcesReader reader(tagset, ifs, false); + sentence.reset(reader.get_next_sentence()); + std::cerr << "Sentence loaded, " << sentence->size() + << " tokens.\n"; + } else { + throw Wccl::FileNotFound(sentence_load, "", "Sentence"); + } + } + Wccl::SentenceContext sc(sentence); + int pos = atoi(position.c_str()); + sc.set_position(pos); + Parser parser(tagset); + if (!query.empty()) { + process_line(query, parser, sc); + return 0; + } + boost::function<bool (const std::string&)> f; + f = boost::bind(&process_line, _1, boost::ref(parser), boost::ref(sc)); #ifdef HAVE_LIBEDIT - libedit_read_loop(f); + libedit_read_loop(f); #else - std_read_loop(f); + std_read_loop(f); #endif + } catch (PwrNlp::PwrNlpError& e) { + std::cerr << e.info() << std::endl; + return 2; + } return 0; }