Skip to content
Snippets Groups Projects
Commit 011ca5d1 authored by ilor's avatar ilor
Browse files

add raw operator string support in wcclrun

parent 5014ffab
Branches
No related merge requests found
...@@ -48,6 +48,9 @@ public: ...@@ -48,6 +48,9 @@ public:
} }
bool load_more_operators(const std::string &filename); bool load_more_operators(const std::string &filename);
bool load_operator_string(const std::string &op_string);
const std::vector< boost::shared_ptr<Wccl::FunctionalOperator> >& operators() const { const std::vector< boost::shared_ptr<Wccl::FunctionalOperator> >& operators() const {
return ops_; return ops_;
} }
...@@ -102,6 +105,27 @@ bool Runner::load_more_operators(const std::string& filename) ...@@ -102,6 +105,27 @@ bool Runner::load_more_operators(const std::string& filename)
return false; return false;
} }
bool Runner::load_operator_string(const std::string& op_string)
{
boost::shared_ptr<Wccl::FunctionalOperator> retOp;
try {
retOp = parser_.parseAnyOperator(op_string);
if (retOp) {
op_names_.push_back(op_string);
ops_.push_back(retOp);
return true;
} else {
std::cerr << "Problem while parsing -- "
<< "parser returned NULL!" << std::endl;
}
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << e.scope() << " error: " << e.info() << std::endl;
} catch (antlr::ANTLRException& e) {
std::cerr << "Antlr error " << e.getMessage() << std::endl;
}
return false;
}
void Runner::do_sentence(const boost::shared_ptr<Corpus2::Sentence>& sentence) void Runner::do_sentence(const boost::shared_ptr<Corpus2::Sentence>& sentence)
{ {
Wccl::SentenceContext sc(sentence); Wccl::SentenceContext sc(sentence);
...@@ -197,7 +221,7 @@ int main(int argc, char** argv) ...@@ -197,7 +221,7 @@ int main(int argc, char** argv)
{ {
std::string tagset_load = "kipi"; std::string tagset_load = "kipi";
bool first = false; bool first = false;
std::vector<std::string> corpora_files, ccl_files, files; std::vector<std::string> corpora_files, files, operator_strings;
bool corpus_stdin = false; bool corpus_stdin = false;
using boost::program_options::value; using boost::program_options::value;
...@@ -207,8 +231,8 @@ int main(int argc, char** argv) ...@@ -207,8 +231,8 @@ int main(int argc, char** argv)
"Tagset to use") "Tagset to use")
("corpus,c", value(&corpora_files), ("corpus,c", value(&corpora_files),
"Corpus file to load (XCES)") "Corpus file to load (XCES)")
("ccl-file,C", value(&ccl_files), ("ccl-operator,C", value(&operator_strings),
"CCL query file") "CCL operator file or string")
("files,f", value(&files), ("files,f", value(&files),
"Files to load, looking at the extension to determine type") "Files to load, looking at the extension to determine type")
("corpus-from-stdin,I", value(&corpus_stdin)->zero_tokens(), ("corpus-from-stdin,I", value(&corpus_stdin)->zero_tokens(),
...@@ -248,7 +272,8 @@ int main(int argc, char** argv) ...@@ -248,7 +272,8 @@ int main(int argc, char** argv)
if (vm.count("help")) { if (vm.count("help")) {
std::cerr << "Usage " << argv[0] << " [OPTIONS] FILES\n" std::cerr << "Usage " << argv[0] << " [OPTIONS] FILES\n"
<< "Files ending with .xml are treated as corpora, otherwise \n" << "Files ending with .xml are treated as corpora, otherwise \n"
<< "as CCL files. Use - to read corpus from stdin (as with -I)\n"; << "as CCL files. Use - to read corpus from stdin (as with -I)\n"
<< "Files not ending with an extension are treated as raw operator strings\n";
std::cout << desc << "\n"; std::cout << desc << "\n";
return 1; return 1;
} }
...@@ -259,23 +284,27 @@ int main(int argc, char** argv) ...@@ -259,23 +284,27 @@ int main(int argc, char** argv)
} else if (boost::algorithm::ends_with(f, ".xml")) { } else if (boost::algorithm::ends_with(f, ".xml")) {
corpora_files.push_back(f); corpora_files.push_back(f);
} else { } else {
ccl_files.push_back(f); operator_strings.push_back(f);
} }
} }
if (corpora_files.empty() || (ccl_files.empty() && !output_orths)) { if (corpora_files.empty() || (operator_strings.empty() && !output_orths)) {
std::cerr << "Nothing to do, try " << argv[0] << " -h\n"; std::cerr << "Nothing to do, try " << argv[0] << " -h\n";
return 2; return 2;
} }
try { try {
const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load); const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
Runner runner(tagset); Runner runner(tagset);
foreach (const std::string& f, ccl_files) { foreach (const std::string& f, operator_strings) {
size_t sz = runner.operators().size(); if (boost::algorithm::ends_with(f, ".ccl")) {
if (!runner.load_more_operators(f)) { size_t sz = runner.operators().size();
std::cerr << "Warning: error while parsing " << f << "\n"; if (!runner.load_more_operators(f)) {
} std::cerr << "Warning: error while parsing " << f << "\n";
if (runner.operators().size() == sz) { }
std::cerr << "Warning: no operators loaded from " << f << "\n"; if (runner.operators().size() == sz) {
std::cerr << "Warning: no operators loaded from " << f << "\n";
}
} else {
runner.load_operator_string(f);
} }
} }
if (!runner.operators().empty()) { if (!runner.operators().empty()) {
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment