Skip to content
Snippets Groups Projects
Commit 00376006 authored by ilor's avatar ilor
Browse files

wcclrun -- a tool to run ccl operators on a sentence and get all results

parent 01e8ddc1
No related branches found
No related tags found
No related merge requests found
......@@ -59,4 +59,5 @@ endif(MSVC OR BORLAND)
add_subdirectory(libwccl)
add_subdirectory(wcclparser)
add_subdirectory(wcclrun)
add_subdirectory(tests)
PROJECT( wcclrun )
find_package(Libedit)
if (Libedit_FOUND)
message(STATUS "Building with libedit")
add_definitions( -DHAVE_LIBEDIT )
set(LIBS ${LIBS} ${Libedit_LIBRARIES})
endif (Libedit_FOUND)
find_package(LibXML++ REQUIRED)
include_directories(${LibXML++_INCLUDE_DIRS})
link_directories(${LibXML++_LIBRARY_DIRS})
set(LIBS ${LIBS} ${LibXML++_LIBRARIES})
include_directories( ${CMAKE_SOURCE_DIR} )
add_definitions(-DLIBWCCL_WCCLRUN_DATA_DIR="${PROJECT_SOURCE_DIR}/")
add_executable(wcclrun
main.cpp
)
target_link_libraries (wcclrun wccl ${Boost_LIBRARIES} antlr ${LIBS})
include_directories(${Boost_INCLUDE_DIR})
link_directories(${Boost_LIBRARY_DIRS})
if(UNIX)
install(TARGETS wcclrun
RUNTIME DESTINATION bin
)
endif(UNIX)
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <libwccl/values/strset.h>
#include <libwccl/parser/Parser.h>
#include <libcorpus2/tagsetmanager.h>
#include <boost/bind.hpp>
#include <boost/algorithm/string.hpp>
#include <boost/make_shared.hpp>
#include <boost/program_options.hpp>
#include <libcorpus2/io/xcesreader.h>
#include <antlr/NoViableAltException.hpp>
#include <antlr/MismatchedTokenException.hpp>
void load_more_operators(const std::string& filename, Wccl::Parser& parser,
std::vector< boost::shared_ptr<Wccl::FunctionalOperator> >& ops)
{
boost::shared_ptr<const Wccl::Value> retVal;
boost::shared_ptr<Wccl::FunctionalOperator> retOp;
try {
std::ifstream is(filename.c_str());
if (!is.good()) {
throw Wccl::FileNotFound(filename, "", __FUNCTION__);
}
retOp = parser.parseAnyOperator(is);
if (retOp) {
ops.push_back(retOp);
} else {
std::cerr << "Problem while parsing -- "
<< "parser returned NULL!" << std::endl;
}
} catch (antlr::MismatchedTokenException &e) {
std::cerr << e.getFileLineColumnString()
<< " " << e.getMessage() << std::endl;
} catch(antlr::NoViableAltException &e) {
std::cerr << e.getFileLineColumnString()
<< " " << e.getMessage() << std::endl;
} catch (Wccl::InvalidVariableName &e) {
std::cerr << "Wccl::InvalidVariableName " << e.info() << std::endl;
} catch (Wccl::VariableTypeMismatch &e) {
std::cerr << "Wccl::VariableTypeMismatch " << e.info() << std::endl;
} catch (Wccl::WcclError& e) {
std::cerr << "Wccl::WcclError:" << e.info() << std::endl;
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << "PwrNlp::PwrNlpError " << e.info() << std::endl;
} catch (antlr::ANTLRException& e) {
std::cerr << "Antlr error " << e.getMessage() << std::endl;
}
}
class streamsave
{
public:
streamsave(std::ostream& os) : os_(os), flags_(os.flags()) {}
~streamsave() { os_.flags(flags_); }
private:
std::ostream& os_;
std::ios_base::fmtflags flags_;
};
void do_head(const Corpus2::Tagset& tagset,
const std::vector< boost::shared_ptr<Wccl::FunctionalOperator> >& ops)
{
streamsave sv(std::cout);
std::cout << "## ";
std::cout << std::setw(20) << "orth";
int i = 0;
foreach (const boost::shared_ptr<Wccl::FunctionalOperator>& o, ops) {
++i;
std::cout << " ";
std::cout.setf(std::ios::right);
std::cout << std::setw(15) << "operator ";
std::cout.setf(std::ios::left);
std::cout << std::setw(5) << i;
}
std::cout << "\n";
}
void do_sentence(const boost::shared_ptr<Corpus2::Sentence>& sentence,
const Corpus2::Tagset& tagset,
const std::vector< boost::shared_ptr<Wccl::FunctionalOperator> >& ops)
{
Wccl::SentenceContext sc(sentence);
std::vector< std::vector< UnicodeString > > outputs(sentence->size());
std::vector<int> lengths(ops.size(), 0);
streamsave sv(std::cout);
for (size_t i = 0; i < sentence->size(); ++i) {
sc.set_position(i);
UnicodeString orth = sentence->tokens()[i]->orth();
outputs[i].push_back(orth);
lengths[0] = std::max(lengths[0], orth.length());
int li = 1;
foreach (const boost::shared_ptr<Wccl::FunctionalOperator>& o, ops) {
boost::shared_ptr<const Wccl::Value> v = o->base_apply(sc);
UnicodeString vstr = v->to_string_u(tagset);
lengths[li] = std::max(lengths[li], vstr.length());
++li;
outputs[i].push_back(vstr);
}
}
for (size_t i = 0; i < sentence->size(); ++i) {
std::cout << std::setw(2) << (i + 1) << " ";
for (size_t oi = 0; oi < outputs[i].size(); ++oi) {
UnicodeString u = outputs[i][oi];
u.padTrailing(lengths[oi]);
std::cout << PwrNlp::to_utf8(u) << " ";
}
std::cout << "\n";
}
}
void do_file(const std::string& filename, const Corpus2::Tagset& tagset,
const std::vector< boost::shared_ptr<Wccl::FunctionalOperator> >& ops,
bool first)
{
Corpus2::XcesReader xr(tagset, filename);
Corpus2::Sentence::Ptr s;
//do_head(tagset, ops);
while ((s = xr.get_next_sentence())) {
do_sentence(s, tagset, ops);
std::cout << "\n";
if (first) break;
}
}
int main(int argc, char** argv)
{
std::string tagset_load = "kipi";
bool first = false;
std::vector<std::string> corpora_files, ccl_files, files;
bool quiet = false;
bool dump_variables = false;
using boost::program_options::value;
boost::program_options::options_description desc("Allowed options");
desc.add_options()
("tagset,t", value(&tagset_load),
"Tagset to use\n")
("corpus,c", value(&corpora_files),
"Corpus file to load (XCES)\n")
("ccl-file,C", value(&ccl_files),
"CCL query file\n")
("files,f", value(&files),
"Files to load, look at extecion to determine type\n")
("quiet,q", value(&quiet)->zero_tokens(),
"Suppress messages\n")
("first-sentence-only,1", value(&first)->zero_tokens(),
"Only process first sentence\n")
("help,h", "Show help")
;
boost::program_options::variables_map vm;
boost::program_options::positional_options_description p;
p.add("files", -1);
try {
boost::program_options::store(
boost::program_options::command_line_parser(argc, argv)
.options(desc).positional(p).run(), vm);
} catch (boost::program_options::error& e) {
std::cerr << e.what() << std::endl;
return 2;
}
boost::program_options::notify(vm);
if (vm.count("help")) {
std::cout << desc << "\n";
return 1;
}
foreach (const std::string& f, files) {
if (boost::algorithm::ends_with(f, ".xml")) {
corpora_files.push_back(f);
} else {
ccl_files.push_back(f);
}
}
try {
const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
std::vector< boost::shared_ptr<Wccl::FunctionalOperator> > operators;
Wccl::Parser parser(tagset);
foreach (const std::string& f, ccl_files) {
load_more_operators(f, parser, operators);
}
if (!operators.empty()) {
foreach (const std::string& f, corpora_files) {
do_file(f, tagset, operators, first);
}
}
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << e.info() << std::endl;
return 2;
}
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment