Skip to content
Snippets Groups Projects
Commit 0d1e2404 authored by ilor's avatar ilor
Browse files

usage tweaks in wccl-rules and wccl-match

parent 72b7a918
Branches
No related merge requests found
......@@ -102,6 +102,14 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
}
}
void usage(char* name)
{
std::cerr << "This program runs WCCL match rules.\n";
std::cerr << "Usage " << name << " [OPTIONS] FILES\n"
<< "Files ending with .xml are treated as corpora, otherwise \n"
<< "as CCL files. Use - to read corpus from stdin (as with -I)\n";
}
int main(int argc, char** argv)
{
std::string tagset_load = "kipi";
......@@ -130,6 +138,8 @@ int main(int argc, char** argv)
"CCL rule files\n")
("files,f", value(&files),
"Files to load, looking at the extension to determine type\n")
("corpus-from-stdin,I", value(&corpus_stdin)->zero_tokens(),
"Read corpus from stdin")
("input-format,i", value(&input_format)->default_value("xces"),
readers_help.c_str())
("output-format,o", value(&output_format)->default_value("ccl"),
......@@ -161,21 +171,29 @@ int main(int argc, char** argv)
boost::program_options::notify(vm);
if (vm.count("help")) {
std::cerr << "Usage " << argv[0] << " [OPTIONS] FILES\n"
<< "Files ending with .xml are treated as corpora, otherwise \n"
<< "as CCL files. Use - to read corpus from stdin (as with -I)";
usage(argv[0]);
std::cout << desc << "\n";
return 1;
}
foreach (const std::string& f, files) {
if (boost::algorithm::ends_with(f, ".xml")) {
if (f == "-") {
corpus_stdin = true;
} else if (boost::algorithm::ends_with(f, ".xml")) {
corpora_files.push_back(f);
} else {
ccl_files.push_back(f);
}
}
// consider stdin only when no corpus files given
corpus_stdin = corpus_stdin && corpora_files.empty();
if (ccl_files.empty() || (corpora_files.empty() && !corpus_stdin)) {
usage(argv[0]);
return 2;
}
try {
const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
MatchRunner runner(tagset);
......@@ -188,8 +206,14 @@ int main(int argc, char** argv)
boost::shared_ptr<Corpus2::TokenWriter> writer;
writer.reset(Corpus2::TokenWriter::create(output_format, std::cout, tagset));
boost::shared_ptr<Corpus2::TokenReader> reader;
reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, corpora_files[0]);
runner.apply_rules(reader, writer);
foreach (std::string cf, corpora_files) {
reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, cf);
runner.apply_rules(reader, writer);
}
if (corpus_stdin) {
reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin);
runner.apply_rules(reader, writer);
}
}
} catch (PwrNlp::PwrNlpError& e) {
std::cerr << e.info() << std::endl;
......
......@@ -101,6 +101,14 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader,
}
}
void usage(char* name)
{
std::cerr << "This program runs WCCL disambiguation rules.\n";
std::cerr << "Usage " << name << " [OPTIONS] FILES\n"
<< "Files ending with .xml are treated as corpora, otherwise \n"
<< "as CCL files. Use - to read corpus from stdin (as with -I)\n";
}
int main(int argc, char** argv)
{
......@@ -130,6 +138,8 @@ int main(int argc, char** argv)
"CCL rule files\n")
("files,f", value(&files),
"Files to load, looking at the extension to determine type\n")
("corpus-from-stdin,I", value(&corpus_stdin)->zero_tokens(),
"Read corpus from stdin")
("input-format,i", value(&input_format)->default_value("xces"),
readers_help.c_str())
("output-format,o", value(&output_format)->default_value("xces"),
......@@ -161,23 +171,28 @@ int main(int argc, char** argv)
boost::program_options::notify(vm);
if (vm.count("help")) {
std::cerr << "Usage " << argv[0] << " [OPTIONS] FILES\n"
<< "Files ending with .xml are treated as corpora, otherwise \n"
<< "as CCL files. Use - to read corpus from stdin (as with -I)";
usage(argv[0]);
std::cout << desc << "\n";
return 1;
}
foreach (const std::string& f, files) {
if (boost::algorithm::ends_with(f, ".xml")) {
if (f == "-") {
corpus_stdin = true;
} else if (boost::algorithm::ends_with(f, ".xml")) {
corpora_files.push_back(f);
} else {
ccl_files.push_back(f);
}
}
// consider stdin only when no corpus files given
corpus_stdin = corpus_stdin && corpora_files.empty();
// consider stdin only when no corpus files given
corpus_stdin = corpus_stdin && corpora_files.empty();
if (ccl_files.empty() || (corpora_files.empty() && !corpus_stdin)) {
usage(argv[0]);
return 2;
}
try {
const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment