diff --git a/wccl-apps/wccl-match.cpp b/wccl-apps/wccl-match.cpp index a2da6ede3e90b62ea67688751823b81c5ecc913c..2f5d7521eb1579bf4dbfa8d10f70182fb518894d 100644 --- a/wccl-apps/wccl-match.cpp +++ b/wccl-apps/wccl-match.cpp @@ -102,6 +102,14 @@ void MatchRunner::apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, } } +void usage(char* name) +{ + std::cerr << "This program runs WCCL match rules.\n"; + std::cerr << "Usage " << name << " [OPTIONS] FILES\n" + << "Files ending with .xml are treated as corpora, otherwise \n" + << "as CCL files. Use - to read corpus from stdin (as with -I)\n"; +} + int main(int argc, char** argv) { std::string tagset_load = "kipi"; @@ -130,6 +138,8 @@ int main(int argc, char** argv) "CCL rule files\n") ("files,f", value(&files), "Files to load, looking at the extension to determine type\n") + ("corpus-from-stdin,I", value(&corpus_stdin)->zero_tokens(), + "Read corpus from stdin") ("input-format,i", value(&input_format)->default_value("xces"), readers_help.c_str()) ("output-format,o", value(&output_format)->default_value("ccl"), @@ -161,21 +171,29 @@ int main(int argc, char** argv) boost::program_options::notify(vm); if (vm.count("help")) { - std::cerr << "Usage " << argv[0] << " [OPTIONS] FILES\n" - << "Files ending with .xml are treated as corpora, otherwise \n" - << "as CCL files. Use - to read corpus from stdin (as with -I)"; + usage(argv[0]); std::cout << desc << "\n"; return 1; } foreach (const std::string& f, files) { - if (boost::algorithm::ends_with(f, ".xml")) { + if (f == "-") { + corpus_stdin = true; + } else if (boost::algorithm::ends_with(f, ".xml")) { corpora_files.push_back(f); } else { ccl_files.push_back(f); } } + // consider stdin only when no corpus files given + corpus_stdin = corpus_stdin && corpora_files.empty(); + + if (ccl_files.empty() || (corpora_files.empty() && !corpus_stdin)) { + usage(argv[0]); + return 2; + } + try { const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load); MatchRunner runner(tagset); @@ -188,8 +206,14 @@ int main(int argc, char** argv) boost::shared_ptr<Corpus2::TokenWriter> writer; writer.reset(Corpus2::TokenWriter::create(output_format, std::cout, tagset)); boost::shared_ptr<Corpus2::TokenReader> reader; - reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, corpora_files[0]); - runner.apply_rules(reader, writer); + foreach (std::string cf, corpora_files) { + reader = Corpus2::TokenReader::create_path_reader(input_format, tagset, cf); + runner.apply_rules(reader, writer); + } + if (corpus_stdin) { + reader = Corpus2::TokenReader::create_stream_reader(input_format, tagset, std::cin); + runner.apply_rules(reader, writer); + } } } catch (PwrNlp::PwrNlpError& e) { std::cerr << e.info() << std::endl; diff --git a/wccl-apps/wccl-rules.cpp b/wccl-apps/wccl-rules.cpp index 2126e5dbaa3aacf486ca5de4405b06fa62bd7261..e25257cabddb07a3ce520a683a05882e78108f16 100644 --- a/wccl-apps/wccl-rules.cpp +++ b/wccl-apps/wccl-rules.cpp @@ -101,6 +101,14 @@ void apply_rules(boost::shared_ptr<Corpus2::TokenReader> reader, } } +void usage(char* name) +{ + std::cerr << "This program runs WCCL disambiguation rules.\n"; + std::cerr << "Usage " << name << " [OPTIONS] FILES\n" + << "Files ending with .xml are treated as corpora, otherwise \n" + << "as CCL files. Use - to read corpus from stdin (as with -I)\n"; +} + int main(int argc, char** argv) { @@ -130,6 +138,8 @@ int main(int argc, char** argv) "CCL rule files\n") ("files,f", value(&files), "Files to load, looking at the extension to determine type\n") + ("corpus-from-stdin,I", value(&corpus_stdin)->zero_tokens(), + "Read corpus from stdin") ("input-format,i", value(&input_format)->default_value("xces"), readers_help.c_str()) ("output-format,o", value(&output_format)->default_value("xces"), @@ -161,23 +171,28 @@ int main(int argc, char** argv) boost::program_options::notify(vm); if (vm.count("help")) { - std::cerr << "Usage " << argv[0] << " [OPTIONS] FILES\n" - << "Files ending with .xml are treated as corpora, otherwise \n" - << "as CCL files. Use - to read corpus from stdin (as with -I)"; + usage(argv[0]); std::cout << desc << "\n"; return 1; } foreach (const std::string& f, files) { - if (boost::algorithm::ends_with(f, ".xml")) { + if (f == "-") { + corpus_stdin = true; + } else if (boost::algorithm::ends_with(f, ".xml")) { corpora_files.push_back(f); } else { ccl_files.push_back(f); } } - // consider stdin only when no corpus files given - corpus_stdin = corpus_stdin && corpora_files.empty(); + // consider stdin only when no corpus files given + corpus_stdin = corpus_stdin && corpora_files.empty(); + + if (ccl_files.empty() || (corpora_files.empty() && !corpus_stdin)) { + usage(argv[0]); + return 2; + } try { const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);