Skip to content
Snippets Groups Projects
Select Git revision
  • 322d2ca9c02e9e3b4622951bc92bd23fdbf18d0a
  • master default protected
  • develop protected
  • feat_remove_attr
  • python2.7
  • python3.8
6 results

corpus2get.cpp

Blame
  • user avatar
    Adam Radziszewski authored
    move corpus2get.cpp out of the main build system as it generates warnings and is not flexible enough to replace Python corpus-get
    d94bbc58
    History
    corpus2get.cpp 3.15 KiB
    /*
        Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
        Part of the libcorpus2 project
    
        This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your option)
    any later version.
    
        This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE. 
    
        See the LICENSE and COPYING files for more details.
    */
    
    #include <libcorpus2/util/settings.h>
    #include <libcorpus2/tagsetparser.h>
    #include <libcorpus2/tagsetmanager.h>
    #include <libcorpus2/token.h>
    #include <libcorpus2/sentence.h>
    
    #include <libcorpus2/io/reader.h>
    #include <libcorpus2/io/writer.h>
    
    #include <boost/foreach.hpp>
    
    #include <boost/program_options.hpp>
    #include <boost/bind.hpp>
    #include <boost/algorithm/string.hpp>
    
    #include <algorithm>
    #include <fstream>
    #include <iterator>
    
    #ifdef HAVE_LIBEDIT
    #include <histedit.h>
    #endif
    
    #ifdef HAVE_CONFIG_D_H
    #include <libcorpus2/config_d.h>
    #endif
    
    // not checking for HAVE_VERSION, there is no reason it shouldn't be there
    #include <libcorpus2/version.h>
    
    int main(int argc, char** argv)
    {
    	std::string tagset_load, file;
    	bool quiet = false, internals = false;
    	bool parse = false, validate = false, sort = false;
    	using boost::program_options::value;
    
    	boost::program_options::options_description desc("Allowed options");
    	desc.add_options()
    			("tagset,t", value(&tagset_load)->default_value("kipi"),
    			 "tagset")
    			("file", value(&file),
    			 "file to load")
    			("help,h", "Show help")
    			("version", "print version string")
    			;
    	boost::program_options::variables_map vm;
    	boost::program_options::positional_options_description p;
    	p.add("file", -1);
    
    	try {
    		boost::program_options::store(
    			boost::program_options::command_line_parser(argc, argv)
    			.options(desc).positional(p).run(), vm);
    	} catch (boost::program_options::error& e) {
    		std::cerr << e.what() << "\n";
    		return 2;
    	}
    	boost::program_options::notify(vm);
    
    	if (vm.count("help")) {
    		std::cout << desc << "\n";
    		std::cout << "Available tagsets: ";
    		std::cout << Corpus2::available_tagsets() << "\n";
    		std::cout << "Available readers: ";
    		std::cout << boost::algorithm::join(Corpus2::TokenReader::available_reader_types(), " ") << "\n";
    		std::cout << "Available writers: ";
    		std::cout << boost::algorithm::join(Corpus2::TokenWriter::available_writer_types(), " ") << "\n";
    		return 1;
    	}
    	if (vm.count("version")) {
    		std::cout << "corpus-read (libcorpus2) " << LIBCORPUS2_VERSION << "\n";
    		return 0;
    	}
    
    	try {
    		const Corpus2::Tagset& tagset = Corpus2::get_named_tagset(tagset_load);
    		const Corpus2::TokenReader::TokenReaderPtr reader = Corpus2::TokenReader::create_path_reader(
    			"xces", tagset, file);
    		const Corpus2::TokenWriter::TokenWriterPtr writer = Corpus2::TokenWriter::create_stream_writer(
    			"xces", std::cout, tagset);
    		while (Corpus2::Sentence::Ptr sentence = reader->get_next_sentence()) {
    			writer->write_sentence(*sentence);
    		}
    	} catch (PwrNlp::PwrNlpError& e) {
    		std::cerr << "Error: " << e.info() << "\n";
    	}
    	return 0;
    }