Skip to content
Snippets Groups Projects
Select Git revision
  • a5674e4b5cd6784ca8ac259ef7f4af94d5c1a224
  • master default protected
  • fix-words-ann
  • wccl-rules-migration
  • develop
5 results

wcclfile.h

Blame
  • iob-chan.h 2.09 KiB
    /*
    	Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
    	Part of the libcorpus2 project
    
    	This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your option)
    any later version.
    
    	This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE.
    
    	See the LICENSE and COPYING files for more details.
    */
    
    #ifndef LIBSORPUS2_IO_IOB_CHAN_H
    #define LIBCORPUS2_IO_IOB_CHAN_H
    
    #include <libcorpus2/io/reader.h>
    #include <libcorpus2/io/writer.h>
    #include <boost/scoped_ptr.hpp>
    
    namespace Corpus2 {
    
    /**
     * Simple writer class to output token in RFTagger-compatible corpora form.
     *
     * One token per line, token line consists of the orth, followed by the
     * tag, followed by newline (one tag per token only). Each sentence
     * is followed by a blank line.
     *
     * The first lexeme is used. No-lexeme tokens trigger a warning unless
     * nowarn is passed.
     */
    class IobChanWriter : public TokenWriter
    {
    public:
    	IobChanWriter(std::ostream& os, const Tagset& tagset,
    			const string_range_vector& params);
    
    	void write_token(const Token& t);
    
    	void write_sentence(const Sentence& s);
    
    	void write_chunk(const Chunk &p);
    
    	void write_tag(const Tag& tag);
    
    	static bool registered;
    
    private:
    	bool warn_on_no_lexemes_;
    
    	bool force_;
    };
    
    class IobChanReader : public BufferedSentenceReader
    {
    public:
    	IobChanReader(const Tagset& tagset, std::istream& is);
    
    	IobChanReader(const Tagset& tagset, const std::string& filename);
    
    	std::istream& is() {
    		return *is_;
    	}
    
    	void set_option(const std::string& option);
    
    	std::string get_option(const std::string& option) const;
    
    	static bool registered;
    
    protected:
    	/// BufferedSentenceReader override
    	Sentence::Ptr actual_next_sentence();
    
    	std::istream* is_;
    	boost::scoped_ptr<std::istream> is_owned_;
    
    	/// Whether to mark all incoming tags as disambiguated
    	bool disamb_;
    };
    
    } /* end ns Corpus2 */
    
    #endif // LIBCORPUS2_IO_IOB_CHAN_H