Skip to content
Snippets Groups Projects
Select Git revision
  • 967dd4d1c1b5d25d4b62910c5dcbc8b89b78ac7e
  • master default protected
  • fix-words-ann
  • wccl-rules-migration
  • develop
5 results

FindPwrUtils.cmake

Blame
  • orthwriter.cpp 1.79 KiB
    /*
        Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
        Part of the libcorpus2 project
    
        This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your option)
    any later version.
    
        This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE. 
    
        See the LICENSE and COPYING files for more details.
    */
    
    #include <libcorpus2/io/orthwriter.h>
    #include <libpwrutils/foreach.h>
    
    namespace Corpus2 {
    
    bool OrthWriter::registered = TokenWriter::register_writer<OrthWriter>(
    		"orth", "actual_ws,end_nl");
    
    OrthWriter::OrthWriter(std::ostream& os, const Tagset& tagset,
    		const string_range_vector& params)
    	: TokenWriter(os, tagset, params), actual_ws_(false), end_nl_(false)
    {
    	foreach (const string_range& param, params) {
    		std::string p = boost::copy_range<std::string>(param);
    		if (p == "actual_ws") {
    			actual_ws_ = true;
    		} else if (p == "end_nl") {
    			end_nl_ = true;
    		}
    	}
    }
    
    OrthWriter::~OrthWriter()
    {
    	if (end_nl_) {
    		os() << "\n";
    	}
    }
    
    void OrthWriter::write_token(const Token &t)
    {
    	os() << PwrNlp::Whitespace::to_whitespace(t.wa()) << t.orth_utf8();
    }
    
    void OrthWriter::write_sentence(const Sentence &s)
    {
    	if (!s.tokens().empty()) {
    		if (actual_ws_) {
    			write_token(*s[0]);
    		} else {
    			os() << s[0]->orth_utf8();
    		}
    	}
    	for (size_t i = 1; i < s.tokens().size(); ++i) {
    		write_token(*s[i]);
    	}
    	if (!actual_ws_) {
    		os() << "\n";
    	}
    }
    
    void OrthWriter::write_chunk(const Chunk &c)
    {
    	foreach (const Sentence::Ptr& s, c.sentences()) {
    		write_sentence(*s);
    		if (!actual_ws_) {
    			os() << "\n";
    		}
    	}
    }
    
    } /* end ns Corpus2 */