Skip to content
Snippets Groups Projects
Select Git revision
  • a0fa6b8a34d87396c1acb40e55c1c9f22388c9f2
  • master default protected
  • develop protected
  • feat_remove_attr
  • python2.7
  • python3.8
6 results

exception.h

Blame
  • xcescommon.cpp 3.82 KiB
    /*
        Copyright (C) 2010 Tomasz Śniatowski, Adam Radziszewski
        Part of the libcorpus2 project
    
        This program is free software; you can redistribute it and/or modify it
    under the terms of the GNU Lesser General Public License as published by the Free
    Software Foundation; either version 3 of the License, or (at your option)
    any later version.
    
        This program is distributed in the hope that it will be useful, but
    WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
    or FITNESS FOR A PARTICULAR PURPOSE. 
    
        See the LICENSE.CORPUS2, LICENSE.POLIQARP, COPYING.LESSER and COPYING files for more details.
    */
    
    #include <libcorpus2/io/xcescommon.h>
    #include <boost/foreach.hpp>
    #include <sstream>
    
    namespace Corpus2 {
    
    namespace {
    	std::ostream& osi(std::ostream& os, int indent) {
    		for (int i = 0; i < indent; ++i) {
    			os << " ";
    		}
    		return os;
    	}
    
    	inline void lexeme_as_xces_xml(std::ostream& os, const Tagset& tagset,
    			const Lexeme& l, bool output_disamb)
    	{
    		if (output_disamb && l.is_disamb()) {
    			os << "<lex disamb=\"1\">";
    		} else {
    			os << "<lex>";
    		}
    		os << "<base>";
    		encode_xml_entities_into(os, l.lemma_utf8());
    		os << "</base>";
    		os << "<ctag>";
    		encode_xml_entities_into(os, tagset.tag_to_string(l.tag()));
    		os << "</ctag>";
    		os << "</lex>\n";
    	}
    }
    
    void token_as_xces_xml_head(std::ostream& os,
    		const Token& t, int indent, bool whitespace_info /* false */)
    {
    	if (t.wa() == PwrNlp::Whitespace::None) {
    		osi(os, indent) << "<ns/>\n";
    	}
    	if (!whitespace_info) {
    		osi(os, indent) << "<tok>\n";
    	} else {
    		osi(os, indent) << "<tok ws=\""
    			<< PwrNlp::Whitespace::to_string(t.wa()) << "\">\n";
    	}
    }
    
    void token_as_xces_xml_body(std::ostream& os, const Tagset& tagset,
    		const Token& t, int indent, bool output_disamb /* = false */,
    		bool sort /* = false */)
    {
    	osi(os, indent) << "<orth>";
    	encode_xml_entities_into(os, t.orth_utf8());
    	os << "</orth>\n";
    	if (!sort) {