diff --git a/CMakeLists.txt b/CMakeLists.txt index 75783dd3c7e5d6dce8316c2750ff6892e2230929..01a8890f1d320c1fa5346b472fdd441902caa2df 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ PROJECT(Corpus2Library) set(corpus2_ver_major "1") set(corpus2_ver_minor "3") -set(corpus2_ver_patch "1") +set(corpus2_ver_patch "2") cmake_minimum_required(VERSION 2.8.0) diff --git a/libcorpus2/io/premorphwriter.cpp b/libcorpus2/io/premorphwriter.cpp index c47e84df8090e3b1ad6c5610fbc9fd9106fe72c6..e3307948b8d9fe6d36a671540c9f28922be09876 100644 --- a/libcorpus2/io/premorphwriter.cpp +++ b/libcorpus2/io/premorphwriter.cpp @@ -17,6 +17,9 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/io/premorphwriter.h> #include <boost/foreach.hpp> +// for entity encoding +#include <libcorpus2/io/xcescommon.h> + namespace Corpus2 { bool PremorphWriter::registered = TokenWriter::register_writer<PremorphWriter>( @@ -42,14 +45,15 @@ PremorphWriter::~PremorphWriter() void PremorphWriter::write_token(const Token &t) { - os() << PwrNlp::Whitespace::to_whitespace(t.wa()) << t.orth_utf8(); + os() << PwrNlp::Whitespace::to_whitespace(t.wa()); + encode_xml_entities_into(os(), t.orth_utf8()); } void PremorphWriter::write_sentence(const Sentence &s) { os() << "<chunk type=\"s\">"; if (!s.tokens().empty()) { - os() << s[0]->orth_utf8(); + encode_xml_entities_into(os(), s[0]->orth_utf8()); } for (size_t i = 1; i < s.tokens().size(); ++i) { write_token(*s[i]);