From 1e5502b4a97ed9d73d19889a2829f2900dc4a8ce Mon Sep 17 00:00:00 2001 From: Adam Radziszewski <adam.radziszewski@pwr.wroc.pl> Date: Tue, 16 Apr 2013 13:35:13 +0200 Subject: [PATCH] fx premorph writer: escape XML entities --- CMakeLists.txt | 2 +- libcorpus2/io/premorphwriter.cpp | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 75783dd..01a8890 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ PROJECT(Corpus2Library) set(corpus2_ver_major "1") set(corpus2_ver_minor "3") -set(corpus2_ver_patch "1") +set(corpus2_ver_patch "2") cmake_minimum_required(VERSION 2.8.0) diff --git a/libcorpus2/io/premorphwriter.cpp b/libcorpus2/io/premorphwriter.cpp index c47e84d..e330794 100644 --- a/libcorpus2/io/premorphwriter.cpp +++ b/libcorpus2/io/premorphwriter.cpp @@ -17,6 +17,9 @@ or FITNESS FOR A PARTICULAR PURPOSE. #include <libcorpus2/io/premorphwriter.h> #include <boost/foreach.hpp> +// for entity encoding +#include <libcorpus2/io/xcescommon.h> + namespace Corpus2 { bool PremorphWriter::registered = TokenWriter::register_writer<PremorphWriter>( @@ -42,14 +45,15 @@ PremorphWriter::~PremorphWriter() void PremorphWriter::write_token(const Token &t) { - os() << PwrNlp::Whitespace::to_whitespace(t.wa()) << t.orth_utf8(); + os() << PwrNlp::Whitespace::to_whitespace(t.wa()); + encode_xml_entities_into(os(), t.orth_utf8()); } void PremorphWriter::write_sentence(const Sentence &s) { os() << "<chunk type=\"s\">"; if (!s.tokens().empty()) { - os() << s[0]->orth_utf8(); + encode_xml_entities_into(os(), s[0]->orth_utf8()); } for (size_t i = 1; i < s.tokens().size(); ++i) { write_token(*s[i]); -- GitLab