diff --git a/libcorpus2/io/xmlreader.cpp b/libcorpus2/io/xmlreader.cpp
index 0816839e69873f82ec81219d13819b36c30b193f..bde08712fb9d61efcc950c985efb4ed0ae6092cf 100644
--- a/libcorpus2/io/xmlreader.cpp
+++ b/libcorpus2/io/xmlreader.cpp
@@ -19,6 +19,8 @@ or FITNESS FOR A PARTICULAR PURPOSE.
 #include <libxml++/libxml++.h>
 #include <libxml2/libxml/parser.h>
 #include <boost/make_shared.hpp>
+#include <boost/algorithm/string.hpp>
+
 #include <fstream>
 
 namespace Corpus2 {
@@ -228,15 +230,21 @@ void XmlReader::on_end_element(const Glib::ustring &name)
 {
 	//std::cerr << "/" << name << state_ << "\n";
 	if (state_ == STATE_ORTH && name == "orth") {
-		tok_->set_orth(UnicodeString::fromUTF8(get_buf()));
+		std::string tmp_buf = get_buf();
+		boost::trim(tmp_buf);
+		tok_->set_orth(UnicodeString::fromUTF8(tmp_buf));
 		grab_characters_ = false;
 		state_ = STATE_TOK;
 	} else if (state_ == STATE_LEMMA && name == "base") {
-		tok_->lexemes().back().set_lemma(UnicodeString::fromUTF8(get_buf()));
+		std::string tmp_buf = get_buf();
+		boost::trim(tmp_buf);
+		tok_->lexemes().back().set_lemma(UnicodeString::fromUTF8(tmp_buf));
 		grab_characters_ = false;
 		state_ = STATE_LEX;
 	} else if (state_ == STATE_TAG && name == "ctag") {
-		Tag tag = base_reader_.parse_tag(get_buf());
+		std::string tmp_buf = get_buf();
+		boost::trim(tmp_buf);
+		Tag tag = base_reader_.parse_tag(tmp_buf);
 		tok_->lexemes().back().set_tag(tag);
 		grab_characters_ = false;
 		state_ = STATE_LEX;