From ad9740b4627d1025d2d12a5363b42bd8243be756 Mon Sep 17 00:00:00 2001 From: ilor <kailoran@gmail.com> Date: Fri, 25 Feb 2011 10:22:05 +0100 Subject: [PATCH] loose tag parsing option in xmlreader --- libcorpus2/io/xmlreader.cpp | 4 +++- libcorpus2/io/xmlreader.h | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/libcorpus2/io/xmlreader.cpp b/libcorpus2/io/xmlreader.cpp index 7a51708..a9b8e0c 100644 --- a/libcorpus2/io/xmlreader.cpp +++ b/libcorpus2/io/xmlreader.cpp @@ -33,6 +33,7 @@ XmlReader::XmlReader(const Tagset& tagset, , sbuf_(), tok_(NULL), sent_(), chunk_(), obuf_(obuf) , disamb_only_(false), disamb_sh_(false) , warn_on_inconsistent_(true), warn_on_unexpected_(true) + , loose_tag_parsing_(false) { } @@ -221,7 +222,8 @@ void XmlReader::on_end_element(const Glib::ustring &name) grab_characters_ = false; state_ = STATE_LEX; } else if (state_ == STATE_TAG && name == "ctag") { - Tag tag = tagset_.parse_simple_tag(get_buf(), true); + Tag tag = tagset_.parse_simple_tag(get_buf(), + loose_tag_parsing_ ? Tagset::ParseLoose : Tagset::ParseDefault); tok_->lexemes().back().set_tag(tag); grab_characters_ = false; state_ = STATE_LEX; diff --git a/libcorpus2/io/xmlreader.h b/libcorpus2/io/xmlreader.h index fe88533..f2b9b14 100644 --- a/libcorpus2/io/xmlreader.h +++ b/libcorpus2/io/xmlreader.h @@ -51,6 +51,10 @@ public: warn_on_unexpected_ = v; } + void set_loose_tag_parsing(bool v) { + loose_tag_parsing_ = v; + } + protected: std::string get_type_from_attributes(const AttributeList& attributes) const; @@ -130,6 +134,9 @@ protected: /// Tag name for sentence objects, customized in child class ctors std::string sentence_tag_name_; + + /// Flag to disable strict tag correctness checking + bool loose_tag_parsing_; }; } /* end ns Corpus2 */ -- GitLab