diff --git a/libcorpus2/io/xmlreader.cpp b/libcorpus2/io/xmlreader.cpp index 7a517089c00cd0c05f836b619f8f256322e84b6f..a9b8e0c4414c911309acb049694c9341685157d2 100644 --- a/libcorpus2/io/xmlreader.cpp +++ b/libcorpus2/io/xmlreader.cpp @@ -33,6 +33,7 @@ XmlReader::XmlReader(const Tagset& tagset, , sbuf_(), tok_(NULL), sent_(), chunk_(), obuf_(obuf) , disamb_only_(false), disamb_sh_(false) , warn_on_inconsistent_(true), warn_on_unexpected_(true) + , loose_tag_parsing_(false) { } @@ -221,7 +222,8 @@ void XmlReader::on_end_element(const Glib::ustring &name) grab_characters_ = false; state_ = STATE_LEX; } else if (state_ == STATE_TAG && name == "ctag") { - Tag tag = tagset_.parse_simple_tag(get_buf(), true); + Tag tag = tagset_.parse_simple_tag(get_buf(), + loose_tag_parsing_ ? Tagset::ParseLoose : Tagset::ParseDefault); tok_->lexemes().back().set_tag(tag); grab_characters_ = false; state_ = STATE_LEX; diff --git a/libcorpus2/io/xmlreader.h b/libcorpus2/io/xmlreader.h index fe885337f096b5f7d3561fefa00e7fd5676cb7f1..f2b9b14dc4122797f9222ae86213cf417a754fa0 100644 --- a/libcorpus2/io/xmlreader.h +++ b/libcorpus2/io/xmlreader.h @@ -51,6 +51,10 @@ public: warn_on_unexpected_ = v; } + void set_loose_tag_parsing(bool v) { + loose_tag_parsing_ = v; + } + protected: std::string get_type_from_attributes(const AttributeList& attributes) const; @@ -130,6 +134,9 @@ protected: /// Tag name for sentence objects, customized in child class ctors std::string sentence_tag_name_; + + /// Flag to disable strict tag correctness checking + bool loose_tag_parsing_; }; } /* end ns Corpus2 */