diff --git a/libmwereader/mweparser.cpp b/libmwereader/mweparser.cpp index 6ab3f5e210527936afb2bfcca364e39efba81117..96c5268e90c59ad372c0d871a6a0dd7dbede57d7 100644 --- a/libmwereader/mweparser.cpp +++ b/libmwereader/mweparser.cpp @@ -51,9 +51,7 @@ namespace Corpus2 { if(search != where.end()) return search->second; - //std::cout << " dddddddddddddd "<< cond << std::endl; BoolOpPtr op = parser_.parseBoolOperator(cond); - //std::cout << " dddddddddddddd $$$$" << cond << std::endl; where[cond] = op; return op; @@ -75,25 +73,19 @@ namespace Corpus2 { { MWEBuilder::BoolOpPtr main = mwe_builder_->get_mwe_condition( wccl_operator_); - //std::cout << " kupa head" << std::endl; MWEBuilder::BoolOpPtr head = mwe_builder_->get_head_condition( head_cond_); - //std::cout << " kupa " << std::endl; if(group_type_ == "fix"){ // group_name_ -> lower case - //std::cout << " kupa fix" << std::endl; mwe_index_.add_lexicalunit( LexicalUnit::Ptr(new FixedLU(mwe_base_, main, head, variables_))); } else if(group_type_ == "flex"){ - //std::cout << " kupa flex" << std::endl; mwe_index_.add_lexicalunit(LexicalUnit::Ptr(new FlexLU(mwe_base_, main, head, variables_))); } else { throw Wccl::WcclError("Unknown type of lexical unit:" + group_type_); } - //std::cout << " kupa clear" << std::endl; variables_.clear(); - //std::cout << "po kupie " << std::endl; } std::string MWEParser::get_attribute(const AttributeList& attributes, @@ -128,7 +120,6 @@ namespace Corpus2 { void MWEParser::on_start_element(const Glib::ustring &name, const AttributeList& attributes) { - //std::cout << state_ << ": " << name << std::endl; if(state_ == NONE && name == "units_description"){ tagset_ = get_attribute(attributes, "tagset"); mwe_builder_ = boost::shared_ptr<MWEBuilder>(new MWEBuilder(Corpus2::get_named_tagset(tagset_))); @@ -160,8 +151,6 @@ namespace Corpus2 { void MWEParser::on_end_element(const Glib::ustring &name) { - //std::cout << "/" << state_ << ": " << name << std::endl; - if(name == "units_description"){ state_ = NONE; } else if(state_ == CONDITION && name == "condition"){ @@ -207,8 +196,11 @@ namespace Corpus2 { boost::algorithm::trim(str); grab_characters_ = false; return str; + } - //std::pair<wcclptr, wcclptr, map<string,string>, string, type> + Corpus2::Tagset MWEParser::get_tagset() const + { + return Corpus2::get_named_tagset(tagset_); } } // ns Corpus2 diff --git a/libmwereader/mweparser.h b/libmwereader/mweparser.h index c42df372759cb445e69e5672a4ddf47ca2e54953..8489d6bc61d771c36bb219777a7942307b4d9532 100644 --- a/libmwereader/mweparser.h +++ b/libmwereader/mweparser.h @@ -60,6 +60,8 @@ public: MWEParser(MWEIndex &index); ~MWEParser(); + Corpus2::Tagset get_tagset() const; + protected: void create_mwe(); typedef std::map<std::string, std::string> str_map; diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index 821cbf715b68a8fe212879e1f6e025a684ce0c85..c30fad49fdd97d50bc09ec2074c1d7a619b3f808 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -216,6 +216,9 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( parser.parse_file(filename); mwes_counter++; + if(parser.get_tagset().name() != tagset().name()) + throw std::runtime_error( "Tagset in mwe file does not match reader tagset!" ); + }