From fcea786ea5ee59203eb09155359cfd8a6d62ecfa Mon Sep 17 00:00:00 2001 From: Bartosz Broda <bartosz.broda@gmail.com> Date: Fri, 10 Jun 2011 10:42:18 +0200 Subject: [PATCH] add more mwe for ad hoc testing, printing of partial parses --- libmwereader/mweparser.cpp | 25 +++++++++++++++++-------- libmwereader/mweparser.h | 4 +++- libmwereader/mwereader.cpp | 2 +- libmwereader/test_mwe.xml | 23 +++++++++++++++++++++++ 4 files changed, 44 insertions(+), 10 deletions(-) diff --git a/libmwereader/mweparser.cpp b/libmwereader/mweparser.cpp index ab85753..60ccdb0 100644 --- a/libmwereader/mweparser.cpp +++ b/libmwereader/mweparser.cpp @@ -12,7 +12,6 @@ namespace Corpus2 { MWEParser::MWEParser() : BasicSaxParser(), state_(NONE) { - } std::string MWEParser::get_attribute(const AttributeList& attributes, @@ -82,7 +81,6 @@ namespace Corpus2 { state_ = NONE; } else if(state_ == CONDITION && name == "condition"){ wccl_operator_ = finish_get_text(); - std::cout << wccl_operator_ << std::endl; state_ = MWEGROUP; } else if(state_ == MWEGROUP && name == "mwegroup"){ state_ = UNITSDESC; @@ -91,12 +89,8 @@ namespace Corpus2 { } else if(state_ == MWE && name == "MWE"){ state_ = INSTANCES; // TODO: tworzenie jednostki - std::cout << "Tworzenie jednostki: " << mwe_base_ << " dla "; - foreach(str_map::value_type &i, variables_) - std::cout << i.first << ": " << i.second << ", "; - std::cout << "\nhead: " << head_cond_ << "\nop: " - << wccl_operator_ << std::endl; - std::cout << "MWE Group name: " << group_name_ << std::endl; + print_current_mwe(true); + } else if(state_ == VAR && name == "var"){ state_ = MWE; variables_[var_name_] = finish_get_text(); @@ -109,6 +103,21 @@ namespace Corpus2 { } } + void MWEParser::print_current_mwe(bool with_condition) + { + std::cout << "Forma podstawowa: " << mwe_base_ << "\nZmienne: "; + + foreach(str_map::value_type &i, variables_) + std::cout << i.first << ": " << i.second << ", "; + std::cout << "\nWarunek głowy: " << head_cond_ << "\n"; + if(with_condition){ + std::cout << "Grupa jednostek: " << group_name_ << std::endl; + std::cout << "Operator: " << wccl_operator_ << std::endl; + + } + + } + std::string MWEParser::finish_get_text() { std::string str = get_buf(); diff --git a/libmwereader/mweparser.h b/libmwereader/mweparser.h index 442e1a8..4ccdaf3 100644 --- a/libmwereader/mweparser.h +++ b/libmwereader/mweparser.h @@ -6,7 +6,6 @@ namespace Corpus2 { -typedef std::map<std::string, std::string> str_map; class MWEParser : public BasicSaxParser { @@ -14,12 +13,15 @@ public: MWEParser(); protected: + typedef std::map<std::string, std::string> str_map; void on_start_element(const Glib::ustring &name, const AttributeList& attributes); void on_end_element(const Glib::ustring &name); std::string finish_get_text(); + void print_current_mwe(bool with_condition = false); + /// retrives tagset= attribute std::string get_attribute(const AttributeList& attributes, diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index f6bb63b..6fe0d1e 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -9,7 +9,7 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( MWEReader::MWEReader(const Tagset &tagset, const std::string &filename) : TokenReader(tagset), inner_filename_(filename) { - // TODO implementataion + // TODO implementataion? } MWEReader::~MWEReader() diff --git a/libmwereader/test_mwe.xml b/libmwereader/test_mwe.xml index 78c3d1a..07bb808 100644 --- a/libmwereader/test_mwe.xml +++ b/libmwereader/test_mwe.xml @@ -16,6 +16,29 @@ <var name="Subst2">paweł</var> <head>inter(cas[0], {nom})</head> </MWE> + <MWE base="waga netto"> + <var name="Subst1">waga</var> + <var name="Subst2">netto</var> + <head>inter(base[0], "waga")</head> + </MWE> + </instances> + </mwegroup> + <mwegroup name="AdjSubstFix" type="fix" class="subst"> + <condition> + and( + inter(base[0],{$Adj}), + inter(class[0],{adj}), + inter(base[1],$Subst), + inter(class[1],{subst,ger,depr}), + agrpp(0,1,{nmb,gnd,cas}) + ) + </condition> + <instances> + <MWE name="dobre imię"> + <var name="Adj">dobry</var> + <var name="Subst">imię</var> + <head>inter(class[0],{subst,ger,depr})</head> + </MWE> </instances> </mwegroup> -- GitLab