From d35e12c3b1a195d28ee710624f72ae5650b68b25 Mon Sep 17 00:00:00 2001 From: Bartosz Broda <bartosz.broda@gmail.com> Date: Tue, 14 Jun 2011 09:06:52 +0200 Subject: [PATCH] add searching for head of an mwe --- libmwereader/mwe.cpp | 23 ++++++++++++++++++++--- libmwereader/mwe.h | 2 +- libmwereader/mwereader.cpp | 3 ++- libmwereader/test_mwe.xml | 2 +- 4 files changed, 24 insertions(+), 6 deletions(-) diff --git a/libmwereader/mwe.cpp b/libmwereader/mwe.cpp index cc1f537..5799d2c 100644 --- a/libmwereader/mwe.cpp +++ b/libmwereader/mwe.cpp @@ -24,7 +24,7 @@ LexicalUnit::LexicalUnit(const std::string &base, } bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc, - std::set<size_t> &out_position) const + std::set<size_t> &out_position, int &head_pos) const { // set variables for(variables_map::const_iterator ivars = variables_.begin(); @@ -37,6 +37,10 @@ bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc, if(pResult->get_value() == false) return false; + bool found_head = false; + + Wccl::SentenceContext sc2(sc.get_sentence_ptr()); + // fill up positions foreach(const std::string&varname, condition_->valid_variable_names()){ if(boost::algorithm::starts_with(varname, "Pos")){ @@ -46,11 +50,24 @@ bool LexicalUnit::IsHere(const Wccl::SentenceContext &sc, errmsg += " Offending unit: " + base_; throw Wccl::WcclError(errmsg); } - out_position.insert( sc.get_abs_position(pos) ); + int abs_pos = sc.get_abs_position(pos); + out_position.insert( abs_pos ); + if(!found_head){ + sc2.set_position(abs_pos); + if(head_cond_->apply(sc2)->get_value()){ + head_pos = abs_pos; + found_head = true; + } + } } } - // TODO head position + if(!found_head){ + std::string errmsg("MWE found, but no head for it."); + errmsg += " Offending unit: " + base_; + throw Wccl::WcclError(errmsg); + } + return true; } diff --git a/libmwereader/mwe.h b/libmwereader/mwe.h index 77a38fe..439323f 100644 --- a/libmwereader/mwe.h +++ b/libmwereader/mwe.h @@ -35,7 +35,7 @@ public: * \returns true if this lexical unit was found here */ virtual bool IsHere(const Wccl::SentenceContext& sc, - std::set<size_t> &out_positions) const; + std::set<size_t> &out_positions, int &head_pos) const; const std::string & get_base() const{ return base_;} const variables_map & get_variables() const{ return variables_;} diff --git a/libmwereader/mwereader.cpp b/libmwereader/mwereader.cpp index 26e4c6a..e6328be 100644 --- a/libmwereader/mwereader.cpp +++ b/libmwereader/mwereader.cpp @@ -46,7 +46,8 @@ bool MWEReader::registered = TokenReader::register_path_reader<MWEReader>( std::cout << "# "; foreach(LexicalUnit::Ptr pLU, potential){ std::set<size_t> positions; - bool is_here = pLU->IsHere(sc, positions); + int head; + bool is_here = pLU->IsHere(sc, positions, head); if(is_here) std::cout << "** " << pLU->get_base() << "** "; } diff --git a/libmwereader/test_mwe.xml b/libmwereader/test_mwe.xml index 43a302e..020213f 100644 --- a/libmwereader/test_mwe.xml +++ b/libmwereader/test_mwe.xml @@ -17,7 +17,7 @@ <MWE base="Jan Paweł"> <var name="Subst1">jan</var> <var name="Subst2">paweł</var> - <head>inter(cas[0], {nom})</head> + <head>inter(base[0], "jan")</head> </MWE> <MWE base="waga netto"> <var name="Subst1">waga</var> -- GitLab