diff --git a/poliqarp-library/sakura/poliqarp.c b/poliqarp-library/sakura/poliqarp.c index 7d1a5c5bc595c09ff273ebd1bc122d6652ec37c8..123876a92232dd7747aef9ed522baf0a0c056085 100644 --- a/poliqarp-library/sakura/poliqarp.c +++ b/poliqarp-library/sakura/poliqarp.c @@ -31,7 +31,7 @@ #define POLIQARP_MAJOR_VERSION 1 #define POLIQARP_MINOR_VERSION 3 -#define POLIQARP_REVISION_NUMBER 11 +#define POLIQARP_REVISION_NUMBER 12 #define POLIQARP_LIBRARY_NAME "sakura" const int poliqarp_major_version = POLIQARP_MAJOR_VERSION; diff --git a/poliqarp-library/sakura/poliqarp.h b/poliqarp-library/sakura/poliqarp.h index a779d677f2e0b6c0678e39883e6cbfe2a756640d..f5926aec689df3a8678113a55807fe275eff0959 100644 --- a/poliqarp-library/sakura/poliqarp.h +++ b/poliqarp-library/sakura/poliqarp.h @@ -376,6 +376,8 @@ int poliqarp_resize_match_buffer(struct poliqarp_match_buffer *buffer, struct poliqarp_match { size_t start; /** Offset of the first segment that belongs to this match. */ + size_t withinEnd; + size_t withinStart; size_t end; /** Offset of one-past-end segment in this match. */ size_t focus; /** Offset (relative to start of corpus) of focus point. */ size_t document; /** Document identifier associated with this match. */ diff --git a/poliqarp-library/sakura/query.c b/poliqarp-library/sakura/query.c index 84135531c77800e839e8333998277028a03eb97d..57943f1e938a10dde81e4aec923f60d34f46837a 100644 --- a/poliqarp-library/sakura/query.c +++ b/poliqarp-library/sakura/query.c @@ -683,6 +683,8 @@ look_breakout: match.start = ctx.m_start; assert(ctx.m_end != (size_t)-1); match.end = ctx.m_end; + match.withinStart = ctx.subdocument.corpus_low; + match.withinEnd = ctx.subdocument.corpus_high; match.focus = ctx.m_focus == (size_t)-1 ? ctx.m_start : ctx.m_focus; match.document = corpus->document.current - 1; pthread_mutex_lock(&result->mutex); diff --git a/poliqarp-library/sakura/value.h b/poliqarp-library/sakura/value.h index 7775847b377af3350c45e23944b8162f4d400526..62455b289167c19af92d8450d6b1a587c8908b35 100644 --- a/poliqarp-library/sakura/value.h +++ b/poliqarp-library/sakura/value.h @@ -229,7 +229,7 @@ found_true: * @param pos Position for which the value is calculated. */ static inline bool poliqarp_value_eval(const struct poliqarp_value *this, - const struct poliqarp_corpus *corpus, + const struct poliqarp_corpus * corpus, const struct poliqarp_binary_segment *pos) { switch (this->domain) { diff --git a/poliqarp/c2pqtest.cpp b/poliqarp/c2pqtest.cpp index 29a111455ef93900e4a4d82599392f4aa5131d70..371567536a4c972877a2dfda1315f2cbc2fb33d2 100644 --- a/poliqarp/c2pqtest.cpp +++ b/poliqarp/c2pqtest.cpp @@ -18,7 +18,7 @@ int main(int argc, char** argv) //while (Corpus2::Token* t = pqc.get_next_focus_token()) { // writer->write_token_dispose(t); //} - while (Corpus2::Sentence::Ptr s = pqc.get_next_match_sequence()) { + while (Corpus2::Sentence::Ptr s = pqc.get_next_match_sequence(false)) { writer->write_sentence(*s); } } diff --git a/poliqarp/pqclient.cpp b/poliqarp/pqclient.cpp index 494b4cd2738df868080e0635bc828ed09eb97b64..90f1e91f4faea20503944a9c173f854031f63fe5 100644 --- a/poliqarp/pqclient.cpp +++ b/poliqarp/pqclient.cpp @@ -115,11 +115,12 @@ Token* PoliqarpClient::get_next_focus_token() } }; -Sentence::Ptr PoliqarpClient::get_next_match_sequence() +Sentence::Ptr PoliqarpClient::get_next_match_sequence(bool getWholeSentence) { poliqarp_match match; if (next_match(match)) { - return get_token_range(match.start, match.end); + if(getWholeSentence) return get_token_range(match.withinStart, match.withinEnd); + else return get_token_range(match.start, match.end); } else { return Sentence::Ptr(); } diff --git a/poliqarp/pqclient.h b/poliqarp/pqclient.h index 0420849e352c3546784e55ddf42fd4e46a80ab04..266186e693f5fbd6133f7767552fa5acf72363a9 100644 --- a/poliqarp/pqclient.h +++ b/poliqarp/pqclient.h @@ -31,7 +31,7 @@ public: bool next_match(poliqarp_match& match); Token* get_next_focus_token(); - Sentence::Ptr get_next_match_sequence(); + Sentence::Ptr get_next_match_sequence(bool getWholeSentence); boost::shared_ptr<Chunk> get_next_document(); Token* get_token(size_t pos); diff --git a/poliqarp/pqreader.cpp b/poliqarp/pqreader.cpp index 6334b4fb2b09080c0083514794770cd5fcadbe89..2fb4c83fbe34293ec40e1546e059563e57c33df6 100644 --- a/poliqarp/pqreader.cpp +++ b/poliqarp/pqreader.cpp @@ -20,7 +20,7 @@ bool PoliqarpReader::registered = TokenReader::register_path_reader<PoliqarpRead PoliqarpReader::PoliqarpReader(const Tagset &tagset, const std::string &filename) : TokenReader(tagset), pq_(new PoliqarpClient(tagset, filename)), - executed_(false), mode_(PQ_SENTENCES) + executed_(false), mode_(PQ_SENTENCES), getWholeSentence(false) { pq_->compile_query("[]+ within s"); } @@ -33,7 +33,7 @@ void PoliqarpReader::set_query(const std::string &query) { pq_->compile_query(query); mode_ = PQ_MANUAL; - executed_ = false; + executed_ = false; } void PoliqarpReader::execute() @@ -50,8 +50,8 @@ Token* PoliqarpReader::get_next_token() Sentence::Ptr PoliqarpReader::get_next_sentence() { - if (!executed_) execute(); - return pq_->get_next_match_sequence(); + if (!executed_) execute(); + return pq_->get_next_match_sequence(getWholeSentence); } boost::shared_ptr<Chunk> PoliqarpReader::get_next_chunk() @@ -74,11 +74,18 @@ void PoliqarpReader::set_option(const std::string &option) pq_->compile_query("[]"); mode_ = PQ_TOKENS; executed_ = false; - } else { + } else if (option == "getWholeSentence") { + setGetWholeSentence(true); + }else { TokenReader::set_option(option); } } +void PoliqarpReader::setGetWholeSentence(bool getWholeSentence_) +{ + getWholeSentence = getWholeSentence_; +} + std::string PoliqarpReader::get_option(const std::string& option) const { return TokenReader::get_option(option); diff --git a/poliqarp/pqreader.h b/poliqarp/pqreader.h index a8fb40ba06311f2d5913ae53382dd3fe174f44ed..e7dc398623070a0b54194d6a91cdf5f393ebb44d 100644 --- a/poliqarp/pqreader.h +++ b/poliqarp/pqreader.h @@ -40,6 +40,8 @@ public: boost::shared_ptr<Chunk> get_next_chunk(); + void setGetWholeSentence(bool getWholeSentence_); + void set_option(const std::string& option); std::string get_option(const std::string& option) const; @@ -51,9 +53,11 @@ protected: boost::scoped_ptr<PoliqarpClient> pq_; - bool executed_; + bool executed_; PQ_MODE mode_; + + bool getWholeSentence; }; } /* end ns Corpus2 */