From 5debff88a2eb295fe25da8764e4cdcb41f244275 Mon Sep 17 00:00:00 2001 From: minda <minda@kalafior.(none)> Date: Wed, 18 Jan 2012 10:08:02 +0100 Subject: [PATCH] Changes in poliqarp library - new possibility to take all sentence --- poliqarp-library/sakura/poliqarp.c | 2 +- poliqarp-library/sakura/poliqarp.h | 2 ++ poliqarp-library/sakura/query.c | 2 ++ poliqarp-library/sakura/value.h | 2 +- poliqarp/c2pqtest.cpp | 2 +- poliqarp/pqclient.cpp | 5 +++-- poliqarp/pqclient.h | 2 +- poliqarp/pqreader.cpp | 17 ++++++++++++----- poliqarp/pqreader.h | 6 +++++- 9 files changed, 28 insertions(+), 12 deletions(-) diff --git a/poliqarp-library/sakura/poliqarp.c b/poliqarp-library/sakura/poliqarp.c index 7d1a5c5..123876a 100644 --- a/poliqarp-library/sakura/poliqarp.c +++ b/poliqarp-library/sakura/poliqarp.c @@ -31,7 +31,7 @@ #define POLIQARP_MAJOR_VERSION 1 #define POLIQARP_MINOR_VERSION 3 -#define POLIQARP_REVISION_NUMBER 11 +#define POLIQARP_REVISION_NUMBER 12 #define POLIQARP_LIBRARY_NAME "sakura" const int poliqarp_major_version = POLIQARP_MAJOR_VERSION; diff --git a/poliqarp-library/sakura/poliqarp.h b/poliqarp-library/sakura/poliqarp.h index a779d67..f5926ae 100644 --- a/poliqarp-library/sakura/poliqarp.h +++ b/poliqarp-library/sakura/poliqarp.h @@ -376,6 +376,8 @@ int poliqarp_resize_match_buffer(struct poliqarp_match_buffer *buffer, struct poliqarp_match { size_t start; /** Offset of the first segment that belongs to this match. */ + size_t withinEnd; + size_t withinStart; size_t end; /** Offset of one-past-end segment in this match. */ size_t focus; /** Offset (relative to start of corpus) of focus point. */ size_t document; /** Document identifier associated with this match. */ diff --git a/poliqarp-library/sakura/query.c b/poliqarp-library/sakura/query.c index 8413553..57943f1 100644 --- a/poliqarp-library/sakura/query.c +++ b/poliqarp-library/sakura/query.c @@ -683,6 +683,8 @@ look_breakout: match.start = ctx.m_start; assert(ctx.m_end != (size_t)-1); match.end = ctx.m_end; + match.withinStart = ctx.subdocument.corpus_low; + match.withinEnd = ctx.subdocument.corpus_high; match.focus = ctx.m_focus == (size_t)-1 ? ctx.m_start : ctx.m_focus; match.document = corpus->document.current - 1; pthread_mutex_lock(&result->mutex); diff --git a/poliqarp-library/sakura/value.h b/poliqarp-library/sakura/value.h index 7775847..62455b2 100644 --- a/poliqarp-library/sakura/value.h +++ b/poliqarp-library/sakura/value.h @@ -229,7 +229,7 @@ found_true: * @param pos Position for which the value is calculated. */ static inline bool poliqarp_value_eval(const struct poliqarp_value *this, - const struct poliqarp_corpus *corpus, + const struct poliqarp_corpus * corpus, const struct poliqarp_binary_segment *pos) { switch (this->domain) { diff --git a/poliqarp/c2pqtest.cpp b/poliqarp/c2pqtest.cpp index 29a1114..3715675 100644 --- a/poliqarp/c2pqtest.cpp +++ b/poliqarp/c2pqtest.cpp @@ -18,7 +18,7 @@ int main(int argc, char** argv) //while (Corpus2::Token* t = pqc.get_next_focus_token()) { // writer->write_token_dispose(t); //} - while (Corpus2::Sentence::Ptr s = pqc.get_next_match_sequence()) { + while (Corpus2::Sentence::Ptr s = pqc.get_next_match_sequence(false)) { writer->write_sentence(*s); } } diff --git a/poliqarp/pqclient.cpp b/poliqarp/pqclient.cpp index 494b4cd..90f1e91 100644 --- a/poliqarp/pqclient.cpp +++ b/poliqarp/pqclient.cpp @@ -115,11 +115,12 @@ Token* PoliqarpClient::get_next_focus_token() } }; -Sentence::Ptr PoliqarpClient::get_next_match_sequence() +Sentence::Ptr PoliqarpClient::get_next_match_sequence(bool getWholeSentence) { poliqarp_match match; if (next_match(match)) { - return get_token_range(match.start, match.end); + if(getWholeSentence) return get_token_range(match.withinStart, match.withinEnd); + else return get_token_range(match.start, match.end); } else { return Sentence::Ptr(); } diff --git a/poliqarp/pqclient.h b/poliqarp/pqclient.h index 0420849..266186e 100644 --- a/poliqarp/pqclient.h +++ b/poliqarp/pqclient.h @@ -31,7 +31,7 @@ public: bool next_match(poliqarp_match& match); Token* get_next_focus_token(); - Sentence::Ptr get_next_match_sequence(); + Sentence::Ptr get_next_match_sequence(bool getWholeSentence); boost::shared_ptr<Chunk> get_next_document(); Token* get_token(size_t pos); diff --git a/poliqarp/pqreader.cpp b/poliqarp/pqreader.cpp index 6334b4f..2fb4c83 100644 --- a/poliqarp/pqreader.cpp +++ b/poliqarp/pqreader.cpp @@ -20,7 +20,7 @@ bool PoliqarpReader::registered = TokenReader::register_path_reader<PoliqarpRead PoliqarpReader::PoliqarpReader(const Tagset &tagset, const std::string &filename) : TokenReader(tagset), pq_(new PoliqarpClient(tagset, filename)), - executed_(false), mode_(PQ_SENTENCES) + executed_(false), mode_(PQ_SENTENCES), getWholeSentence(false) { pq_->compile_query("[]+ within s"); } @@ -33,7 +33,7 @@ void PoliqarpReader::set_query(const std::string &query) { pq_->compile_query(query); mode_ = PQ_MANUAL; - executed_ = false; + executed_ = false; } void PoliqarpReader::execute() @@ -50,8 +50,8 @@ Token* PoliqarpReader::get_next_token() Sentence::Ptr PoliqarpReader::get_next_sentence() { - if (!executed_) execute(); - return pq_->get_next_match_sequence(); + if (!executed_) execute(); + return pq_->get_next_match_sequence(getWholeSentence); } boost::shared_ptr<Chunk> PoliqarpReader::get_next_chunk() @@ -74,11 +74,18 @@ void PoliqarpReader::set_option(const std::string &option) pq_->compile_query("[]"); mode_ = PQ_TOKENS; executed_ = false; - } else { + } else if (option == "getWholeSentence") { + setGetWholeSentence(true); + }else { TokenReader::set_option(option); } } +void PoliqarpReader::setGetWholeSentence(bool getWholeSentence_) +{ + getWholeSentence = getWholeSentence_; +} + std::string PoliqarpReader::get_option(const std::string& option) const { return TokenReader::get_option(option); diff --git a/poliqarp/pqreader.h b/poliqarp/pqreader.h index a8fb40b..e7dc398 100644 --- a/poliqarp/pqreader.h +++ b/poliqarp/pqreader.h @@ -40,6 +40,8 @@ public: boost::shared_ptr<Chunk> get_next_chunk(); + void setGetWholeSentence(bool getWholeSentence_); + void set_option(const std::string& option); std::string get_option(const std::string& option) const; @@ -51,9 +53,11 @@ protected: boost::scoped_ptr<PoliqarpClient> pq_; - bool executed_; + bool executed_; PQ_MODE mode_; + + bool getWholeSentence; }; } /* end ns Corpus2 */ -- GitLab