Skip to content
Snippets Groups Projects
Select Git revision
  • 6e102b044f6aad89a73004036216c9d19ea17c23
  • master default protected
  • vertical_relations
  • lu_without_semantic_frames
  • hierarchy
  • additional-unification-filters
  • v0.1.1
  • v0.1.0
  • v0.0.9
  • v0.0.8
  • v0.0.7
  • v0.0.6
  • v0.0.5
  • v0.0.4
  • v0.0.3
  • v0.0.2
  • v0.0.1
17 results

reset_db.sh

Blame
  • pqclient.cpp 5.11 KiB
    #include "pqclient.h"
    #include <boost/make_shared.hpp>
    #include <boost/lexical_cast.hpp>
    
    extern "C" {
    	void async_notify_new_results(void* session)
    	{
    	}
    }
    
    namespace Corpus2
    {
    
    PoliqarpClient::PoliqarpClient(const Tagset& tagset, const std::string path)
    	: tagset_(tagset)
    {
    	query_compiled_ = false;
    	poliqarp_error error = poliqarp_error_none;
    	if (poliqarp_create("", &error) != 0) {
    		throw Corpus2Error(poliqarp_error_message_get(&error));
    	}
    	progress_init(&progress_);
    	count_so_far_ = 0;
    	err_ = 0;
    	if (poliqarp_open_corpus(&corpus_, path.c_str(), &progress_, &error) == -1) {
    		throw Corpus2Error(poliqarp_error_message_get(&error));
    	} else {
    		poliqarp_create_match_buffer(&buffer_, 1000);
    	}
    	poliqarp_corpus_info cinfo;
    	poliqarp_get_corpus_info(&corpus_, &cinfo);
    	corpus_size_ = cinfo.num_segments;
    	curr_chunk_doc_id_ = 0;
    }
    
    PoliqarpClient::~PoliqarpClient()
    {
    	poliqarp_close_corpus(&corpus_);
    	poliqarp_destroy_match_buffer(&buffer_);
    	if (query_compiled_) {
    		poliqarp_destroy_query(&query_);
    		query_compiled_ = false;
    	}
    	poliqarp_destroy();
    };
    
    void PoliqarpClient::compile_query(const std::string & q)
    {
    	count_so_far_ = 0;
    	last_query_ = q;
    	if (query_compiled_) {
    		poliqarp_destroy_query(&query_);
    		query_compiled_ = false;
    	}
    	poliqarp_error error = poliqarp_error_none;
    	if (q.empty()) {
    		throw Corpus2Error("EmptyQuery");
    	} else if (poliqarp_create_query(&query_, q.c_str(), &corpus_,
    			0, NULL, NULL, &error) == -1) {
    		throw Corpus2Error(std::string("QueryFailed: ") + poliqarp_error_message_get(&error));
    	} else {
    		query_compiled_ = true;
    	}
    }
    
    void PoliqarpClient::reset_query()
    {
    	compile_query(last_query_);
    }
    
    void PoliqarpClient::execute_query()
    {
    	if (query_compiled_) {
    		poliqarp_forget(&buffer_);
    		if (poliqarp_produce(&buffer_, 1000, &query_, &progress_,
    				NULL, 0, 1000)) {
    			throw Corpus2Error("query execution error");
    		}
    		if (poliqarp_get_match_buffer_info(&buffer_, &info_)) {
    			throw Corpus2Error("buffer read error");
    		}
    		count_so_far_ += buffer_.used;
    		buffer_pos_ = 0;
    	} else {
    		throw Corpus2Error("Query not compiled");
    	}
    }
    
    
    bool PoliqarpClient::next_match(poliqarp_match& match)
    {
    	if (info_.used > 0) {
    		if (buffer_pos_ < info_.used) {
    			poliqarp_get_match(&buffer_, &match, buffer_pos_++);
    			return true;
    		} else if (info_.used == buffer_.capacity) {
    			poliqarp_forget(&buffer_);
    			execute_query();
    			if (info_.used > 0) {
    				poliqarp_get_match(&buffer_, &match, buffer_pos_++);
    				return true;
    			}
    		}
    	}
    	return false;
    }
    
    Token* PoliqarpClient::get_next_focus_token()
    {
    	poliqarp_match match;
    	if (next_match(match)) {
    		return get_token(match.focus);
    	} else {
    		return NULL;
    	}
    };
    
    Sentence::Ptr PoliqarpClient::get_next_match_sequence()
    {
    	poliqarp_match match;
    	if (next_match(match)) {
    		return get_token_range(match.start, match.end);
    	} else {
    		return Sentence::Ptr();
    	}
    }
    
    Token* PoliqarpClient::get_token(size_t pos)
    {
    	poliqarp_segment segment;
    	poliqarp_segment_info info;
    	poliqarp_interpretation_set set;
    	poliqarp_interpretation_set_info sinfo;
    	poliqarp_get_segment(&segment, &corpus_, pos);
    	poliqarp_get_segment_info(&segment, &info);
    	poliqarp_get_disambiguated_interpretations(&segment, &set);
    	poliqarp_get_interpretation_set_info(&set, &sinfo);
    
    	std::auto_ptr<Token> res(new Token());
    	if (info.space_before) {
    		res->set_wa(PwrNlp::Whitespace::Space);
    	}
    	res->set_orth_utf8(info.text);
    	for (size_t i = 0; i < sinfo.size; i++) {
    		poliqarp_interpretation interp;
    		poliqarp_interpretation_info iinfo;
    		poliqarp_get_interpretation(&set, &interp, i);
    		poliqarp_get_interpretation_info(&interp, &iinfo);
    		Tag tag = tagset_.parse_simple_tag(iinfo.tag);
    		Lexeme lex = Lexeme(UnicodeString::fromUTF8(iinfo.base), tag);
    		lex.set_disamb(interp.disamb);
    		res->add_lexeme(lex);
    	}
    	return res.release();
    }
    
    boost::shared_ptr<Chunk> PoliqarpClient::get_next_document()
    {
    	poliqarp_match match;
    	boost::shared_ptr<Chunk> chunk;
    	if (next_match(match)) {
    		chunk = boost::make_shared<Chunk>();
    		size_t document_id = match.document;
    		chunk->set_attribute("id", "ch" + boost::lexical_cast<std::string>(document_id));
    		chunk->append(get_token_range(match.start, match.end));
    		while (next_match(match)) {
    			if (match.document == document_id) {
    				chunk->append(get_token_range(match.start, match.end));
    			} else {
    				buffer_pos_--;
    				break;
    			}
    		}
    	}
    	return chunk;
    }
    
    Sentence::Ptr PoliqarpClient::get_token_range(size_t from, size_t to)
    {
    	Sentence::Ptr s = boost::make_shared<Sentence>();
    	for (size_t j = from; j < to; j++) {
    		s->append(get_token(j));
    	}
    	return s;
    }
    
    size_t PoliqarpClient::get_count_of_matches_so_far()
    {
    	return count_so_far_;
    }
    
    size_t PoliqarpClient::only_count_results()
    {
    	//countSoFar = 0;
    	if (query_compiled_) {
    		while (poliqarp_produce(&buffer_, 1000, &query_, &progress_, NULL, 0, 1000) &&
    				poliqarp_get_match_buffer_info(&buffer_, &info_)==0 &&
    				info_.used > 0) {
    			count_so_far_ += info_.used;
    			poliqarp_forget(&buffer_);
    		}
    	}
    	return count_so_far_;
    }
    
    size_t PoliqarpClient::get_corpus_size() const
    {
    	return corpus_size_;
    };
    
    size_t PoliqarpClient::get_corpus_pos() const
    {
    	if (query_compiled_) {
    		return query_.last_context.index;
    	} else {
    		return 0;
    	}
    };
    }