/*
 * This file is part of the Poliqarp suite.
 * 
 * Copyright (C) 2004-2009 by Instytut Podstaw Informatyki Polskiej
 * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish
 * Academy of Sciences; cf. www.ipipan.waw.pl).  All rights reserved.
 * 
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file gpl.txt included in the packaging
 * of this file.  (See http://www.gnu.org/licenses/translations.html for
 * unofficial translations.)
 * 
 * A commercial license is available from IPI PAN (contact
 * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more
 * information).  Licensees holding a valid commercial license from IPI
 * PAN may use this file in accordance with that license.
 * 
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING
 * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

#include <errno.h>

#include <sakura/random.h>
#include <sakura/query.h>
#include <sakura/lexer.h>
#include <sakura/common/bitstream.h>

#define POLIQARP_UNINDEX_THRESHOLD 200

int yyparse(yyscan_t scanner, struct poliqarp_query *query);

struct poliqarp_within *poliqarp_within_create_subdocument(
   struct poliqarp_subdocument_set *set)
{
   struct poliqarp_within *result = malloc(sizeof(*result));
   result->type = WITHIN_SUBDOCUMENT;
   result->as.subdocument = set;
   return result;
}

struct poliqarp_within *poliqarp_within_create_phrase(
   struct poliqarp_expression *phrase)
{
   struct poliqarp_within *result = malloc(sizeof(*result));
   result->type = WITHIN_PHRASE;
   result->as.phrase = phrase;
   return result;
}

int poliqarp_create_query(struct poliqarp_query *this, 
   const char *query_text, struct poliqarp_corpus *corpus, int flags,
   const char *rewrite, struct poliqarp_random_state *random_state,
   struct poliqarp_error *error)
{
   yyscan_t scanner;
   size_t i;
   int rc;

   assert(this != NULL);
   assert(corpus != NULL);
   assert(query_text != NULL);

   this->aliases = &(poliqarp_get_const_backend(corpus, config)->aliases);
   bitset_arena_create_dummy(&this->area.arena);
   graph_create(&this->graph, (set_compare_fn)poliqarp_expression_compare,
      (set_free_fn)poliqarp_expression_destroy);
   this->corpus = corpus;
   this->error = error;
   this->random_state = random_state;

   yylex_init(&scanner);
   this->query_text = strdup(query_text);
   yy_scan_string(this->query_text, scanner);
   this->meta_expression = NULL;
   this->eflags = 0;
   this->flags = flags;
   memset(this->variable_ranges, 0, sizeof this->variable_ranges);
   for (i = 0; i < POLIQARP_MAX_VARIABLES; i++)
      this->variable_types[i] = NULL;
   this->rewrite = poliqarp_get_query_rewrite(
      &corpus->config.query_rewrite_table,
      rewrite, false);
   this->rewrite_in_progress = false;
   switch (yyparse(scanner, this)) {
      case 0:
         break;
      case 2:
         errno = ENOMEM;
         poliqarp_error_from_system(this->error, NULL);
         /* fall through */
      case 1:
         rc = -1;
         poliqarp_destroy_query(this);
         goto cleanup;
      default:
         abort(); /* should not happen */
   }
   this->have_last_context = false;
   this->max_segment = 0;
   this->progress = 0;
   this->progress_helper = 0;
   poliqarp_create_search_area(this);

   /* reset corpus backends */
   corpus->document.current = 0;
   if (this->within && this->within->type == WITHIN_SUBDOCUMENT)
      this->within->as.subdocument->current = 0;
   if (corpus->syntax.syntax)
      poliqarp_backend_syntax_reset(&corpus->syntax);
   rc = 0;
cleanup:
   yylex_destroy(scanner);
   return rc;
}

void poliqarp_query_set_meta_expression(struct poliqarp_query *this, 
   struct poliqarp_expression *exp)
{
   assert(this != NULL);

   if (this->meta_expression)
      poliqarp_expression_destroy(this->meta_expression);
   this->meta_expression = exp;
}

void poliqarp_query_set_within(struct poliqarp_query *this, 
   struct poliqarp_within *within)
{
   assert(this != NULL);

   this->within = within;
}

int poliqarp_destroy_query(struct poliqarp_query *this)
{
   assert(this != NULL);

   if (this->corpus) {
      free(this->query_text);
      graph_destroy(&this->graph);
      if (this->meta_expression)
         poliqarp_expression_destroy(this->meta_expression);
      this->corpus = NULL;
      bitset_arena_destroy(&this->area.arena);
   }
   return 0;
}

enum xmode { cleanup, look, found, phrase_found, not_found, add, boundary, finish };

struct produce_helper {
   size_t last_segment;
   const struct poliqarp_searchable_area *area;
   const struct dfs_node *root;
   void *session;
   int notify_step;
   progress_t *progress;
   size_t progress_helper;
   bool document_fixup;
};

static inline uint64_t update_progress(struct poliqarp_query *this,
   progress_t *progress, size_t pos, uint64_t helper)
{
   if (pos > this->max_segment) {
      uint32_t num_segments =
         poliqarp_backend_corpus_size(&this->corpus->corpus);
      helper += (uint64_t)100 * (pos - this->max_segment);
      this->max_segment = pos;
      while (helper >= num_segments) {
         helper -= num_segments;
         progress_advance(progress, 1);
      }
   }
   return helper;
}

struct return_stack {
   size_t size;
   size_t pointer;
   struct poliqarp_search_context *data;
};

static void return_stack_create(struct return_stack *stack)
{
   stack->size = 16;
   stack->pointer = 0;
   stack->data = malloc(stack->size * sizeof(stack->data[0]));
}

static void return_stack_destroy(struct return_stack *stack)
{
   free(stack->data);
}

static void return_stack_push(struct return_stack *stack, 
   const struct poliqarp_search_context *p)
{
   if (stack->pointer == stack->size) {
      stack->size *= 2;
      stack->data = realloc(stack->data, stack->size * sizeof(stack->data[0]));
   }
   stack->data[stack->pointer++] = *p;
}

static void return_stack_push_link(struct return_stack *stack,
   const struct poliqarp_search_context *p)
{
   struct poliqarp_search_context q = *p;
   q.link = q.link->next;
   return_stack_push(stack, &q);
}

static void return_stack_push_phrase(struct return_stack *stack,
   const struct poliqarp_search_context *p)
{
   struct poliqarp_search_context q = *p;
   q.phrase++;
   return_stack_push(stack, &q);
}

static struct poliqarp_search_context return_stack_pop(struct return_stack *stack)
{
   return stack->data[--stack->pointer];
}

static bool return_stack_empty(const struct return_stack *stack)
{
   return (stack->pointer == 0);
}

static void return_stack_wipe(struct return_stack *stack)
{
   stack->pointer = 0;
}

static inline struct poliqarp_binary_segment get_segment(
   struct poliqarp_backend_corpus *this, size_t index)
{
   if (index != file_reader_current(&this->corpus))
      poliqarp_backend_corpus_seek(this, index);
   return poliqarp_backend_corpus_next(this);
}

static bool phrase_expression_eval_single(const struct poliqarp_expression *expression,
   struct poliqarp_corpus *corpus, const struct poliqarp_syntax_group *phrase)
{
   bool res = true;
   struct poliqarp_binary_segment pos;
   if (expression->as.phrase.synh) {
      if (phrase->u.noncoord.synh != POLIQARP_SYNTAX_GROUP_UNKNOWN) {
         pos = get_segment(&corpus->corpus, phrase->u.noncoord.synh);
         res = poliqarp_expression_eval(expression->as.phrase.synh, corpus, &pos, NULL);
      } else {
         res = false;
      }
   }
   if (!res)
      return false;
   if (expression->as.phrase.same) 
      return phrase->u.noncoord.synh == phrase->u.noncoord.semh;
   if (expression->as.phrase.semh) {
      if (phrase->u.noncoord.semh != POLIQARP_SYNTAX_GROUP_UNKNOWN) {
         pos = get_segment(&corpus->corpus, phrase->u.noncoord.semh);
         return poliqarp_expression_eval(expression->as.phrase.semh, corpus, &pos, NULL);
      } else {
         return false;
      }
   } else
      return true;
}

static bool phrase_expression_eval(const struct poliqarp_expression *expression,
   struct poliqarp_corpus *corpus, size_t i)
{
   bool result = false;
   struct poliqarp_syntax_group *groups = corpus->syntax.groups;
   switch (expression->type) {
      case POLIQARP_EXPRESSION_CONSTANT:
         return expression->as.constant;
      case POLIQARP_EXPRESSION_VALUE:
         return BIT_ARRAY_GET(((struct poliqarp_value *)(expression->as.value.value))->bits,
            groups[i].type);
      case POLIQARP_EXPRESSION_AND:
         return expression->as.expression.negate ^ 
            (phrase_expression_eval(expression->as.expression.left, corpus, i) && 
             phrase_expression_eval(expression->as.expression.right, corpus, i));
      case POLIQARP_EXPRESSION_OR:
         return expression->as.expression.negate ^ 
            (phrase_expression_eval(expression->as.expression.left, corpus, i) ||
             phrase_expression_eval(expression->as.expression.right, corpus, i));         
      case POLIQARP_EXPRESSION_PHRASE:
         if (groups[i].type == POLIQARP_SYNTAX_GROUP_COORD) {
            int len = groups[i].u.coord.length;
            result = expression->as.phrase.negate ^ expression->as.phrase.all;
            while (--len) {
               if (++i == corpus->syntax.size)
                  i = 0;
               if (groups[i].type == POLIQARP_SYNTAX_GROUP_COORD || groups[i].type == POLIQARP_SYNTAX_GROUP_CONJUNCTION)
                  continue;
               if (expression->as.phrase.all) {
                  if (!phrase_expression_eval_single(expression, corpus, groups + i)) {
                     result = !result;
                     break;
                  }
               } else {
                  if (phrase_expression_eval_single(expression, corpus, groups + i)) {
                     result = !result;
                     break;
                  }
               }
            }
         } else {
            result = expression->as.phrase.negate ^ 
               phrase_expression_eval_single(expression, corpus, groups + i);
         }
         return result;
      case POLIQARP_EXPRESSION_VARIABLE:
         return phrase_expression_eval(expression->as.variable.children[0],
            corpus, i);
         /* FIXME */
      case POLIQARP_EXPRESSION_INVALID:
         abort(); /* Should not happen. */
   }
   return false; /* muffle potential warnings */
}

static size_t find_phrase_satisfying_expression(struct poliqarp_corpus *corpus,
   uint32_t offset, size_t phrase, const struct poliqarp_expression *expr)
{
   struct poliqarp_backend_syntax *this = &corpus->syntax;
   if (phrase == (size_t)-1 || offset / 1024 > this->groups[phrase].to / 1024) {
      this->start = this->end = phrase = 0;
      this->pos = en4(*((uint32_t *)tinydb_fetch_item(&this->offsets, offset / 1024)));
   }
   while (phrase == this->end || this->groups[phrase].to < offset ||
      !phrase_expression_eval(expr, corpus, phrase)) 
   {
      if (phrase == this->end) {
         if (poliqarp_backend_syntax_next(this) == -1)
            return -1;
         this->start = phrase;
      } else {
         if (++phrase == this->size)
            phrase = 0;
      }
   }
   return phrase;
}

static int check_boundaries(struct poliqarp_search_context *ctx,
   struct poliqarp_query *this)
{
   int check_index = (ctx->index % this->area.granularity == 0);
   for (;;) {
      if (check_index) {
         size_t i = ctx->index / this->area.granularity;
         while (i < this->area.num_bits && 
            !bitset_arena_get(&this->area.arena, this->area.area, i)) 
         {
            i++;
         }
         if (ctx->index < i * this->area.granularity)
            ctx->index = i * this->area.granularity;
         if (ctx->index >= poliqarp_backend_corpus_size(&this->corpus->corpus))
            return -1;
      }
      if (ctx->index >= ctx->document.corpus_high) {
         /* try to read next document */
         if (poliqarp_backend_document_next(&this->corpus->document, &ctx->document) == -1)
            return -1;
         if (ctx->index < ctx->document.corpus_low || ctx->index >= ctx->document.corpus_high) {
            poliqarp_backend_document_search(&this->corpus->document, ctx->index);
            if (poliqarp_backend_document_next(&this->corpus->document, &ctx->document) == -1)
               return -1;
         }
         while (this->meta_expression && 
            poliqarp_expression_eval(this->meta_expression, this->corpus, &ctx->document, NULL) == false)
         {
            if (poliqarp_backend_document_next(&this->corpus->document, &ctx->document) == -1)
               return -1;
         }
         if (ctx->index < ctx->document.corpus_low)
            ctx->index = ctx->document.corpus_low;
         check_index = 1;
         continue;
      }
      if (this->within && this->within->type == WITHIN_SUBDOCUMENT &&
         ctx->index >= ctx->subdocument.corpus_high) 
      {
         /* try to read next subdocument */
         if (poliqarp_subdocument_next(this->within->as.subdocument, &ctx->subdocument) == -1)
            return -1;
         if (ctx->index >= ctx->subdocument.corpus_high) {
            poliqarp_subdocument_search(this->within->as.subdocument, ctx->index);
            if (poliqarp_subdocument_next(this->within->as.subdocument, &ctx->subdocument) == -1)
               return -1;
         }
         if (ctx->index < ctx->subdocument.corpus_low)
            ctx->index = ctx->subdocument.corpus_low;
         check_index = 1;
         continue;
      }
      if (this->within && this->within->type == WITHIN_PHRASE) {
         /* This function is called in the circumstances where we can safely
            reset the phrase list.  We want to scroll to the next phrase that
            satisfies the current `within phrase' expression. */
         struct poliqarp_backend_syntax *syntax = &this->corpus->syntax;
         ctx->within_phrase = find_phrase_satisfying_expression(this->corpus, ctx->index,
            ctx->within_phrase, this->within->as.phrase);
         ctx->phrase = syntax->start;
         if (ctx->within_phrase == (size_t)-1)
            return -1;
         if (ctx->index < syntax->groups[ctx->within_phrase].from) {
            ctx->index = syntax->groups[ctx->within_phrase].from;
            check_index = 1;
            continue;
         }
      }
      return 0;
   }
}

static bool end_of_within(struct poliqarp_search_context *ctx,
   struct poliqarp_query *this)
{
   return 
      ((ctx->index == ctx->document.corpus_high) ||
       (this->within && this->within->type == WITHIN_SUBDOCUMENT &&
          ctx->index == ctx->subdocument.corpus_high) ||
       (this->within && this->within->type == WITHIN_PHRASE && 
          ctx->index == this->corpus->syntax.groups[ctx->within_phrase].to + 1));
}

static void poliqarp_produce_cleanup(void *data)
{
   struct return_stack *stack = data;
   return_stack_destroy(stack);
}

static bool poliqarp_reset_variable_bindings(struct poliqarp_query *this,
   size_t *variable_bindings)
{
   size_t i;
   for (i = 0; i < POLIQARP_MAX_VARIABLES; i++)
      variable_bindings[i] = 0;
   return true;
}

static bool poliqarp_next_variable_bindings(struct poliqarp_query *this,
   size_t *variable_bindings)
{
   size_t i;
   for (i = 0; i < POLIQARP_MAX_VARIABLES; i++)
   {
      if (variable_bindings[i] > 0)
      {
         variable_bindings[i]--;
         break;
      }
      else
      {
         size_t r = this->variable_ranges[i];
         variable_bindings[i] = (r > 0) ? (r - 1) : 0; 
      }
   }
   return i < POLIQARP_MAX_VARIABLES;
}

static inline void poliqarp_swap_query_result(
   struct poliqarp_match_buffer *this, size_t i)
{
   pthread_mutex_lock(&this->mutex);
   assert(i < this->used);
   struct poliqarp_match temp = this->match[i];
   this->match[i] = this->match[this->used - 1];
   this->match[this->used - 1] = temp;
   pthread_mutex_unlock(&this->mutex);
}

static inline void poliqarp_replace_query_result(
   struct poliqarp_match_buffer *this, size_t i,
   const struct poliqarp_match *match)
{
   pthread_mutex_lock(&this->mutex);
   assert(i < this->used);
   this->match[i] = *match;
   pthread_mutex_unlock(&this->mutex);
}

int poliqarp_produce(struct poliqarp_match_buffer *result, size_t max,
   struct poliqarp_query *this, progress_t *progress, void *session,
   size_t notify_step, size_t max_match_length)
{
   struct poliqarp_corpus *corpus = this->corpus;
   size_t corpus_size;
   enum xmode next_mode, mode;
   struct poliqarp_search_context ctx;
   struct return_stack stack;
   struct poliqarp_binary_segment pos;
   struct poliqarp_match match;
   uint64_t progress_helper = 0;
#ifndef POLIQARP_SINGLE_THREADED
   int cancel_state;
#endif
   size_t variable_bindings[POLIQARP_MAX_VARIABLES];

#ifndef POLIQARP_SINGLE_THREADED
   pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state); 
#endif
   corpus_size = poliqarp_backend_corpus_size(&corpus->corpus);
   pthread_mutex_lock(&progress->mutex);
   result->corpus = corpus;
   pthread_mutex_unlock(&progress->mutex);
   return_stack_create(&stack);
   if (this->have_last_context)
      ctx = this->last_context;
   else {
      progress_reset(progress);
      ctx.m_start = (size_t)-1;
      ctx.phrase = 0;
      ctx.document.corpus_high = ctx.subdocument.corpus_high = 0;
      ctx.within_phrase = (size_t)-1;
   }

   if (this->eflags & POLIQARP_QEFLAG_HAS_VARIABLES)
   {
      poliqarp_reset_variable_bindings(this, variable_bindings);
      ctx.index = -1;
   }

#ifndef POLIQARP_SINGLE_THREADED
   pthread_cleanup_push(poliqarp_produce_cleanup, &stack);
#endif
   next_mode = cleanup; /* just to shut up compilers */
   for (mode = cleanup; ; mode = next_mode) {
      switch (mode) {
         case cleanup:
            if (return_stack_empty(&stack)) {
               ctx.node = this->graph.dfs.root;
               ctx.link = NULL;
               if (this->eflags & POLIQARP_QEFLAG_HAS_VARIABLES)
               {
                  if (ctx.m_start != (size_t)-1)
                     ctx.index = ctx.m_start;
                  if (!poliqarp_next_variable_bindings(this, variable_bindings))
                     ctx.index++;
               }
               else
                  ctx.index = ctx.m_start + 1;
               ctx.m_start = ctx.m_end = ctx.m_focus = ctx.c_focus = (size_t)-1;
            } else {
               ctx = return_stack_pop(&stack);
               next_mode = look; /* skip checking boundaries if backtracking */
               break;
            }
            /* fall through */

         case boundary:
            if (check_boundaries(&ctx, this) == -1 || ctx.index >= corpus_size) {
               next_mode = finish;
               break;
            }
            /* fall through */

         case look:
#ifndef POLIQARP_SINGLE_THREADED
            pthread_setcancelstate(cancel_state, NULL);
            pthread_testcancel(); /* cancel opportunity */
            pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
#endif
            next_mode = not_found;
            if (ctx.link == NULL)
               ctx.link = ctx.node->first_link;
            pos = get_segment(&corpus->corpus, ctx.index);
            while (ctx.link) {
               if (ctx.m_start != (size_t)-1 && ctx.index - ctx.m_start >= max_match_length)
               {
                  /* skip */
               } else if (ctx.link->symbol != SYMBOL_DOT && poliqarp_expression_type(
                     (const struct poliqarp_expression *)ctx.link->symbol) == POLIQARP_EXPRESSION_PHRASE)
               {
                  if (!corpus->syntax.syntax)
                     break;
                  for (;;) {
                     if (ctx.phrase == corpus->syntax.end) {
                        if (poliqarp_backend_syntax_next(&corpus->syntax) == -1)
                           goto look_breakout;
                     }
                     if (corpus->syntax.groups[ctx.phrase].from < ctx.index) {
                        ctx.phrase++;
                        continue;
                     } else if (corpus->syntax.groups[ctx.phrase].from == ctx.index) {
                        if (phrase_expression_eval((const struct poliqarp_expression *)ctx.link->symbol,
                               corpus, ctx.phrase))
                        {
                           next_mode = phrase_found;
                           goto look_breakout;
                        } else
                           ctx.phrase++;
                     } else 
                        break; 
                  }
               } else if (ctx.link->symbol == SYMBOL_DOT || 
                  (ctx.index < corpus_size && 
                   poliqarp_expression_eval(ctx.link->symbol, corpus, &pos, variable_bindings)))
               {
                  if (ctx.link->next)
                     return_stack_push_link(&stack, &ctx);
                  next_mode = found;
                  if (ctx.link->flags)
                     ctx.c_focus = ctx.index;
                  break;
               }
               ctx.link = ctx.link->next;
            }
look_breakout:
            if (ctx.node->flags.is_final) {
               ctx.m_focus = ctx.c_focus; /* promote focus candidate */
               ctx.m_end = ctx.index;
            }
            break;

         case found:
            ctx.node = ctx.link->to; /* follow link */
            ctx.link = NULL; 
            if (ctx.m_start == (size_t)-1) /* remember first item */
               ctx.m_start = ctx.index;
            ctx.index++;
            if (end_of_within(&ctx, this)) {
               if (ctx.node->flags.is_final)
                  ctx.m_end = ctx.index;
               next_mode = ctx.m_end != (size_t)-1 ? add : cleanup;
            } else
               next_mode = look;
            progress_helper = update_progress(this, progress, ctx.index,
               progress_helper);
            break;

         case phrase_found:
            /* FIXME: check whether this is not the last phrase */
            return_stack_push_phrase(&stack, &ctx);
            ctx.node = ctx.link->to; /* follow link */
            ctx.link = NULL;
            if (ctx.m_start == (size_t)-1) /* remember first item */
               ctx.m_start = ctx.index;
            ctx.index = corpus->syntax.groups[ctx.phrase].to + 1;
            if (end_of_within(&ctx, this)) {
               if (ctx.node->flags.is_final)
                  ctx.m_end = ctx.index;
               next_mode = ctx.m_end != (size_t)-1 ? add : cleanup;
            } else
               next_mode = look;
            progress_helper = update_progress(this, progress, ctx.index,
               progress_helper);
            break;
            
         case not_found:
            if (ctx.m_start != (size_t)-1) {
               if (ctx.m_end == (size_t)-1 || ctx.m_start == ctx.m_end)
                  next_mode = cleanup;
               else
                  next_mode = add;
               /* note: we don't advance here */
            } else if (this->eflags & POLIQARP_QEFLAG_HAS_VARIABLES) {
               next_mode = cleanup;
            } else {
               ctx.index++;
               if ((ctx.index & 0xffff) == 0) {
                  /* From time to time, update the progress even if there are
                   * no matches. */
                  progress_helper = update_progress(this, progress, ctx.index,
                     progress_helper);
               }
               next_mode = boundary;
            }
            break;

         case add:
            match.start = ctx.m_start;
            assert(ctx.m_end != (size_t)-1);
            match.end = ctx.m_end;
            match.focus = ctx.m_focus == (size_t)-1 ? ctx.m_start : ctx.m_focus;
            match.document = corpus->document.current - 1;
            pthread_mutex_lock(&result->mutex);
            result->num_results++;
            pthread_mutex_unlock(&result->mutex);
            if (result->used < result->capacity) {
               poliqarp_append_query_result(result, &match);
               if (this->random_state) {
                  size_t i = poliqarp_random(this->random_state) % result->num_results;
                  poliqarp_swap_query_result(result, i);
               } else {
                  if (notify_step > 0 && result->num_results % notify_step == 0)
                     async_notify_new_results(session);
                  if (result->num_results == max)
                     goto out;
               }
            } else {
               if (this->random_state) {
                  size_t i = poliqarp_random(this->random_state) % result->num_results;
                  if (i < result->used)
                     poliqarp_replace_query_result(result, i, &match);
               } else {
                  goto out;
               }
            }
               
            return_stack_wipe(&stack);
            /* Comment out the following line for (slower) support of subresults,
               i.e. results that are part of other results. */
            ctx.m_start = ctx.m_end - 1;
            next_mode = cleanup;
            break;
            
         case finish:
            goto out;
      }
   }

out:
   if (result->used > max)
      result->used = max;
   this->have_last_context = true;
   this->last_context = ctx;
#ifndef POLIQARP_SINGLE_THREADED
   pthread_cleanup_pop(1);
   pthread_setcancelstate(cancel_state, NULL);
#else
   poliqarp_produce_cleanup(&stack);
#endif
   return 0;
}

int poliqarp_create_match_buffer(struct poliqarp_match_buffer *this, 
   size_t capacity)
{
   assert(this != NULL);

   this->match = malloc(sizeof *this->match * capacity);
   this->used = 0;
   this->num_results = 0;
   this->capacity = capacity;
   this->sort_arena = NULL;
   pthread_mutex_init(&this->mutex, NULL);
   return 0;
}

int poliqarp_destroy_match_buffer(struct poliqarp_match_buffer *this)
{
   assert(this != NULL);
   free(this->match);
   if (this->sort_arena)
      marena_destroy(this->sort_arena);
   pthread_mutex_destroy(&this->mutex);
   return 0;
}

int poliqarp_get_match_buffer_info(struct poliqarp_match_buffer *buffer,
   struct poliqarp_match_buffer_info *info)
{
   pthread_mutex_lock(&buffer->mutex);
   info->capacity = buffer->capacity;
   info->used = buffer->used;
   info->num_results = buffer->num_results;
   pthread_mutex_unlock(&buffer->mutex);
   return 0;
}

int poliqarp_forget(struct poliqarp_match_buffer *buffer)
{
   buffer->used = 0;
   buffer->num_results = 0;
   return 0;
}

int poliqarp_resize_match_buffer(struct poliqarp_match_buffer *buffer,
   size_t size)
{
   if (size < buffer->used) {
      memmove(buffer->match, buffer->match + buffer->used - size, 
         size * sizeof *buffer->match);
      buffer->used = size;
   }
   buffer->match = realloc(buffer->match, size * sizeof *buffer->match);
   buffer->capacity = size;
   return 0;
}

struct sort_helper_descriptor {
   uint32_t *segments;
   size_t length;
};

static inline int sort_cmp(struct sort_helper_descriptor *hdesc, uint32_t x,
   uint32_t y, const struct poliqarp_sort_info *criteria, 
   struct poliqarp_corpus *corpus)
{
   int res = 0;
   size_t xl = 0, yl = 0;
   if (criteria->atergo) {
      if (hdesc[x].length && hdesc[y].length) { 
         xl = hdesc[x].length - 1;
         yl = hdesc[y].length - 1;
         while (xl && yl) {
            uint32_t xo = hdesc[x].segments[xl], yo = hdesc[y].segments[yl];
            size_t i1, i2;
            i1 = poliqarp_backend_orth_atergo_fetch(poliqarp_get_const_backend(
               corpus, orth), xo);
            i2 = poliqarp_backend_orth_atergo_fetch(poliqarp_get_const_backend(
               corpus, orth), yo); 
            res = i1 == i2 ? 0 : (i1 < i2 ? -1 : +1);
            if (res)
               break;
            xl--; yl--;
         }
      } else {
         xl = hdesc[x].length;
         yl = hdesc[y].length;
      }
   } else {
      while (xl < hdesc[x].length && yl < hdesc[y].length) {
         uint32_t xo = hdesc[x].segments[xl], yo = hdesc[y].segments[yl];
         size_t i1, i2;
         i1 = poliqarp_backend_orth_afronte_fetch(poliqarp_get_const_backend(
            corpus, orth), xo);
         i2 = poliqarp_backend_orth_afronte_fetch(poliqarp_get_const_backend(
            corpus, orth), yo); 
         res = i1 == i2 ? 0 : (i1 < i2 ? -1 : +1);
         if (res)
            break;
         xl++; yl++;
      }
      xl = hdesc[x].length - xl;
      yl = hdesc[y].length - yl;
   }
   res = res ? res : xl ? 1 : yl ? -1 : 0;
   if (!criteria->ascending)
      res = -res;
   return (int) res;
} 

static void sort_helper(struct sort_helper_descriptor *hdesc, uint32_t *arr, 
   size_t start, size_t end, uint32_t *scratch, 
   const struct poliqarp_sort_info *criteria, struct poliqarp_corpus *corpus, 
   progress_t *progress)
{
   size_t i = 0, length = end - start, mid = length / 2, l = start, r = l + mid;
   if (start + 1 >= end)
      return;
   sort_helper(hdesc, arr, start, start + mid, scratch, criteria, corpus, 
      progress);
   sort_helper(hdesc, arr, start + mid, end, scratch, criteria, corpus, 
      progress);
   for (i = 0; i < length; i++) {
      if (l < start + mid && 
         (r == end || sort_cmp(hdesc, arr[l], arr[r], criteria, corpus) <= 0))
      {
         scratch[i] = arr[l++];
      } else {
         scratch[i] = arr[r++];
      }  
   }
   memcpy(arr + start, scratch, length * sizeof(*arr));
#ifndef POLIQARP_SINGLE_THREADED
   pthread_testcancel();
#endif
}

struct read_serializer {
   size_t left;
   size_t right; 
   size_t pos;
};

static int compare_serializers(const void *x, const void *y)
{
   const struct read_serializer *xx = (const struct read_serializer *)x,
                                *yy = (const struct read_serializer *)y;
   return (long)xx->left - (long)yy->left;
}

static struct sort_helper_descriptor *sort_prepare_helper(
   struct poliqarp_match_buffer *buffer, 
   const struct poliqarp_sort_info *criteria)
{
   struct sort_helper_descriptor *result;
   size_t i;
   struct read_serializer *ser;
   
   if (buffer->sort_arena == NULL) {
      buffer->sort_arena = malloc(sizeof(*(buffer->sort_arena)));
      if (buffer->sort_arena == NULL)
         return NULL;
      marena_create(buffer->sort_arena);
   }
   result = malloc(buffer->used * sizeof(*result));
   if (result == NULL)
      return NULL;
   ser = malloc(buffer->used * sizeof(*ser));
   if (ser == NULL) {
      free(result);
      return NULL;
   }
   for (i = 0; i < buffer->used; i++) {
      size_t left = 0, right = 0;
      size_t segnum;
      switch (criteria->column) {
         case POLIQARP_COLUMN_LEFT_CONTEXT:
            left = buffer->match[i].start < criteria->context ? 0 :
               buffer->match[i].start - criteria->context;
            right = buffer->match[i].start;
            break;
         case POLIQARP_COLUMN_LEFT_MATCH:
            left = buffer->match[i].start;
            right = buffer->match[i].focus;
            break;
         case POLIQARP_COLUMN_MATCH:
            left = buffer->match[i].start;
            right = buffer->match[i].end;
            break;
         case POLIQARP_COLUMN_RIGHT_MATCH:
            left = buffer->match[i].focus;
            right = buffer->match[i].end;
            break;
         case POLIQARP_COLUMN_RIGHT_CONTEXT:
            left = buffer->match[i].end;
            right = buffer->match[i].end + criteria->context;
            segnum = poliqarp_backend_corpus_size(&buffer->corpus->corpus);
            if (right > segnum)
               right = segnum;
            break;
         default:
            abort(); /* Should not happen. */
      }
      ser[i].left = left;
      ser[i].right = right;
      ser[i].pos = i;
   }
   qsort(ser, buffer->used, sizeof(*ser), compare_serializers);
   for (i = 0; i < buffer->used; i++) 
   {
      size_t k = ser[i].pos, left = ser[i].left, right = ser[i].right;
      result[k].length = right - left;
      if (result[k].length > 0) {
         size_t j;
         result[k].segments = marena_alloc(buffer->sort_arena, 
            sizeof(uint32_t) * result[k].length);
         if (result[k].segments == NULL) {
            free(result);
            return NULL;
         }
         for (j = 0; j < result[k].length; j++) {
            struct poliqarp_binary_segment s = poliqarp_backend_corpus_get(
               &buffer->corpus->corpus, left + j);
            result[k].segments[j] = s.orth_space_id >> 1;
         }
      } else {
         result[k].segments = NULL;
      }
   }
   free(ser);
   return result;
}

struct poliqarp_sort_sandbox {
   uint32_t *scratch;
   uint32_t *array;
   struct sort_helper_descriptor *helper_descriptor;
   struct poliqarp_match_buffer *buffer;
};

static void poliqarp_sort_match_buffer_cleanup(void *data)
{
   struct poliqarp_sort_sandbox *cleanup_data = data;
   free(cleanup_data->scratch);
   free(cleanup_data->array);
   free(cleanup_data->helper_descriptor);
   if (cleanup_data->buffer->sort_arena != NULL)
      marena_release(cleanup_data->buffer->sort_arena);
}

/* 
 * Main sorting routine -- implements mergesort.
 *
 * Note: DO NOT use qsort(), because:
 * (a) this might not be portable; on some systems we might have to provide
 *     a replacement anyway
 * (b) it imposes a performance impact because of calls to comparator
 * (c) it is not stable by default -- it CAN be made stable, but that means
 *     performance costs
 * (d) it does not support progress reporting
 */
int poliqarp_sort_match_buffer(struct poliqarp_match_buffer *buffer,
   const struct poliqarp_sort_info *criteria, progress_t *progress)
{   
   struct poliqarp_sort_sandbox sandbox = { NULL, NULL, NULL, NULL };
   uint32_t i;
   int res = 0;
#ifndef POLIQARP_SINGLE_THREADED
   int cancel_state;
#endif

   if (buffer->used < 2)
      return 0;
   
#ifndef POLIQARP_SINGLE_THREADED
   pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &cancel_state); 
   pthread_cleanup_push(poliqarp_sort_match_buffer_cleanup, &sandbox);
#endif
   sandbox.buffer = buffer;
   if ((sandbox.helper_descriptor = sort_prepare_helper(buffer, criteria)) == NULL) 
      goto err;
   sandbox.scratch = malloc(buffer->used * sizeof(*sandbox.scratch));
   if (sandbox.scratch == NULL)
   {
      res = -1;
      goto err;
   }
   sandbox.array = malloc(buffer->used * sizeof(*sandbox.array));
   if (sandbox.array == NULL)
   {
      res = -1;
      goto err;
   }
   for (i = 0; i < buffer->used; i++)
      sandbox.array[i] = i;
#ifndef POLIQARP_SINGLE_THREADED
   pthread_setcancelstate(cancel_state, NULL);
#endif   
   sort_helper(sandbox.helper_descriptor, sandbox.array, 0, buffer->used,
      sandbox.scratch, criteria, buffer->corpus, progress);
#ifndef POLIQARP_SINGLE_THREADED
   pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
#endif

   /* rearrange buffer */
   for (i = 0; i < buffer->used; i++)
      sandbox.scratch[sandbox.array[i]] = i;
   for (i = 0; i < buffer->used; i++) {
      struct poliqarp_match m = buffer->match[i];
      buffer->match[i] = buffer->match[sandbox.array[i]];
      buffer->match[sandbox.array[i]] = m;
      sandbox.array[sandbox.scratch[i]] = sandbox.array[i];
      sandbox.scratch[sandbox.array[i]] = sandbox.scratch[i];
   };
err:
#ifndef POLIQARP_SINGLE_THREADED
   ; /* just to shut up compilers */
   pthread_cleanup_pop(1);
   pthread_setcancelstate(cancel_state, NULL);
#else
   poliqarp_sort_match_buffer_cleanup(&sandbox);
#endif   
   return res;
}

int poliqarp_get_match(const struct poliqarp_match_buffer *buffer,
   struct poliqarp_match *match, size_t index)
{
#ifndef NDEBUG
   if (index >= buffer->used)
      return -1;
#endif
   *match = buffer->match[index];
   return 0;
}

int poliqarp_get_match_for_document(const struct poliqarp_corpus *corpus,
   size_t id, struct poliqarp_match *match)
{
   struct poliqarp_document document;
   if (poliqarp_backend_document_fetch(&corpus->document, id, &document) == -1)
      return -1;
   match->start = match->focus = document.corpus_low;
   match->end = document.corpus_high;
   match->document = id;
   return 0;
}

void poliqarp_append_query_result(struct poliqarp_match_buffer *this,
   const struct poliqarp_match *match)
{
   pthread_mutex_lock(&this->mutex);
   assert(this->used < this->capacity);
   this->match[this->used++] = *match;
   pthread_mutex_unlock(&this->mutex);
}


static void poliqarp_unindex_item(bitset bs, struct poliqarp_searchable_area *area,
   size_t distance, size_t what, struct poliqarp_rindex *index)
{
   uint32_t freq, i, b, pos = 0;
   size_t dmin = distance / area->granularity, 
          dmax = (distance + area->granularity - 1) / area->granularity;
   poliqarp_rindex_set(index, what);
   freq = decode_gamma(&index->ibs);
   b = get_golomb_parameter(area->num_bits, freq);
   for (i = 0; i < freq; i++) {
      uint32_t x = decode_golomb(&index->ibs, b);
      if (i == 0)
         pos = x - 1;
      else
         pos += x;
      if (pos >= dmin)
         bitset_arena_set(&area->arena, bs, pos - dmin);
      if (pos >= dmax)
         bitset_arena_set(&area->arena, bs, pos - dmax);
   }
}

static bitset poliqarp_unindex_value(struct poliqarp_value *value, 
   struct poliqarp_searchable_area *area, size_t distance, int indices, 
   const struct poliqarp_backend_index *index)
{
   struct poliqarp_rindex *my_index;
   size_t i, pos;
   bitset res;

   if (value->num_hits > POLIQARP_UNINDEX_THRESHOLD)
      return NULL;
   switch (value->domain) {
      case POLIQARP_DOMAIN_ORTH:
         my_index = index->orth_index;
         break;
      case POLIQARP_DOMAIN_SPACE:
         my_index = NULL;
         break;
      case POLIQARP_DOMAIN_INTERP__DISAMB:
         my_index = index->disamb_index;
         break;
      case POLIQARP_DOMAIN_INTERP__AMB:
         my_index = index->amb_index;
         break;
      default:
         abort(); /* Should not happen. */
   }
   if (my_index == NULL)
      return NULL;
   res = bitset_arena_alloc(&area->arena);
   i = value->num_hits;
   pos = 0;
   while (i) {
      if (BIT_ARRAY_GET(value->bits, pos)) {
         poliqarp_unindex_item(res, area, distance, pos, my_index);
         i--;
      }
      pos++;
   }
   return res;
}

static bitset poliqarp_unindex_expression(struct poliqarp_expression *expr, 
   struct poliqarp_searchable_area *area, size_t distance, int indices, 
   const struct poliqarp_backend_index *index)
{
   bitset bs_left, bs_right;
   if (expr == SYMBOL_DOT)
      return NULL;
   switch (expr->type) {
      case POLIQARP_EXPRESSION_CONSTANT:
         return NULL;
      case POLIQARP_EXPRESSION_AND:
         if (expr->as.expression.negate)
            return NULL;
         bs_left = poliqarp_unindex_expression(expr->as.expression.left, area, 
            distance, indices, index);
         bs_right = poliqarp_unindex_expression(expr->as.expression.right, area,
            distance, indices, index);
         if (bs_left == NULL)
            return bs_right;
         if (bs_right == NULL)
            return bs_left;
         bitset_arena_intersect(&area->arena, bs_left, bs_right);
         return bs_left;
      case POLIQARP_EXPRESSION_OR:
         if (expr->as.expression.negate)
            return NULL;
         bs_left = poliqarp_unindex_expression(expr->as.expression.left, area, 
            distance, indices, index);
         bs_right = poliqarp_unindex_expression(expr->as.expression.right, area,
            distance, indices, index);
         if (bs_left == NULL || bs_right == NULL)
            return NULL;
         bitset_arena_union(&area->arena, bs_left, bs_right);
         return bs_left;
      case POLIQARP_EXPRESSION_VALUE:
         if (expr->as.value.negate)
            return NULL;
         return poliqarp_unindex_value(expr->as.value.value, area, distance,
            indices, index);
      default:
         return NULL;
   };
}

void poliqarp_recursive_create_area(struct poliqarp_searchable_area *area, 
   bitset work, struct dfs_node *node, int indices, 
   const struct poliqarp_backend_index *index)
{
   bitset bs;
   struct dfs_link *link;

   if (node->distance == (size_t)-1 || node->flags.is_final) {
      bitset_arena_union(&area->arena, area->area, work);
      return;
   }
   bs = bitset_arena_copy(&area->arena, work);
   for (link = node->first_link; link != NULL; link = link->next) {
      bitset link_bs;

      bitset_arena_copy_to(&area->arena, bs, work);
      link_bs = poliqarp_unindex_expression(link->symbol, area, node->distance,
         indices, index);
      if (link_bs)
         bitset_arena_intersect(&area->arena, work, link_bs);
      poliqarp_recursive_create_area(area, work, link->to, indices, index);
   }
}

struct dfs_queue_item {
   void* symbol;
   const struct dfs_node *node;
   size_t distance;
};

struct dfs_queue {
   struct dfs_queue_item *items;
   size_t start;
   size_t length;
   size_t max_length;
   size_t max_distance;
   bool overflown;
};

static void dfs_queue_create(struct dfs_queue *queue, size_t max_length, size_t max_distance)
{
   queue->items = calloc(max_length, sizeof(struct dfs_queue_item));
   if (queue->items == NULL)
      abort();
   queue->start = 0;
   queue->length = 0;
   queue->max_length = max_length;
   queue->max_distance = max_distance;
   queue->overflown = false;
}

static void dfs_queue_destroy(struct dfs_queue *queue)
{
   free(queue->items);
   queue->items = NULL;
}

static void dfs_queue_push(struct dfs_queue *queue, void *symbol,
   const struct dfs_node *node, size_t distance)
{
   size_t i;
   if (queue->overflown)
      return;
   if (node->flags.is_final && distance < queue->max_distance)
      queue->max_distance = distance;
   if (distance > queue->max_distance)
   {
      queue->overflown = true;
      return;
   }
   if (queue->length >= queue->max_length)
   {
      queue->overflown = true;
      while (queue->length > 0)
      {
         i = (queue->start + queue->length - 1) % queue->max_length;
         if (queue->items[i].distance >= distance)
            queue->length--;
         else
            break;
      }
      return;
   }
   i = (queue->start + queue->length) % queue->max_length;
   queue->items[i].symbol = symbol;
   queue->items[i].node = node;
   queue->items[i].distance = distance;
   queue->length++;
}

static const struct dfs_queue_item* dfs_queue_pop(struct dfs_queue *queue)
{
   size_t i;
   if (queue->length == 0)
      return NULL;
   i = queue->start;
   queue->start = (queue->start + 1) % queue->max_length;
   queue->length--;
   return queue->items + i;
}

static void poliqarp_bfs_create_area(struct poliqarp_searchable_area *area, 
   struct dfs_node *start_node, size_t num_nodes,
   int indices, const struct poliqarp_backend_index *index)
{
   bitset work_bs;
   size_t current_distance = 0;
   const struct dfs_queue_item *queue_item;
   const struct dfs_link *link;
   struct dfs_queue queue;
   dfs_queue_create(&queue, 3 * num_nodes, num_nodes);
   work_bs = bitset_arena_alloc(&area->arena);
   for (link = start_node->first_link; link != NULL; link = link->next)
      dfs_queue_push(&queue, link->symbol, link->to, 0);
   while (1)
   {
      queue_item = dfs_queue_pop(&queue);
      if (queue_item == NULL)
         break;
      if (queue_item->distance > current_distance)
      {
         if (work_bs != NULL)
            bitset_arena_intersect(&area->arena, area->area, work_bs);
         work_bs = poliqarp_unindex_expression(queue_item->symbol, area,
            queue_item->distance, indices, index);
         current_distance++;
      }
      else if (work_bs)
      {
         bitset tmp_bs = poliqarp_unindex_expression(queue_item->symbol, area,
            queue_item->distance, indices, index);
         if (tmp_bs)
            bitset_arena_union(&area->arena, work_bs, tmp_bs);
         else
            work_bs = NULL;
      }
      for (link = queue_item->node->first_link; link != NULL; link = link->next)
         dfs_queue_push(&queue, link->symbol, link->to, current_distance + 1);
   }
   if (work_bs != NULL)
      bitset_arena_intersect(&area->arena, area->area, work_bs);
   dfs_queue_destroy(&queue);
}

void poliqarp_create_search_area(struct poliqarp_query *this)
{
   struct poliqarp_searchable_area *area = &this->area;
   size_t num_segments = poliqarp_backend_corpus_size(&this->corpus->corpus);

   if (this->corpus->config.cdf.indices == 0) {
      area->num_bits = 1;
      area->granularity = num_segments;
   } else {
      area->granularity = this->corpus->config.cdf.granularity;
      area->num_bits = num_segments / area->granularity;
      if (area->num_bits * area->granularity < num_segments)
         area->num_bits++;
   }

   bitset_arena_create(&area->arena, area->num_bits, NULL);
   area->area = bitset_arena_alloc(&area->arena);
   if (this->corpus->config.cdf.indices == 0) {
      bitset_arena_set(&area->arena, area->area, 0);
   } else {
      /* Try depth-first search to restrict area: */
      bitset work = bitset_arena_alloc_ones(&area->arena);
      poliqarp_recursive_create_area(area, work, this->graph.dfs.root,
         this->corpus->config.cdf.indices, poliqarp_get_const_backend(this->corpus,
            index));
      /* Try breadth-first search to restrict area: */
      poliqarp_bfs_create_area(area,
         this->graph.dfs.root, this->graph.dfs.num_nodes,
         this->corpus->config.cdf.indices,
         poliqarp_get_const_backend(this->corpus, index));
   }
   area->num_segments = area->granularity * bitset_count_ones(&area->arena, 
      area->area);
   if (bitset_arena_get(&area->arena, area->area, area->num_bits - 1)) 
      area->num_segments = area->num_segments - area->granularity + 
         (num_segments - 1) % area->granularity + 1;
}