Skip to content
Snippets Groups Projects
value-attr.c 4.58 KiB
/*
 * This file is part of the Poliqarp suite.
 * 
 * Copyright (C) 2004-2009 by Instytut Podstaw Informatyki Polskiej
 * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish
 * Academy of Sciences; cf. www.ipipan.waw.pl).  All rights reserved.
 * 
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file gpl.txt included in the packaging
 * of this file.  (See http://www.gnu.org/licenses/translations.html for
 * unofficial translations.)
 * 
 * A commercial license is available from IPI PAN (contact
 * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more
 * information).  Licensees holding a valid commercial license from IPI
 * PAN may use this file in accordance with that license.
 * 
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING
 * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

#include <errno.h>

#include <sakura/value-attr.h>
#include <sakura/regexp.h>

struct poliqarp_value *poliqarp_value_create_attr(const struct poliqarp_corpus *corpus,
   const char *name, const struct poliqarp_regexp *pattern)
{
   struct poliqarp_value *this;

   struct poliqarp_attr *attr;

   const struct poliqarp_backend_tag *backend_tag = &corpus->tag;
   const struct poliqarp_backend_config *backend_config = &corpus->config;
   const struct poliqarp_attr_value *value;
   const struct entity *entity;

   size_t i;

   unsigned int *bitmap;

   /* find our attribute */
   entity = lookup_const_entity(&backend_config->named_items, name);
   if (entity == NULL || 
      *(enum poliqarp_entity_type *) entity->tag != POLIQARP_ENTITY_ATTR)
   {
      errno = ENOENT;
      return NULL;
   }
   attr = entity->data;

   /* create bitmap big enough to hold each part of speech */
   bitmap = malloc(BIT_ARRAY_LENGTH_BYTES(bitmap, attr->num_values));
   memset(bitmap, 0, BIT_ARRAY_LENGTH_BYTES(bitmap, attr->num_values));

   for (value = attr->first_value; value; value = value->next_value) {
      if (poliqarp_regexp_match(pattern, value->self->name))
         BIT_ARRAY_SET(bitmap, value->id);
   }

   /* now allocate value */
   this = malloc(sizeof *this);
   this->num_items = poliqarp_backend_tag_num_items(backend_tag);
   this->num_bytes = BIT_ARRAY_LENGTH_BYTES(this->bits, this->num_items);
   this->num_hits = 0;
   this->bits = malloc(this->num_bytes);
   this->domain = POLIQARP_DOMAIN_TAG;

   /* clear the bit field */
   memset(this->bits, 0, this->num_bytes);

   {
      size_t attr_id = attr->id;
      const struct poliqarp_attr_value *attr_value;
      /* iterate over all tags and find those that match */
      for (i = 0; i < this->num_items; ++i) {
         attr_value = poliqarp_backend_parsed_tag_fetch(backend_tag, i)->
            attr_value[attr_id];
         if (attr_value && BIT_ARRAY_GET(bitmap, attr_value->id)) {
            BIT_ARRAY_SET(this->bits, i);
            ++this->num_hits;
         }
      }
   }

   free(bitmap);
   return this;
}

struct poliqarp_value **poliqarp_value_create_all_attr(const struct poliqarp_corpus *corpus,
   const char *name, size_t *n)
{
   struct poliqarp_value **values, *this;

   const struct poliqarp_backend_tag *backend_tag = &corpus->tag;
   const struct poliqarp_backend_config *backend_config = &corpus->config;
   const struct poliqarp_attr_value *avalue1, *avalue2;
   const struct entity *entity;
   struct poliqarp_attr *attr;
   size_t n_tags, i;

   entity = lookup_const_entity(&backend_config->named_items, name);
   if (entity == NULL || 
      *(enum poliqarp_entity_type *) entity->tag != POLIQARP_ENTITY_ATTR)
   {
      errno = ENOENT;
      return NULL;
   }
   attr = entity->data;

   *n = attr->num_values;
   values = malloc(*n * sizeof(*this));

   n_tags = poliqarp_backend_tag_num_items(backend_tag);
   for (avalue1 = attr->first_value; avalue1; avalue1 = avalue1->next_value) {
      values[avalue1->id] = this = malloc(sizeof(*this));
      this->num_items = n_tags;
      this->num_bytes = BIT_ARRAY_LENGTH_BYTES(this->bits, n_tags);
      this->bits = malloc(this->num_bytes);
      this->domain = POLIQARP_DOMAIN_TAG;
      this->num_hits = 0;
      memset(this->bits, 0, this->num_bytes);
      for (i = 0; i < n_tags; i++) {
         const struct poliqarp_parsed_tag *parsed_tag =
            poliqarp_backend_parsed_tag_fetch(backend_tag, i);
         avalue2 = parsed_tag->attr_value[attr->id];
         if (avalue2 && (avalue1->id == avalue2->id)) {
            BIT_ARRAY_SET(this->bits, i);
            this->num_hits++;
         }
      }
   }
   return values;
}