/*
* This file is part of the Poliqarp suite.
*
* Copyright (C) 2004-2009 by Instytut Podstaw Informatyki Polskiej
* Akademii Nauk (IPI PAN; Institute of Computer Science, Polish
* Academy of Sciences; cf. www.ipipan.waw.pl). All rights reserved.
*
* This file may be distributed and/or modified under the terms of the
* GNU General Public License version 2 as published by the Free Software
* Foundation and appearing in the file gpl.txt included in the packaging
* of this file. (See http://www.gnu.org/licenses/translations.html for
* unofficial translations.)
*
* A commercial license is available from IPI PAN (contact
* Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more
* information). Licensees holding a valid commercial license from IPI
* PAN may use this file in accordance with that license.
*
* This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING
* THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE.
*/
#include <foostring/foostring.h>
#include <sakura/backend-config.h>
#include <sakura/backend-index.h>
#include <sakura/abi.h>
#include <sakura/cdf.h>
#include <sakura/query-rewrite.h>
#include <sakura/exception.h>
#include <sakura/config.h>
enum poliqarp_section {
SECTION_NONE = -1,
SECTION_STARTUP,
SECTION_ALIASES,
SECTION_ATTR,
SECTION_POS,
SECTION_NAMED_ENTITY,
SECTION_QUERY_REWRITE_RULES,
};
/* attribute construction */
struct poliqarp_attr *poliqarp_new_attr(struct poliqarp_backend_config *this,
const char *name)
{
static enum poliqarp_entity_type tag = POLIQARP_ENTITY_ATTR;
struct entity *self;
struct poliqarp_attr *result;
/* create entity */
self = new_entity(&this->named_items, name, &tag, NULL);
if (self == NULL)
return NULL;
/* create attr */
result = marena_alloc(&this->arena, sizeof *result);
if (result == NULL)
return NULL;
result->self = self;
result->num_values = 0;
result->first_value = NULL;
result->id = this->num_attr++;
/* update pointer in entity */
self->data = result;
return result;
}
struct poliqarp_attr_value *poliqarp_new_attr_value(
struct poliqarp_backend_config *this, struct poliqarp_attr *attr, const char *name)
{
static enum poliqarp_entity_type tag = POLIQARP_ENTITY_ATTR_VALUE;
struct entity *self;
struct poliqarp_attr_value *result;
/* create entity NOTE: it uses different entity bag */
self = new_entity(&this->attr_values, name, &tag, NULL);
if (self == NULL)
return NULL;
/* create attr value */
result = marena_alloc(&this->arena, sizeof *result);
if (result == NULL)
return NULL;
result->self = self;
result->next_value = attr->first_value;
result->attr = attr;
result->id = attr->num_values++;
/* update pointer in entity */
self->data = result;
/* update attr's value list */
attr->first_value = result;
return result;
}
/* part of speech construction */
struct poliqarp_part_of_speech *poliqarp_new_part_of_speech(
struct poliqarp_backend_config *this, const char *name)
{
static enum poliqarp_entity_type tag = POLIQARP_ENTITY_POS;
struct entity *self;
struct poliqarp_part_of_speech *result;
/* create entity */
self = new_entity(&this->named_items, name, &tag, NULL);
if (self == NULL)
return NULL;
/* create part of speech */
result = marena_alloc(&this->arena, sizeof *result);
if (result == NULL)
return NULL;
result->self = self;
result->num_instances = 0;
result->first_instance = NULL;
result->id = this->num_pos++;
/* update pointer in entity */
self->data = result;
return result;
}
/* attribute instance construction */
struct poliqarp_attr_instance *poliqarp_new_attr_instance(
struct poliqarp_backend_config *this, struct poliqarp_part_of_speech *pos,
struct poliqarp_attr *attr, bool is_optional)
{
struct poliqarp_attr_instance *result = marena_alloc(&this->arena,
sizeof *result);
if (result == NULL)
return NULL;
/* create instance */
result->next_instance = pos->first_instance;
result->attr = attr;
result->pos = pos;
result->is_optional = is_optional;
/* update attr instance list in part of speech */
pos->first_instance = result;
return result;
}
/** Configuration parser handler. */
static int poliqarp_backend_config_handler(void *exta, int section,
char *text, struct poliqarp_error *error);
int poliqarp_backend_config_open(struct poliqarp_backend_config *this,
const char *base_name, struct poliqarp_error *error)
{
int rc;
bool have_marena = false, have_named_items = false,
have_attr_values = false, have_aliases = false,
have_query_rewrite_table = false;
static enum poliqarp_entity_type tag_pos = POLIQARP_ENTITY_ITEM_POS;
static enum poliqarp_entity_type tag_orth = POLIQARP_ENTITY_ITEM_ORTH;
static enum poliqarp_entity_type tag_base = POLIQARP_ENTITY_ITEM_BASE;
static enum poliqarp_entity_type tag_tag = POLIQARP_ENTITY_ITEM_TAG;
static enum poliqarp_entity_type tag_space = POLIQARP_ENTITY_ITEM_SPACE;
static enum poliqarp_entity_type tag_type = POLIQARP_ENTITY_ITEM_TYPE;
/* create arena */
marena_create(&this->arena);
have_marena = true;
/* create two entity bags */
create_entity_bag(&this->named_items, &this->arena);
have_named_items = true;
create_entity_bag(&this->attr_values, &this->arena);
have_attr_values = true;
/* create alias manager */
create_hash_table(&this->aliases, 25, HASHTABLE_DUPLICATE_KEYS, &this->arena);
have_aliases = true;
/* create query rewrite table */
poliqarp_create_query_rewrite_table(&this->query_rewrite_table);
have_query_rewrite_table = true;
/* reset counters */
this->num_attr = 0;
this->num_pos = 0;
/* create default entities */
new_entity(&this->named_items, "entity-pos", &tag_pos, NULL);
new_entity(&this->named_items, "entity-orth", &tag_orth, NULL);
new_entity(&this->named_items, "entity-base", &tag_base, NULL);
new_entity(&this->named_items, "entity-tag", &tag_tag, NULL);
new_entity(&this->named_items, "space", &tag_space, NULL);
new_entity(&this->named_items, "type", &tag_type, NULL);
static const struct poliqarp_config_section sections[] = {
{ "STARTUP", SECTION_STARTUP }, /* obsolete */
{ "ALIASES", SECTION_ALIASES },
{ "ATTR", SECTION_ATTR },
{ "POS", SECTION_POS },
{ "NAMED-ENTITY", SECTION_NAMED_ENTITY },
{ "QUERY-REWRITE-RULES", SECTION_QUERY_REWRITE_RULES },
{ NULL, 0 }
};
/* parse the config file */
char *conf_path = string_aformat("%s.cfg", base_name);
if (conf_path == NULL) {
poliqarp_error_from_system(error, "Unable to read corpus configuration file");
goto error;
}
rc = poliqarp_parse_config_file(this, conf_path,
sections, poliqarp_backend_config_handler, error);
if (rc != 0)
goto error;
free(conf_path);
/* read CDF */
rc = poliqarp_cdf_read(base_name, &this->cdf);
if (rc != 0 && errno != ENOENT) {
poliqarp_error_from_system(error, _("Unable to read CDF file"));
goto error;
}
/* validate corpus format */
if (this->cdf.version != POLIQARP_ABI_VERSION) {
poliqarp_error_message_set(error,
_("Version %d of binary format is not supported"), this->cdf.version);
goto error;
}
else if (this->cdf.endianness != ENDIAN_LE) {
poliqarp_error_message_set(error,
_("Big-endian binary format is not supported"), this->cdf.version);
goto error;
}
return 0;
error:
if (have_named_items)
destroy_entity_bag(&this->named_items);
if (have_attr_values)
destroy_entity_bag(&this->attr_values);
if (have_aliases)
destroy_hash_table(&this->aliases, free);
if (have_query_rewrite_table)
poliqarp_destroy_query_rewrite_table(&this->query_rewrite_table);
if (have_marena)
marena_destroy(&this->arena);
return -1;
}
void poliqarp_backend_config_close(struct poliqarp_backend_config *this)
{
/* get rid of everything */
destroy_entity_bag(&this->named_items);
destroy_entity_bag(&this->attr_values);
destroy_hash_table(&this->aliases, free);
poliqarp_destroy_query_rewrite_table(&this->query_rewrite_table);
/* free everything we have, quick and painless */
marena_destroy(&this->arena);
}
/* tag parsers */
/**
* Parse the tag on the fly, the tag is destroyed.
*/
int poliqarp_backend_config_parse(const struct poliqarp_backend_config *this,
struct poliqarp_parsed_tag *result, char *tag)
{
const struct entity *entity;
char *item;
size_t i;
/* reset attributes */
for (i = 0; i < this->num_attr; ++i)
result->attr_value[i] = NULL;
/* parse part of speech */
item = strtok(tag, ":");
entity = lookup_const_entity(&this->named_items, item);
if (entity == NULL)
return -1;
if (entity->tag == NULL ||
*(enum poliqarp_entity_type *)entity->tag != POLIQARP_ENTITY_POS)
{
errno = EINVAL;
return -1;
}
result->pos = entity->data;
/* parse attributes */
while ((item = strtok(NULL, ":"))) {
entity = lookup_const_entity(&this->attr_values, item);
if (entity == NULL)
return -1;
if (entity == NULL || entity->tag == NULL ||
*(enum poliqarp_entity_type *)entity->tag != POLIQARP_ENTITY_ATTR_VALUE)
{
errno = EINVAL;
return -1;
}
result->attr_value[((struct poliqarp_attr_value *) entity->data)->attr->id] =
entity->data;
}
return 0;
}
/**
* Non-destructive version of poliqarp_backend_config_parse_copy(). */
int poliqarp_backend_config_parse_copy(const struct poliqarp_backend_config *this,
struct poliqarp_parsed_tag *result, const char *tag)
{
char *copy = strdup(tag);
if (copy == NULL)
return -1;
int rc = poliqarp_backend_config_parse(this, result, copy);
free(copy);
return rc;
}
static int poliqarp_backend_config_handler(void *extra, int section,
char *text, struct poliqarp_error *error)
{
struct poliqarp_backend_config *this = extra;
bool have_args = false, have_variable = true;
struct poliqarp_variable variable;
struct text_args args;
int rc;
bool is_optional = false;
size_t i;
struct entity *entity;
struct entity *result;
struct poliqarp_part_of_speech *pos;
struct poliqarp_attr *attr;
args_init(&args);
switch (section) {
case SECTION_ATTR:
case SECTION_POS:
case SECTION_NAMED_ENTITY:
case SECTION_QUERY_REWRITE_RULES:
rc = poliqarp_parse_variable(&variable, text);
if (rc != 0) {
if (errno == EINVAL) {
poliqarp_error_message_set(error, _("a line is not a key=value pair"));
goto error;
} else
goto system_error;
}
have_variable = true;
rc = args_parse(&args, variable.value);
if (rc != 0)
goto system_error;
have_args = true;
break;
case SECTION_STARTUP: /* support for old syntax: /alias foo = bar baz */
rc = poliqarp_parse_variable(&variable, text);
if (rc != 0)
goto system_error;
have_variable = true;
if (strncmp(variable.name, "/alias ", 7) == 0) {
char *tmp = variable.value;
for (; *tmp; tmp++)
if (*tmp == ' ')
*tmp = '|';
rc = hash_table_set(&this->aliases, variable.name + 7, strdup(variable.value));
if (rc != 0)
goto system_error;
}
poliqarp_free_variable(&variable);
have_variable = false;
break;
case SECTION_ALIASES:
rc = poliqarp_parse_variable(&variable, text);
if (rc != 0)
goto system_error;
have_variable = true;
rc = hash_table_set(&this->aliases, variable.name, strdup(variable.value));
if (rc != 0)
goto system_error;
poliqarp_free_variable(&variable);
have_variable = false;
break;
default:
break;
}
switch (section) {
case SECTION_POS:
/* create new part of speech */
pos = poliqarp_new_part_of_speech(this, variable.name);
if (pos == NULL) {
if (errno == EEXIST) {
poliqarp_error_message_set(error,
_("unable to redefine part-of-speech '%s'"), variable.name);
goto error;
}
else
goto system_error;
}
for (i = 0; i < args.num_items; ++i) {
char *item = args.item[i].value;
/* detect optional markers */
if (strcmp(item, "[") == 0) {
is_optional = true;
continue;
}
/* lookup name */
entity = lookup_entity(&this->named_items, item);
if (entity == NULL) {
if (errno == ENOENT) {
poliqarp_error_message_set(error, _("entity '%s' is undefined"), item);
goto error;
} else
goto system_error;
}
if (entity->tag == NULL ||
*(enum poliqarp_entity_type *)entity->tag != POLIQARP_ENTITY_ATTR)
{
poliqarp_error_message_set(error, _("entity '%s' is not an attribute"), item);
goto error;
}
attr = entity->data;
/* create new attr instance */
if (poliqarp_new_attr_instance(this, pos, attr, is_optional) == NULL)
goto system_error;
if (is_optional) {
++i;
is_optional = false;
}
}
break;
case SECTION_ATTR:
/* create new attribute */
attr = poliqarp_new_attr(this, variable.name);
if (attr == NULL) {
if (errno == EEXIST) {
poliqarp_error_message_set(error, _("unable to redefine attribute '%s'"),
variable.name);
goto error;
}
else
goto system_error;
}
/* add values */
for (i = 0; i < args.num_items; ++i)
if (poliqarp_new_attr_value(this, attr, args.item[i].value) == NULL) {
if (errno == EEXIST) {
poliqarp_error_message_set(error, _("duplicate attribute value '%s'"), args.item[i].value);
goto error;
}
else
goto system_error;
}
break;
case SECTION_NAMED_ENTITY:
entity = lookup_entity(&this->named_items, variable.name);
if (entity == NULL) {
if (errno == ENOENT) {
poliqarp_error_message_set(error, _("entity '%s' is undefined"), variable.name);
goto error;
}
else
goto system_error;
}
/* make aliases */
for (i = 0; i < args.num_items; ++i) {
result = new_entity_alias(&this->named_items, entity,
args.item[i].value);
if (result != NULL)
continue;
if (errno == EEXIST) {
poliqarp_error_message_set(error,
_("unable to redefine '%s' as '%s'"), entity->name,
args.item[i].value);
goto error;
}
else
goto system_error;
}
break;
case SECTION_QUERY_REWRITE_RULES:
if (args.num_items != 2) {
poliqarp_error_message_set(error, _("unable to create rule set %s; "
"each rewrite rule should consist of exactly 2 strings"), variable.name);
goto error;
}
{
struct poliqarp_query_rewrite *rewrite =
poliqarp_get_query_rewrite(&this->query_rewrite_table, variable.name, true);
if (rewrite == NULL)
goto system_error;
rc = poliqarp_add_query_rewrite_rule(rewrite, args.item[0].value, args.item[1].value);
if (rc != 0)
goto system_error;
}
default:
break;
} /* switch (section) */
rc = 0;
goto done;
system_error:
poliqarp_error_from_system(error, NULL);
error:
rc = -1;
done:
if (have_variable)
poliqarp_free_variable(&variable);
if (have_args)
args_free(&args);
return rc;
}