/*
 * This file is part of the Poliqarp suite.
 * 
 * Copyright (C) 2004-2009 by Instytut Podstaw Informatyki Polskiej
 * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish
 * Academy of Sciences; cf. www.ipipan.waw.pl).  All rights reserved.
 * 
 * This file may be distributed and/or modified under the terms of the
 * GNU General Public License version 2 as published by the Free Software
 * Foundation and appearing in the file gpl.txt included in the packaging
 * of this file.  (See http://www.gnu.org/licenses/translations.html for
 * unofficial translations.)
 * 
 * A commercial license is available from IPI PAN (contact
 * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more
 * information).  Licensees holding a valid commercial license from IPI
 * PAN may use this file in accordance with that license.
 * 
 * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING
 * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE.
 */

#include <errno.h>
#include <string.h>

#include <sakura/regexp.h>

#define C1 "a"
#define C2 "\xce\xb1" /* GREEK SMALL LETTER ALPHA */
#define C3 "\xe0\xa4\x85" /* DEVANAGARI LETTER A */
#define C4 "\xf4\x80\x80\x80" /* Plane 15 Private Use, First */

int poliqarp_regexp_validate_utf8()
{
   int rc;
   regex_t preg;
   const char *string = C1 C2 C3 C4;
   const char *regex = "^" C1 "[" C2 "][" C3 "][" C4 "]$";
   rc = regcomp(&preg, regex, REG_EXTENDED | REG_NOSUB);
   /* A match is expected for UTF-8 encoding *only*. */
   if (rc != 0) {
      errno = (rc == REG_ESPACE) ? ENOMEM : EINVAL;
      return -1;
   }
   rc = regexec(&preg, string, 0, NULL, 0);
   regfree(&preg);
   if (rc != 0) {
      errno = EILSEQ;
      return 1;
   }
   return 0;
}

#undef C1
#undef C2
#undef C3
#undef C4

/* create a regular expression */
int poliqarp_regexp_create(struct poliqarp_regexp *this, const char *pattern, 
   unsigned int flags, unsigned int xflags)
{
   char *use_pattern;
   bool remade_pattern = false;

   if (xflags & POLIQARP_REG_NO_ANCHORS) {
      use_pattern = (char *)pattern;
      this->simple = false;
   } else {
      use_pattern = malloc(strlen(pattern) + 5);
      sprintf(use_pattern, "^(%s)$", pattern);
      remade_pattern = true;
      this->simple = (flags & REG_ICASE) ? false :
         (strpbrk(pattern, "|*+?{,}()^$.\\-[]=:\"") == NULL);
   }
   this->pattern = strdup(pattern);
   this->status = regcomp(&this->preg, use_pattern, flags | REG_EXTENDED);
   if (remade_pattern)
      free(use_pattern);
   if (this->status) {
      size_t size;
      size = regerror(this->status, &this->preg, 0, 0);
      this->message = malloc(size);
      if (this->message != NULL)
         regerror(this->status, &this->preg, this->message, size);
   } else
      this->message = NULL;
   return this->status;
}

/* free the regular expression */
void poliqarp_regexp_destroy(struct poliqarp_regexp *this)
{
   if (this->status == 0)
      regfree(&this->preg);
   free(this->pattern);
   free(this->message);
}

void poliqarp_parse_regexp_flags(const char *text, unsigned int *flags, 
   unsigned int *xflags)
{
   unsigned result = *flags;    /* this provides continuity but requires 
                                   initialization by the caller */
   unsigned xresult = *xflags;
   int c;

   while ((c = *text++))
      switch (c) {
         case 'i':
            result |= REG_ICASE;
            break;
         case 'I':
            result &= ~REG_ICASE;
            break;
         case 'x':
            xresult |= POLIQARP_REG_NO_ANCHORS;
            break;
         case 'X':
            xresult &= ~POLIQARP_REG_NO_ANCHORS;
            break;
         default:
            break;
      }
   *flags = result;
   *xflags = xresult;
}