/* * This file is part of the Poliqarp suite. * * Copyright (C) 2004-2009 by Instytut Podstaw Informatyki Polskiej * Akademii Nauk (IPI PAN; Institute of Computer Science, Polish * Academy of Sciences; cf. www.ipipan.waw.pl). All rights reserved. * * This file may be distributed and/or modified under the terms of the * GNU General Public License version 2 as published by the Free Software * Foundation and appearing in the file gpl.txt included in the packaging * of this file. (See http://www.gnu.org/licenses/translations.html for * unofficial translations.) * * A commercial license is available from IPI PAN (contact * Michal.Ciesiolka@ipipan.waw.pl or ipi@ipipan.waw.pl for more * information). Licensees holding a valid commercial license from IPI * PAN may use this file in accordance with that license. * * This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING * THE WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE. */ %{ #include <poliqarp-config.h> #include <math.h> #include <stdlib.h> #include <stdio.h> #include <sakura/corpus.h> #include <sakura/expression.h> #include <sakura/value.h> #include <sakura/meta-value.h> #include <sakura/common/graph.h> #include <sakura/parser.h> #include <foostring/foostring.h> #include <unibits/unibits.h> static int string_to_int(const char *s) { long result; char *end_ptr; errno = 0; result = strtol(s, &end_ptr, 10); if (result < INT_MIN) result = INT_MIN; if (result > INT_MAX || *end_ptr != '\0') result = INT_MAX; return result; } %} %x META_MODE %x STAT_MODE %x STAT_SORT_MODE %x STAT_INTERP_MODE %x DQUOTE %x SQUOTE %option noyywrap %option reentrant bison-bridge %option 8bit %option nounput %option stack DIGIT [[:digit:]] SPACE [[:space:]]+ /* Note: flex (not to mention the other lexes) is brain damaged. It doesn't * use the regex library (or regex facilities of libc), thus the regular * expression below does not support multibyte characters. Because we expect * UTF-8 input, and because the characters 128-255 can only be parts of * multibyte characters above U+0080, we explicitly allow them all to be * parts of identifiers. */ ID [:_.a-zA-Z0-9\x80-\xff-] ID_EXT [*?+] %% string_t string_buf; string_buf = string_create(); /* just to shut up compilers */ string_free(string_buf); <INITIAL>(?i:meta) { BEGIN(META_MODE); return META; } <INITIAL,META_MODE>(?i:within) { return WITHIN; } <INITIAL,META_MODE>(?i:group{SPACE}by) { BEGIN(STAT_MODE); return GROUP_BY; } <INITIAL>(?i:head) { return HEAD; } <INITIAL>(?i:synh) { return SYNH; } <INITIAL>(?i:semh) { return SEMH; } <STAT_MODE>(i?:interp) { BEGIN(STAT_INTERP_MODE); return STAT_INTERP; } <STAT_INTERP_MODE>(i?:random) { BEGIN(STAT_MODE); return STAT_INTERP_RANDOM; } <STAT_INTERP_MODE>(i?:combine) { BEGIN(STAT_MODE); return STAT_INTERP_COMBINE; } <STAT_MODE>(i?:sort{SPACE}a{SPACE}fronte) { return STAT_SORT_AFRONTE; } <STAT_MODE>(?i:sort{SPACE}by) { BEGIN(STAT_SORT_MODE); return STAT_SORT_BY; } <STAT_SORT_MODE>(?i:freq) { BEGIN(STAT_MODE); return STAT_SORT_BY_FREQ; } <STAT_SORT_MODE>(?i:cp) { BEGIN(STAT_MODE); return STAT_SORT_BY_CP; } <STAT_SORT_MODE>(?i:scp) { BEGIN(STAT_MODE); return STAT_SORT_BY_SCP; } <STAT_SORT_MODE>(?i:maxcp) { BEGIN(STAT_MODE); return STAT_SORT_BY_MAXCP; } <STAT_SORT_MODE>(?i:dice) { BEGIN(STAT_MODE); return STAT_SORT_BY_DICE; } <STAT_MODE>(?i:bias) { return STAT_BIAS; } <STAT_MODE>(?i:min) { return STAT_MIN; } <STAT_MODE>(?i:count) { return STAT_COUNT; } <META_MODE>"<" { yylval->as_mop.strategy = POLIQARP_STRATEGY_SMALLER; yylval->as_mop.negate = false; return MOP; } <META_MODE>"<=" { yylval->as_mop.strategy = POLIQARP_STRATEGY_SMALLER_EQUAL; yylval->as_mop.negate = false; return MOP; } <META_MODE>">" { yylval->as_mop.strategy = POLIQARP_STRATEGY_GREATER; yylval->as_mop.negate = false; return MOP; } <META_MODE>">=" { yylval->as_mop.strategy = POLIQARP_STRATEGY_GREATER_EQUAL; yylval->as_mop.negate = false; return MOP; } <META_MODE>"=" { yylval->as_mop.strategy = POLIQARP_STRATEGY_EQUAL; yylval->as_mop.negate = false; return MOP; } <META_MODE>"!=" { yylval->as_mop.strategy = POLIQARP_STRATEGY_EQUAL; yylval->as_mop.negate = true; return MOP; } <INITIAL>[$]{DIGIT}+ { yylval->as_int = string_to_int(yytext + 1); return VARIABLE; } <INITIAL,STAT_MODE>{DIGIT}+ { yylval->as_int = string_to_int(yytext); return INTEGER; } <STAT_MODE>[+-]{DIGIT}+ { yylval->as_int = string_to_int(yytext); return SIGNED_INTEGER; } <STAT_MODE>[+-]?{DIGIT}+[.]{DIGIT}+ { char *end_ptr; errno = 0; yylval->as_double = strtod(yytext, &end_ptr); if (*end_ptr != '\0' || errno == ERANGE) yylval->as_double = NAN; return REAL_NUMBER; } <INITIAL,META_MODE,STAT_MODE>{ID}+({ID}|{ID_EXT})* { yylval->as_text = strdup(yytext); return IDENT; } <INITIAL>~ { yylval->as_op.strategy = POLIQARP_STRATEGY_ANY; yylval->as_op.use_disamb = false; yylval->as_op.negate = false; return OP; } <INITIAL>!~ { yylval->as_op.strategy = POLIQARP_STRATEGY_ANY; yylval->as_op.use_disamb = false; yylval->as_op.negate = true; return OP; } <INITIAL>~~ { yylval->as_op.strategy = POLIQARP_STRATEGY_ALL; yylval->as_op.use_disamb = false; yylval->as_op.negate = false; return OP; } <INITIAL>!~~ { yylval->as_op.strategy = POLIQARP_STRATEGY_ALL; yylval->as_op.use_disamb = false; yylval->as_op.negate = true; return OP; } <INITIAL>= { yylval->as_op.strategy = POLIQARP_STRATEGY_ANY; yylval->as_op.use_disamb = true; yylval->as_op.negate = false; return OP; } <INITIAL>!= { yylval->as_op.strategy = POLIQARP_STRATEGY_ANY; yylval->as_op.use_disamb = true; yylval->as_op.negate = true; return OP; } <INITIAL>== { yylval->as_op.strategy = POLIQARP_STRATEGY_ALL; yylval->as_op.use_disamb = true; yylval->as_op.negate = false; return OP; } <INITIAL>!== { yylval->as_op.strategy = POLIQARP_STRATEGY_ALL; yylval->as_op.use_disamb = true; yylval->as_op.negate = true; return OP; } <INITIAL,META_MODE,STAT_MODE>"\'" { yy_push_state(SQUOTE, yyscanner); string_buf = string_create(); } <INITIAL,META_MODE,STAT_MODE>"\"" { yy_push_state(DQUOTE, yyscanner); string_buf = string_create(); } <DQUOTE>\" | <SQUOTE>\' { yy_pop_state(yyscanner); yylval->as_text = string_free_and_get_buffer(string_buf); return STRING; } <SQUOTE,DQUOTE>{ "\\n" string_append_char(string_buf, '\n'); "\\r" string_append_char(string_buf, '\r'); "\\t" string_append_char(string_buf, '\t'); "\\v" string_append_char(string_buf, '\v'); "\\b" string_append_char(string_buf, '\b'); "\\f" string_append_char(string_buf, '\f'); "\\\\" string_append_char(string_buf, '\\'); "\\\"" string_append_char(string_buf, '\"'); "\\\'" string_append_char(string_buf, '\''); "\\\n" string_append_char(string_buf, yytext[1]); \\(x[0-9a-fA-F]{2}|u[0-9a-fA-F]{4}|U00[0-9a-fA-F]{6}) { unsigned long code = strtoul(yytext + 2, NULL, 16); if (code == 0 || code > 0x10ffff) code = 0xfffd; Tcl_UniChar ch = code; /* no overflow expected */ Tcl_DString dstring; Tcl_DStringInit(&dstring); char * string = Tcl_UniCharToUtfDString(&ch, 1, &dstring); string_append_str(string_buf, string); Tcl_DStringFree(&dstring); } \\. string_append_strn(string_buf, yytext, yyleng); } <DQUOTE>[^\\\n\"]+ | <SQUOTE>[^\\\n\']+ { string_append_strn(string_buf, yytext, yyleng); } <SQUOTE,DQUOTE>{ \n | <<EOF>> { yy_pop_state(yyscanner); yylval->as_text = string_free_and_get_buffer(string_buf); return STRING_OPEN; } } <INITIAL,META_MODE,STAT_MODE>{SPACE} <<EOF>> { BEGIN(INITIAL); yyterminate(); } <INITIAL,META_MODE,STAT_MODE>. { return *yytext; } %%