diff --git a/tools/ruljos2wccl.py b/tools/ruljos2wccl.py new file mode 100755 index 0000000000000000000000000000000000000000..906f0bfede2137ac9f67a799db8bebc0052bab24 --- /dev/null +++ b/tools/ruljos2wccl.py @@ -0,0 +1,100 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import os, sys, codecs, re +from optparse import OptionParser +from StringIO import StringIO + +descr = """%prog [options] IN OUT + +Attempts to convert JOSKIPI rules to WCCL rules. +NOTE: this is based on very naive heuristics. +""" + +# TODO agr bits +# TODO isbig -> regex + +p_brackets = re.compile(u'{(\\s*"[^"]*"(\\s*,\\s*"[^"]*")*\\s*)}', re.U) + +def jos2ccl(what): + what = what.replace('flex[', 'class[') + return p_brackets.sub(u'[\\1]', what) + +class IndentWriter: + def __init__(self, out, baseindent = 0): + self.out = out + self.indent = baseindent + + def write(self, text): + for line in text.split('\n'): + line = line.strip() + if line: + meat = line.split('//', 1)[0] + idelta = meat.count('(') - meat.count(')') + nowindent = (self.indent + idelta) if meat.startswith(')') else self.indent + self.out.write('\t' * nowindent) + self.out.write(line) + self.out.write('\n') + self.indent += idelta + else: + self.out.write('\n') + +class Rule: + def __init__(self, text): + pre, cond = text.split(':-') + act, name = pre.split('#') + self.cond = jos2ccl(cond.strip()) + self.act = jos2ccl(act.strip()) + self.name = name.strip() + + def write(self, out, comma = False): + out.write('rule("%s",\n' % self.name) + out.write('%s,\n' % self.cond) + out.write('%s\n' % self.act) + out.write(')%s\n' % (',' if comma else '')) + +def rule_texts(infile): + buf = StringIO() + for line in infile: + if 'delete' in line: # TODO: regex with other actions + yield buf.getvalue() + buf = StringIO() + buf.write(line) + yield buf.getvalue() + +def rules(infile): + for rt in rule_texts(infile): + rt = '\n'.join(line.split('//', 1)[0] for line in rt.split('\n')) + if rt.strip(): + yield Rule(rt) + +def go(): + parser = OptionParser(usage=descr) + parser.add_option('-v', '--verbose', action='store_true', dest='verbose', default=False, help='set the verbose mode') + + (options, args) = parser.parse_args() + + if len(args) != 2: + print 'You need to provide input and output paths' + print 'See %s --help' % os.path.basename(sys.argv[0]) + sys.exit(1) + + inpath, outpath = args + + inf = codecs.open(inpath, 'rb', 'utf-8') + outf = codecs.open(outpath, 'wb', 'utf-8') + w = IndentWriter(outf) + + indent = 0 + w.write('rules(\n') + # quick and dirty: to get all but last + allrules = [r for r in rules(inf)] + for rule in allrules[:-1]: + rule.write(w, True) + allrules[-1].write(w, False) # no comma + w.write(')\n') + + outf.close() + inf.close() + +if __name__ == '__main__': + go()