#! /usr/bin/python # -*- coding: utf-8 -*- from xml.sax import handler from PhraseTypeEntry import PhraseTypeEntry class XMLNode: def __init__(self, name, attrs, parent): self._name = name self._attrs = attrs self._children = [] self._parent = parent self._content = "" def addChild(self, child): self._children.append(child) def setContent(self, content): self._content = content def __str__(self): att = zip(self._attrs.keys(), self._attrs.values()) return self._name + '[' + str(att) + '](' + ';'.join([str(temp) for temp in self._children]) + ')' class PhraseTypeTeiHandler(handler.ContentHandler): def __init__(self, out): handler.ContentHandler.__init__(self) self._out = out self._subtree = None self._current = None self._constructing = False self._content = "" def printMeta(self, text): for out in self._out.values(): out.write('% ' + text + '\n') def startElement(self, name, attrs): if name == 'date': self.printMeta(attrs['when']) if name == 'entry': self._constructing = True self._content = "" if (self._constructing): node = XMLNode(name, attrs, self._current) if self._current is not None: self._current.addChild(node) else: self._subtree = node self._current = node def endElement(self, name): if self._current is not None: self._current.setContent(self._content) self._current = self._current._parent if name == 'entry': if self._current is not None: raise TEIStructureError() entry = PhraseTypeEntry(self._subtree) entry.write(self._out) self._content = '' else: if name == 'title': self.printMeta(self._content) self._content = '' elif name == 'publisher': self.printMeta(self._content) self._content = '' elif name == 'licence': self.printMeta(self._content) self.content = '' elif name == 'p': self._content += '\n% ' def characters(self, content): self._content += content.strip() def endDocument(self): pass