#!/usr/bin/python # Copyright 2007 Frederik Gladhorn #/*************************************************************************** #* * #* This program is free software; you can redistribute it and/or modify * #* it under the terms of the GNU General Public License as published by * #* the Free Software Foundation; either version 2 of the License, or * #* (at your option) any later version. * #* * #***************************************************************************/ #convert wordtypes of files that were created during the beta phase #paste the output at the end of the kvtml file, before the tag #import xml.dom.minidom #xml - sax parsing from xml.sax import make_parser from xml.sax.handler import ContentHandler #xml - sax - writing the meta files from xml.sax.saxutils import XMLGenerator from xml.sax import saxutils #from textnormalize import text_normalize_filter #directory stuff from dircache import listdir from os.path import isdir #for the time - this still has to be improved or done in a different way - file time? import time #unicode import codecs wordtypeFile = codecs.open("wordtypes_xml", "w", "utf-8") class KVTML_2_Handler(ContentHandler): #important tags CAPTURE_ENTRY = 1 CAPTURE_WORDTYPE = 2 CAPTURE_SUBTYPE = 3 def __init__(self): self.wordtype="" self.subtype="" self._state = None return def startDocument(self): pass def startElement(self, name, attrs): if (name == u"entry"): self.entry_id = attrs["id"] if (name == u"translation"): self.trans_id = attrs["id"] if (name == u"typename"): self._state = self.CAPTURE_WORDTYPE if (name == u"subtypename"): self._state = self.CAPTURE_SUBTYPE def endElement(self, name): if (name == u"translation"): if (len(self.wordtype) > 0): print self.entry_id, " - ", self.trans_id, " ", self.wordtype, " :: ", self.subtype wordtypeFile.write(u"") wordtypeFile.write(self.wordtype) wordtypeFile.write(u"") if (len(self.subtype) > 0): wordtypeFile.write(u"") wordtypeFile.write(self.subtype) wordtypeFile.write(u"") wordtypeFile.write(u"") wordtypeFile.write(u"") wordtypeFile.write(u"") if (len(self.subtype) > 0): wordtypeFile.write(u"\n") wordtypeFile.write(u"\n") self.wordtype="" self.subtype="" def characters(self, text): if self._state == self.CAPTURE_WORDTYPE: self.wordtype = text if self._state == self.CAPTURE_SUBTYPE: self.subtype = text self._state = None return def endDocument(self): pass def processFile(voc_file): #kvtml = parse(open(voc_file)) #root = kvtml.getElementsByTagName("kvtml")[0] #readLanguages(root) language = KVTML_2_Handler() saxparser = make_parser() saxparser.setContentHandler(language) datasource = open(voc_file,"r") wordtypeFile.write(u"\n") saxparser.parse(datasource) wordtypeFile.write(u"") wordtypeFile.close() def readFile(path): print "Reading " + path processFile(path) def readDirectory(path): for entry in listdir(path): if isdir(path + "/" + entry): #print path + "/" + entry readDirectory(path + "/" + entry) else: #print "a file: " + path + "/" + entry if entry[-5:] == "kvtml": readFile(path + "/" + entry) def main(): print "paste the output into the kvtml file" rootPath = "." readDirectory(rootPath) main()