#!/usr/bin/env python # -*- coding: UTF-8 -*- import string import sys import os from pology.file.catalog import Catalog import xml.parsers.expat import xml.dom.minidom if len(sys.argv) != 4: print "usage: %s " % os.path.basename(sys.argv[0]) sys.exit(1) fname = sys.argv[1] cat_input = Catalog(fname) cat_output = Catalog(sys.argv[3], create=True) cat_output.header = cat_input.header message = None tagstack = 0 hadcdata = False output = "" def start_element(name, attrs): global output, tagstack, hadcdata, message if name == 'msgid' or name == 'msgids': return replace = '<%s %s>' % (name, string.join(['%s="%s"' % t for t in attrs.items()])) message = message.replace('' % tagstack, replace) message = message.replace('' % tagstack, replace) tagstack = tagstack + 1 def end_element(name): global output, tagstack, hadcdata, message if name == 'msgid' or name == 'msgids': tagstack = 0 return if hadcdata: message = message.replace('', '' % name, 1) else: message = message[:-1] + '/>' def char_data(data): global output, hadcdata hadcdata = True output += str(data) p = xml.parsers.expat.ParserCreate() p.StartElementHandler = start_element p.EndElementHandler = end_element p.CharacterDataHandler = char_data i = 0 html = open(sys.argv[2], 'r').read() dom = xml.dom.minidom.parseString(html.replace('&','&')) messages = [string.join([n.toxml().replace('&','&') for n in m.childNodes], '')for m in dom.getElementsByTagName("p")] p.Parse("", False) for msg in cat_input: message = messages[i] i += 1 if msg.translated: text = msg.msgstr else: text = msg.msgid text = "%s" % text.replace('&', '&') p.Parse(text, False) msg.msgstr[0] = message cat_output.add(msg) p.Parse("", True); cat_output.sync()