# This program extracts sheet translations and puts them in a C file for # further extraction by xgettext(1). # # Copyright (C) 2001, Cyrille Chepelov # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # from xml.sax import saxexts from xml.sax import saxlib import sys,string,pickle # Note: as a list of strings is not a hashable type, whenever the key of a # dictionary was to be a list of strings, it'll be implicitly replaced by the # pickled equivalent. def list_merge(a,b): for x in b: if not b in a: a.append(x) def message_merge(msgdict,message,translations): #print "message_merge(%s) trans=%s" % (message,translations) msgkey = pickle.dumps(message) if not msgdict.has_key(msgkey): msgdict[msgkey] = {} for k,v in translations.items(): if msgdict[msgkey].has_key(k): mvalue = msgdict[msgkey][k] if v != mvalue: sys.stdout.write( """W: translation collision on message '%s' for language %s W: (previous translation '%s') W: (new(ignored) translation '%s') """ % (message,k,mvalue,v)) else: msgdict[msgkey][k] = v def places_merge(placedict,message,places): msgkey = pickle.dumps(message) if not placedict.has_key(msgkey): placedict[msgkey] = [] list_merge(placedict[msgkey],places) class SheetTranslationSlurper(saxlib.DocumentHandler): def __init__(self): self.elemstk = [] self.langstk = [] self.namestk = [] def setDocumentLocator(self,locator): self.locator = locator saxlib.DocumentHandler.setDocumentLocator(self,locator) self.desctransstk = [] # of dictionaries[lang->translated message] self.descidstk = [] # of strings (untranslated messages) self.sheetnameidstk = [] # (should have at most one element) self.sheetnametransstk = [] # (ditto) self.inlangstk = [] # of language codes self.issheetnameid = 0 self.issheetnametrans = '' # or the language code self.isdescid = 0 self.isdesctrans = '' # or the language code self.placestk = [] # of lists of (filename,name) self.datastack = [] # of strings self.messages = {} # of dictionaries[msgid(string)->dictionary[langid(string)->translated(string)]] self.places = {} self.warning("locator is set now") def warning(self,message): sys.stdout.write("W:%s:L%d:C%d: %s\n" % (self.locator.getSystemId(), self.locator.getLineNumber(), self.locator.getColumnNumber(), message)) def resetstate(self): self.issheetnameid = 0 self.issheetnametrans = '' self.isdescid = 0 self.isdesctrans = '' def startElement(self,name,attrs): self.resetstate() #print "start of ",name,attrs,attrs.map attmap = attrs.map self.elemstk.append(name) if (name == "sheet") or (name == "object"): self.langstk.append({}) if attmap.has_key('name'): name = 'Object "%s"' % attmap['name'] else: name = 'Sheet "%s"' % self.locator.getSystemId() self.sheetnametransstk.append({}) self.sheetnameidstk.append("") self.namestk.append(name) self.desctransstk.append({}) self.descidstk.append("") elif (name == "name"): if attmap.has_key("xml:lang"): self.issheetnametrans = attmap["xml:lang"] else: self.placestk.append( ("N"+self.locator.getSystemId(), self.locator.getLineNumber()) ) self.issheetnameid = 1 elif (name == "description"): if attmap.has_key("xml:lang"): self.isdesctrans = attmap["xml:lang"] else: self.placestk.append( ("D"+self.locator.getSystemId(), self.locator.getLineNumber()) ) self.isdescid = 1 self.datastack.append("") #print self.placestk def characters(self,ch, start, length): s = ch[start:start+length] self.datastack[-1] = self.datastack[-1] + s def endElement(self,name): #print "end of ",name, #print self.placestk, popped = self.elemstk.pop() if popped != name: raise Exception("stack error somewhere...") data = self.datastack.pop() if self.issheetnameid: self.sheetnameidstk[-1] = self.sheetnameidstk[-1] + string.strip(data) places = self.placestk.pop() places_merge(self.places,self.sheetnameidstk[-1],places) elif self.isdescid: self.descidstk[-1] = self.descidstk[-1] + string.strip(data) places = self.placestk.pop() places_merge(self.places,self.descidstk[-1],places) elif self.issheetnametrans: lang = self.issheetnametrans dict = self.sheetnametransstk[-1] if dict.has_key(lang): self.warning("possible translation collision for lang='%s'" % lang) dict[lang] = [string.strip(data)] elif self.isdesctrans: lang = self.isdesctrans dict = self.desctransstk[-1] if dict.has_key(lang): self.warning("possible translation collision for lang='%s'" % lang) dict[lang] = [string.strip(data)] elif (name == "sheet") or (name == "object"): res = self.langstk.pop() self.namestk.pop() desctrans = self.desctransstk.pop() descid = self.descidstk.pop() message_merge(self.messages,[descid],desctrans) if name == "sheet": sheetnameid = self.sheetnameidstk.pop() sheetnametrans = self.sheetnametransstk.pop() message_merge(self.messages,[sheetnameid],sheetnametrans) #print res self.resetstate() # Load parser and driver p=saxexts.make_parser() #p=saxexts.XMLValParserFactory.make_parser() sts=SheetTranslationSlurper() p.setDocumentHandler(sts) #p.setEntityResolver(BasicEntityResolver()) def extract_sheet_messages(name): try: p.parse(name) return sts.messages,sts.places except IOError,e: sys.stderr.write("E: %s: %s\n" % (name,str(e))) except saxlib.SAXException,e: sys.stderr.write("E: %s\n" % str(e)) return None,None ## def load_potfile(potfilename): ## messages = {} # dictionary[mesgid(list of strings)] of dictionary[langcode(string)] of list of strings ## comments = {} # dictionary[mesgid(list of strings)] of list of strings (without #) ## dot = {} # dictionary[mesgid(list of strings)] of list of strings (without #) ## places = {} # dictionary[mesgid(list of strings)] of list of (filename(string),linenum(integer)) ## attributes = {} # dictionary[mesgid(list of strings)] of list of strings (without #) ## return messages,comments,dot,places,attributes ## def output_potfile(outfilename,lang,messages,comments,dot,places,attributes): ## if not outfilename: out = sys.stdout ## else: out = open(outfilename,"w") ## for key,msgstrs in messages.items(): ## msgid = pickle.loads(key) def output_pseudo_pofile(outfilename,lang,messages): if not outfilename: out = sys.stdout else: out = open(outfilename,"w") for k,v in messages.items(): if v.has_key(lang): msg = pickle.loads(k) out.write('msgid %s\n' % string.join(map(lambda x: '"%s"' %x, msg),"\n")) out.write('msgstr %s\n' % string.join(map(lambda x: '"%s"' %x,v[lang]),"\n")) # warning: this will probably be an UTF-8 stream. Pipe it through # iconv(1) as needed. out.write("\n") def make_c_escapes(s): rs = "" for c in s: if (c in range(32)) or (c == ord('"')): rs = rs + (r"\x%x" % c) else: rs = rs + c return rs def output_c_file(outfilename,messages): if not outfilename: out = sys.stdout else: out = open(outfilename,"w") msgs = map(lambda m: string.join(map(make_c_escapes,pickle.loads(m)),"\n"), messages.keys()) c_msgs = map(lambda x: '_N("%s")'%x, msgs) c_file = """/* AUTOMATICALLY GENERATED FILE. DO NOT EDIT ! */ /* This file has been automatically generated from the sheet files. It's here so that xgettext can extract these strings and merge them into dia.pot. It will not actually be compiled. */ #include static const char *all_sheet_strings[] = { %s}; """ % string.join(c_msgs,",\n") out.write(c_file) def merge_messages(messages,newmessages): for k,v in newmessages.items(): msg = pickle.loads(k) message_merge(messages,msg,v) def merge_places(places,newplaces): for k,v in newplaces.items(): place = pickle.loads(k) places_merge(places,place,v) if len(sys.argv)<3: print "Usage: %s " % sys.argv[0] print print " : file name of the POT file to merge into" print " : file name of the sheet to merge" sys.exit(1) potfilename = sys.argv[1] fnames = sys.argv[2:] import pprint #messages,comments,dot,places,attributes = load_potfile(potfilename) messages = {} places = {} for name in fnames: sheet_messages,sheet_places = extract_sheet_messages(name) merge_messages(messages,sheet_messages) merge_places(places,sheet_places) #pprint.pprint(messages) #pprint.pprint(places) lang="fr" outfilename = "foo.c" #output_potfile(outfilename,lang,messages,comments,dot,places,attributes) output_pseudo_pofile(outfilename,"fr",messages) output_c_file(outfilename,messages)