# This program extracts sheet translations and puts them in a C file for
# further extraction by xgettext(1).
#
# Copyright (C) 2001, Cyrille Chepelov <chepelov calixo net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#


from xml.sax import saxexts
from xml.sax import saxlib
import sys,string,pickle

# Note: as a list of strings is not a hashable type, whenever the key of a
# dictionary was to be a list of strings, it'll be implicitly replaced by the
# pickled equivalent.

def list_merge(a,b):
    for x in b:
        if not b in a: a.append(x)

def message_merge(msgdict,message,translations):
    #print "message_merge(%s) trans=%s" % (message,translations)
    msgkey = pickle.dumps(message)
    
    if not msgdict.has_key(msgkey):
        msgdict[msgkey] = {}
    for k,v in translations.items():
        if msgdict[msgkey].has_key(k):
            mvalue = msgdict[msgkey][k]

            if v != mvalue:
                sys.stdout.write(
"""W: translation collision on message '%s' for language %s
W:   (previous translation     '%s')
W:   (new(ignored) translation '%s')
""" % (message,k,mvalue,v))
        else:
            msgdict[msgkey][k] = v

def places_merge(placedict,message,places):
    msgkey = pickle.dumps(message)
    if not placedict.has_key(msgkey):
        placedict[msgkey] = []
    list_merge(placedict[msgkey],places)

class SheetTranslationSlurper(saxlib.DocumentHandler):
    def __init__(self):
        self.elemstk = []
        self.langstk = []
        self.namestk = []
        
    def setDocumentLocator(self,locator):
        self.locator = locator
        saxlib.DocumentHandler.setDocumentLocator(self,locator)
        self.desctransstk = [] # of dictionaries[lang->translated message]
        self.descidstk = [] # of strings (untranslated messages)
        self.sheetnameidstk = [] # (should have at most one element)
        self.sheetnametransstk = [] # (ditto)
        self.inlangstk = [] # of language codes

        self.issheetnameid = 0
        self.issheetnametrans = '' # or the language code
        self.isdescid = 0
        self.isdesctrans = '' # or the language code
        self.placestk = [] # of lists of (filename,name)

        self.datastack = [] # of strings
        self.messages = {} # of dictionaries[msgid(string)->dictionary[langid(string)->translated(string)]]
        self.places = {}

        self.warning("locator is set now")
    def warning(self,message):
        sys.stdout.write("W:%s:L%d:C%d: %s\n" % (self.locator.getSystemId(),
                                    self.locator.getLineNumber(),
                                    self.locator.getColumnNumber(),
                                    message))

    def resetstate(self):
        self.issheetnameid = 0
        self.issheetnametrans = ''
        self.isdescid = 0
        self.isdesctrans = ''
        
    def startElement(self,name,attrs):
        self.resetstate()
        #print "start of ",name,attrs,attrs.map
        attmap = attrs.map
        self.elemstk.append(name)

        if (name == "sheet") or (name == "object"):
            self.langstk.append({})
            if attmap.has_key('name'):
                name = 'Object "%s"' % attmap['name']
            else:
                name = 'Sheet "%s"' % self.locator.getSystemId()                
                self.sheetnametransstk.append({})
                self.sheetnameidstk.append("")
            self.namestk.append(name)
            self.desctransstk.append({})
            self.descidstk.append("")
                        
        elif (name == "name"):
            if attmap.has_key("xml:lang"):
                self.issheetnametrans = attmap["xml:lang"]
            else:
                self.placestk.append( ("N"+self.locator.getSystemId(),
                                        self.locator.getLineNumber()) )
                self.issheetnameid = 1

        elif (name == "description"):
            if attmap.has_key("xml:lang"):
                self.isdesctrans = attmap["xml:lang"]
            else:
                self.placestk.append( ("D"+self.locator.getSystemId(),
                                        self.locator.getLineNumber()) )
                self.isdescid = 1
        self.datastack.append("")
        #print self.placestk
        
    def characters(self,ch, start, length):
        s = ch[start:start+length]        
        self.datastack[-1] = self.datastack[-1] + s
        
    def endElement(self,name):
        #print "end of ",name,
        #print self.placestk,
        popped = self.elemstk.pop()
        if popped != name:
            raise Exception("stack error somewhere...")
        data = self.datastack.pop()
        if self.issheetnameid:
            self.sheetnameidstk[-1] = self.sheetnameidstk[-1] + string.strip(data)            
            places = self.placestk.pop()
            places_merge(self.places,self.sheetnameidstk[-1],places)
        elif self.isdescid:
            self.descidstk[-1] = self.descidstk[-1] + string.strip(data)
            places = self.placestk.pop()
            places_merge(self.places,self.descidstk[-1],places)
        elif self.issheetnametrans:
            lang = self.issheetnametrans
            dict = self.sheetnametransstk[-1]

            if dict.has_key(lang):
                self.warning("possible translation collision for lang='%s'" % lang)
            dict[lang] = [string.strip(data)]
        elif self.isdesctrans:
            lang = self.isdesctrans
            dict = self.desctransstk[-1]

            if dict.has_key(lang):
                self.warning("possible translation collision for lang='%s'" % lang)
            dict[lang] = [string.strip(data)]
        elif (name == "sheet") or (name == "object"):
            res = self.langstk.pop()
            self.namestk.pop()
            desctrans = self.desctransstk.pop()
            descid = self.descidstk.pop()
            message_merge(self.messages,[descid],desctrans)
            if name == "sheet":
                sheetnameid = self.sheetnameidstk.pop()
                sheetnametrans = self.sheetnametransstk.pop()
                message_merge(self.messages,[sheetnameid],sheetnametrans)
                
            #print res
        self.resetstate()

# Load parser and driver

p=saxexts.make_parser()
#p=saxexts.XMLValParserFactory.make_parser()
sts=SheetTranslationSlurper()
p.setDocumentHandler(sts)
#p.setEntityResolver(BasicEntityResolver())

def extract_sheet_messages(name):
    try:
        p.parse(name)
        return sts.messages,sts.places
    except IOError,e:
        sys.stderr.write("E: %s: %s\n" % (name,str(e)))
    except saxlib.SAXException,e:
        sys.stderr.write("E: %s\n" % str(e))
    
    return None,None
    
##  def load_potfile(potfilename):
##      messages = {} # dictionary[mesgid(list of strings)] of dictionary[langcode(string)] of list of strings
##      comments = {} # dictionary[mesgid(list of strings)] of list of strings (without #)
##      dot = {} # dictionary[mesgid(list of strings)] of list of strings (without #)
##      places = {} # dictionary[mesgid(list of strings)] of list of (filename(string),linenum(integer))
##      attributes = {} # dictionary[mesgid(list of strings)] of list of strings (without #)

##      return messages,comments,dot,places,attributes

##  def output_potfile(outfilename,lang,messages,comments,dot,places,attributes):
##      if not outfilename: out = sys.stdout
##      else: out = open(outfilename,"w")

##      for key,msgstrs in messages.items():
##          msgid = pickle.loads(key)


def output_pseudo_pofile(outfilename,lang,messages):
    if not outfilename: out = sys.stdout
    else: out = open(outfilename,"w")
    
    for k,v in messages.items():
        if v.has_key(lang):
            msg = pickle.loads(k)

            out.write('msgid %s\n' % string.join(map(lambda x: '"%s"' %x,
                                                   msg),"\n"))
            out.write('msgstr %s\n' %
                      string.join(map(lambda x: '"%s"' %x,v[lang]),"\n"))
            # warning: this will probably be an UTF-8 stream. Pipe it through
            # iconv(1) as needed.
               
            out.write("\n")

def make_c_escapes(s):
    rs = ""
    for c in s:
        if (c in range(32)) or (c == ord('"')):
            rs = rs + (r"\x%x" % c)
        else:
            rs = rs + c
    return rs

def output_c_file(outfilename,messages):
    if not outfilename: out = sys.stdout
    else: out = open(outfilename,"w")
    
    msgs = map(lambda m: string.join(map(make_c_escapes,pickle.loads(m)),"\n"),
               messages.keys())
    c_msgs = map(lambda x: '_N("%s")'%x, msgs)
    c_file = """/* AUTOMATICALLY GENERATED FILE. DO NOT EDIT ! */
/* This file has been automatically generated from the sheet files. It's here
so that xgettext can extract these strings and merge them into dia.pot.

It will not actually be compiled. */

#include <intl.h>

static const char *all_sheet_strings[] = {
%s};

""" % string.join(c_msgs,",\n")

    out.write(c_file)
        
def merge_messages(messages,newmessages):
    for k,v in newmessages.items():
        msg = pickle.loads(k)
        message_merge(messages,msg,v)

def merge_places(places,newplaces):
    for k,v in newplaces.items():
        place = pickle.loads(k)
        places_merge(places,place,v)
        
if len(sys.argv)<3:
    print "Usage: %s <dia.pot> <sheet.sheet>" % sys.argv[0]
    print
    print " <dia.pot>: file name of the POT file to merge into"
    print " <sheet.sheet>: file name of the sheet to merge"
    sys.exit(1)


potfilename = sys.argv[1]
fnames = sys.argv[2:]

import pprint

#messages,comments,dot,places,attributes = load_potfile(potfilename)
messages = {}
places = {}

for name in fnames:
    sheet_messages,sheet_places = extract_sheet_messages(name)

    merge_messages(messages,sheet_messages)
    merge_places(places,sheet_places)

#pprint.pprint(messages)
#pprint.pprint(places)

lang="fr"

outfilename = "foo.c"
#output_potfile(outfilename,lang,messages,comments,dot,places,attributes)
output_pseudo_pofile(outfilename,"fr",messages)
output_c_file(outfilename,messages)