ooo-build r14995 - in trunk: . scratch/sd-pptutil scratch/sd-pptutil/src



Author: thorstenb
Date: Sun Jan  4 21:38:58 2009
New Revision: 14995
URL: http://svn.gnome.org/viewvc/ooo-build?rev=14995&view=rev

Log:
    * scratch/sd-pptutil/*: added license headers, gentle changes to
    make stuff more py3-conformant; keeping input bytes as real
    strings now throughout the code (more performant and also much
    less error prone); added unpacking and pretty-printing of embedded
    OOXML payload



Added:
   trunk/scratch/sd-pptutil/src/xmlpp.py
Modified:
   trunk/ChangeLog
   trunk/scratch/sd-pptutil/ppt-dump.py
   trunk/scratch/sd-pptutil/src/globals.py
   trunk/scratch/sd-pptutil/src/ole.py
   trunk/scratch/sd-pptutil/src/record.py
   trunk/scratch/sd-pptutil/src/stream.py

Modified: trunk/scratch/sd-pptutil/ppt-dump.py
==============================================================================
--- trunk/scratch/sd-pptutil/ppt-dump.py	(original)
+++ trunk/scratch/sd-pptutil/ppt-dump.py	Sun Jan  4 21:38:58 2009
@@ -1,5 +1,18 @@
 #!/usr/bin/env python
 
+########################################################################
+#
+#    OpenOffice.org - a multi-platform office productivity suite
+#
+#    Author:
+#      Kohei Yoshida  <kyoshida novell com>
+#      Thorsten Behrens <tbehrens novell com>	   	
+#
+#   The Contents of this file are made available subject to
+#   the terms of GNU Lesser General Public License Version 2.1.
+#
+########################################################################
+
 import sys, os.path, getopt
 sys.path.append(sys.path[0]+"/src")
 import ole, stream, globals

Modified: trunk/scratch/sd-pptutil/src/globals.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/globals.py	(original)
+++ trunk/scratch/sd-pptutil/src/globals.py	Sun Jan  4 21:38:58 2009
@@ -1,3 +1,14 @@
+########################################################################
+#
+#    OpenOffice.org - a multi-platform office productivity suite
+#
+#    Author:
+#      Kohei Yoshida  <kyoshida novell com>
+#
+#   The Contents of this file are made available subject to
+#   the terms of GNU Lesser General Public License Version 2.1.
+#
+########################################################################
 
 import sys, struct, math
 

Modified: trunk/scratch/sd-pptutil/src/ole.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/ole.py	(original)
+++ trunk/scratch/sd-pptutil/src/ole.py	Sun Jan  4 21:38:58 2009
@@ -1,3 +1,14 @@
+########################################################################
+#
+#    OpenOffice.org - a multi-platform office productivity suite
+#
+#    Author:
+#      Kohei Yoshida  <kyoshida novell com>
+#
+#   The Contents of this file are made available subject to
+#   the terms of GNU Lesser General Public License Version 2.1.
+#
+########################################################################
 
 import sys
 import stream, globals
@@ -314,7 +325,6 @@
         self.sectorIDs = []
         self.bytes = bytes
         self.array = []
-
         self.params = params
 
 
@@ -342,10 +352,10 @@
 
 
     def outputRawBytes (self):
-        bytes = []
+        bytes = ""
         for secID in self.sectorIDs:
             pos = 512 + secID*self.sectorSize
-            bytes.extend(self.bytes[pos:pos+self.sectorSize])
+            bytes += self.bytes[pos:pos+self.sectorSize]
         globals.dumpBytes(bytes, 512)
 
 
@@ -485,7 +495,7 @@
         self.SSAT = header.getSSAT()
         self.header = header
         self.RootStorage = None
-        self.RootStorageBytes = []
+        self.RootStorageBytes = ""
         self.params = params
 
 
@@ -498,7 +508,7 @@
         chain = self.header.getSAT().getSectorIDChain(firstSecID)
         for secID in chain:
             pos = 512 + secID*self.sectorSize
-            self.RootStorageBytes.extend(self.header.bytes[pos:pos+self.sectorSize])
+            self.RootStorageBytes += self.header.bytes[pos:pos+self.sectorSize]
 
 
     def __getRawStream (self, entry):
@@ -514,20 +524,20 @@
             if self.RootStorage == None:
                 raise NoRootStorage
 
-            bytes = []
+            bytes = ""
             self.__buildRootStorageBytes()
             size = self.header.getShortSectorSize()
             for id in chain:
                 pos = id*size
-                bytes.extend(self.RootStorageBytes[pos:pos+size])
+                bytes += self.RootStorageBytes[pos:pos+size]
             return bytes
 
         offset = 512
         size = self.header.getSectorSize()
-        bytes = []
+        bytes = ""
         for id in chain:
             pos = offset + id*size
-            bytes.extend(self.header.bytes[pos:pos+size])
+            bytes += self.header.bytes[pos:pos+size]
 
         return bytes
 
@@ -683,10 +693,10 @@
             return
 
         # combine all sectors first.
-        bytes = []
+        bytes = ""
         for secID in self.sectorIDs:
             pos = globals.getSectorPos(secID, self.sectorSize)
-            bytes.extend(self.bytes[pos:pos+self.sectorSize])
+            bytes += self.bytes[pos:pos+self.sectorSize]
 
         self.entries = []
 

Modified: trunk/scratch/sd-pptutil/src/record.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/record.py	(original)
+++ trunk/scratch/sd-pptutil/src/record.py	Sun Jan  4 21:38:58 2009
@@ -1,6 +1,20 @@
+########################################################################
+#
+#    OpenOffice.org - a multi-platform office productivity suite
+#
+#    Author:
+#      Kohei Yoshida  <kyoshida novell com>
+#      Thorsten Behrens <tbehrens novell com>	   	
+#
+#   The Contents of this file are made available subject to
+#   the terms of GNU Lesser General Public License Version 2.1.
+#
+########################################################################
 
-import struct
+import StringIO
 import globals
+import zipfile
+import xmlpp
 
 # -------------------------------------------------------------------
 # record handler classes
@@ -142,7 +156,7 @@
                 complexBytes = allComplexBytes[:propValue]
                 allComplexBytes = allComplexBytes[propValue:]
                 
-            if propData.has_key(propType):
+            if propType in propData:
                 handler = propData[propType][1](propType, propValue, isComplex, isBlip, complexBytes, self.appendLine)
                 handler.output()
             else:
@@ -189,7 +203,7 @@
 
     def parseBytes (self):
         # any shape text set? if not, no chance to calc run lengths
-        if not self.streamProperties.has_key("ShapeText"):
+        if not "ShapeText" in self.streamProperties:
             self.appendLine("no shape text given, skipping props")
             return
         
@@ -296,7 +310,7 @@
         if styleMask & 0xE0000:
             paraAsianLinebreaking = self.readUnsignedInt(2)
             # filter bits not in flag field
-            paraAsianLinebreaking = paraAsianLinebreaking & ((styleMask & 0xE0000) / 0x20000)
+            paraAsianLinebreaking = paraAsianLinebreaking & ((styleMask & 0xE0000) // 0x20000)
             self.appendParaProp("para asian line breaking flags %4.4Xh"%paraAsianLinebreaking)
 
         if styleMask & 0x200000:
@@ -356,11 +370,11 @@
         self.bytes = complexBytes
         self.pos = 0
         self.printer = printer
-        if propData.has_key(self.propType):
+        if self.propType in propData:
             self.propEntry = propData[self.propType]
     
     def output (self):
-        if propData.has_key(self.propType):
+        if self.propType in propData:
             self.printer("%4.4Xh: %s = %8.8Xh [\"%s\" - default handler]"%(self.propType, self.propEntry[0],
                                                                            self.propValue, self.propEntry[2]))
 
@@ -370,7 +384,7 @@
     def output (self):
         bitMask = 1
         for i in xrange(self.propType, self.propType-32):
-            if propData.has_key(i):
+            if i in propData:
                 propEntry = propData[i]
                 if type(propEntry[1]) == type(BoolPropertyHandler):
                     flagValue = self.getTrueFalse(self.propValue & bitMask)
@@ -422,11 +436,11 @@
     """Color property."""   
 
     def split (self, packedColor):
-        return (packedColor & 0xFF0000) / 0x10000, (packedColor & 0xFF00) / 0x100, (packedColor & 0xFF)
+        return (packedColor & 0xFF0000) // 0x10000, (packedColor & 0xFF00) / 0x100, (packedColor & 0xFF)
     
     def output (self):
         propEntry = ["<color atom>", None, "undocumented color property"]
-        if propData.has_key(self.propType):
+        if self.propType in propData:
             propEntry = propData[self.propType]
         colorValue = self.propValue & 0xFFFFFF
         if self.propValue & 0xFE000000 == 0xFE000000:
@@ -454,7 +468,43 @@
     """zip storage."""  
 
     def output (self):
-        self.printer("zipped stuff")
+        class StreamWrap(object):
+            def __init__ (self,printer):
+                self.printer = printer
+                self.buffer = ""
+            def write (self,string):
+                self.buffer += string
+            def flush (self):
+                for line in self.buffer.splitlines():
+                    self.printer(line)
+            
+        self.printer("Zipped content:")
+        self.printer('='*61)
+        rawFile = StringIO.StringIO(self.bytes)
+        zipFile = zipfile.ZipFile(rawFile)
+        i = 0
+        # TODO: when 2.6/3.0 is in widespread use, change to infolist here
+        for filename in zipFile.namelist():
+            if i > 0:
+                self.printer('-'*61)
+            i += 1
+            self.printer(filename + ":")
+            self.printer('-'*61)
+
+            contents = zipFile.read(filename)
+            if filename.endswith(".xml") or contents.startswith("<?xml"):
+                wrapper = StreamWrap(self.printer)
+                xmlpp.pprint(contents,wrapper,1,80)
+                wrapper.flush()
+            else:
+                while len(contents):
+                    self.printer(contents[:60].replace('\n','').replace('\r',''))
+                    contents = contents[60:]
+#            content = zipFile.open(zipInfo)
+#            for line in content.readlines():
+#                self.printer(line)
+        zipFile.close()        
+
 
 # -------------------------------------------------------------------
 # special record handler: properties

Modified: trunk/scratch/sd-pptutil/src/stream.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/stream.py	(original)
+++ trunk/scratch/sd-pptutil/src/stream.py	Sun Jan  4 21:38:58 2009
@@ -1,3 +1,15 @@
+########################################################################
+#
+#    OpenOffice.org - a multi-platform office productivity suite
+#
+#    Author:
+#      Kohei Yoshida  <kyoshida novell com>
+#      Thorsten Behrens <tbehrens novell com>	   	
+#
+#   The Contents of this file are made available subject to
+#   the terms of GNU Lesser General Public License Version 2.1.
+#
+########################################################################
 
 import sys
 import ole, globals, record
@@ -317,7 +329,7 @@
 
     def printRecordHeader (self, startPos, recordInstance, recordVersion, recordType, size):
         self.__printSep('=')
-        if recData.has_key(recordType):
+        if recordType in recData:
             self.__print("[%s]"%recData[recordType][0])
         else:
             self.__print("[anon record]")
@@ -344,7 +356,7 @@
         startPos = self.pos
         recordInstance = self.readUnsignedInt(2)
         recordVersion = (recordInstance & 0x000F)
-        recordInstance = recordInstance / 16
+        recordInstance = recordInstance // 16
         recordType = self.readUnsignedInt(2)
         size = self.readUnsignedInt(4)
 
@@ -352,7 +364,7 @@
         bytes = self.readBytes(size)
 
         recordInfo = None
-        if recData.has_key(recordType) and len(recData[recordType]) >= 2:
+        if recordType in recData and len(recData[recordType]) >= 2:
             recordInfo = recData[recordType]
 
         if recordVersion == 0x0F:

Added: trunk/scratch/sd-pptutil/src/xmlpp.py
==============================================================================
--- (empty file)
+++ trunk/scratch/sd-pptutil/src/xmlpp.py	Sun Jan  4 21:38:58 2009
@@ -0,0 +1,148 @@
+"""Pretty print an XML document.
+
+LICENCE:
+Copyright (c) 2008, Fredrik Ekholdt
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, 
+this list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, 
+this list of conditions and the following disclaimer in the documentation 
+and/or other materials provided with the distribution.
+
+* Neither the name of None nor the names of its contributors may be used to 
+endorse or promote products derived from this software without specific prior 
+written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 
+POSSIBILITY OF SUCH DAMAGE."""
+
+import sys as _sys
+import re as _re
+
+def _usage(this_file):
+    return """SYNOPSIS: pretty print an XML document
+USAGE: python %s <filename> \n""" % this_file
+
+def _pprint_line(indent_level, line, width=100, output=_sys.stdout):
+    if line.strip():
+        start = ""
+        number_chars = 0
+        for l in range(indent_level):
+            start = start + " "
+            number_chars = number_chars + 1
+        try:
+            elem_start = _re.findall("(\<\W{0,1}\w+) ?", line)[0]
+            elem_finished = _re.findall("([?|\]\]]*\>)", line)[0] 
+            #should not have *
+            attrs = _re.findall("(\S*?\=\".*?\")", line)
+            output.write(start + elem_start)
+            number_chars = len(start + elem_start)
+            for attr in attrs:
+                if (attrs.index(attr) + 1) == len(attrs):
+                    number_chars = number_chars + len(elem_finished)
+                if (number_chars + len(attr) + 1) > width:
+                    output.write("\n")
+                    for i in range(len(start + elem_start) + 1):
+                        output.write(" ")
+                    number_chars = len(start + elem_start) + 1 
+                else:
+                    output.write(" ")
+                    number_chars = number_chars + 1
+                output.write(attr)
+                number_chars = number_chars + len(attr)
+            output.write(elem_finished + "\n")
+        except IndexError:
+            #give up pretty print this line
+            output.write(start + line + "\n")
+                
+
+def _pprint_elem_content(indent_level, line, output=_sys.stdout):
+    if line.strip():
+        for l in range(indent_level):
+            output.write(" ")
+        output.write(line + "\n")
+
+def _get_next_elem(data):
+    start_pos = data.find("<")
+    end_pos = data.find(">") + 1
+    retval = data[start_pos:end_pos]
+    stopper = retval.rfind("/") 
+    if stopper < retval.rfind("\""):
+        stopper = -1
+    single = (stopper > -1 and ((retval.find(">") - stopper) < (stopper - retval.find("<"))))
+
+    ignore_excl = retval.find("<!") > -1
+    ignore_question =  retval.find("<?") > -1
+
+    if ignore_excl:
+        cdata = retval.find("<![CDATA[") > -1
+        if cdata:
+            end_pos = data.find("]]>")
+            if end_pos > -1:
+                end_pos = end_pos + len("]]>")
+
+    elif ignore_question:
+        end_pos = data.find("?>") + len("?>")
+    ignore = ignore_excl or ignore_question
+    
+    no_indent = ignore or single
+
+    #print retval, end_pos, start_pos, stopper > -1, no_indent
+    return start_pos, \
+           end_pos, \
+           stopper > -1, \
+           no_indent
+
+def pprint(xml, output=_sys.stdout, indent=4, width=80):
+    """Pretty print xml. 
+    Use output to select output stream. Default is sys.stdout
+    Use indent to select indentation level. Default is 4   """
+    data = xml
+    indent_level = 0
+    start_pos, end_pos, is_stop, no_indent  = _get_next_elem(data)
+    while ((start_pos > -1 and end_pos > -1)):
+        _pprint_elem_content(indent_level, data[:start_pos].strip(), 
+                             output=output)
+        data = data[start_pos:]
+        if is_stop and not no_indent:
+            indent_level = indent_level - indent
+        _pprint_line(indent_level, 
+                     data[:end_pos - start_pos], 
+                     width=width,
+                     output=output)
+        data = data[end_pos - start_pos:]
+        if not is_stop and not no_indent :
+            indent_level = indent_level + indent
+
+        if not data:
+            break
+        else:
+            start_pos, end_pos, is_stop, no_indent  = _get_next_elem(data)
+    
+
+if __name__ == "__main__":
+    if "-h" in _sys.argv or "--help" in _sys.argv:
+        _sys.stderr.write(_usage(_sys.argv[0]))
+        _sys.exit(1)
+    if len(_sys.argv) < 2:
+        _sys.stderr.write(_usage(_sys.argv[0]))
+        _sys.exit(1)
+    else:
+        filename = _sys.argv[1]
+        fh = open(filename)
+
+    pprint(fh.read(), output=_sys.stdout, indent=4, width=80)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]