ooo-build r14990 - in trunk: . scratch/sd-pptutil/src



Author: thorstenb
Date: Sat Jan  3 01:28:16 2009
New Revision: 14990
URL: http://svn.gnome.org/viewvc/ooo-build?rev=14990&view=rev

Log:
    * scratch/sd-pptutil/*: more work on text style parsers, lots of
    small improvements across the board



Modified:
   trunk/ChangeLog
   trunk/scratch/sd-pptutil/src/record.py
   trunk/scratch/sd-pptutil/src/stream.py

Modified: trunk/scratch/sd-pptutil/src/record.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/record.py	(original)
+++ trunk/scratch/sd-pptutil/src/record.py	Sat Jan  3 01:28:16 2009
@@ -7,12 +7,14 @@
 
 class BaseRecordHandler(object):
 
-    def __init__ (self, recordType, recordInstance, size, bytes, prefix=''):
+    def __init__ (self, recordType, recordInstance, size, bytes, streamProperties, prefix='', propertyName=None):
         self.recordType = recordType
         self.recordInstance = recordInstance
         self.size = size
         self.bytes = bytes
         self.lines = []
+        self.streamProperties = streamProperties
+        self.propertyName = propertyName
         self.prefix = prefix
         self.pos = 0       # current byte position
 
@@ -41,6 +43,13 @@
         text = "%s: %s"%(name, self.getYesNo(value))
         self.appendLine(text)
 
+    def appendProperty (self, value):
+        if self.propertyName is not None:
+            self.streamProperties[self.propertyName] = value
+
+    def isEmpty (self):
+        return len(self.bytes) <= self.pos
+    
     def readBytes (self, length):
         r = self.bytes[self.pos:self.pos+length]
         self.pos += length
@@ -88,15 +97,25 @@
 
     def parseBytes (self):
         name = globals.getTextBytes(self.readRemainingBytes())
+        self.appendProperty(name)
         self.appendLine("text: '%s'"%name)
 
+def ShapeString (*args):
+    args += "ShapeText",
+    return String(*args)
+
 class UniString(BaseRecordHandler):
     """Textual content."""
 
     def parseBytes (self):
         name = globals.getUTF8FromUTF16(globals.getTextBytes(self.readRemainingBytes()))
+        self.appendProperty(name)
         self.appendLine("text: '%s'"%name)
 
+def ShapeUniString (*args):
+    args += "ShapeText",
+    return UniString(*args)
+
 # -------------------------------------------------------------------
 # special record handler: properties
 
@@ -106,7 +125,7 @@
     def parseBytes (self):
         # each prop entry takes 6 bytes; complex stuff comes after
         # prop entries and fills remaining record space
-        complexBytes = self.bytes[self.pos+self.recordInstance*6:]
+        allComplexBytes = self.bytes[self.pos+self.recordInstance*6:]
 
         # recordInstance gives number of properties
         for i in xrange(0, self.recordInstance):
@@ -116,47 +135,97 @@
             isComplex = (propType & 0x8000) != 0
             isBlip = ((propType & 0x4000) != 0) and not isComplex
             propType = (propType & 0x3FFF)
+            complexBytes = []
 
+            if isComplex:
+                # eat propValue bytes from complexBytes
+                complexBytes = allComplexBytes[:propValue]
+                allComplexBytes = allComplexBytes[propValue:]
+                
             if propData.has_key(propType):
-                handler = propData[propType][1](propType, propValue, isComplex, isBlip, self.appendLine)
+                handler = propData[propType][1](propType, propValue, isComplex, isBlip, complexBytes, self.appendLine)
                 handler.output()
             else:
                 self.appendLine("%4.4Xh: [unknown property type: %4.4Xh, value: %8.8Xh, complex: %d, blip: %d]"%(propType, propValue, isComplex, isBlip))
 
 # -------------------------------------------------------------------
+# special record handler: document atom
+
+class DocAtom(BaseRecordHandler):
+    """Document atom."""
+
+    def parseBytes (self):
+        slideWidth = self.readSignedInt(4)
+        slideHeight = self.readSignedInt(4)
+        notesWidth = self.readSignedInt(4)
+        notesHeight = self.readSignedInt(4)
+        oleWidth = self.readSignedInt(4)
+        oleHeight = self.readSignedInt(4)
+        notesMasterPersist = self.readUnsignedInt(4)
+        handoutMasterPersist = self.readUnsignedInt(4)
+        firstSlideNum = self.readUnsignedInt(2)
+        slideSizeType = self.readSignedInt(2)
+        savedWithFont = self.readUnsignedInt(1)
+        omitTitlePlace = self.readUnsignedInt(1)
+        right2Left = self.readUnsignedInt(1)
+        showComments = self.readUnsignedInt(1)
+
+        self.appendLine("Slide: (%d,%d), notes: (%d,%d), ole zoom: (%d,%d)"%(slideWidth, slideHeight,
+                                                                             notesWidth, notesHeight,
+                                                                             oleWidth, oleHeight))
+        self.appendLine("Notes master persist offset: %8.8Xh"%notesMasterPersist)
+        self.appendLine("Handout master persist offset: %8.8Xh"%handoutMasterPersist)
+        self.appendLine("1st slide num: %d, slide size type: %4.4Xh"%(firstSlideNum, slideSizeType))
+        self.appendLine("embedded fonts: %s, no placeholders on title slide: %s"%(savedWithFont,
+                                                                                  omitTitlePlace))
+        self.appendLine("RTL doc: %s, show comment shapes: %s"%(right2Left, showComments))
+
+
+# -------------------------------------------------------------------
 # special record handlers: text style properties
 
 class TextStyles(BaseRecordHandler):
     """Text style properties."""
 
     def parseBytes (self):
-        # 4 bytes: total len of para attribs
+        # any shape text set? if not, no chance to calc run lengths
+        if not self.streamProperties.has_key("ShapeText"):
+            self.appendLine("no shape text given, skipping props")
+            return
+        
+        textLen = len(self.streamProperties["ShapeText"])
+
+        # 4 bytes: <count> characters of shape text this para run is meant for
         # <para attribs>
-        # 4 bytes: total len of char attribs
-        # <char attribs>
-        paraAttribLen = self.readUnsignedInt(4)
-        paraAttribEndPos = self.pos + paraAttribLen
-        while self.pos < paraAttribEndPos:
-            self.parseParaStyle()
+        # repeat until all shape text is consumed
+        charPos = 0
+        while not self.isEmpty() and charPos < textLen:
+            runLen = self.readUnsignedInt(4)
+            charPos += runLen
+            self.parseParaStyle(runLen)
             self.appendLine("-"*61)
-
-        charAttribLen = self.readUnsignedInt(4)
-        charAttribEndPos = self.pos + charAttribLen
-        while self.pos < charAttribEndPos:
-            self.parseCharStyle()
+            
+        # 4 bytes: <count> characters of shape text this char run is meant for
+        # <char attribs>
+        # repeat until all shape text is consumed
+        charPos = 0
+        while not self.isEmpty() and charPos < textLen:
+            runLen = self.readUnsignedInt(4)
+            charPos += runLen
+            self.parseCharStyle(runLen)
             self.appendLine("-"*61)
-
+            
     def appendParaProp (self, text):
         self.appendLine("para prop given: "+text)
 
     def appendCharProp (self, text):
         self.appendLine("char prop given: "+text)
 
-    def parseParaStyle (self):
+    def parseParaStyle (self, runLen):
         indentLevel = self.readUnsignedInt(2)
         styleMask = self.readUnsignedInt(4)
 
-        self.appendLine("para props for indent: %d"%indentLevel)
+        self.appendLine("para props for %d chars, indent: %d"%(runLen,indentLevel))
 
         if styleMask & 0x000F:
             bulletFlags = self.readUnsignedInt(2)
@@ -177,7 +246,7 @@
             self.appendParaProp("bullet size %d"%bulletSize)
 
         if styleMask & 0x0020:
-            bulletColorAtom = ColorPropertyHandler(self.readUnsignedInt(2), self.readUnsignedInt(4), False, False, self.appendParaProp)
+            bulletColorAtom = ColorPropertyHandler(self.readUnsignedInt(2), self.readUnsignedInt(4), False, False, [], self.appendParaProp)
             bulletColorAtom.output()
             self.appendParaProp("bullet color atom")
 
@@ -234,9 +303,11 @@
             paraTextDirection = self.readUnsignedInt(2)
             self.appendParaProp("para text direction %4.4Xh"%paraTextDirection)
 
-    def parseCharStyle (self):
+    def parseCharStyle (self, runLen):
         styleMask = self.readUnsignedInt(4)
 
+        self.appendLine("char props for %d chars"%runLen)
+
         if styleMask & 0xFFFF:
             charFlags = self.readUnsignedInt(2)
             self.appendCharProp("char flags %4.4Xh"%charFlags)
@@ -262,7 +333,7 @@
             self.appendCharProp("char font size %d"%fontSize)
 
         if styleMask & 0x40000:
-            charColorAtom = ColorPropertyHandler(self.readUnsignedInt(2), self.readUnsignedInt(4), False, False, self.appendCharProp)
+            charColorAtom = ColorPropertyHandler(self.readUnsignedInt(2), self.readUnsignedInt(4), False, False, [], self.appendCharProp)
             charColorAtom.output()
             self.appendCharProp("char color atom")
 
@@ -277,17 +348,21 @@
 class BasePropertyHandler():
     """Base property handler."""
 
-    def __init__ (self, propType, propValue, isComplex, isBlip, printer):
+    def __init__ (self, propType, propValue, isComplex, isBlip, complexBytes, printer):
         self.propType = propType
         self.propValue = propValue
         self.isComplex = isComplex
         self.isBlip = isBlip
+        self.bytes = complexBytes
+        self.pos = 0
         self.printer = printer
+        if propData.has_key(self.propType):
+            self.propEntry = propData[self.propType]
     
     def output (self):
         if propData.has_key(self.propType):
-            propEntry = propData[self.propType]
-            self.printer("%4.4Xh: %s = %8.8Xh [\"%s\" - default handler]"%(self.propType, propEntry[0], self.propValue, propEntry[2]))
+            self.printer("%4.4Xh: %s = %8.8Xh [\"%s\" - default handler]"%(self.propType, self.propEntry[0],
+                                                                           self.propValue, self.propEntry[2]))
 
 class BoolPropertyHandler(BasePropertyHandler):
     """Bool properties."""
@@ -309,11 +384,39 @@
 class MsoArrayPropertyHandler(BasePropertyHandler):
     """MsoArray property."""
 
+    def readBytes (self, length):
+        r = self.bytes[self.pos:self.pos+length]
+        self.pos += length
+        return r
+
+    def readUnsignedInt (self, length):
+        bytes = self.readBytes(length)
+        return globals.getUnsignedInt(bytes)
+
+    def output (self):
+        if self.isComplex:
+            numElements = self.readUnsignedInt(2)
+            dummy = self.readUnsignedInt(2)
+            elementSize = self.readUnsignedInt(2)
+            self.printer("%4.4Xh: %s: [\"%s\"]"%(self.propType, self.propEntry[0], self.propEntry[2]))
+            for i in xrange(0, numElements):
+                currElem = self.readUnsignedInt(elementSize)
+                self.printer("%4.4Xh: %d = %Xh"%(i,currElem))
+
 class UniCharPropertyHandler(BasePropertyHandler):
     """unicode string property."""  
 
+    def output (self):
+        if self.isComplex:
+            name = globals.getUTF8FromUTF16(globals.getTextBytes(self.bytes))
+            self.printer("%4.4Xh: %s = %s: [\"%s\"]"%(self.propType, self.propEntry[0], name, self.propEntry[2]))
+
 class FixedPointHandler(BasePropertyHandler):
     """FixedPoint property."""
+
+    def output (self):
+        value = self.propValue / 65536.0
+        self.printer("%4.4Xh: %s = %f [\"%s\"]"%(self.propType, self.propEntry[0], value, self.propEntry[2]))
     
 class ColorPropertyHandler(BasePropertyHandler):
     """Color property."""   
@@ -339,8 +442,19 @@
 class CharPropertyHandler(BasePropertyHandler):
     """string property."""  
 
+    def output (self):
+        if self.isComplex:
+            name = globals.getTextBytes(self.bytes)
+            self.printer("%4.4Xh: %s = %s: [\"%s\"]"%(self.propType, self.propEntry[0], name, self.propEntry[2]))
+
 class HandlesPropertyHandler(BasePropertyHandler):
-    """string property."""  
+    """handles property."""  
+
+class ZipStoragePropertyHandler(BasePropertyHandler):
+    """zip storage."""  
+
+    def output (self):
+        self.printer("zipped stuff")
 
 # -------------------------------------------------------------------
 # special record handler: properties
@@ -638,7 +752,7 @@
  904:  ["DFF_Prop_lidRegroup",                   LongPropertyHandler,              "Regroup ID"],
  927:  ["DFF_Prop_tableProperties",             LongPropertyHandler, ""],
  928:  ["DFF_Prop_tableRowProperties",          LongPropertyHandler, ""],
- 937:  ["DFF_Prop_xmlstuff",                     LongPropertyHandler, "Embedded ooxml"],
+ 937:  ["DFF_Prop_xmlstuff",                     ZipStoragePropertyHandler, "Embedded ooxml"],
  953:  ["DFF_Prop_fEditedWrap",                  BoolPropertyHandler,              "Has the wrap polygon been edited?"],
  954:  ["DFF_Prop_fBehindDocument",              BoolPropertyHandler,              "Word-only (shape is behind text)"],
  955:  ["DFF_Prop_fOnDblClickNotify",            BoolPropertyHandler,              "Notify client on a double click"],

Modified: trunk/scratch/sd-pptutil/src/stream.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/stream.py	(original)
+++ trunk/scratch/sd-pptutil/src/stream.py	Sat Jan  3 01:28:16 2009
@@ -18,7 +18,7 @@
     7:  ["DFF_PST_ClientSignal2"],              
    10:  ["DFF_PST_PowerPointStateInfoAtom"],    
  1000:  ["DFF_PST_Document"],                   
- 1001:  ["DFF_PST_DocumentAtom"],               
+ 1001:  ["DFF_PST_DocumentAtom", record.DocAtom],               
  1002:  ["DFF_PST_EndDocument"],                
  1003:  ["DFF_PST_SlidePersist"],               
  1004:  ["DFF_PST_SlideBase"],                  
@@ -84,22 +84,22 @@
  3035:  ["DFF_PST_OEShapeAtom"],                
  3998:  ["DFF_PST_OutlineTextRefAtom"],         
  3999:  ["DFF_PST_TextHeaderAtom"],             
- 4000:  ["DFF_PST_TextCharsAtom", record.UniString],
+ 4000:  ["DFF_PST_TextCharsAtom", record.ShapeUniString],
  4001:  ["DFF_PST_StyleTextPropAtom", record.TextStyles],          
- 4002:  ["DFF_PST_BaseTextPropAtom"],           
+ 4002:  ["DFF_PST_BaseTextPropAtom", record.TextStyles],           
  4003:  ["DFF_PST_TxMasterStyleAtom"],          
  4004:  ["DFF_PST_TxCFStyleAtom"],              
  4005:  ["DFF_PST_TxPFStyleAtom"],              
  4006:  ["DFF_PST_TextRulerAtom"],              
  4007:  ["DFF_PST_TextBookmarkAtom"],           
- 4008:  ["DFF_PST_TextBytesAtom", record.String],
+ 4008:  ["DFF_PST_TextBytesAtom", record.ShapeString],
  4009:  ["DFF_PST_TxSIStyleAtom"],              
  4010:  ["DFF_PST_TextSpecInfoAtom"],           
  4011:  ["DFF_PST_DefaultRulerAtom"],           
  4023:  ["DFF_PST_FontEntityAtom"],             
  4024:  ["DFF_PST_FontEmbedData"],              
  4025:  ["DFF_PST_TypeFace"],                   
- 4026:  ["DFF_PST_CString"],                    
+ 4026:  ["DFF_PST_CString", record.UniString],                    
  4027:  ["DFF_PST_ExternalObject"],             
  4033:  ["DFF_PST_MetaFile"],                   
  4034:  ["DFF_PST_ExOleObj"],                   
@@ -202,7 +202,7 @@
 0xF011: ["DFF_msofbtClientData"],      
 0xF11F: ["DFF_msofbtOleObject"],       
 0xF11D: ["DFF_msofbtDeletedPspl"],     
-0xF122: ["DFF_msofbtUDefProp"],        
+0xF122: ["DFF_msofbtUDefProp", record.Property],        
 0xF005: ["DFF_msofbtSolverContainer"], 
 0xF012: ["DFF_msofbtConnectorRule"],   
 0xF013: ["DFF_msofbtAlignRule"],       
@@ -244,7 +244,7 @@
 
     def __getDirectoryObj (self):
         obj = self.header.getDirectory()
-        if obj == None:
+        if obj is None:
             return None
         obj.parseDirEntries()
         return obj
@@ -252,14 +252,14 @@
 
     def printDirectory (self):
         obj = self.__getDirectoryObj()
-        if obj == None:
+        if obj is None:
             return
         obj.output()
 
 
     def getDirectoryNames (self):
         obj = self.__getDirectoryObj()
-        if obj == None:
+        if obj is None:
             return
         return obj.getDirectoryNames()
 
@@ -267,7 +267,7 @@
     def getDirectoryStreamByName (self, name):
         obj = self.__getDirectoryObj()
         bytes = []
-        if obj != None:
+        if obj is not None:
             bytes = obj.getRawStreamByName(name)
         strm = PPTDirStream(bytes, self.params)
         return strm
@@ -275,12 +275,13 @@
 
 class PPTDirStream(object):
     """Represents one single powerpoint file subdirectory, like e.g. \"PowerPoint Document\"."""
-    def __init__ (self, bytes, params, prefix=''):
+    def __init__ (self, bytes, params, prefix='', recordInfo=None):
         self.bytes = bytes
         self.size = len(self.bytes)
         self.pos = 0
         self.prefix = prefix
         self.params = params
+        self.properties = {"recordInfo": recordInfo}
 
 
     def readBytes (self, size=1):
@@ -305,8 +306,8 @@
 
     def readRecords (self):
         try:
-            # read until data is exhausted
-            while self.pos < self.size:
+            # read until data is exhausted (min record size: 8 bytes)
+            while self.pos+8 < self.size:
                 print("")
                 self.readRecord()
             return True 
@@ -336,6 +337,7 @@
                 print("")
         if size > 0:
             print("")
+            self.__printSep('-', 61, "%4.4Xh: "%recordType)
 
     
     def readRecord (self):
@@ -348,19 +350,22 @@
 
         self.printRecordHeader(startPos, recordInstance, recordVersion, recordType, size)
         bytes = self.readBytes(size)
-        
+
+        recordInfo = None
         if recData.has_key(recordType) and len(recData[recordType]) >= 2:
-            assert(recordVersion != 0x0F)
-            # call special record handler, if any
-            handler = recData[recordType][1](recordType, recordInstance, size, bytes, self.prefix)
+            recordInfo = recData[recordType]
+
+        if recordVersion == 0x0F:
+            # substream? recurse into that
+            subSubStrm = PPTDirStream(bytes, self.params, self.prefix+" ", recordInfo)
+            subSubStrm.readRecords()
+        elif recordInfo is not None:
+            handler = recordInfo[1](recordType, recordInstance, size, bytes, self.properties, self.prefix)
             print("")
-            if handler != None:
+            # call special record handler, if any
+            if handler is not None:
                 handler.output()
             self.printRecordDump(bytes, recordType)
-        elif recordVersion == 0x0F:
-            # substream? recurse into that
-            subSubStrm = PPTDirStream(bytes, self.params, self.prefix+" ")
-            subSubStrm.readRecords()
         elif size > 0:
             print("")
             self.printRecordDump(bytes, recordType)



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]