ooo-build r14878 - in trunk: . scratch/sd-pptutil scratch/sd-pptutil/src



Author: thorstenb
Date: Wed Dec 17 00:31:58 2008
New Revision: 14878
URL: http://svn.gnome.org/viewvc/ooo-build?rev=14878&view=rev

Log:
    * scratch/sd-pptutil/*: more tweaking to make this actually work
    with PPT. Added initial handler for text content.



Modified:
   trunk/ChangeLog
   trunk/scratch/sd-pptutil/ppt-dump.py
   trunk/scratch/sd-pptutil/src/globals.py
   trunk/scratch/sd-pptutil/src/record.py
   trunk/scratch/sd-pptutil/src/stream.py

Modified: trunk/scratch/sd-pptutil/ppt-dump.py
==============================================================================
--- trunk/scratch/sd-pptutil/ppt-dump.py	(original)
+++ trunk/scratch/sd-pptutil/ppt-dump.py	Wed Dec 17 00:31:58 2008
@@ -43,7 +43,9 @@
 
             dirstrm = strm.getDirectoryStreamByName(dirname)
             self.__printDirHeader(dirname, len(dirstrm.bytes))
-            if  dirname == "Revision Log":
+            if  dirname == "PowerPoint Document":
+                self.__readSubStream(dirstrm)
+            elif  dirname == "Current User":
                 self.__readSubStream(dirstrm)
             else:
                 globals.dumpBytes(dirstrm.bytes, 512)

Modified: trunk/scratch/sd-pptutil/src/globals.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/globals.py	(original)
+++ trunk/scratch/sd-pptutil/src/globals.py	Wed Dec 17 00:31:58 2008
@@ -35,40 +35,6 @@
     return newname
 
 
-def getRichText (bytes, textLen=None):
-    """parse a string of the rich-text format that Excel uses."""
-
-    flags = bytes[0]
-    if type(flags) == type('c'):
-        flags = ord(flags)
-    is16Bit   = (flags & 0x01)
-    isFarEast = (flags & 0x04)
-    isRich    = (flags & 0x08)
-
-    i = 1
-    formatRuns = 0
-    if isRich:
-        formatRuns = getSignedInt(bytes[i:i+2])
-        i += 2
-
-    extInfo = 0
-    if isFarEast:
-        extInfo = getSignedInt(bytes[i:i+4])
-        i += 4
-
-    extraBytes = 0
-    if textLen == None:
-        extraBytes = formatRuns*4 + extInfo
-        textLen = len(bytes) - extraBytes - i
-
-    totalByteLen = i + textLen + extraBytes
-    if is16Bit:
-        return ("<16-bit strings not supported yet>", totalByteLen)
-
-    text = toTextBytes(bytes[i:i+textLen])
-    return (text, totalByteLen)
-
-
 def dumpBytes (chars, subDivide=None):
     line = 0
     subDivideLine = None

Modified: trunk/scratch/sd-pptutil/src/record.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/record.py	(original)
+++ trunk/scratch/sd-pptutil/src/record.py	Wed Dec 17 00:31:58 2008
@@ -7,15 +7,13 @@
 
 class BaseRecordHandler(object):
 
-    def __init__ (self, header, size, bytes, strmData):
+    def __init__ (self, header, size, bytes):
         self.header = header
         self.size = size
         self.bytes = bytes
         self.lines = []
         self.pos = 0       # current byte position
 
-        self.strmData = strmData
-
     def parseBytes (self):
         """Parse the original bytes and generate human readable output.
 
@@ -79,3 +77,17 @@
         bytes = self.readBytes(8)
         return globals.getDouble(bytes)
 
+
+class String(BaseRecordHandler):
+    """Textual content."""
+
+    def parseBytes (self):
+        name = globals.getTextBytes(self.readRemainingBytes())
+        self.appendLine("text: '%s'"%name)
+
+class UniString(BaseRecordHandler):
+    """Textual content."""
+
+    def parseBytes (self):
+        name = globals.getUTF8FromUTF16(globals.getTextBytes(self.readRemainingBytes()))
+        self.appendLine("text: '%s'"%name)

Modified: trunk/scratch/sd-pptutil/src/stream.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/stream.py	(original)
+++ trunk/scratch/sd-pptutil/src/stream.py	Wed Dec 17 00:31:58 2008
@@ -9,6 +9,7 @@
 
 recData = {
 
+	0:  ["DFF_PST_Unknown"],      
 	1:  ["DFF_PST_SubContainerCompleted"],      
 	2:	["DFF_PST_IRRAtom"],                    
 	3:	["DFF_PST_PSS"],                        
@@ -83,7 +84,7 @@
  3035:	["DFF_PST_OEShapeAtom"],                
  3998:	["DFF_PST_OutlineTextRefAtom"],         
  3999:	["DFF_PST_TextHeaderAtom"],             
- 4000:	["DFF_PST_TextCharsAtom"],              
+ 4000:	["DFF_PST_TextCharsAtom", record.UniString],
  4001:	["DFF_PST_StyleTextPropAtom"],          
  4002:	["DFF_PST_BaseTextPropAtom"],           
  4003:	["DFF_PST_TxMasterStyleAtom"],          
@@ -91,7 +92,7 @@
  4005:	["DFF_PST_TxPFStyleAtom"],              
  4006:	["DFF_PST_TextRulerAtom"],              
  4007:	["DFF_PST_TextBookmarkAtom"],           
- 4008:	["DFF_PST_TextBytesAtom"],              
+ 4008:	["DFF_PST_TextBytesAtom", record.String],
  4009:	["DFF_PST_TxSIStyleAtom"],              
  4010:	["DFF_PST_TextSpecInfoAtom"],           
  4011:	["DFF_PST_DefaultRulerAtom"],           
@@ -293,7 +294,7 @@
 
         return bytes
 
-    def readByteArray (self, size=1):
+    def readRawByteArray (self, size=1):
         bytes = []
         for i in xrange(0, size):
             if self.pos >= self.size:
@@ -302,6 +303,15 @@
             self.pos += 1
         return bytes
 
+    def readByteArray (self, size=1):
+        bytes = []
+        for i in xrange(0, size):
+            if self.pos >= self.size:
+                raise EndOfStream
+            bytes.append(self.bytes[self.pos])
+            self.pos += 1
+        return bytes
+
     def __printSep (self, c='-', w=68, prefix=''):
         print(prefix + c*w)
 
@@ -310,30 +320,45 @@
             raise EndOfStream
 
         pos = self.pos
-        header = self.readRaw(2)
-        if header == 0x0000:
-            raise EndOfStream
-        size = self.readRaw(2)
-        bytes = self.readByteArray(size)
+        recordInstance = self.readRaw(2)
+        recordVersion = (recordInstance & 0x000F)
+        recordInstance = recordInstance / 16
+        recordType = self.readRaw(2)
+        size = self.readRaw(4)
+
+        # substream? recurse into that
+        if recordVersion == 0x0F:
+            substrm = PPTDirStream(self.readByteArray(size), self.params )
+            
+            try:
+                # read bytes until DFF_PST_SubContainerCompleted
+                header = 0x0000
+                while header != 0x0001:
+                    header = substrm.readRecord()
+                return recordInstance
+            except EndOfStream:
+                return recordInstance
+
+        bytes = self.readRawByteArray(size)
 
         # record handler that parses the raw bytes and displays more 
         # meaningful information.
         handler = None 
 
         print("")
-        self.__printSep('=', 61, "%4.4Xh: "%header)
-        if recData.has_key(header):
+        self.__printSep('=', 61, "%4.4Xh: "%recordType)
+        if recData.has_key(recordType):
             print("%4.4Xh: %s (%4.4Xh)"%
-                  (header, recData[header][0], header))
-            if len(recData[header]) >= 2:
-                handler = recData[header][1](header, size, bytes)
+                  (recordType, recData[recordType][0], recordType))
+            if len(recData[recordType]) >= 2:
+                handler = recData[recordType][1](recordType, size, bytes)
         else:
-            print("%4.4Xh: [unknown record name] (%4.4Xh)"%(header, header))
-        print("%4.4Xh:   size = %d; pos = %d"%(header, size, pos))
-        self.__printSep('-', 61, "%4.4Xh: "%header)
+            print("%4.4Xh: [unknown record name] (%4.4Xh)"%(recordType, recordInstance))
+        print("%4.4Xh:   size = %d; pos = %d"%(recordType, size, pos))
+        self.__printSep('-', 61, "%4.4Xh: "%recordType)
         for i in xrange(0, size):
             if (i+1) % 16 == 1:
-                output("%4.4Xh: "%header)
+                output("%4.4Xh: "%recordType)
             output("%2.2X "%bytes[i])
             if (i+1) % 16 == 0 and i != size-1:
                 print("")
@@ -344,4 +369,4 @@
             # record handler exists.  Parse the record and display more info.
             handler.output()
 
-        return header
+        return recordType



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]