ooo-build r14878 - in trunk: . scratch/sd-pptutil scratch/sd-pptutil/src
- From: thorstenb svn gnome org
- To: svn-commits-list gnome org
- Subject: ooo-build r14878 - in trunk: . scratch/sd-pptutil scratch/sd-pptutil/src
- Date: Wed, 17 Dec 2008 00:31:58 +0000 (UTC)
Author: thorstenb
Date: Wed Dec 17 00:31:58 2008
New Revision: 14878
URL: http://svn.gnome.org/viewvc/ooo-build?rev=14878&view=rev
Log:
* scratch/sd-pptutil/*: more tweaking to make this actually work
with PPT. Added initial handler for text content.
Modified:
trunk/ChangeLog
trunk/scratch/sd-pptutil/ppt-dump.py
trunk/scratch/sd-pptutil/src/globals.py
trunk/scratch/sd-pptutil/src/record.py
trunk/scratch/sd-pptutil/src/stream.py
Modified: trunk/scratch/sd-pptutil/ppt-dump.py
==============================================================================
--- trunk/scratch/sd-pptutil/ppt-dump.py (original)
+++ trunk/scratch/sd-pptutil/ppt-dump.py Wed Dec 17 00:31:58 2008
@@ -43,7 +43,9 @@
dirstrm = strm.getDirectoryStreamByName(dirname)
self.__printDirHeader(dirname, len(dirstrm.bytes))
- if dirname == "Revision Log":
+ if dirname == "PowerPoint Document":
+ self.__readSubStream(dirstrm)
+ elif dirname == "Current User":
self.__readSubStream(dirstrm)
else:
globals.dumpBytes(dirstrm.bytes, 512)
Modified: trunk/scratch/sd-pptutil/src/globals.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/globals.py (original)
+++ trunk/scratch/sd-pptutil/src/globals.py Wed Dec 17 00:31:58 2008
@@ -35,40 +35,6 @@
return newname
-def getRichText (bytes, textLen=None):
- """parse a string of the rich-text format that Excel uses."""
-
- flags = bytes[0]
- if type(flags) == type('c'):
- flags = ord(flags)
- is16Bit = (flags & 0x01)
- isFarEast = (flags & 0x04)
- isRich = (flags & 0x08)
-
- i = 1
- formatRuns = 0
- if isRich:
- formatRuns = getSignedInt(bytes[i:i+2])
- i += 2
-
- extInfo = 0
- if isFarEast:
- extInfo = getSignedInt(bytes[i:i+4])
- i += 4
-
- extraBytes = 0
- if textLen == None:
- extraBytes = formatRuns*4 + extInfo
- textLen = len(bytes) - extraBytes - i
-
- totalByteLen = i + textLen + extraBytes
- if is16Bit:
- return ("<16-bit strings not supported yet>", totalByteLen)
-
- text = toTextBytes(bytes[i:i+textLen])
- return (text, totalByteLen)
-
-
def dumpBytes (chars, subDivide=None):
line = 0
subDivideLine = None
Modified: trunk/scratch/sd-pptutil/src/record.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/record.py (original)
+++ trunk/scratch/sd-pptutil/src/record.py Wed Dec 17 00:31:58 2008
@@ -7,15 +7,13 @@
class BaseRecordHandler(object):
- def __init__ (self, header, size, bytes, strmData):
+ def __init__ (self, header, size, bytes):
self.header = header
self.size = size
self.bytes = bytes
self.lines = []
self.pos = 0 # current byte position
- self.strmData = strmData
-
def parseBytes (self):
"""Parse the original bytes and generate human readable output.
@@ -79,3 +77,17 @@
bytes = self.readBytes(8)
return globals.getDouble(bytes)
+
+class String(BaseRecordHandler):
+ """Textual content."""
+
+ def parseBytes (self):
+ name = globals.getTextBytes(self.readRemainingBytes())
+ self.appendLine("text: '%s'"%name)
+
+class UniString(BaseRecordHandler):
+ """Textual content."""
+
+ def parseBytes (self):
+ name = globals.getUTF8FromUTF16(globals.getTextBytes(self.readRemainingBytes()))
+ self.appendLine("text: '%s'"%name)
Modified: trunk/scratch/sd-pptutil/src/stream.py
==============================================================================
--- trunk/scratch/sd-pptutil/src/stream.py (original)
+++ trunk/scratch/sd-pptutil/src/stream.py Wed Dec 17 00:31:58 2008
@@ -9,6 +9,7 @@
recData = {
+ 0: ["DFF_PST_Unknown"],
1: ["DFF_PST_SubContainerCompleted"],
2: ["DFF_PST_IRRAtom"],
3: ["DFF_PST_PSS"],
@@ -83,7 +84,7 @@
3035: ["DFF_PST_OEShapeAtom"],
3998: ["DFF_PST_OutlineTextRefAtom"],
3999: ["DFF_PST_TextHeaderAtom"],
- 4000: ["DFF_PST_TextCharsAtom"],
+ 4000: ["DFF_PST_TextCharsAtom", record.UniString],
4001: ["DFF_PST_StyleTextPropAtom"],
4002: ["DFF_PST_BaseTextPropAtom"],
4003: ["DFF_PST_TxMasterStyleAtom"],
@@ -91,7 +92,7 @@
4005: ["DFF_PST_TxPFStyleAtom"],
4006: ["DFF_PST_TextRulerAtom"],
4007: ["DFF_PST_TextBookmarkAtom"],
- 4008: ["DFF_PST_TextBytesAtom"],
+ 4008: ["DFF_PST_TextBytesAtom", record.String],
4009: ["DFF_PST_TxSIStyleAtom"],
4010: ["DFF_PST_TextSpecInfoAtom"],
4011: ["DFF_PST_DefaultRulerAtom"],
@@ -293,7 +294,7 @@
return bytes
- def readByteArray (self, size=1):
+ def readRawByteArray (self, size=1):
bytes = []
for i in xrange(0, size):
if self.pos >= self.size:
@@ -302,6 +303,15 @@
self.pos += 1
return bytes
+ def readByteArray (self, size=1):
+ bytes = []
+ for i in xrange(0, size):
+ if self.pos >= self.size:
+ raise EndOfStream
+ bytes.append(self.bytes[self.pos])
+ self.pos += 1
+ return bytes
+
def __printSep (self, c='-', w=68, prefix=''):
print(prefix + c*w)
@@ -310,30 +320,45 @@
raise EndOfStream
pos = self.pos
- header = self.readRaw(2)
- if header == 0x0000:
- raise EndOfStream
- size = self.readRaw(2)
- bytes = self.readByteArray(size)
+ recordInstance = self.readRaw(2)
+ recordVersion = (recordInstance & 0x000F)
+ recordInstance = recordInstance / 16
+ recordType = self.readRaw(2)
+ size = self.readRaw(4)
+
+ # substream? recurse into that
+ if recordVersion == 0x0F:
+ substrm = PPTDirStream(self.readByteArray(size), self.params )
+
+ try:
+ # read bytes until DFF_PST_SubContainerCompleted
+ header = 0x0000
+ while header != 0x0001:
+ header = substrm.readRecord()
+ return recordInstance
+ except EndOfStream:
+ return recordInstance
+
+ bytes = self.readRawByteArray(size)
# record handler that parses the raw bytes and displays more
# meaningful information.
handler = None
print("")
- self.__printSep('=', 61, "%4.4Xh: "%header)
- if recData.has_key(header):
+ self.__printSep('=', 61, "%4.4Xh: "%recordType)
+ if recData.has_key(recordType):
print("%4.4Xh: %s (%4.4Xh)"%
- (header, recData[header][0], header))
- if len(recData[header]) >= 2:
- handler = recData[header][1](header, size, bytes)
+ (recordType, recData[recordType][0], recordType))
+ if len(recData[recordType]) >= 2:
+ handler = recData[recordType][1](recordType, size, bytes)
else:
- print("%4.4Xh: [unknown record name] (%4.4Xh)"%(header, header))
- print("%4.4Xh: size = %d; pos = %d"%(header, size, pos))
- self.__printSep('-', 61, "%4.4Xh: "%header)
+ print("%4.4Xh: [unknown record name] (%4.4Xh)"%(recordType, recordInstance))
+ print("%4.4Xh: size = %d; pos = %d"%(recordType, size, pos))
+ self.__printSep('-', 61, "%4.4Xh: "%recordType)
for i in xrange(0, size):
if (i+1) % 16 == 1:
- output("%4.4Xh: "%header)
+ output("%4.4Xh: "%recordType)
output("%2.2X "%bytes[i])
if (i+1) % 16 == 0 and i != size-1:
print("")
@@ -344,4 +369,4 @@
# record handler exists. Parse the record and display more info.
handler.output()
- return header
+ return recordType
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]