ooo-build r11354 - in trunk: . scratch/sc-xlsutil scratch/sc-xlsutil/src
- From: kyoshida svn gnome org
- To: svn-commits-list gnome org
- Subject: ooo-build r11354 - in trunk: . scratch/sc-xlsutil scratch/sc-xlsutil/src
- Date: Tue, 22 Jan 2008 06:53:30 +0000 (GMT)
Author: kyoshida
Date: Tue Jan 22 06:53:29 2008
New Revision: 11354
URL: http://svn.gnome.org/viewvc/ooo-build?rev=11354&view=rev
Log:
2008-01-22 Kohei Yoshida <kyoshida novell com>
* scratch/sc-xlsutil/src/ole.py:
* scratch/sc-xlsutil/src/stream.py:
* scratch/sc-xlsutil/xls_dump.py: more OLE dumping.
Modified:
trunk/ChangeLog
trunk/scratch/sc-xlsutil/src/ole.py
trunk/scratch/sc-xlsutil/src/stream.py
trunk/scratch/sc-xlsutil/xls_dump.py
Modified: trunk/scratch/sc-xlsutil/src/ole.py
==============================================================================
--- trunk/scratch/sc-xlsutil/src/ole.py (original)
+++ trunk/scratch/sc-xlsutil/src/ole.py Tue Jan 22 06:53:29 2008
@@ -2,8 +2,12 @@
import sys
import stream
-def output (msg):
- sys.stdout.write(msg)
+# ----------------------------------------------------------------------------
+# Reference: The Microsoft Compound Document File Format by Daniel Rentz
+# http://sc.openoffice.org/compdocfileformat.pdf
+# ----------------------------------------------------------------------------
+
+from globals import output
def printSep (c='-', w=68, prefix=''):
print(prefix + c*w)
@@ -42,6 +46,12 @@
BigEndian = 1
Unknown = 2
+class SectorType:
+ MSAT = 0
+ SAT = 1
+ SSAT = 2
+ Directory = 3
+
class Header(object):
@staticmethod
@@ -54,8 +64,9 @@
else:
return ByteOrder.Unknown
- def __init__ (self, chars):
- self.chars = chars
+
+ def __init__ (self, bytes):
+ self.bytes = bytes
self.MSAT = None
self.docId = None
@@ -64,19 +75,35 @@
self.version = 0
self.byteOrder = ByteOrder.Unknown
self.minStrmSize = 0
+
self.numSecMSAT = 0
self.numSecSSAT = 0
self.numSecSAT = 0
- self.secIDFirstMSAT = -2
+
+ self.__secIDFirstMSAT = -2
+ self.__secIDFirstDirStrm = -2
+ self.__secIDFirstSSAT = -2
+
self.secSize = 512
self.secSizeShort = 64
+
+ def getFirstSectorID (self, sectorType):
+ if sectorType == SectorType.MSAT:
+ return self.__secIDFirstMSAT
+ elif sectorType == SectorType.SSAT:
+ return self.__secIDFirstSSAT
+ elif sectorType == SectorType.Directory:
+ return self.__secIDFirstDirStrm
+ return -2
+
+
def dumpBytes (self, initPos=0, quitAtBOF=True):
# dump OLE header until it reaches BOF.
i, bprev = initPos, 0
labelPrinted = False
while True:
- b = ord(self.chars[i])
+ b = ord(self.bytes[i])
if quitAtBOF and bprev == 0x09 and b == 0x08:
# BOF reached
return i-1
@@ -95,13 +122,6 @@
return i
- def dumpSectorBytes (self, initPos, secSize):
- print('foo')
- for i in xrange(0, secSize):
- b = ord(self.chars[i+initPos])
- output("%2.2X "%b)
- if (i+1)%16 == 0:
- output("\n")
def output (self):
@@ -121,10 +141,10 @@
printRawBytes(self.uId)
# revision and version
- print("revision: %d version: %d"%(self.revision, self.version))
+ print("Revision: %d Version: %d"%(self.revision, self.version))
# byte order
- output("byte order: ")
+ output("Byte order: ")
if self.byteOrder == ByteOrder.LittleEndian:
print("little endian")
elif self.byteOrder == ByteOrder.BigEndian:
@@ -133,99 +153,119 @@
print("unknown")
# sector size (usually 512 bytes)
- print("Sector size: %d"%self.secSize)
+ print("Sector size: %d (%d)"%(2**self.secSize, self.secSize))
# short sector size (usually 64 bytes)
- print("Short sector size: %d"%self.secSizeShort)
+ print("Short sector size: %d (%d)"%(2**self.secSizeShort, self.secSizeShort))
# total number of sectors in SAT (equals the number of sector IDs
# stored in the MSAT).
print("Total number of sectors used in SAT: %d"%self.numSecSAT)
- # ???
- print("Section ID of the first sector of the directory stream: %d"%self.secIDFirstDirStrm)
+ print("Sector ID of the first sector of the directory stream: %d"%
+ self.__secIDFirstDirStrm)
print("Minimum stream size: %d"%self.minStrmSize)
- print("SecID of first SSAT sector: %d"%(512+self.secIDFirstSSAT*self.secSizeShort))
+ if self.__secIDFirstSSAT == -2:
+ print("Sector ID of the first SSAT sector: [none]")
+ else:
+ print("Sector ID of the first SSAT sector: %d"%self.__secIDFirstSSAT)
print("Total number of sectors used in SSAT: %d"%self.numSecSSAT)
- if self.secIDFirstMSAT == -2:
+ if self.__secIDFirstMSAT == -2:
# There is no more sector ID stored outside the header.
- print("SecID of first MSAT sector: [end of chain]")
+ print("Sector ID of the first MSAT sector: [end of chain]")
else:
# There is more sector IDs than 109 IDs stored in the header.
- print("SecID of first MSAT sector: %d"%(512+self.secIDFirstMSAT*self.secSize))
+ print("Sector ID of the first MSAT sector: %d"%(512+self.__secIDFirstMSAT*(2**self.secSize)))
print("Total number of sectors used to store additional MSAT: %d"%self.numSecMSAT)
+
def parse (self):
# document ID and unique ID
- self.docId = self.chars[0:8]
- self.uId = self.chars[8:24]
+ self.docId = self.bytes[0:8]
+ self.uId = self.bytes[8:24]
# revision and version
- self.revision = getSignedInt(self.chars[24:26])
- self.version = getSignedInt(self.chars[26:28])
+ self.revision = getSignedInt(self.bytes[24:26])
+ self.version = getSignedInt(self.bytes[26:28])
# byte order
- self.byteOrder = Header.byteOrder(self.chars[28:30])
+ self.byteOrder = Header.byteOrder(self.bytes[28:30])
# sector size (usually 512 bytes)
- self.secSize = 2**getSignedInt(self.chars[30:32])
+ self.secSize = getSignedInt(self.bytes[30:32])
# short sector size (usually 64 bytes)
- self.secSizeShort = 2**getSignedInt(self.chars[32:34])
+ self.secSizeShort = getSignedInt(self.bytes[32:34])
# total number of sectors in SAT (equals the number of sector IDs
# stored in the MSAT).
- self.numSecSAT = getSignedInt(self.chars[44:48])
+ self.numSecSAT = getSignedInt(self.bytes[44:48])
- self.secIDFirstDirStrm = getSignedInt(self.chars[48:52])
- self.minStrmSize = getSignedInt(self.chars[56:60])
- self.secIDFirstSSAT = getSignedInt(self.chars[60:64])
- self.numSecSSAT = getSignedInt(self.chars[64:68])
- self.secIDFirstMSAT = getSignedInt(self.chars[68:72])
- self.numSecMSAT = getSignedInt(self.chars[72:76])
+ self.__secIDFirstDirStrm = getSignedInt(self.bytes[48:52])
+ self.minStrmSize = getSignedInt(self.bytes[56:60])
+ self.__secIDFirstSSAT = getSignedInt(self.bytes[60:64])
+ self.numSecSSAT = getSignedInt(self.bytes[64:68])
+ self.__secIDFirstMSAT = getSignedInt(self.bytes[68:72])
+ self.numSecMSAT = getSignedInt(self.bytes[72:76])
- self.MSAT = MSAT(self.secSize)
+ # master sector allocation table
+ self.MSAT = MSAT(2**self.secSize, self.bytes)
# First part of MSAT consisting of an array of up to 109 sector IDs.
# Each sector ID is 4 bytes in length.
for i in xrange(0, 109):
pos = 76 + i*4
- id = getSignedInt(self.chars[pos:pos+4])
+ id = getSignedInt(self.bytes[pos:pos+4])
if id == -1:
break
self.MSAT.appendSectorID(id)
return 512
-# return self.dumpBytes(512)
- def dumpSAT (self):
- for pos in self.secPosList:
- self.dumpSectorBytes(pos, 512)
- return
def getMSAT (self):
return self.MSAT
+ def getSAT (self):
+ return self.MSAT.getSAT()
+
+
+ def getSSAT (self):
+ ssatID = self.getFirstSectorID(SectorType.SSAT)
+ if ssatID < 0:
+ return None
+ chain = self.MSAT.getSAT().getSectorIDChain(ssatID)
+ if len(chain) == 0:
+ return None
+ obj = SSAT(2**self.secSize, self.bytes)
+ for secID in chain:
+ obj.addSector(secID)
+ obj.buildArray()
+ return obj
+
+
+
class MSAT(object):
- """Master Sector Allocation Table
+ """Master Sector Allocation Table (MSAT)
-This class represents the master sector allocation table (MSAT) that
-stores sector IDs that point to the sectors that are used by the sector
-allocation table (SAT). The actual SAT are to be constructed by
-combining all the sectors pointed by the sector IDs in order of
-occurrence.
+This class represents the master sector allocation table (MSAT) that stores
+sector IDs that point to all the sectors that are used by the sector
+allocation table (SAT). The actual SAT are to be constructed by combining
+all the sectors pointed by the sector IDs in order of occurrence.
"""
- def __init__ (self, sectorSize):
+ def __init__ (self, sectorSize, bytes):
self.sectorSize = sectorSize
self.secIDs = []
+ self.bytes = bytes
+ self.__SAT = None
def appendSectorID (self, id):
self.secIDs.append(id)
@@ -233,12 +273,101 @@
def output (self):
print('')
print("="*68)
- print("Master Sector Allocation Table")
+ print("Master Sector Allocation Table (MSAT)")
print("-"*68)
for id in self.secIDs:
print("sector ID: %5d (pos: %7d)"%(id, 512+id*self.sectorSize))
+ def getSATSectorPosList (self):
+ list = []
+ for id in self.secIDs:
+ pos = 512 + id*self.sectorSize
+ list.append([id, pos])
+ return list
+
+ def getSAT (self):
+ if self.__SAT != None:
+ return self.__SAT
+
+ obj = SAT(self.sectorSize, self.bytes)
+ for id in self.secIDs:
+ obj.addSector(id)
+ obj.buildArray()
+ self.__SAT = obj
+ return self.__SAT
+
+
+class SAT(object):
+ """Sector Allocation Table (SAT)
+"""
+ def __init__ (self, sectorSize, bytes):
+ self.sectorSize = sectorSize
+ self.sectorIDs = []
+ self.bytes = bytes
+ self.array = []
+
+
+ def addSector (self, pos):
+ self.sectorIDs.append(pos)
+
+
+ def buildArray (self):
+ numItems = int(self.sectorSize/4)
+ self.array = []
+ for secID in self.sectorIDs:
+ pos = 512 + secID*self.sectorSize
+ for i in xrange(0, numItems):
+ beginPos = pos + i*4
+ id = getSignedInt(self.bytes[beginPos:beginPos+4])
+ self.array.append(id)
+
+
+ def output (self):
+ print('')
+ print("="*68)
+ print("Sector Allocation Table (SAT)")
+ print("-"*68)
+ for i in xrange(0, len(self.array)):
+ item = self.array[i]
+ output("%3d : %3d\n"%(i, item))
+
+
+ def getSectorIDChain (self, initID):
+ chain = [initID]
+ nextID = self.array[initID]
+ while nextID != -2:
+ chain.append(nextID)
+ nextID = self.array[nextID]
+ return chain
+
+
+class SSAT(SAT):
+ """Short Sector Allocation Table (SSAT)
+
+SSAT contains an array of sector ID chains of all short streams, as oppposed
+to SAT which contains an array of sector ID chains of all standard streams.
+The sector IDs included in the SSAT point to the short sectors in the short
+stream container stream.
+
+The first sector ID of SSAT is in the header, and the IDs of the remaining
+sectors are contained in the SAT as a sector ID chain.
+"""
+
+ def __init__ (self, sectorSize, bytes):
+ SAT.__init__(self, sectorSize, bytes)
+ return
+
+ def output (self):
+ print('')
+ print("="*68)
+ print("Short Sector Allocation Table (SSAT)")
+ print("-"*68)
+ for i in xrange(0, len(self.array)):
+ item = self.array[i]
+ output("%3d : %3d\n"%(i, item))
+
+
Modified: trunk/scratch/sc-xlsutil/src/stream.py
==============================================================================
--- trunk/scratch/sc-xlsutil/src/stream.py (original)
+++ trunk/scratch/sc-xlsutil/src/stream.py Tue Jan 22 06:53:29 2008
@@ -1,6 +1,6 @@
import sys
-import ole
+import ole, globals
recData = {
0x000A: ["EOF", "End of File"],
@@ -179,14 +179,15 @@
class XLStream(object):
- def __init__ (self, file):
- self.chars = file.read()
+ def __init__ (self, chars):
+ self.chars = chars
self.size = len(self.chars)
self.pos = 0
self.version = None
self.header = None
self.MSAT = None
+ self.SAT = None
def __printSep (self, c='-', w=68, prefix=''):
print(prefix + c*w)
@@ -206,6 +207,25 @@
def printMSAT (self):
self.MSAT.output()
+# secPosList = self.MSAT.getSATSectorPosList()
+# secSize = self.MSAT.sectorSize
+# for sec in secPosList:
+# id, pos = sec[0], sec[1]
+# print("")
+# print("-"*68)
+# print("Sector ID: %d (pos = %d)"%(id, pos))
+# print("-"*68)
+# globals.dumpBytes(self.chars[pos:pos+secSize])
+
+ def printSAT (self):
+ sat = self.MSAT.getSAT()
+ sat.output()
+
+ def printSSAT (self):
+ obj = self.header.getSSAT()
+ if obj == None:
+ return
+ obj.output()
def dumpHeader (self):
oleobj = ole.Header(self.chars)
Modified: trunk/scratch/sc-xlsutil/xls_dump.py
==============================================================================
--- trunk/scratch/sc-xlsutil/xls_dump.py (original)
+++ trunk/scratch/sc-xlsutil/xls_dump.py Tue Jan 22 06:53:29 2008
@@ -11,14 +11,16 @@
def dump (self):
file = open(self.filepath, 'r')
- strm = stream.XLStream(file)
+ strm = stream.XLStream(file.read())
+ file.close()
strm.printStreamInfo()
strm.printHeader()
strm.printMSAT()
+ strm.printSAT()
+ strm.printSSAT()
success = True
while success:
success = self.__read(strm)
- file.close()
def __read (self, strm):
# read bytes from BOF to EOF.
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]