ooo-build r11354 - in trunk: . scratch/sc-xlsutil scratch/sc-xlsutil/src



Author: kyoshida
Date: Tue Jan 22 06:53:29 2008
New Revision: 11354
URL: http://svn.gnome.org/viewvc/ooo-build?rev=11354&view=rev

Log:
2008-01-22  Kohei Yoshida  <kyoshida novell com>

	* scratch/sc-xlsutil/src/ole.py:
	* scratch/sc-xlsutil/src/stream.py:
	* scratch/sc-xlsutil/xls_dump.py: more OLE dumping.


Modified:
   trunk/ChangeLog
   trunk/scratch/sc-xlsutil/src/ole.py
   trunk/scratch/sc-xlsutil/src/stream.py
   trunk/scratch/sc-xlsutil/xls_dump.py

Modified: trunk/scratch/sc-xlsutil/src/ole.py
==============================================================================
--- trunk/scratch/sc-xlsutil/src/ole.py	(original)
+++ trunk/scratch/sc-xlsutil/src/ole.py	Tue Jan 22 06:53:29 2008
@@ -2,8 +2,12 @@
 import sys
 import stream
 
-def output (msg):
-    sys.stdout.write(msg)
+# ----------------------------------------------------------------------------
+# Reference: The Microsoft Compound Document File Format by Daniel Rentz
+# http://sc.openoffice.org/compdocfileformat.pdf
+# ----------------------------------------------------------------------------
+
+from globals import output
 
 def printSep (c='-', w=68, prefix=''):
     print(prefix + c*w)
@@ -42,6 +46,12 @@
     BigEndian = 1
     Unknown = 2
 
+class SectorType:
+    MSAT      = 0
+    SAT       = 1
+    SSAT      = 2
+    Directory = 3
+
 class Header(object):
 
     @staticmethod
@@ -54,8 +64,9 @@
         else:
             return ByteOrder.Unknown
 
-    def __init__ (self, chars):
-        self.chars = chars
+
+    def __init__ (self, bytes):
+        self.bytes = bytes
         self.MSAT = None
 
         self.docId = None
@@ -64,19 +75,35 @@
         self.version = 0
         self.byteOrder = ByteOrder.Unknown
         self.minStrmSize = 0
+
         self.numSecMSAT = 0
         self.numSecSSAT = 0
         self.numSecSAT = 0
-        self.secIDFirstMSAT = -2
+
+        self.__secIDFirstMSAT = -2
+        self.__secIDFirstDirStrm = -2
+        self.__secIDFirstSSAT = -2
+
         self.secSize = 512
         self.secSizeShort = 64
 
+
+    def getFirstSectorID (self, sectorType):
+        if sectorType == SectorType.MSAT:
+            return self.__secIDFirstMSAT
+        elif sectorType == SectorType.SSAT:
+            return self.__secIDFirstSSAT
+        elif sectorType == SectorType.Directory:
+            return self.__secIDFirstDirStrm
+        return -2
+
+
     def dumpBytes (self, initPos=0, quitAtBOF=True):
         # dump OLE header until it reaches BOF.
         i, bprev = initPos, 0
         labelPrinted = False
         while True:
-            b = ord(self.chars[i])
+            b = ord(self.bytes[i])
             if quitAtBOF and bprev == 0x09 and b == 0x08:
                 # BOF reached
                 return i-1
@@ -95,13 +122,6 @@
 
         return i
 
-    def dumpSectorBytes (self, initPos, secSize):
-        print('foo')
-        for i in xrange(0, secSize):
-            b = ord(self.chars[i+initPos])
-            output("%2.2X "%b)
-            if (i+1)%16 == 0:
-                output("\n")
 
     def output (self):
 
@@ -121,10 +141,10 @@
         printRawBytes(self.uId)
 
         # revision and version
-        print("revision: %d  version: %d"%(self.revision, self.version))
+        print("Revision: %d  Version: %d"%(self.revision, self.version))
 
         # byte order
-        output("byte order: ")
+        output("Byte order: ")
         if self.byteOrder == ByteOrder.LittleEndian:
             print("little endian")
         elif self.byteOrder == ByteOrder.BigEndian:
@@ -133,99 +153,119 @@
             print("unknown")
 
         # sector size (usually 512 bytes)
-        print("Sector size: %d"%self.secSize)
+        print("Sector size: %d (%d)"%(2**self.secSize, self.secSize))
 
         # short sector size (usually 64 bytes)
-        print("Short sector size: %d"%self.secSizeShort)
+        print("Short sector size: %d (%d)"%(2**self.secSizeShort, self.secSizeShort))
 
         # total number of sectors in SAT (equals the number of sector IDs 
         # stored in the MSAT).
         print("Total number of sectors used in SAT: %d"%self.numSecSAT)
 
-        # ???
-        print("Section ID of the first sector of the directory stream: %d"%self.secIDFirstDirStrm)
+        print("Sector ID of the first sector of the directory stream: %d"%
+              self.__secIDFirstDirStrm)
 
         print("Minimum stream size: %d"%self.minStrmSize)
 
-        print("SecID of first SSAT sector: %d"%(512+self.secIDFirstSSAT*self.secSizeShort))
+        if self.__secIDFirstSSAT == -2:
+            print("Sector ID of the first SSAT sector: [none]")
+        else:
+            print("Sector ID of the first SSAT sector: %d"%self.__secIDFirstSSAT)
 
         print("Total number of sectors used in SSAT: %d"%self.numSecSSAT)
 
-        if self.secIDFirstMSAT == -2:
+        if self.__secIDFirstMSAT == -2:
             # There is no more sector ID stored outside the header.
-            print("SecID of first MSAT sector: [end of chain]")
+            print("Sector ID of the first MSAT sector: [end of chain]")
         else:
             # There is more sector IDs than 109 IDs stored in the header.
-            print("SecID of first MSAT sector: %d"%(512+self.secIDFirstMSAT*self.secSize))
+            print("Sector ID of the first MSAT sector: %d"%(512+self.__secIDFirstMSAT*(2**self.secSize)))
 
         print("Total number of sectors used to store additional MSAT: %d"%self.numSecMSAT)
 
+
     def parse (self):
 
         # document ID and unique ID
-        self.docId = self.chars[0:8]
-        self.uId = self.chars[8:24]
+        self.docId = self.bytes[0:8]
+        self.uId = self.bytes[8:24]
 
         # revision and version
-        self.revision = getSignedInt(self.chars[24:26])
-        self.version = getSignedInt(self.chars[26:28])
+        self.revision = getSignedInt(self.bytes[24:26])
+        self.version = getSignedInt(self.bytes[26:28])
 
         # byte order
-        self.byteOrder = Header.byteOrder(self.chars[28:30])
+        self.byteOrder = Header.byteOrder(self.bytes[28:30])
 
         # sector size (usually 512 bytes)
-        self.secSize = 2**getSignedInt(self.chars[30:32])
+        self.secSize = getSignedInt(self.bytes[30:32])
 
         # short sector size (usually 64 bytes)
-        self.secSizeShort = 2**getSignedInt(self.chars[32:34])
+        self.secSizeShort = getSignedInt(self.bytes[32:34])
 
         # total number of sectors in SAT (equals the number of sector IDs 
         # stored in the MSAT).
-        self.numSecSAT = getSignedInt(self.chars[44:48])
+        self.numSecSAT = getSignedInt(self.bytes[44:48])
 
-        self.secIDFirstDirStrm = getSignedInt(self.chars[48:52])
-        self.minStrmSize = getSignedInt(self.chars[56:60])
-        self.secIDFirstSSAT = getSignedInt(self.chars[60:64])
-        self.numSecSSAT = getSignedInt(self.chars[64:68])
-        self.secIDFirstMSAT = getSignedInt(self.chars[68:72])
-        self.numSecMSAT = getSignedInt(self.chars[72:76])
+        self.__secIDFirstDirStrm = getSignedInt(self.bytes[48:52])
+        self.minStrmSize = getSignedInt(self.bytes[56:60])
+        self.__secIDFirstSSAT = getSignedInt(self.bytes[60:64])
+        self.numSecSSAT = getSignedInt(self.bytes[64:68])
+        self.__secIDFirstMSAT = getSignedInt(self.bytes[68:72])
+        self.numSecMSAT = getSignedInt(self.bytes[72:76])
 
-        self.MSAT = MSAT(self.secSize)
+        # master sector allocation table
+        self.MSAT = MSAT(2**self.secSize, self.bytes)
 
         # First part of MSAT consisting of an array of up to 109 sector IDs.
         # Each sector ID is 4 bytes in length.
         for i in xrange(0, 109):
             pos = 76 + i*4
-            id = getSignedInt(self.chars[pos:pos+4])
+            id = getSignedInt(self.bytes[pos:pos+4])
             if id == -1:
                 break
 
             self.MSAT.appendSectorID(id)
 
         return 512
-#       return self.dumpBytes(512)
 
-    def dumpSAT (self):
-        for pos in self.secPosList:
-            self.dumpSectorBytes(pos, 512)
-        return
 
     def getMSAT (self):
         return self.MSAT
 
 
+    def getSAT (self):
+        return self.MSAT.getSAT()
+
+
+    def getSSAT (self):
+        ssatID = self.getFirstSectorID(SectorType.SSAT)
+        if ssatID < 0:
+            return None
+        chain = self.MSAT.getSAT().getSectorIDChain(ssatID)
+        if len(chain) == 0:
+            return None
+        obj = SSAT(2**self.secSize, self.bytes)
+        for secID in chain:
+            obj.addSector(secID)
+        obj.buildArray()
+        return obj
+
+
+
 class MSAT(object):
-    """Master Sector Allocation Table
+    """Master Sector Allocation Table (MSAT)
 
-This class represents the master sector allocation table (MSAT) that
-stores sector IDs that point to the sectors that are used by the sector
-allocation table (SAT).  The actual SAT are to be constructed by 
-combining all the sectors pointed by the sector IDs in order of 
-occurrence.
+This class represents the master sector allocation table (MSAT) that stores 
+sector IDs that point to all the sectors that are used by the sector 
+allocation table (SAT).  The actual SAT are to be constructed by combining 
+all the sectors pointed by the sector IDs in order of occurrence.
 """
-    def __init__ (self, sectorSize):
+    def __init__ (self, sectorSize, bytes):
         self.sectorSize = sectorSize
         self.secIDs = []
+        self.bytes = bytes
+        self.__SAT = None
 
     def appendSectorID (self, id):
         self.secIDs.append(id)
@@ -233,12 +273,101 @@
     def output (self):
         print('')
         print("="*68)
-        print("Master Sector Allocation Table")
+        print("Master Sector Allocation Table (MSAT)")
         print("-"*68)
 
         for id in self.secIDs:
             print("sector ID: %5d   (pos: %7d)"%(id, 512+id*self.sectorSize))
 
+    def getSATSectorPosList (self):
+        list = []
+        for id in self.secIDs:
+            pos = 512 + id*self.sectorSize
+            list.append([id, pos])
+        return list
+
+    def getSAT (self):
+        if self.__SAT != None:
+            return self.__SAT
+
+        obj = SAT(self.sectorSize, self.bytes)
+        for id in self.secIDs:
+            obj.addSector(id)
+        obj.buildArray()
+        self.__SAT = obj
+        return self.__SAT
+
+
+class SAT(object):
+    """Sector Allocation Table (SAT)
+"""
+    def __init__ (self, sectorSize, bytes):
+        self.sectorSize = sectorSize
+        self.sectorIDs = []
+        self.bytes = bytes
+        self.array = []
+
+
+    def addSector (self, pos):
+        self.sectorIDs.append(pos)
+
+
+    def buildArray (self):
+        numItems = int(self.sectorSize/4)
+        self.array = []
+        for secID in self.sectorIDs:
+            pos = 512 + secID*self.sectorSize
+            for i in xrange(0, numItems):
+                beginPos = pos + i*4
+                id = getSignedInt(self.bytes[beginPos:beginPos+4])
+                self.array.append(id)
+
+
+    def output (self):
+        print('')
+        print("="*68)
+        print("Sector Allocation Table (SAT)")
+        print("-"*68)
+        for i in xrange(0, len(self.array)):
+            item = self.array[i]
+            output("%3d : %3d\n"%(i, item))
+
+
+    def getSectorIDChain (self, initID):
+        chain = [initID]
+        nextID = self.array[initID]
+        while nextID != -2:
+            chain.append(nextID)
+            nextID = self.array[nextID]
+        return chain
+
+
+class SSAT(SAT):
+    """Short Sector Allocation Table (SSAT)
+
+SSAT contains an array of sector ID chains of all short streams, as oppposed 
+to SAT which contains an array of sector ID chains of all standard streams.
+The sector IDs included in the SSAT point to the short sectors in the short
+stream container stream.
+
+The first sector ID of SSAT is in the header, and the IDs of the remaining 
+sectors are contained in the SAT as a sector ID chain.
+"""
+
+    def __init__ (self, sectorSize, bytes):
+        SAT.__init__(self, sectorSize, bytes)
+        return
+
+    def output (self):
+        print('')
+        print("="*68)
+        print("Short Sector Allocation Table (SSAT)")
+        print("-"*68)
+        for i in xrange(0, len(self.array)):
+            item = self.array[i]
+            output("%3d : %3d\n"%(i, item))
+
+
 
 
 

Modified: trunk/scratch/sc-xlsutil/src/stream.py
==============================================================================
--- trunk/scratch/sc-xlsutil/src/stream.py	(original)
+++ trunk/scratch/sc-xlsutil/src/stream.py	Tue Jan 22 06:53:29 2008
@@ -1,6 +1,6 @@
 
 import sys
-import ole
+import ole, globals
 
 recData = {
     0x000A: ["EOF", "End of File"],
@@ -179,14 +179,15 @@
 
 class XLStream(object):
 
-    def __init__ (self, file):
-        self.chars = file.read()
+    def __init__ (self, chars):
+        self.chars = chars
         self.size = len(self.chars)
         self.pos = 0
         self.version = None
 
         self.header = None
         self.MSAT = None
+        self.SAT = None
 
     def __printSep (self, c='-', w=68, prefix=''):
         print(prefix + c*w)
@@ -206,6 +207,25 @@
 
     def printMSAT (self):
         self.MSAT.output()
+#       secPosList = self.MSAT.getSATSectorPosList()
+#       secSize = self.MSAT.sectorSize
+#       for sec in secPosList:
+#           id, pos = sec[0], sec[1]
+#           print("")
+#           print("-"*68)
+#           print("Sector ID: %d  (pos = %d)"%(id, pos))
+#           print("-"*68)
+#           globals.dumpBytes(self.chars[pos:pos+secSize])
+
+    def printSAT (self):
+        sat = self.MSAT.getSAT()
+        sat.output()
+
+    def printSSAT (self):
+        obj = self.header.getSSAT()
+        if obj == None:
+            return
+        obj.output()
 
     def dumpHeader (self):
         oleobj = ole.Header(self.chars)

Modified: trunk/scratch/sc-xlsutil/xls_dump.py
==============================================================================
--- trunk/scratch/sc-xlsutil/xls_dump.py	(original)
+++ trunk/scratch/sc-xlsutil/xls_dump.py	Tue Jan 22 06:53:29 2008
@@ -11,14 +11,16 @@
 
     def dump (self):
         file = open(self.filepath, 'r')
-        strm = stream.XLStream(file)
+        strm = stream.XLStream(file.read())
+        file.close()
         strm.printStreamInfo()
         strm.printHeader()
         strm.printMSAT()
+        strm.printSAT()
+        strm.printSSAT()
         success = True
         while success: 
             success = self.__read(strm)
-        file.close()
 
     def __read (self, strm):
         # read bytes from BOF to EOF.



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]