ooo-build r11474 - in trunk: . scratch/sc-xlsutil scratch/sc-xlsutil/src



Author: kyoshida
Date: Thu Jan 31 00:02:17 2008
New Revision: 11474
URL: http://svn.gnome.org/viewvc/ooo-build?rev=11474&view=rev

Log:
2008-01-30  Kohei Yoshida  <kyoshida novell com>

	* scratch/sc-xlsutil/src/globals.py: better raw byte dumping output.
	
	* scratch/sc-xlsutil/src/ole.py:
	* scratch/sc-xlsutil/src/stream.py:
	* scratch/sc-xlsutil/xls_dump.py: added mechanism to store the command
	line options and pass them across the class instances, and fixed the 
	bug on incorrect stream construction when the stream is a short stream
	and is located in the root storage.


Modified:
   trunk/ChangeLog
   trunk/scratch/sc-xlsutil/src/globals.py
   trunk/scratch/sc-xlsutil/src/ole.py
   trunk/scratch/sc-xlsutil/src/stream.py
   trunk/scratch/sc-xlsutil/xls_dump.py

Modified: trunk/scratch/sc-xlsutil/src/globals.py
==============================================================================
--- trunk/scratch/sc-xlsutil/src/globals.py	(original)
+++ trunk/scratch/sc-xlsutil/src/globals.py	Thu Jan 31 00:02:17 2008
@@ -1,8 +1,14 @@
 
-import sys, struct
+import sys, struct, math
 
 class ByteConvertError(Exception): pass
 
+
+class Params(object):
+    def __init__ (self):
+        self.Debug = False
+
+
 def output (msg):
     sys.stdout.write(msg)
 
@@ -61,16 +67,33 @@
     subDivideLine = None
     if subDivide != None:
         subDivideLine = subDivide/16
-    for i in xrange(0, len(chars)):
+
+    flushBytes = True
+    charLen = len(chars)
+    labelWidth = int(math.ceil(math.log(charLen, 10)))
+    for i in xrange(0, charLen):
+        if (i+1)%16 == 1:
+            # print line header with seek position
+            fmt = "%%%d.%dd: "%(labelWidth, labelWidth)
+            output(fmt%i)
+
         byte = ord(chars[i])
         output("%2.2X "%byte)
+        flushBytes = True
+
+        if (i+1)%4 == 0:
+            # put extra space at every 4 bytes.
+            output(" ")
+
         if (i+1)%16 == 0:
             output("\n")
+            flushBytes = False
             if subDivideLine != None and (line+1)%subDivideLine == 0:
                 output("\n")
             line += 1
 
-    output("\n")
+    if flushBytes:
+        output("\n")
 
 
 def getSectorPos (secID, secSize):

Modified: trunk/scratch/sc-xlsutil/src/ole.py
==============================================================================
--- trunk/scratch/sc-xlsutil/src/ole.py	(original)
+++ trunk/scratch/sc-xlsutil/src/ole.py	Thu Jan 31 00:02:17 2008
@@ -9,9 +9,8 @@
 
 from globals import output
 
-def printSep (c='-', w=68, prefix=''):
-    print(prefix + c*w)
 
+class NoRootStorage(Exception): pass
 
 class ByteOrder:
     LittleEndian = 0
@@ -41,7 +40,7 @@
             return ByteOrder.Unknown
 
 
-    def __init__ (self, bytes):
+    def __init__ (self, bytes, params):
         self.bytes = bytes
         self.MSAT = None
 
@@ -63,6 +62,7 @@
         self.secSize = 512
         self.secSizeShort = 64
 
+        self.params = params
 
     def getSectorSize (self):
         return 2**self.secSize
@@ -89,10 +89,17 @@
                 output("%2.2X "%ord(b))
             output("\n")
 
+        def printSep (c='-', w=68, prefix=''):
+            print(prefix + c*w)
+
         printSep('=', 68)
         print("Compound Document Header")
         printSep('-', 68)
 
+        if self.params.Debug:
+            globals.dumpBytes(self.bytes[0:512])
+            printSep('-', 68)
+
         # document ID and unique ID
         output("Document ID: ")
         printRawBytes(self.docId)
@@ -174,7 +181,7 @@
         self.numSecMSAT = getSignedInt(self.bytes[72:76])
 
         # master sector allocation table
-        self.MSAT = MSAT(2**self.secSize, self.bytes)
+        self.MSAT = MSAT(2**self.secSize, self.bytes, self.params)
 
         # First part of MSAT consisting of an array of up to 109 sector IDs.
         # Each sector ID is 4 bytes in length.
@@ -204,7 +211,7 @@
         chain = self.getSAT().getSectorIDChain(ssatID)
         if len(chain) == 0:
             return None
-        obj = SSAT(2**self.secSize, self.bytes)
+        obj = SSAT(2**self.secSize, self.bytes, self.params)
         for secID in chain:
             obj.addSector(secID)
         obj.buildArray()
@@ -218,7 +225,7 @@
         chain = self.getSAT().getSectorIDChain(dirID)
         if len(chain) == 0:
             return None
-        obj = Directory(self)
+        obj = Directory(self, self.params)
         for secID in chain:
             obj.addSector(secID)
         return obj
@@ -238,12 +245,14 @@
 allocation table (SAT).  The actual SAT are to be constructed by combining 
 all the sectors pointed by the sector IDs in order of occurrence.
 """
-    def __init__ (self, sectorSize, bytes):
+    def __init__ (self, sectorSize, bytes, params):
         self.sectorSize = sectorSize
         self.secIDs = []
         self.bytes = bytes
         self.__SAT = None
 
+        self.params = params
+
     def appendSectorID (self, id):
         self.secIDs.append(id)
 
@@ -267,7 +276,7 @@
         if self.__SAT != None:
             return self.__SAT
 
-        obj = SAT(self.sectorSize, self.bytes)
+        obj = SAT(self.sectorSize, self.bytes, self.params)
         for id in self.secIDs:
             obj.addSector(id)
         obj.buildArray()
@@ -278,12 +287,14 @@
 class SAT(object):
     """Sector Allocation Table (SAT)
 """
-    def __init__ (self, sectorSize, bytes):
+    def __init__ (self, sectorSize, bytes, params):
         self.sectorSize = sectorSize
         self.sectorIDs = []
         self.bytes = bytes
         self.array = []
 
+        self.params = params
+
 
     def getSectorSize (self):
         return self.sectorSize
@@ -308,11 +319,15 @@
                 self.array.append(id)
 
 
-    def output (self):
-        print('')
-        print("="*68)
-        print("Sector Allocation Table (SAT)")
-        print("-"*68)
+    def outputRawBytes (self):
+        bytes = []
+        for secID in self.sectorIDs:
+            pos = 512 + secID*self.sectorSize
+            bytes.extend(self.bytes[pos:pos+self.sectorSize])
+        globals.dumpBytes(bytes, 512)
+
+
+    def outputArrayStats (self):
         sectorTotal = len(self.array)
         sectorP  = 0       # >= 0
         sectorM1 = 0       # -1
@@ -347,6 +362,21 @@
         print("* other sector count:        %4d"%sectorMElse)
 
 
+    def output (self):
+        print('')
+        print("="*68)
+        print("Sector Allocation Table (SAT)")
+        print("-"*68)
+        if self.params.Debug:
+            self.outputRawBytes()
+            print("-"*68)
+            for i in xrange(0, len(self.array)):
+                print("%5d: %5d"%(i, self.array[i]))
+            print("-"*68)
+
+        self.outputArrayStats()
+
+
     def getSectorIDChain (self, initID):
         if initID < 0:
             return []
@@ -370,21 +400,27 @@
 sectors are contained in the SAT as a sector ID chain.
 """
 
-    def __init__ (self, sectorSize, bytes):
-        SAT.__init__(self, sectorSize, bytes)
-        return
-
     def output (self):
         print('')
         print("="*68)
         print("Short Sector Allocation Table (SSAT)")
         print("-"*68)
-        for i in xrange(0, len(self.array)):
-            item = self.array[i]
-            output("%3d : %3d\n"%(i, item))
+        if self.params.Debug:
+            self.outputRawBytes()
+            print("-"*68)
+            for i in xrange(0, len(self.array)):
+                item = self.array[i]
+                output("%3d : %3d\n"%(i, item))
+
+        self.outputArrayStats()
 
 
 class Directory(object):
+    """Directory Entries
+
+This stream contains a list of directory entries that are stored within the
+entire file stream.
+"""
 
     class Type:
         Empty = 0
@@ -414,9 +450,10 @@
             self.TimeModified = None
             self.StreamSectorID = -2
             self.StreamSize = 0
+            self.bytes = []
 
 
-    def __init__ (self, header):
+    def __init__ (self, header, params):
         self.sectorSize = header.getSectorSize()
         self.bytes = header.bytes
         self.minStreamSize = header.minStreamSize
@@ -425,7 +462,21 @@
         self.SAT = header.getSAT()
         self.SSAT = header.getSSAT()
         self.header = header
-        self.posRootStorage = None
+        self.RootStorage = None
+        self.RootStorageBytes = []
+        self.params = params
+
+
+    def __buildRootStorageBytes (self):
+        if self.RootStorage == None:
+            # no root storage exists.
+            return
+
+        firstSecID = self.RootStorage.StreamSectorID
+        chain = self.header.getSAT().getSectorIDChain(firstSecID)
+        for secID in chain:
+            pos = 512 + secID*self.sectorSize
+            self.RootStorageBytes.extend(self.header.bytes[pos:pos+self.sectorSize])
 
 
     def __getRawStream (self, entry):
@@ -435,14 +486,23 @@
         elif entry.StreamLocation == StreamLocation.SSAT:
             chain = self.header.getSSAT().getSectorIDChain(entry.StreamSectorID)
 
-        offset = 512
-        size = self.header.getSectorSize()
-        bytes = []
+
         if entry.StreamLocation == StreamLocation.SSAT:
-            # get root storage position
-            offset = self.posRootStorage
+            # Get the root storage stream.
+            if self.RootStorage == None:
+                raise NoRootStorage
+
+            bytes = []
+            self.__buildRootStorageBytes()
             size = self.header.getShortSectorSize()
+            for id in chain:
+                pos = id*size
+                bytes.extend(self.RootStorageBytes[pos:pos+size])
+            return bytes
 
+        offset = 512
+        size = self.header.getSectorSize()
+        bytes = []
         for id in chain:
             pos = offset + id*size
             bytes.extend(self.header.bytes[pos:pos+size])
@@ -495,6 +555,11 @@
         else:
             print("name: [empty]   (name buffer size: %d bytes)"%entry.CharBufferSize)
 
+        if self.params.Debug:
+            print("-"*68)
+            globals.dumpBytes(entry.bytes)
+            print("-"*68)
+
         output("type: ")
         if entry.Type == Directory.Type.Empty:
             print("empty")
@@ -614,6 +679,7 @@
 
     def parseDirEntry (self, bytes):
         entry = Directory.Entry()
+        entry.bytes = bytes
         name = globals.getUTF8FromUTF16(bytes[0:64])
         entry.Name = name
         entry.CharBufferSize = getSignedInt(bytes[64:66])
@@ -637,7 +703,8 @@
             entry.StreamLocation = StreamLocation.SSAT
 
         if entry.Type == Directory.Type.RootStorage and entry.StreamSectorID >= 0:
-            self.posRootStorage = 512 + entry.StreamSectorID*self.header.getSectorSize()
+            # This is an existing root storage.
+            self.RootStorage = entry
 
         return entry
 

Modified: trunk/scratch/sc-xlsutil/src/stream.py
==============================================================================
--- trunk/scratch/sc-xlsutil/src/stream.py	(original)
+++ trunk/scratch/sc-xlsutil/src/stream.py	Thu Jan 31 00:02:17 2008
@@ -255,7 +255,7 @@
 
 class XLStream(object):
 
-    def __init__ (self, chars):
+    def __init__ (self, chars, params):
         self.chars = chars
         self.size = len(self.chars)
         self.pos = 0
@@ -265,6 +265,8 @@
         self.MSAT = None
         self.SAT = None
 
+        self.params = params
+
     def __printSep (self, c='-', w=68, prefix=''):
         print(prefix + c*w)
 
@@ -276,7 +278,7 @@
         print('')
 
     def printHeader (self):
-        self.header = ole.Header(self.chars)
+        self.header = ole.Header(self.chars, self.params)
         self.pos = self.header.parse()
         self.header.output()
         self.MSAT = self.header.getMSAT()
@@ -321,7 +323,7 @@
         bytes = []
         if obj != None:
             bytes = obj.getRawStreamByName(name)
-        strm = XLDirStream(bytes)
+        strm = XLDirStream(bytes, self.params)
         return strm
 
 class DirType:
@@ -330,12 +332,14 @@
 
 class XLDirStream(object):
 
-    def __init__ (self, bytes):
+    def __init__ (self, bytes, params):
         self.bytes = bytes
         self.size = len(self.bytes)
         self.pos = 0
         self.type = DirType.Workbook
 
+        self.params = params
+
     def seekBOF (self):
         while self.pos < self.size-1:
             b1, b2 = ord(self.bytes[self.pos]), ord(self.bytes[self.pos+1])

Modified: trunk/scratch/sc-xlsutil/xls_dump.py
==============================================================================
--- trunk/scratch/sc-xlsutil/xls_dump.py	(original)
+++ trunk/scratch/sc-xlsutil/xls_dump.py	Thu Jan 31 00:02:17 2008
@@ -18,8 +18,9 @@
 
 class XLDumper(object):
 
-    def __init__ (self, filepath):
+    def __init__ (self, filepath, params):
         self.filepath = filepath
+        self.params = params
 
     def __printDirHeader (self, dirname, byteLen):
         dirname = globals.decodeName(dirname)
@@ -30,13 +31,13 @@
 
     def dump (self):
         file = open(self.filepath, 'rb')
-        strm = stream.XLStream(file.read())
+        strm = stream.XLStream(file.read(), self.params)
         file.close()
         strm.printStreamInfo()
         strm.printHeader()
         strm.printMSAT()
         strm.printSAT()
-#       strm.printSSAT()
+        strm.printSSAT()
         strm.printDirectory()
         dirnames = strm.getDirectoryNames()
         for dirname in dirnames:
@@ -81,12 +82,15 @@
         usage(exname)
         return
 
+    params = globals.Params()
     try:
-        opts, args = getopt.getopt(args, "h", ["help"])
+        opts, args = getopt.getopt(args, "h", ["help", "debug"])
         for opt, arg in opts:
             if opt in ['-h', '--help']:
                 usage(exname)
                 return
+            elif opt in ['--debug']:
+                params.Debug = True
             else:
                 error("unknown option %s\n"%opt)
                 usage()
@@ -96,7 +100,7 @@
         usage(exname)
         return
 
-    dumper = XLDumper(args[0])
+    dumper = XLDumper(args[0], params)
     dumper.dump()
 
 if __name__ == '__main__':



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]