beagle r4761 - trunk/beagle/Util
- From: dbera svn gnome org
- To: svn-commits-list gnome org
- Subject: beagle r4761 - trunk/beagle/Util
- Date: Sun, 18 May 2008 17:12:18 +0000 (UTC)
Author: dbera
Date: Sun May 18 17:12:18 2008
New Revision: 4761
URL: http://svn.gnome.org/viewvc/beagle?rev=4761&view=rev
Log:
Fix #533529. Fix parsing of the opera cache file according to the format at http://www.opera.com/docs/fileformats/. Two major fixes - most integers are uint32 and stored in Big-Endian format and in case of parsing error, be conservative and skip records to avoid any kind of looping.
Modified:
trunk/beagle/Util/OperaHistory.cs
Modified: trunk/beagle/Util/OperaHistory.cs
==============================================================================
--- trunk/beagle/Util/OperaHistory.cs (original)
+++ trunk/beagle/Util/OperaHistory.cs Sun May 18 17:12:18 2008
@@ -35,6 +35,8 @@
public class OperaHistory {
private ArrayList rows;
private DateTime lastRead;
+ // Details of the cache file format from
+ // http://www.opera.com/docs/fileformats/
public enum Directives : byte {
RowStart = 0x01, // Row start (new entry)
Address = 0x03, // Web address
@@ -119,9 +121,9 @@
}
}
- public long Length {
+ public uint Length {
get {
- return OperaHistory.GetLength (GetContent (Directives.Length));
+ return OperaHistory.GetUInt32 (GetContent (Directives.Length));
}
}
@@ -139,7 +141,7 @@
get {
try {
byte[] content = GetContent (Directives.LastVisited);
- return Beagle.Util.DateTimeUtil.UnixToDateTimeUtc (FSpot.BitConverter.ToUInt32 (content, 0, false));
+ return Beagle.Util.DateTimeUtil.UnixToDateTimeUtc (GetUInt32 (content));
} catch {
return DateTime.MinValue;
}
@@ -150,7 +152,7 @@
get {
try {
byte[] content = GetContent (Directives.LocalSaveTime);
- return Beagle.Util.DateTimeUtil.UnixToDateTimeUtc (FSpot.BitConverter.ToUInt32 (content, 0, false));
+ return Beagle.Util.DateTimeUtil.UnixToDateTimeUtc (GetUInt32 (content));
} catch {
return DateTime.MinValue;
}
@@ -161,7 +163,7 @@
get {
try {
byte[] content = GetContent (Directives.LastChanged);
- return Beagle.Util.DateTimeUtil.UnixToDateTimeUtc (FSpot.BitConverter.ToUInt32 (content, 0, false));
+ return Beagle.Util.DateTimeUtil.UnixToDateTimeUtc (GetUInt32 (content));
} catch {
return DateTime.MinValue;
}
@@ -208,6 +210,7 @@
Read (filename);
}
+ static uint filepos = 12;
private void Read (string filename)
{
using (StreamReader stream = new StreamReader (filename)) {
@@ -216,8 +219,23 @@
// Skip first 12 bytes since their purpose is yet unknown
binary.BaseStream.Seek (12, SeekOrigin.Begin);
while (binary.ReadByte () == 1) {
- int length = Convert.ToInt32 (GetLength (binary.ReadByte (), binary.ReadByte ()));
- ReadLine (binary.ReadBytes (length));
+ uint length = GetUInt32 (binary.ReadByte (), binary.ReadByte ());
+ filepos += 3;
+
+ byte[] line = binary.ReadBytes ((int)length);
+ if (line.Length < length)
+ break; // EOF
+
+ try {
+ ReadLine (line);
+ } catch (EndOfStreamException) {
+ break;
+ } catch (IOException) {
+ break;
+ } catch(Exception e) {
+ Beagle.Util.Logger.Log.Error(e);
+ }
+ filepos += length;
}
}
}
@@ -236,14 +254,10 @@
Row row = new Row ();
while (position <= line.Length) {
- try {
- Property prop = NewProperty (line, ref position);
+ Property prop = NewProperty (line, ref position);
- if (prop != null)
- row.AddProperty (prop);
- } catch(Exception e) {
- Beagle.Util.Logger.Log.Error(e);
- }
+ if (prop != null)
+ row.AddProperty (prop);
}
return row;
@@ -254,21 +268,32 @@
if (position+3 > line.Length) {
position++;
return null;
- } else if (line [position] == (byte) 0x8F) {
- // It seems to be something magic with 0x8F because it appears when you least
- // expect it and doesn't seem to belong anywhere. Just ignore it.
- position++;
- return NewProperty (line, ref position);
}
+ // Tag_id values in which the MSB (Most Significant Bit) is set to 1,
+ // are reserved for records with implicit no length.
+ // The tag_id field is NOT followed by a length field, nor a payload buffer.
+ // Such records are used as Boolean flags: True if present, False if not present.
+ if ((line [position] & (byte) 0x80) == (byte) 0x80) {
+#if OPERA_DEBUG
+ Console.WriteLine ("Ignoring flag record 0x{0:x} at {1} ({2})", line [position], position, filepos + position);
+#endif
+ position++;
+ return null;
+ }
+
int start = position+1, length = 0, directive = position;
-
+
// Read the two bytes that follows the directive byte and parse them as an integer.
// This will be how far we will be reading in the stream
byte[] length_bytes = new byte [2];
Array.Copy (line, start, length_bytes, 0, 2);
- length = Convert.ToInt32 (GetLength (length_bytes));
-
+ length = (int) GetUInt32 (length_bytes);
+
+#if OPERA_DEBUG
+ Console.WriteLine ("Adding record 0x{0:x2} at {1} of length {2} ({3})", line [position], position, length, filepos + position);
+#endif
+
// The content is what we really is after. This can be an address, object size or
// something else valuable.
byte[] content = new byte [length];
@@ -278,18 +303,20 @@
return new Property (line [directive], content);
}
-
- public static long GetLength (params byte[] bytes)
+
+ public static uint GetUInt32 (params byte[] bytes)
{
- byte[] t = new byte [8] { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
-
- if (bytes == null || bytes.Length > 8 || bytes.Length == 0)
+ if (bytes == null || bytes.Length > 4 || bytes.Length == 0)
return 0;
-
- for (int i = 0; i < bytes.Length; i++)
- t [i] = bytes [bytes.Length-i-1];
-
- return BitConverter.ToInt64 (t, 0);
+
+ byte[] t = bytes;
+
+ if (bytes.Length < 4) {
+ t = new byte [4] {0x00, 0x00, 0x00, 0x00};
+ Array.Copy (bytes, 0, t, 4 - bytes.Length, bytes.Length);
+ }
+
+ return FSpot.BitConverter.ToUInt32 (t, 0, false);
}
public IEnumerator GetEnumerator ()
@@ -301,5 +328,20 @@
{
return this.lastRead;
}
+#if OPERA_DEBUG
+ public static void Main (string[] args)
+ {
+ if (args.Length != 1)
+ return;
+ OperaHistory op = new OperaHistory (args [0]);
+ IEnumerator iter = op.GetEnumerator ();
+ while (iter.MoveNext ()) {
+ Row row = (Row) iter.Current;
+ Uri uri = row.Address;
+ Console.WriteLine (uri);
+ }
+ }
+#endif
}
}
+
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]