[banshee] [extras/metrics] Support incremental download/load



commit 04f1df53a00ddf89b20f516fa1eb2dba131af133
Author: Gabriel Burt <gabriel burt gmail com>
Date:   Sat Mar 13 13:11:36 2010 -0800

    [extras/metrics] Support incremental download/load
    
    The database keeps track of the last report it imported, and imports any
    new ones, and the download now happens via rsync.

 extras/metrics/Database.cs        |  181 ++++++++++++++++++++++++++++---------
 extras/metrics/Main.cs            |   18 +++--
 extras/metrics/Makefile.am        |    6 +-
 extras/metrics/MetaMetrics.cs     |   29 ++++---
 extras/metrics/Metric.cs          |   16 +++-
 extras/metrics/MultiUserSample.cs |   11 ++-
 extras/metrics/User.cs            |   45 +++++++++
 extras/metrics/fetch-metrics      |   23 +++--
 extras/metrics/metrics.csproj     |    6 ++
 9 files changed, 257 insertions(+), 78 deletions(-)
---
diff --git a/extras/metrics/Database.cs b/extras/metrics/Database.cs
index 8bad4ad..37e17b4 100644
--- a/extras/metrics/Database.cs
+++ b/extras/metrics/Database.cs
@@ -32,64 +32,163 @@ using Hyena.Data.Sqlite;
 using Hyena.Json;
 using Mono.Data.Sqlite;
 using System.Collections.Generic;
+using System.Text.RegularExpressions;
+using ICSharpCode.SharpZipLib.GZip;
 
 namespace metrics
 {
-    public class Database
+    public class Config
     {
-        const string db_path = "metrics.db";
+        [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)]
+        public long Id;
 
-        public static HyenaSqliteConnection Open ()
+        [DatabaseColumn]
+        public string Key;
+
+        [DatabaseColumn]
+        public string Value;
+    }
+
+    public class Database : HyenaSqliteConnection
+    {
+        public Database (string db_path) : base (db_path)
         {
             HyenaSqliteCommand.LogAll = ApplicationContext.CommandLine.Contains ("debug-sql");
-            var db =  new HyenaSqliteConnection (db_path);
-            db.Execute ("PRAGMA cache_size = ?", 32768 * 2);
-            db.Execute ("PRAGMA synchronous = OFF");
-            db.Execute ("PRAGMA temp_store = MEMORY");
-            db.Execute ("PRAGMA count_changes = OFF");
-            SampleProvider = new SqliteModelProvider<MultiUserSample> (db, "Samples", true);
-            return db;
+            Execute ("PRAGMA cache_size = ?", 32768 * 4);
+            Execute ("PRAGMA synchronous = OFF");
+            Execute ("PRAGMA temp_store = MEMORY");
+            Execute ("PRAGMA count_changes = OFF");
+
+            Config = new SqliteModelProvider<Config> (this, "Config", true);
+            SampleProvider = new SqliteModelProvider<MultiUserSample> (this, "Samples", true);
+            MetricProvider = new SqliteModelProvider<Metric> (this, "Metrics", true);
+            Users = new SqliteModelProvider<User> (this, "Users", true);
+
+            Execute ("CREATE INDEX IF NOT EXISTS SampleUserMetricIndex ON Samples (UserID, MetricID)");
+        }
+
+        public SqliteModelProvider<Config> Config { get; private set; }
+        public SqliteModelProvider<MultiUserSample> SampleProvider { get; private set; }
+        public SqliteModelProvider<Metric> MetricProvider { get; private set; }
+        public SqliteModelProvider<User> Users { get; private set; }
+
+        private const string collapse_source_metric = "Banshee/Configuration/sources.";
+        private static char [] collapse_source_chars = new char [] {'-', '/', '.', '_'};
+        private Dictionary<string, Metric> metrics = new Dictionary<string, Metric> ();
+        public Metric GetMetric (string name)
+        {
+            Metric metric;
+            if (metrics.TryGetValue (name, out metric))
+                return metric;
+
+            metric = MetricProvider.FetchFirstMatching ("Name = ?", name);
+            if (metric == null) {
+                metric = new Metric () { Name = name };
+                MetricProvider.Save (metric);
+            }
+
+            metrics[name] = metric;
+            return metric;
         }
 
-        public static bool Exists { get { return System.IO.File.Exists (db_path); } }
+        private Dictionary<string, User> users = new Dictionary<string, User> ();
+        public User GetUser (string guid)
+        {
+            User user;
+            if (users.TryGetValue (guid, out user))
+                return user;
+
+            user = Users.FetchFirstMatching ("Guid = ?", guid);
+            if (user == null) {
+                user = new User () { Guid = guid };
+                Users.Save (user);
+            }
 
-        public static SqliteModelProvider<MultiUserSample> SampleProvider { get; private set; }
+            users[guid] = user;
+            return user;
+        }
 
-        public static void Import ()
+        public static bool Exists (string db_path)
         {
-            using (var db = Open ()) {
-                var sample_provider = SampleProvider;
-                db.BeginTransaction ();
-                foreach (var file in System.IO.Directory.GetFiles ("data")) {
-                    Log.InformationFormat ("Importing {0}", file);
-
-                    try {
-                        var o = new Deserializer (System.IO.File.ReadAllText (file)).Deserialize () as JsonObject;
-
-                        string user_id = (string) o["ID"];
-                        int format_version = (int) o["FormatVersion"];
-                        if (format_version != MetricsCollection.FormatVersion) {
-                            Log.WarningFormat ("Ignoring user report with old FormatVersion: {0}", format_version);
-                            continue;
-                        }
+            return System.IO.File.Exists (db_path);
+        }
+
+        private Config LastReportNumber {
+            get {
+                return Config.FetchFirstMatching ("Key = 'LastReportNumber'") ?? new Config () { Key = "LastReportNumber", Value = "0" };
+            }
+        }
 
-                        var metrics = o["Metrics"] as JsonObject;
-                        try {
-                            foreach (string metric_name in metrics.Keys) {
-                                var samples = metrics[metric_name] as JsonArray;
-                                foreach (JsonArray sample in samples) {
-                                    sample_provider.Save (MultiUserSample.Import (user_id, metric_name, (string)sample[0], (object)sample[1]));
-                                }
+        private Regex report_number_regex = new Regex ("data/(.{24}).json.gz", RegexOptions.Compiled);
+
+        public void Import ()
+        {
+            var db = this;
+            var sample_provider = SampleProvider;
+
+            var last_config = LastReportNumber;
+            long last_report_number = Int64.Parse (last_config.Value);
+
+            db.BeginTransaction ();
+            foreach (var file in System.IO.Directory.GetFiles ("data")) {
+                var match = report_number_regex.Match (file);
+                if (!match.Success) {
+                    continue;
+                }
+
+                long num = Int64.Parse (match.Groups[1].Captures[0].Value);
+                if (num <= last_report_number) {
+                    continue;
+                }
+
+                last_report_number = num;
+                Log.DebugFormat ("Importing {0}", file);
+
+                try {
+                    JsonObject o = null;
+                    using (var stream = System.IO.File.OpenRead (file)) {
+                        using (var gzip_stream = new GZipInputStream (stream)) {
+                            using (var txt_stream = new System.IO.StreamReader (gzip_stream)) {
+                                o = new Deserializer (txt_stream.ReadToEnd ()).Deserialize () as JsonObject;
                             }
-                        } catch {
-                            throw;
                         }
-                    } catch (Exception e) {
-                        Log.Exception (String.Format ("Failed to read {0}", file), e);
                     }
+
+                    if (o == null)
+                        throw new Exception ("Unable to parse JSON; empty file, maybe?");
+
+                    string user_id = (string) o["ID"];
+                    int format_version = (int) o["FormatVersion"];
+                    if (format_version != MetricsCollection.FormatVersion) {
+                        Log.WarningFormat ("Ignoring user report with old FormatVersion: {0}", format_version);
+                        continue;
+                    }
+
+                    var metrics = o["Metrics"] as JsonObject;
+                    foreach (string metric_name in metrics.Keys) {
+                        var samples = metrics[metric_name] as JsonArray;
+
+                        string name = metric_name;
+                        if (name.StartsWith (collapse_source_metric)) {
+                            string [] pieces = name.Split ('/');
+                            var reduced_name = pieces[2].Substring (8, pieces[2].IndexOfAny (collapse_source_chars, 8) - 8);
+                            name = String.Format ("{0}{1}/{2}", collapse_source_metric, reduced_name, pieces[pieces.Length - 1]);
+                        }
+
+                        foreach (JsonArray sample in samples) {
+                            sample_provider.Save (MultiUserSample.Import (db, user_id, name, (string)sample[0], (object)sample[1]));
+                        }
+                    }
+                } catch (Exception e) {
+                    Log.Exception (String.Format ("Failed to read {0}", file), e);
                 }
-                db.CommitTransaction ();
             }
+            db.CommitTransaction ();
+
+            last_config.Value = last_report_number.ToString ();
+            Config.Save (last_config);
+
+            Log.InformationFormat ("Done importing - last report # = {0}", last_report_number);
         }
     }
 
@@ -133,4 +232,4 @@ namespace metrics
             return list[list.Count / 2];
         }
     }
-}
\ No newline at end of file
+}
diff --git a/extras/metrics/Main.cs b/extras/metrics/Main.cs
index c762d88..b6edb41 100644
--- a/extras/metrics/Main.cs
+++ b/extras/metrics/Main.cs
@@ -33,15 +33,19 @@ namespace metrics
 {
     public class MainEntry
     {
+        const string db_path = "metrics.db";
+
         public static void Main (string [] args)
         {
-            if (!Database.Exists) {
-                Database.Import ();
-            } else {
-                using (var db = Database.Open ()) {
-                    new MetaMetrics (db);
-                }
+            try {
+            using (var db = new Database (db_path)) {
+                db.Import ();
+                new MetaMetrics (db);
+            }
+            } catch (Exception e) {
+                Console.WriteLine ("Going down, got exception {0}", e);
+                throw;
             }
         }
     }
-}
+}
\ No newline at end of file
diff --git a/extras/metrics/Makefile.am b/extras/metrics/Makefile.am
index 0372766..1dc56d6 100644
--- a/extras/metrics/Makefile.am
+++ b/extras/metrics/Makefile.am
@@ -1,13 +1,17 @@
 ASSEMBLY = Metrics
 TARGET = exe
-LINK = $(LINK_HYENA_DEPS)
+LINK = $(LINK_HYENA_DEPS) $(LINK_ICSHARP_ZIP_LIB)
 SOURCES =  \
 	Database.cs \
 	Main.cs \
 	Metric.cs \
 	MetaMetrics.cs \
+	User.cs \
 	MultiUserSample.cs
 
 RESOURCES =
 
+copy:
+	cp $(top_srcdir)/bin/Metrics.exe* .
+
 include $(top_srcdir)/build/build.mk
diff --git a/extras/metrics/MetaMetrics.cs b/extras/metrics/MetaMetrics.cs
index 41bc060..67d50b8 100644
--- a/extras/metrics/MetaMetrics.cs
+++ b/extras/metrics/MetaMetrics.cs
@@ -50,12 +50,12 @@ namespace metrics
 
         private static int id;
 
-        public SampleModel (string condition, HyenaSqliteConnection db, string aggregates)
+        public SampleModel (string condition, Database db, string aggregates)
         {
             Selection = new Hyena.Collections.Selection ();
             ReloadFragment = String.Format ("FROM Samples {0}", condition);
             SelectAggregates = aggregates;
-            Cache = new SqliteModelCache<MultiUserSample> (db, (id++).ToString (), this, Database.SampleProvider);
+            Cache = new SqliteModelCache<MultiUserSample> (db, (id++).ToString (), this, db.SampleProvider);
         }
 
         public void Reload ()
@@ -67,21 +67,24 @@ namespace metrics
 
     public class MetricSampleModel : SampleModel
     {
+        private Metric metric;
         public string MetricName { get; private set; }
+        public long MetricId { get { return metric.Id; } }
 
         private string condition;
-        public MetricSampleModel (SqliteModelCache<MultiUserSample> limiter, HyenaSqliteConnection db, string aggregates) : base (null, db, aggregates)
+        public MetricSampleModel (SqliteModelCache<MultiUserSample> limiter, Database db, string aggregates) : base (null, db, aggregates)
         {
             condition = String.Format (
-                "FROM Samples, HyenaCache WHERE Samples.MetricName = '{0}' AND HyenaCache.ModelID = {1} AND Samples.ID = HyenaCache.ItemID",
+                "FROM Samples, HyenaCache WHERE Samples.MetricID = {0} AND HyenaCache.ModelID = {1} AND Samples.ID = HyenaCache.ItemID",
                 "{0}", limiter.CacheId
             );
         }
 
-        public void ChangeMetric (string metricName)
+        public void ChangeMetric (Database db, string metricName)
         {
             MetricName = metricName;
-            ReloadFragment = String.Format (condition, metricName);
+            metric = db.GetMetric (metricName);
+            ReloadFragment = String.Format (condition, metric.Id);
             Reload ();
         }
     }
@@ -90,9 +93,9 @@ namespace metrics
     {
         string fmt = "{0,20}";
 
-        public MetaMetrics (HyenaSqliteConnection db)
+        public MetaMetrics (Database db)
         {
-            var latest_samples = new SampleModel ("GROUP BY UserID, MetricName ORDER BY stamp desc", db, "COUNT(DISTINCT(UserID)), MIN(Stamp), MAX(Stamp)");
+            var latest_samples = new SampleModel ("GROUP BY UserID, MetricID ORDER BY stamp desc", db, "COUNT(DISTINCT(UserID)), MIN(Stamp), MAX(Stamp)");
             latest_samples.Cache.AggregatesUpdated += (reader) => {
                 Console.WriteLine ("Total unique users for this time slice: {0}", reader[1]);
                 Console.WriteLine ("First report was on {0}", SqliteUtils.FromDbFormat (typeof(DateTime), reader[2]));
@@ -108,8 +111,8 @@ namespace metrics
                 Console.WriteLine (String.Format ("   Users:  {0}", fmt), agg_reader[1]);
                 using (var reader = new HyenaDataReader (db.Query (
                     @"SELECT COUNT(DISTINCT(UserId)) as users, Value FROM Samples, HyenaCache
-                        WHERE MetricName = ? AND HyenaCache.ModelID = ? AND HyenaCache.ItemID = Samples.ID
-                        GROUP BY Value ORDER BY users DESC", string_summary.MetricName, string_summary.Cache.CacheId))) {
+                        WHERE MetricId = ? AND HyenaCache.ModelID = ? AND HyenaCache.ItemID = Samples.ID
+                        GROUP BY Value ORDER BY users DESC", string_summary.MetricId, string_summary.Cache.CacheId))) {
                     while (reader.Read ()) {
                         Console.WriteLine ("   {0,-5}: {1,-20}", reader.Get<long> (0), reader.Get<string> (1));
                     }
@@ -131,19 +134,19 @@ namespace metrics
                 Console.WriteLine ();
             };
             
-            var metrics = db.QueryEnumerable<string> ("SELECT DISTINCT(MetricName) as name FROM Samples ORDER BY name ASC");
+            var metrics = db.QueryEnumerable<string> ("SELECT Name FROM Metrics ORDER BY Name ASC");
             foreach (var metric in metrics) {
                 switch (GetMetricType (metric)) {
                 case "string":
                     Console.WriteLine ("{0}:", metric);
-                    string_summary.ChangeMetric (metric);
+                    string_summary.ChangeMetric (db, metric);
                     break;
                 //case "timespan" : SummarizeNumeric<TimeSpan> (metric); break;
                 //case "datetime" : SummarizeNumeric<DateTime> (metric); break;
                 case "float":
                     Console.WriteLine ("{0}:", metric);
                     //SummarizeNumeric<long> (metric_cache);
-                    numeric_slice.ChangeMetric (metric);
+                    numeric_slice.ChangeMetric (db, metric);
                     break;
                 //case "float":
                     //SummarizeNumeric<double> (metric_cache);
diff --git a/extras/metrics/Metric.cs b/extras/metrics/Metric.cs
index 105cc2f..7514e1c 100644
--- a/extras/metrics/Metric.cs
+++ b/extras/metrics/Metric.cs
@@ -39,6 +39,14 @@ namespace metrics
 {
     public class Metric
     {
+        [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)]
+        public long Id { get; private set; }
+
+        [DatabaseColumn (Index = "MetricNameIndex")]
+        public string Name { get; set; }
+
+        public Metric () {}
+
         static Metric ()
         {
             var time = new Func<double, string> (d => String.Format ("{0:N0}", SqliteUtils.FromDbFormat (typeof(DateTime), d)));
@@ -126,13 +134,13 @@ namespace metrics
             }
         }
 
-        private string key;
+        //private string key;
         private bool ends_with;
         private Func<double, string> func;
 
         public Metric (string key, Func<double, string> func)
         {
-            this.key = key;
+            Name = key;
             this.func = func;
             this.ends_with = key[0] == '/';
         }
@@ -149,9 +157,9 @@ namespace metrics
         public bool Matching (string key)
         {
             if (ends_with) {
-                return key.EndsWith (this.key);
+                return key.EndsWith (Name);
             } else {
-                return key == this.key;
+                return key == Name;
             }
         }
     }
diff --git a/extras/metrics/MultiUserSample.cs b/extras/metrics/MultiUserSample.cs
index 8fd2f0d..1b01558 100644
--- a/extras/metrics/MultiUserSample.cs
+++ b/extras/metrics/MultiUserSample.cs
@@ -36,7 +36,10 @@ namespace metrics
     public class MultiUserSample : Sample, Hyena.Data.ICacheableItem
     {
         [DatabaseColumn (Index = "SampleUserIdIndex")]
-        public string UserId;
+        public long UserId;
+
+        [DatabaseColumn (Index = "SampleMetricIdIndex")]
+        public long MetricId;
 
         // ICacheableItem
         public object CacheEntryId { get; set; }
@@ -48,13 +51,13 @@ namespace metrics
 
         static DateTime value_dt;
         static TimeSpan value_span;
-        public static MultiUserSample Import (string user_id, string metric_name, string stamp, object val)
+        public static MultiUserSample Import (Database db, string user_id, string metric_name, string stamp, object val)
         {
             var sample = new MultiUserSample ();
-            sample.UserId = user_id;
+            sample.UserId = db.GetUser (user_id).Id;
 
             // TODO collapse various DAP and DAAP library stats?
-            sample.MetricName = metric_name;
+            sample.MetricId = db.GetMetric (metric_name).Id;
 
             DateTime stamp_dt;
             if (!DateTimeUtil.TryParseInvariant (stamp, out stamp_dt)) {
diff --git a/extras/metrics/User.cs b/extras/metrics/User.cs
new file mode 100644
index 0000000..ae7d51c
--- /dev/null
+++ b/extras/metrics/User.cs
@@ -0,0 +1,45 @@
+// 
+// User.cs
+// 
+// Author:
+//   Gabriel Burt <gabriel burt gmail com>
+// 
+// Copyright (c) 2010 Novell, Inc.
+// 
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// 
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using System;
+
+using Hyena.Data.Sqlite;
+
+namespace metrics
+{
+    public class User
+    {
+        [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)]
+        public long Id;
+
+        [DatabaseColumn]
+        public string Guid;
+
+        public User ()
+        {
+        }
+    }
+}
\ No newline at end of file
diff --git a/extras/metrics/fetch-metrics b/extras/metrics/fetch-metrics
index a1998aa..3c8344c 100755
--- a/extras/metrics/fetch-metrics
+++ b/extras/metrics/fetch-metrics
@@ -1,11 +1,18 @@
 #!/bin/bash
 
-rm -fr data
-mkdir data/
-ssh bansheeweb banshee-project org "cd download.banshee-project.org/metrics/data; tar -cf metrics.tar *.gz"
-scp bansheeweb banshee-project org:~/download.banshee-project.org/metrics/data/metrics.tar data/
-ssh bansheeweb banshee-project org "cd download.banshee-project.org/metrics/data; rm metrics.tar"
+mkdir -p data/
 cd data
-tar -xvf metrics.tar
-rm metrics.tar
-gunzip *.gz
+
+echo "Updating remote tar file"
+ssh bansheeweb banshee-project org "cd download.banshee-project.org/metrics/data; tar --append --remove-files -f metrics.tar *.gz 2>&1 | grep -v 'No such file or directory' | grep -v 'exit delayed from previous'"
+
+echo "Downloading changes with rsync"
+rsync --progress bansheeweb banshee-project org:~/download.banshee-project.org/metrics/data/metrics.tar .
+
+echo "Untarring new records"
+tar --keep-old-files -xf metrics.tar 2>&1 | grep -v "Cannot open: File exists" | grep -v "exit delayed from previous"
+
+#echo "Unzipping $(ls -l *.gz 2>/dev/null | wc -l) new records"
+#gunzip -q *.gz
+
+echo "Done!"
diff --git a/extras/metrics/metrics.csproj b/extras/metrics/metrics.csproj
index d4893e6..5191fd8 100644
--- a/extras/metrics/metrics.csproj
+++ b/extras/metrics/metrics.csproj
@@ -32,6 +32,7 @@
     <Reference Include="System" />
     <Reference Include="System.Core">
     </Reference>
+    <Reference Include="ICSharpCode.SharpZipLib" />
   </ItemGroup>
   <ItemGroup>
     <Compile Include="MultiUserSample.cs" />
@@ -39,6 +40,7 @@
     <Compile Include="Main.cs" />
     <Compile Include="Database.cs" />
     <Compile Include="Metric.cs" />
+    <Compile Include="User.cs" />
   </ItemGroup>
   <ItemGroup>
     <ProjectReference Include="..\..\src\Libraries\Hyena\Hyena.csproj">
@@ -49,6 +51,10 @@
       <Project>{BB1D1D81-7A74-4183-B7B1-3E78B32D42F1}</Project>
       <Name>Mono.Data.Sqlite</Name>
     </ProjectReference>
+    <ProjectReference Include="..\..\src\Core\Banshee.Services\Banshee.Services.csproj">
+      <Project>{B28354F0-BA87-44E8-989F-B864A3C7C09F}</Project>
+      <Name>Banshee.Services</Name>
+    </ProjectReference>
   </ItemGroup>
   <Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
 </Project>
\ No newline at end of file



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]