[banshee] [extras/metrics] Support incremental download/load
- From: Gabriel Burt <gburt src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [banshee] [extras/metrics] Support incremental download/load
- Date: Sat, 13 Mar 2010 21:13:58 +0000 (UTC)
commit 04f1df53a00ddf89b20f516fa1eb2dba131af133
Author: Gabriel Burt <gabriel burt gmail com>
Date: Sat Mar 13 13:11:36 2010 -0800
[extras/metrics] Support incremental download/load
The database keeps track of the last report it imported, and imports any
new ones, and the download now happens via rsync.
extras/metrics/Database.cs | 181 ++++++++++++++++++++++++++++---------
extras/metrics/Main.cs | 18 +++--
extras/metrics/Makefile.am | 6 +-
extras/metrics/MetaMetrics.cs | 29 ++++---
extras/metrics/Metric.cs | 16 +++-
extras/metrics/MultiUserSample.cs | 11 ++-
extras/metrics/User.cs | 45 +++++++++
extras/metrics/fetch-metrics | 23 +++--
extras/metrics/metrics.csproj | 6 ++
9 files changed, 257 insertions(+), 78 deletions(-)
---
diff --git a/extras/metrics/Database.cs b/extras/metrics/Database.cs
index 8bad4ad..37e17b4 100644
--- a/extras/metrics/Database.cs
+++ b/extras/metrics/Database.cs
@@ -32,64 +32,163 @@ using Hyena.Data.Sqlite;
using Hyena.Json;
using Mono.Data.Sqlite;
using System.Collections.Generic;
+using System.Text.RegularExpressions;
+using ICSharpCode.SharpZipLib.GZip;
namespace metrics
{
- public class Database
+ public class Config
{
- const string db_path = "metrics.db";
+ [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)]
+ public long Id;
- public static HyenaSqliteConnection Open ()
+ [DatabaseColumn]
+ public string Key;
+
+ [DatabaseColumn]
+ public string Value;
+ }
+
+ public class Database : HyenaSqliteConnection
+ {
+ public Database (string db_path) : base (db_path)
{
HyenaSqliteCommand.LogAll = ApplicationContext.CommandLine.Contains ("debug-sql");
- var db = new HyenaSqliteConnection (db_path);
- db.Execute ("PRAGMA cache_size = ?", 32768 * 2);
- db.Execute ("PRAGMA synchronous = OFF");
- db.Execute ("PRAGMA temp_store = MEMORY");
- db.Execute ("PRAGMA count_changes = OFF");
- SampleProvider = new SqliteModelProvider<MultiUserSample> (db, "Samples", true);
- return db;
+ Execute ("PRAGMA cache_size = ?", 32768 * 4);
+ Execute ("PRAGMA synchronous = OFF");
+ Execute ("PRAGMA temp_store = MEMORY");
+ Execute ("PRAGMA count_changes = OFF");
+
+ Config = new SqliteModelProvider<Config> (this, "Config", true);
+ SampleProvider = new SqliteModelProvider<MultiUserSample> (this, "Samples", true);
+ MetricProvider = new SqliteModelProvider<Metric> (this, "Metrics", true);
+ Users = new SqliteModelProvider<User> (this, "Users", true);
+
+ Execute ("CREATE INDEX IF NOT EXISTS SampleUserMetricIndex ON Samples (UserID, MetricID)");
+ }
+
+ public SqliteModelProvider<Config> Config { get; private set; }
+ public SqliteModelProvider<MultiUserSample> SampleProvider { get; private set; }
+ public SqliteModelProvider<Metric> MetricProvider { get; private set; }
+ public SqliteModelProvider<User> Users { get; private set; }
+
+ private const string collapse_source_metric = "Banshee/Configuration/sources.";
+ private static char [] collapse_source_chars = new char [] {'-', '/', '.', '_'};
+ private Dictionary<string, Metric> metrics = new Dictionary<string, Metric> ();
+ public Metric GetMetric (string name)
+ {
+ Metric metric;
+ if (metrics.TryGetValue (name, out metric))
+ return metric;
+
+ metric = MetricProvider.FetchFirstMatching ("Name = ?", name);
+ if (metric == null) {
+ metric = new Metric () { Name = name };
+ MetricProvider.Save (metric);
+ }
+
+ metrics[name] = metric;
+ return metric;
}
- public static bool Exists { get { return System.IO.File.Exists (db_path); } }
+ private Dictionary<string, User> users = new Dictionary<string, User> ();
+ public User GetUser (string guid)
+ {
+ User user;
+ if (users.TryGetValue (guid, out user))
+ return user;
+
+ user = Users.FetchFirstMatching ("Guid = ?", guid);
+ if (user == null) {
+ user = new User () { Guid = guid };
+ Users.Save (user);
+ }
- public static SqliteModelProvider<MultiUserSample> SampleProvider { get; private set; }
+ users[guid] = user;
+ return user;
+ }
- public static void Import ()
+ public static bool Exists (string db_path)
{
- using (var db = Open ()) {
- var sample_provider = SampleProvider;
- db.BeginTransaction ();
- foreach (var file in System.IO.Directory.GetFiles ("data")) {
- Log.InformationFormat ("Importing {0}", file);
-
- try {
- var o = new Deserializer (System.IO.File.ReadAllText (file)).Deserialize () as JsonObject;
-
- string user_id = (string) o["ID"];
- int format_version = (int) o["FormatVersion"];
- if (format_version != MetricsCollection.FormatVersion) {
- Log.WarningFormat ("Ignoring user report with old FormatVersion: {0}", format_version);
- continue;
- }
+ return System.IO.File.Exists (db_path);
+ }
+
+ private Config LastReportNumber {
+ get {
+ return Config.FetchFirstMatching ("Key = 'LastReportNumber'") ?? new Config () { Key = "LastReportNumber", Value = "0" };
+ }
+ }
- var metrics = o["Metrics"] as JsonObject;
- try {
- foreach (string metric_name in metrics.Keys) {
- var samples = metrics[metric_name] as JsonArray;
- foreach (JsonArray sample in samples) {
- sample_provider.Save (MultiUserSample.Import (user_id, metric_name, (string)sample[0], (object)sample[1]));
- }
+ private Regex report_number_regex = new Regex ("data/(.{24}).json.gz", RegexOptions.Compiled);
+
+ public void Import ()
+ {
+ var db = this;
+ var sample_provider = SampleProvider;
+
+ var last_config = LastReportNumber;
+ long last_report_number = Int64.Parse (last_config.Value);
+
+ db.BeginTransaction ();
+ foreach (var file in System.IO.Directory.GetFiles ("data")) {
+ var match = report_number_regex.Match (file);
+ if (!match.Success) {
+ continue;
+ }
+
+ long num = Int64.Parse (match.Groups[1].Captures[0].Value);
+ if (num <= last_report_number) {
+ continue;
+ }
+
+ last_report_number = num;
+ Log.DebugFormat ("Importing {0}", file);
+
+ try {
+ JsonObject o = null;
+ using (var stream = System.IO.File.OpenRead (file)) {
+ using (var gzip_stream = new GZipInputStream (stream)) {
+ using (var txt_stream = new System.IO.StreamReader (gzip_stream)) {
+ o = new Deserializer (txt_stream.ReadToEnd ()).Deserialize () as JsonObject;
}
- } catch {
- throw;
}
- } catch (Exception e) {
- Log.Exception (String.Format ("Failed to read {0}", file), e);
}
+
+ if (o == null)
+ throw new Exception ("Unable to parse JSON; empty file, maybe?");
+
+ string user_id = (string) o["ID"];
+ int format_version = (int) o["FormatVersion"];
+ if (format_version != MetricsCollection.FormatVersion) {
+ Log.WarningFormat ("Ignoring user report with old FormatVersion: {0}", format_version);
+ continue;
+ }
+
+ var metrics = o["Metrics"] as JsonObject;
+ foreach (string metric_name in metrics.Keys) {
+ var samples = metrics[metric_name] as JsonArray;
+
+ string name = metric_name;
+ if (name.StartsWith (collapse_source_metric)) {
+ string [] pieces = name.Split ('/');
+ var reduced_name = pieces[2].Substring (8, pieces[2].IndexOfAny (collapse_source_chars, 8) - 8);
+ name = String.Format ("{0}{1}/{2}", collapse_source_metric, reduced_name, pieces[pieces.Length - 1]);
+ }
+
+ foreach (JsonArray sample in samples) {
+ sample_provider.Save (MultiUserSample.Import (db, user_id, name, (string)sample[0], (object)sample[1]));
+ }
+ }
+ } catch (Exception e) {
+ Log.Exception (String.Format ("Failed to read {0}", file), e);
}
- db.CommitTransaction ();
}
+ db.CommitTransaction ();
+
+ last_config.Value = last_report_number.ToString ();
+ Config.Save (last_config);
+
+ Log.InformationFormat ("Done importing - last report # = {0}", last_report_number);
}
}
@@ -133,4 +232,4 @@ namespace metrics
return list[list.Count / 2];
}
}
-}
\ No newline at end of file
+}
diff --git a/extras/metrics/Main.cs b/extras/metrics/Main.cs
index c762d88..b6edb41 100644
--- a/extras/metrics/Main.cs
+++ b/extras/metrics/Main.cs
@@ -33,15 +33,19 @@ namespace metrics
{
public class MainEntry
{
+ const string db_path = "metrics.db";
+
public static void Main (string [] args)
{
- if (!Database.Exists) {
- Database.Import ();
- } else {
- using (var db = Database.Open ()) {
- new MetaMetrics (db);
- }
+ try {
+ using (var db = new Database (db_path)) {
+ db.Import ();
+ new MetaMetrics (db);
+ }
+ } catch (Exception e) {
+ Console.WriteLine ("Going down, got exception {0}", e);
+ throw;
}
}
}
-}
+}
\ No newline at end of file
diff --git a/extras/metrics/Makefile.am b/extras/metrics/Makefile.am
index 0372766..1dc56d6 100644
--- a/extras/metrics/Makefile.am
+++ b/extras/metrics/Makefile.am
@@ -1,13 +1,17 @@
ASSEMBLY = Metrics
TARGET = exe
-LINK = $(LINK_HYENA_DEPS)
+LINK = $(LINK_HYENA_DEPS) $(LINK_ICSHARP_ZIP_LIB)
SOURCES = \
Database.cs \
Main.cs \
Metric.cs \
MetaMetrics.cs \
+ User.cs \
MultiUserSample.cs
RESOURCES =
+copy:
+ cp $(top_srcdir)/bin/Metrics.exe* .
+
include $(top_srcdir)/build/build.mk
diff --git a/extras/metrics/MetaMetrics.cs b/extras/metrics/MetaMetrics.cs
index 41bc060..67d50b8 100644
--- a/extras/metrics/MetaMetrics.cs
+++ b/extras/metrics/MetaMetrics.cs
@@ -50,12 +50,12 @@ namespace metrics
private static int id;
- public SampleModel (string condition, HyenaSqliteConnection db, string aggregates)
+ public SampleModel (string condition, Database db, string aggregates)
{
Selection = new Hyena.Collections.Selection ();
ReloadFragment = String.Format ("FROM Samples {0}", condition);
SelectAggregates = aggregates;
- Cache = new SqliteModelCache<MultiUserSample> (db, (id++).ToString (), this, Database.SampleProvider);
+ Cache = new SqliteModelCache<MultiUserSample> (db, (id++).ToString (), this, db.SampleProvider);
}
public void Reload ()
@@ -67,21 +67,24 @@ namespace metrics
public class MetricSampleModel : SampleModel
{
+ private Metric metric;
public string MetricName { get; private set; }
+ public long MetricId { get { return metric.Id; } }
private string condition;
- public MetricSampleModel (SqliteModelCache<MultiUserSample> limiter, HyenaSqliteConnection db, string aggregates) : base (null, db, aggregates)
+ public MetricSampleModel (SqliteModelCache<MultiUserSample> limiter, Database db, string aggregates) : base (null, db, aggregates)
{
condition = String.Format (
- "FROM Samples, HyenaCache WHERE Samples.MetricName = '{0}' AND HyenaCache.ModelID = {1} AND Samples.ID = HyenaCache.ItemID",
+ "FROM Samples, HyenaCache WHERE Samples.MetricID = {0} AND HyenaCache.ModelID = {1} AND Samples.ID = HyenaCache.ItemID",
"{0}", limiter.CacheId
);
}
- public void ChangeMetric (string metricName)
+ public void ChangeMetric (Database db, string metricName)
{
MetricName = metricName;
- ReloadFragment = String.Format (condition, metricName);
+ metric = db.GetMetric (metricName);
+ ReloadFragment = String.Format (condition, metric.Id);
Reload ();
}
}
@@ -90,9 +93,9 @@ namespace metrics
{
string fmt = "{0,20}";
- public MetaMetrics (HyenaSqliteConnection db)
+ public MetaMetrics (Database db)
{
- var latest_samples = new SampleModel ("GROUP BY UserID, MetricName ORDER BY stamp desc", db, "COUNT(DISTINCT(UserID)), MIN(Stamp), MAX(Stamp)");
+ var latest_samples = new SampleModel ("GROUP BY UserID, MetricID ORDER BY stamp desc", db, "COUNT(DISTINCT(UserID)), MIN(Stamp), MAX(Stamp)");
latest_samples.Cache.AggregatesUpdated += (reader) => {
Console.WriteLine ("Total unique users for this time slice: {0}", reader[1]);
Console.WriteLine ("First report was on {0}", SqliteUtils.FromDbFormat (typeof(DateTime), reader[2]));
@@ -108,8 +111,8 @@ namespace metrics
Console.WriteLine (String.Format (" Users: {0}", fmt), agg_reader[1]);
using (var reader = new HyenaDataReader (db.Query (
@"SELECT COUNT(DISTINCT(UserId)) as users, Value FROM Samples, HyenaCache
- WHERE MetricName = ? AND HyenaCache.ModelID = ? AND HyenaCache.ItemID = Samples.ID
- GROUP BY Value ORDER BY users DESC", string_summary.MetricName, string_summary.Cache.CacheId))) {
+ WHERE MetricId = ? AND HyenaCache.ModelID = ? AND HyenaCache.ItemID = Samples.ID
+ GROUP BY Value ORDER BY users DESC", string_summary.MetricId, string_summary.Cache.CacheId))) {
while (reader.Read ()) {
Console.WriteLine (" {0,-5}: {1,-20}", reader.Get<long> (0), reader.Get<string> (1));
}
@@ -131,19 +134,19 @@ namespace metrics
Console.WriteLine ();
};
- var metrics = db.QueryEnumerable<string> ("SELECT DISTINCT(MetricName) as name FROM Samples ORDER BY name ASC");
+ var metrics = db.QueryEnumerable<string> ("SELECT Name FROM Metrics ORDER BY Name ASC");
foreach (var metric in metrics) {
switch (GetMetricType (metric)) {
case "string":
Console.WriteLine ("{0}:", metric);
- string_summary.ChangeMetric (metric);
+ string_summary.ChangeMetric (db, metric);
break;
//case "timespan" : SummarizeNumeric<TimeSpan> (metric); break;
//case "datetime" : SummarizeNumeric<DateTime> (metric); break;
case "float":
Console.WriteLine ("{0}:", metric);
//SummarizeNumeric<long> (metric_cache);
- numeric_slice.ChangeMetric (metric);
+ numeric_slice.ChangeMetric (db, metric);
break;
//case "float":
//SummarizeNumeric<double> (metric_cache);
diff --git a/extras/metrics/Metric.cs b/extras/metrics/Metric.cs
index 105cc2f..7514e1c 100644
--- a/extras/metrics/Metric.cs
+++ b/extras/metrics/Metric.cs
@@ -39,6 +39,14 @@ namespace metrics
{
public class Metric
{
+ [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)]
+ public long Id { get; private set; }
+
+ [DatabaseColumn (Index = "MetricNameIndex")]
+ public string Name { get; set; }
+
+ public Metric () {}
+
static Metric ()
{
var time = new Func<double, string> (d => String.Format ("{0:N0}", SqliteUtils.FromDbFormat (typeof(DateTime), d)));
@@ -126,13 +134,13 @@ namespace metrics
}
}
- private string key;
+ //private string key;
private bool ends_with;
private Func<double, string> func;
public Metric (string key, Func<double, string> func)
{
- this.key = key;
+ Name = key;
this.func = func;
this.ends_with = key[0] == '/';
}
@@ -149,9 +157,9 @@ namespace metrics
public bool Matching (string key)
{
if (ends_with) {
- return key.EndsWith (this.key);
+ return key.EndsWith (Name);
} else {
- return key == this.key;
+ return key == Name;
}
}
}
diff --git a/extras/metrics/MultiUserSample.cs b/extras/metrics/MultiUserSample.cs
index 8fd2f0d..1b01558 100644
--- a/extras/metrics/MultiUserSample.cs
+++ b/extras/metrics/MultiUserSample.cs
@@ -36,7 +36,10 @@ namespace metrics
public class MultiUserSample : Sample, Hyena.Data.ICacheableItem
{
[DatabaseColumn (Index = "SampleUserIdIndex")]
- public string UserId;
+ public long UserId;
+
+ [DatabaseColumn (Index = "SampleMetricIdIndex")]
+ public long MetricId;
// ICacheableItem
public object CacheEntryId { get; set; }
@@ -48,13 +51,13 @@ namespace metrics
static DateTime value_dt;
static TimeSpan value_span;
- public static MultiUserSample Import (string user_id, string metric_name, string stamp, object val)
+ public static MultiUserSample Import (Database db, string user_id, string metric_name, string stamp, object val)
{
var sample = new MultiUserSample ();
- sample.UserId = user_id;
+ sample.UserId = db.GetUser (user_id).Id;
// TODO collapse various DAP and DAAP library stats?
- sample.MetricName = metric_name;
+ sample.MetricId = db.GetMetric (metric_name).Id;
DateTime stamp_dt;
if (!DateTimeUtil.TryParseInvariant (stamp, out stamp_dt)) {
diff --git a/extras/metrics/User.cs b/extras/metrics/User.cs
new file mode 100644
index 0000000..ae7d51c
--- /dev/null
+++ b/extras/metrics/User.cs
@@ -0,0 +1,45 @@
+//
+// User.cs
+//
+// Author:
+// Gabriel Burt <gabriel burt gmail com>
+//
+// Copyright (c) 2010 Novell, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+using System;
+
+using Hyena.Data.Sqlite;
+
+namespace metrics
+{
+ public class User
+ {
+ [DatabaseColumn (Constraints = DatabaseColumnConstraints.PrimaryKey)]
+ public long Id;
+
+ [DatabaseColumn]
+ public string Guid;
+
+ public User ()
+ {
+ }
+ }
+}
\ No newline at end of file
diff --git a/extras/metrics/fetch-metrics b/extras/metrics/fetch-metrics
index a1998aa..3c8344c 100755
--- a/extras/metrics/fetch-metrics
+++ b/extras/metrics/fetch-metrics
@@ -1,11 +1,18 @@
#!/bin/bash
-rm -fr data
-mkdir data/
-ssh bansheeweb banshee-project org "cd download.banshee-project.org/metrics/data; tar -cf metrics.tar *.gz"
-scp bansheeweb banshee-project org:~/download.banshee-project.org/metrics/data/metrics.tar data/
-ssh bansheeweb banshee-project org "cd download.banshee-project.org/metrics/data; rm metrics.tar"
+mkdir -p data/
cd data
-tar -xvf metrics.tar
-rm metrics.tar
-gunzip *.gz
+
+echo "Updating remote tar file"
+ssh bansheeweb banshee-project org "cd download.banshee-project.org/metrics/data; tar --append --remove-files -f metrics.tar *.gz 2>&1 | grep -v 'No such file or directory' | grep -v 'exit delayed from previous'"
+
+echo "Downloading changes with rsync"
+rsync --progress bansheeweb banshee-project org:~/download.banshee-project.org/metrics/data/metrics.tar .
+
+echo "Untarring new records"
+tar --keep-old-files -xf metrics.tar 2>&1 | grep -v "Cannot open: File exists" | grep -v "exit delayed from previous"
+
+#echo "Unzipping $(ls -l *.gz 2>/dev/null | wc -l) new records"
+#gunzip -q *.gz
+
+echo "Done!"
diff --git a/extras/metrics/metrics.csproj b/extras/metrics/metrics.csproj
index d4893e6..5191fd8 100644
--- a/extras/metrics/metrics.csproj
+++ b/extras/metrics/metrics.csproj
@@ -32,6 +32,7 @@
<Reference Include="System" />
<Reference Include="System.Core">
</Reference>
+ <Reference Include="ICSharpCode.SharpZipLib" />
</ItemGroup>
<ItemGroup>
<Compile Include="MultiUserSample.cs" />
@@ -39,6 +40,7 @@
<Compile Include="Main.cs" />
<Compile Include="Database.cs" />
<Compile Include="Metric.cs" />
+ <Compile Include="User.cs" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\Libraries\Hyena\Hyena.csproj">
@@ -49,6 +51,10 @@
<Project>{BB1D1D81-7A74-4183-B7B1-3E78B32D42F1}</Project>
<Name>Mono.Data.Sqlite</Name>
</ProjectReference>
+ <ProjectReference Include="..\..\src\Core\Banshee.Services\Banshee.Services.csproj">
+ <Project>{B28354F0-BA87-44E8-989F-B864A3C7C09F}</Project>
+ <Name>Banshee.Services</Name>
+ </ProjectReference>
</ItemGroup>
<Import Project="$(MSBuildBinPath)\Microsoft.CSharp.targets" />
</Project>
\ No newline at end of file
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]