Re: Idea: daily packs
- From: Colin Walters <walters verbum org>
- To: Owen Taylor <otaylor redhat com>
- Cc: ostree-list gnome org
- Subject: Re: Idea: daily packs
- Date: Tue, 21 Aug 2012 23:15:07 -0400
On Mon, 2012-08-20 at 11:18 -0400, Colin Walters wrote:
> So...I'll experiment with trying the 50% heuristic for content by
> tonight - I suspect doing that and dropping --related from the default
> command will give us a lot of the win of just wget on a .tar.gz.
I experimented with this a bit locally; ended up choosing 66% just on
gut instinct. I haven't tried doing a full download, but thinking about
this more, I suspect the problem we're going to hit is a "sudden cliff"
where most of the pack files stop having 66% of the desired objects.
If you think about it, short of some sort of "important tree"-aware
clustering (i.e. pack all objects from
gnomeos-3.6-{x86_64,i686}-{runtime,devel} together), what's going to
happen as the repository grows a longer history is that (due to SHA256)
the objects for the latest tree will get spread out over more packfiles.
Whether or not we periodically regenerate the packfiles is a factor here
too.
This patch may still be worth it - however note that we're not fetching
data pack files asynchronously, and objects from them are added
serially, without checksums in separate threads etc. On my laptop
ostree processes a 25MiB pack file in about two seconds, but still.
I may still apply this, but I think I'd like to try just gzipping all of
the loose objects and eating the ~40% increase in disk space on the
server. Note if we do this - we also *halve* our HTTP request count
which is a really big deal.
Another approach to making packfiles better - since they get worse as
history grows longer, we could fix that by just trimming history
aggressively. These are binaries, not source code - so we can in theory
regenerate builds whenever we want.
Say have multiple repositories - one that contains just the last 20
builds. And another that has say one a month. And if we wanted to put
everything in Amazon Glacier, we could have the "all builds since the
beginning of time" repository.
>From 158d7353b6ac8b93957ad0bb80b17db0741a6d54 Mon Sep 17 00:00:00 2001
From: Colin Walters <walters verbum org>
Date: Tue, 21 Aug 2012 22:59:27 -0400
Subject: [PATCH] Add a packfile-threshold, default to 66%
Per discussion with Owen, add a system which dynamically decides
whether or not to fetch pack files. By default, we only fetch them if
we'd use more than 66% of the objects they contain. This should
hopefully speed up the initial download case a bit.
---
src/ostree/ostree-fetcher.c | 11 ++++++
src/ostree/ostree-pull.c | 43 ++++++++++++++++++++--
src/ostree/ot-builtin-pack.c | 86 ++++++++++++++++++++++----------------------
3 files changed, 94 insertions(+), 46 deletions(-)
diff --git a/src/ostree/ostree-fetcher.c b/src/ostree/ostree-fetcher.c
index dcabf6e..3ef6e7e 100644
--- a/src/ostree/ostree-fetcher.c
+++ b/src/ostree/ostree-fetcher.c
@@ -187,15 +187,26 @@ on_request_sent (GObject *object,
{
OstreeFetcherPendingURI *pending = user_data;
GError *local_error = NULL;
+ ot_lobj SoupMessage *msg = NULL;
pending->request_body = soup_request_send_finish ((SoupRequest*) object,
result, &local_error);
+ msg = soup_request_http_get_message ((SoupRequestHTTP*) object);
+
if (!pending->request_body)
{
pending->state = OSTREE_FETCHER_STATE_COMPLETE;
g_simple_async_result_take_error (pending->result, local_error);
g_simple_async_result_complete (pending->result);
}
+ else if (!SOUP_STATUS_IS_SUCCESSFUL (msg->status_code))
+ {
+ g_set_error (&local_error, G_IO_ERROR, G_IO_ERROR_FAILED,
+ "Server returned status %u: %s",
+ msg->status_code, soup_status_get_phrase (msg->status_code));
+ g_simple_async_result_take_error (pending->result, local_error);
+ g_simple_async_result_complete (pending->result);
+ }
else
{
GOutputStreamSpliceFlags flags = G_OUTPUT_STREAM_SPLICE_CLOSE_TARGET;
diff --git a/src/ostree/ostree-pull.c b/src/ostree/ostree-pull.c
index fa5d55c..6d73484 100644
--- a/src/ostree/ostree-pull.c
+++ b/src/ostree/ostree-pull.c
@@ -21,6 +21,9 @@
*/
/**
+ * See:
+ * https://mail.gnome.org/archives/ostree-list/2012-August/msg00021.html
+ *
* DESIGN:
*
* Pull refs
@@ -64,13 +67,13 @@
#include "ostree-fetcher.h"
gboolean verbose;
-gboolean opt_prefer_loose;
+gint opt_packfile_threshold = 66;
gboolean opt_related;
gint opt_depth;
static GOptionEntry options[] = {
{ "verbose", 'v', 0, G_OPTION_ARG_NONE, &verbose, "Show more information", NULL },
- { "prefer-loose", 0, 0, G_OPTION_ARG_NONE, &opt_prefer_loose, "Download loose objects by default", NULL },
+ { "packfile-threshold", 't', 0, G_OPTION_ARG_INT, &opt_packfile_threshold, "Only download packfiles if more than PERCENT objects are needed (default: 66)", "PERCENT" },
{ "related", 0, 0, G_OPTION_ARG_NONE, &opt_related, "Download related commits", NULL },
{ "depth", 0, 0, G_OPTION_ARG_INT, &opt_depth, "Download parent commits up to this depth (default: 0)", NULL },
{ NULL },
@@ -201,6 +204,7 @@ check_outstanding_requests_handle_error (OtPullData *pull_data,
{
pull_data->caught_error = TRUE;
g_propagate_error (pull_data->async_error, error);
+ g_main_loop_quit (pull_data->loop);
}
else
{
@@ -1249,7 +1253,7 @@ fetch_content (OtPullData *pull_data,
cancellable, error))
goto out;
- if (remote_pack_checksum && !opt_prefer_loose)
+ if (remote_pack_checksum)
{
files_to_fetch = g_hash_table_lookup (data_packs_to_fetch, remote_pack_checksum);
if (files_to_fetch == NULL)
@@ -1270,6 +1274,39 @@ fetch_content (OtPullData *pull_data,
}
}
+ g_hash_table_iter_init (&hash_iter, data_packs_to_fetch);
+ while (g_hash_table_iter_next (&hash_iter, &key, &value))
+ {
+ const char *pack_checksum = key;
+ GPtrArray *files = value;
+ ot_lvariant GVariant *mapped_pack = NULL;
+ ot_lvariant GVariant *content_list = NULL;
+ gboolean fetch;
+
+ if (!ostree_repo_map_cached_remote_pack_index (pull_data->repo, pull_data->remote_name,
+ pack_checksum, FALSE,
+ &mapped_pack,
+ cancellable, error))
+ goto out;
+
+ content_list = g_variant_get_child_value (mapped_pack, 2);
+
+ fetch = (((double)files->len) / g_variant_n_children (content_list)) > ((double)opt_packfile_threshold / 100);
+ g_print ("pack %s files: %lu total: %lu fetch:%d\n", pack_checksum, (gulong) files->len,
+ g_variant_n_children (content_list),
+ fetch);
+ if (!fetch)
+ {
+ guint i;
+ for (i = 0; i < files->len; i++)
+ {
+ g_hash_table_insert (loose_files, files->pdata[i], files->pdata[i]);
+ files->pdata[i] = NULL; /* steal data */
+ }
+ g_hash_table_iter_remove (&hash_iter);
+ }
+ }
+
if (n_objects_to_fetch > 0)
g_print ("%u content objects to fetch\n", n_objects_to_fetch);
diff --git a/src/ostree/ot-builtin-pack.c b/src/ostree/ot-builtin-pack.c
index 2fc6def..7456435 100644
--- a/src/ostree/ot-builtin-pack.c
+++ b/src/ostree/ot-builtin-pack.c
@@ -31,15 +31,15 @@
#include <gio/gunixinputstream.h>
#include <gio/gunixoutputstream.h>
-#define OT_DEFAULT_PACK_SIZE_BYTES (50*1024*1024)
#define OT_GZIP_COMPRESSION_LEVEL (8)
static gboolean opt_analyze_only;
static gboolean opt_metadata_only;
+static gboolean opt_content_only;
static gboolean opt_reindex_only;
static gboolean opt_delete_all_loose;
static gboolean opt_keep_all_loose;
-static char* opt_pack_size;
+static char* opt_pack_size = "50m";
static char* opt_int_compression;
static char* opt_ext_compression;
@@ -50,10 +50,11 @@ typedef enum {
} OtCompressionType;
static GOptionEntry options[] = {
- { "pack-size", 0, 0, G_OPTION_ARG_STRING, &opt_pack_size, "Maximum uncompressed size of packfiles in bytes; may be suffixed with k, m, or g", "BYTES" },
+ { "pack-size", 0, 0, G_OPTION_ARG_STRING, &opt_pack_size, "Maximum uncompressed size of packfiles in bytes; may be suffixed with k, m, or g (default: 50m)", "BYTES" },
{ "internal-compression", 0, 0, G_OPTION_ARG_STRING, &opt_int_compression, "Compress objects using COMPRESSION", "COMPRESSION" },
{ "external-compression", 0, 0, G_OPTION_ARG_STRING, &opt_ext_compression, "Compress entire packfiles using COMPRESSION", "COMPRESSION" },
{ "metadata-only", 0, 0, G_OPTION_ARG_NONE, &opt_metadata_only, "Only pack metadata objects", NULL },
+ { "content-only", 0, 0, G_OPTION_ARG_NONE, &opt_content_only, "Only pack content objects", NULL },
{ "analyze-only", 0, 0, G_OPTION_ARG_NONE, &opt_analyze_only, "Just analyze current state", NULL },
{ "reindex-only", 0, 0, G_OPTION_ARG_NONE, &opt_reindex_only, "Regenerate pack index", NULL },
{ "delete-all-loose", 0, 0, G_OPTION_ARG_NONE, &opt_delete_all_loose, "Delete all loose objects (default: delete unreferenced loose)", NULL },
@@ -517,7 +518,8 @@ create_pack_file (OtRepackData *data,
if (!ostree_repo_regenerate_pack_index (data->repo, cancellable, error))
goto out;
- g_print ("Created pack file '%s' with %u objects\n", g_checksum_get_string (pack_checksum), objects->len);
+ g_print ("Created %s pack file '%s' with %u objects\n", is_meta ? "metadata" : "content",
+ g_checksum_get_string (pack_checksum), objects->len);
if (!opt_keep_all_loose)
{
@@ -680,7 +682,6 @@ cluster_objects_stupidly (OtRepackData *data,
static gboolean
parse_size_spec_with_suffix (const char *spec,
- guint64 default_value,
guint64 *out_size,
GError **error)
{
@@ -688,44 +689,36 @@ parse_size_spec_with_suffix (const char *spec,
char *endptr = NULL;
guint64 ret_size;
- if (spec == NULL)
- {
- ret_size = default_value;
- endptr = NULL;
- }
- else
- {
- ret_size = g_ascii_strtoull (spec, &endptr, 10);
+ ret_size = g_ascii_strtoull (spec, &endptr, 10);
- if (endptr && *endptr)
- {
- char suffix = *endptr;
+ if (endptr && *endptr)
+ {
+ char suffix = *endptr;
- switch (suffix)
- {
- case 'k':
- case 'K':
- {
- ret_size *= 1024;
- break;
- }
- case 'm':
- case 'M':
- {
- ret_size *= (1024 * 1024);
- break;
- }
- case 'g':
- case 'G':
- {
- ret_size *= (1024 * 1024 * 1024);
- break;
- }
- default:
- g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
- "Invalid size suffix '%c'", suffix);
- goto out;
- }
+ switch (suffix)
+ {
+ case 'k':
+ case 'K':
+ {
+ ret_size *= 1024;
+ break;
+ }
+ case 'm':
+ case 'M':
+ {
+ ret_size *= (1024 * 1024);
+ break;
+ }
+ case 'g':
+ case 'G':
+ {
+ ret_size *= (1024 * 1024 * 1024);
+ break;
+ }
+ default:
+ g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+ "Invalid size suffix '%c'", suffix);
+ goto out;
}
}
@@ -810,7 +803,7 @@ do_stats_gather_loose (OtRepackData *data,
else if (is_loose)
{
if (!(opt_metadata_only && !OSTREE_OBJECT_TYPE_IS_META(objtype))
- || OSTREE_OBJECT_TYPE_IS_META (objtype))
+ && !(opt_content_only && OSTREE_OBJECT_TYPE_IS_META(objtype)))
{
GVariant *copy = g_variant_ref (serialized_key);
g_hash_table_replace (ret_loose, copy, copy);
@@ -933,6 +926,13 @@ ostree_builtin_pack (int argc, char **argv, GFile *repo_path, GError **error)
if (!g_option_context_parse (context, &argc, &argv, error))
goto out;
+ if (opt_metadata_only && opt_content_only)
+ {
+ g_set_error (error, G_IO_ERROR, G_IO_ERROR_FAILED,
+ "--content-only cannot be specified with --metadata-only");
+ goto out;
+ }
+
repo = ostree_repo_new (repo_path);
if (!ostree_repo_check (repo, error))
goto out;
@@ -940,7 +940,7 @@ ostree_builtin_pack (int argc, char **argv, GFile *repo_path, GError **error)
data.repo = repo;
data.error = error;
- if (!parse_size_spec_with_suffix (opt_pack_size, OT_DEFAULT_PACK_SIZE_BYTES, &data.pack_size, error))
+ if (!parse_size_spec_with_suffix (opt_pack_size, &data.pack_size, error))
goto out;
/* Default internal compression to gzip */
if (!parse_compression_string (opt_int_compression ? opt_int_compression : "gzip", &data.int_compression, error))
--
1.7.11.4
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]