[gnome-software: 1/2] gs-download-utils: Support If-Modified-Since caching




commit f09af1a19a00320eb6dbd5d9b2066344fc3501b0
Author: Philip Withnall <pwithnall endlessos org>
Date:   Fri Mar 18 13:38:21 2022 +0000

    gs-download-utils: Support If-Modified-Since caching
    
    The download code already has support for ETags, but in case the server
    doesn’t support them, or the client doesn’t support the xattrs required
    to save an ETag alongside its file, it’s useful to support cache queries
    based on the modification time of a file.
    
    Add support for `Last-Modified` and `If-Modified-Since`. This changes
    the download APIs, so bumps the API revision.
    
    Signed-off-by: Philip Withnall <pwithnall endlessos org>

 lib/gs-download-utils.c | 137 +++++++++++++++++++++++++++++++++++++++++-------
 lib/gs-download-utils.h |   2 +
 lib/gs-utils.c          |  25 +++++++--
 lib/gs-utils.h          |   1 +
 meson.build             |   2 +-
 5 files changed, 143 insertions(+), 24 deletions(-)
---
diff --git a/lib/gs-download-utils.c b/lib/gs-download-utils.c
index 60bdaf1d4..0a0a316a1 100644
--- a/lib/gs-download-utils.c
+++ b/lib/gs-download-utils.c
@@ -49,6 +49,56 @@ gs_build_soup_session (void)
                                              NULL);
 }
 
+/* See https://httpwg.org/specs/rfc7231.html#http.date
+ * For example: Sun, 06 Nov 1994 08:49:37 GMT */
+static gchar *
+date_time_to_rfc7231 (GDateTime *date_time)
+{
+#if SOUP_CHECK_VERSION(3, 0, 0)
+       return soup_date_time_to_string (date_time, SOUP_DATE_HTTP);
+#else
+       const gchar *day_names[] = { "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" };
+       const gchar *month_names[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", 
"Nov", "Dec" };
+
+       /* We can’t just use g_date_time_format() here because its output
+        * (particularly day and month names) is locale-dependent.
+        * #SoupDate is also a pain to use because there’s no easy way to
+        * convert from a #GDateTime with libsoup-2.4, while preserving the timezone. */
+       g_autofree gchar *time_str = g_date_time_format (date_time, "%H:%M:%S %Z");
+
+       return g_strdup_printf ("%s, %02d %s %d %s",
+                               day_names[g_date_time_get_day_of_week (date_time) - 1],
+                               g_date_time_get_day_of_month (date_time),
+                               month_names[g_date_time_get_month (date_time) - 1],
+                               g_date_time_get_year (date_time),
+                               time_str);
+#endif
+}
+
+static GDateTime *
+date_time_from_rfc7231 (const gchar *rfc7231_str)
+{
+#if SOUP_CHECK_VERSION(3, 0, 0)
+       return soup_date_time_new_from_http_string (rfc7231_str);
+#else
+       g_autoptr(SoupDate) soup_date = NULL;
+       g_autoptr(GTimeZone) tz = NULL;
+
+       soup_date = soup_date_new_from_string (rfc7231_str);
+       if (soup_date == NULL)
+               return NULL;
+
+       if (soup_date->utc)
+               tz = g_time_zone_new_utc ();
+       else
+               tz = g_time_zone_new_offset (soup_date->offset * 60);
+
+       return g_date_time_new (tz, soup_date->year, soup_date->month,
+                               soup_date->day, soup_date->hour,
+                               soup_date->minute, soup_date->second);
+#endif
+}
+
 typedef struct {
        /* Input data. */
        gchar *uri;  /* (not nullable) (owned) */
@@ -56,6 +106,7 @@ typedef struct {
        GOutputStream *output_stream;  /* (nullable) (owned) */
        gsize buffer_size_bytes;
        gchar *last_etag;  /* (nullable) (owned) */
+       GDateTime *last_modified_date;  /* (nullable) (owned) */
        int io_priority;
        GsDownloadProgressCallback progress_callback;  /* (nullable) */
        gpointer progress_user_data;
@@ -72,6 +123,7 @@ typedef struct {
 
        /* Output data. */
        gchar *new_etag;  /* (nullable) (owned) */
+       GDateTime *new_last_modified_date;  /* (nullable) (owned) */
        GError *error;  /* (nullable) (owned) */
 } DownloadData;
 
@@ -87,9 +139,11 @@ download_data_free (DownloadData *data)
        g_clear_object (&data->output_stream);
 
        g_clear_pointer (&data->last_etag, g_free);
+       g_clear_pointer (&data->last_modified_date, g_date_time_unref);
        g_clear_object (&data->message);
        g_clear_pointer (&data->uri, g_free);
        g_clear_pointer (&data->new_etag, g_free);
+       g_clear_pointer (&data->new_last_modified_date, g_date_time_unref);
        g_clear_pointer (&data->currently_unwritten_chunk, g_bytes_unref);
        g_clear_error (&data->error);
 
@@ -120,6 +174,8 @@ static void download_progress (GTask *task);
  * @uri: (not nullable): the URI to download
  * @output_stream: (not nullable): an output stream to write the download to
  * @last_etag: (nullable): the last-known ETag of the URI, or %NULL if unknown
+ * @last_modified_date: (nullable): the last-known Last-Modified date of the
+ *   URI, or %NULL if unknown
  * @io_priority: I/O priority to download and write at
  * @progress_callback: (nullable): callback to call with progress information
  * @progress_user_data: (nullable) (closure progress_callback): data to pass
@@ -130,11 +186,11 @@ static void download_progress (GTask *task);
  *
  * Download @uri and write it to @output_stream asynchronously.
  *
- * If @last_etag is non-%NULL, it will be sent to the server, which may return
- * a ‘not modified’ response. If so, @output_stream will not be written to, and
- * will be closed with a cancelled close operation. This will ensure that the
- * existing content of the output stream (if it’s a file, for example) will not
- * be overwritten.
+ * If @last_etag is non-%NULL or @last_modified_date is non-%NULL, they will be
+ * sent to the server, which may return a ‘not modified’ response. If so,
+ * @output_stream will not be written to, and will be closed with a cancelled
+ * close operation. This will ensure that the existing content of the output
+ * stream (if it’s a file, for example) will not be overwritten.
  *
  * Note that @last_etag must be the ETag value returned by the server last time
  * the file was downloaded, not the local file ETag generated by GLib.
@@ -142,13 +198,14 @@ static void download_progress (GTask *task);
  * If specified, @progress_callback will be called zero or more times until
  * @callback is called, providing progress updates on the download.
  *
- * Since: 42
+ * Since: 43
  */
 void
 gs_download_stream_async (SoupSession                *soup_session,
                           const gchar                *uri,
                           GOutputStream              *output_stream,
                           const gchar                *last_etag,
+                          GDateTime                  *last_modified_date,
                           int                         io_priority,
                           GsDownloadProgressCallback  progress_callback,
                           gpointer                    progress_user_data,
@@ -201,15 +258,27 @@ gs_download_stream_async (SoupSession                *soup_session,
 
        data->message = g_object_ref (msg);
 
+       /* Caching support. Prefer ETags to modification dates, as the latter
+        * have problems with rapid updates and clock drift. */
        if (last_etag != NULL && *last_etag == '\0')
                last_etag = NULL;
        data->last_etag = g_strdup (last_etag);
 
+       if (last_modified_date != NULL)
+               data->last_modified_date = g_date_time_ref (last_modified_date);
+
        if (last_etag != NULL) {
 #if SOUP_CHECK_VERSION(3, 0, 0)
                soup_message_headers_append (soup_message_get_request_headers (msg), "If-None-Match", 
last_etag);
 #else
                soup_message_headers_append (msg->request_headers, "If-None-Match", last_etag);
+#endif
+       } else if (last_modified_date != NULL) {
+               g_autofree gchar *last_modified_date_str = date_time_to_rfc7231 (last_modified_date);
+#if SOUP_CHECK_VERSION(3, 0, 0)
+               soup_message_headers_append (soup_message_get_request_headers (msg), "If-Modified-Since", 
last_modified_date_str);
+#else
+               soup_message_headers_append (msg->request_headers, "If-Modified-Since", 
last_modified_date_str);
 #endif
        }
 
@@ -252,7 +321,7 @@ open_input_stream_cb (GObject      *source_object,
        } else if (SOUP_IS_SESSION (source_object)) {
                SoupSession *soup_session = SOUP_SESSION (source_object);
                guint status_code;
-               const gchar *new_etag;
+               const gchar *new_etag, *new_last_modified_str;
 
                /* HTTP request. */
 #if SOUP_CHECK_VERSION(3, 0, 0)
@@ -270,14 +339,15 @@ open_input_stream_cb (GObject      *source_object,
                }
 
                if (status_code == SOUP_STATUS_NOT_MODIFIED) {
-                       /* If the file has not been modified from the ETag we
-                        * have, finish the download early. Ensure to close the
-                        * output stream so that its existing content is *not*
-                        * overwritten.
+                       /* If the file has not been modified from the ETag or
+                        * Last-Modified date we have, finish the download
+                        * early. Ensure to close the output stream so that its
+                        * existing content is *not* overwritten.
                         *
                         * Preserve the existing ETag. */
                        data->discard_output_stream = TRUE;
                        data->new_etag = g_strdup (data->last_etag);
+                       data->new_last_modified_date = (data->last_modified_date != NULL) ? g_date_time_ref 
(data->last_modified_date) : NULL;
                        finish_download (task, NULL);
                        return;
                } else if (status_code != SOUP_STATUS_OK) {
@@ -315,6 +385,17 @@ open_input_stream_cb (GObject      *source_object,
                if (new_etag != NULL && *new_etag == '\0')
                        new_etag = NULL;
                data->new_etag = g_strdup (new_etag);
+
+               /* Store the Last-Modified date for later use. */
+#if SOUP_CHECK_VERSION(3, 0, 0)
+               new_last_modified_str = soup_message_headers_get_one (soup_message_get_response_headers 
(data->message), "Last-Modified");
+#else
+               new_last_modified_str = soup_message_headers_get_one (data->message->response_headers, 
"Last-Modified");
+#endif
+               if (new_last_modified_str != NULL && *new_last_modified_str == '\0')
+                       new_last_modified_str = NULL;
+               if (new_last_modified_str != NULL)
+                       data->new_last_modified_date = date_time_from_rfc7231 (new_last_modified_str);
        } else {
                g_assert_not_reached ();
        }
@@ -530,18 +611,22 @@ download_progress (GTask *task)
  * @new_etag_out: (out callee-allocates) (transfer full) (optional) (nullable):
  *   return location for the ETag of the downloaded file (which may be %NULL),
  *   or %NULL to ignore it
+ * @new_last_modified_date_out: (out callee-allocates) (transfer full) (optional) (nullable):
+ *   return location for the new Last-Modified date of the downloaded file
+ *   (which may be %NULL), or %NULL to ignore it
  * @error: return location for a #GError
  *
  * Finish an asynchronous download operation started with
  * gs_download_stream_async().
  *
  * Returns: %TRUE on success, %FALSE otherwise
- * Since: 42
+ * Since: 43
  */
 gboolean
 gs_download_stream_finish (SoupSession   *soup_session,
                            GAsyncResult  *result,
                            gchar        **new_etag_out,
+                           GDateTime    **new_last_modified_date_out,
                            GError       **error)
 {
        DownloadData *data;
@@ -554,6 +639,8 @@ gs_download_stream_finish (SoupSession   *soup_session,
 
        if (new_etag_out != NULL)
                *new_etag_out = g_strdup (data->new_etag);
+       if (new_last_modified_date_out != NULL)
+               *new_last_modified_date_out = (data->new_last_modified_date != NULL) ? g_date_time_ref 
(data->new_last_modified_date) : NULL;
 
        return g_task_propagate_boolean (G_TASK (result), error);
 }
@@ -568,6 +655,7 @@ typedef struct {
 
        /* In-progress data. */
        gchar *last_etag;  /* (nullable) (owned) */
+       GDateTime *last_modified_date;  /* (nullable) (owned) */
 } DownloadFileData;
 
 static void
@@ -576,6 +664,7 @@ download_file_data_free (DownloadFileData *data)
        g_free (data->uri);
        g_clear_object (&data->output_file);
        g_free (data->last_etag);
+       g_clear_pointer (&data->last_modified_date, g_date_time_unref);
        g_free (data);
 }
 
@@ -604,8 +693,8 @@ static void download_file_cb (GObject      *source_object,
  * Download @uri and write it to @output_file asynchronously, overwriting the
  * existing content of @output_file.
  *
- * The ETag of @output_file will be queried and, if known, used to skip the
- * download if @output_file is already up to date.
+ * The ETag and modification time of @output_file will be queried and, if known,
+ * used to skip the download if @output_file is already up to date.
  *
  * If specified, @progress_callback will be called zero or more times until
  * @callback is called, providing progress updates on the download.
@@ -659,8 +748,8 @@ gs_download_file_async (SoupSession                *soup_session,
 
        g_clear_error (&local_error);
 
-       /* Query the old ETag if the file already exists. */
-       data->last_etag = gs_utils_get_file_etag (output_file, cancellable);
+       /* Query the old ETag and modification date if the file already exists. */
+       data->last_etag = gs_utils_get_file_etag (output_file, &data->last_modified_date, cancellable);
 
        /* Create the output file.
         *
@@ -706,7 +795,7 @@ download_replace_file_cb (GObject      *source_object,
 
        /* Do the download. */
        gs_download_stream_async (soup_session, data->uri, G_OUTPUT_STREAM (output_stream),
-                                 data->last_etag, data->io_priority,
+                                 data->last_etag, data->last_modified_date, data->io_priority,
                                  data->progress_callback, data->progress_user_data,
                                  cancellable, download_file_cb, g_steal_pointer (&task));
 }
@@ -723,12 +812,22 @@ download_file_cb (GObject      *source_object,
        g_autofree gchar *new_etag = NULL;
        g_autoptr(GError) local_error = NULL;
 
-       if (!gs_download_stream_finish (soup_session, result, &new_etag, &local_error)) {
+       if (!gs_download_stream_finish (soup_session, result, &new_etag, NULL, &local_error)) {
                g_task_return_error (task, g_steal_pointer (&local_error));
                return;
        }
 
-       /* Update the stored HTTP ETag. */
+       /* Update the stored HTTP ETag.
+        *
+        * Under the assumption that this code is only ever used for locally
+        * cached copies of remote files (i.e. the local copies are never
+        * modified except by downloading an updated version from the server),
+        * it’s safe to use the local file modification date for Last-Modified,
+        * and save having to update that explicitly. This is because the
+        * modification time of the local file equals when gnome-software last
+        * checked for updates to it — which is correct to send as the
+        * If-Modified-Since the next time gnome-software checks for updates to
+        * the file. */
        gs_utils_set_file_etag (data->output_file, new_etag, cancellable);
 
        g_task_return_boolean (task, TRUE);
diff --git a/lib/gs-download-utils.h b/lib/gs-download-utils.h
index 9d3bdea63..6ab568579 100644
--- a/lib/gs-download-utils.h
+++ b/lib/gs-download-utils.h
@@ -44,6 +44,7 @@ void          gs_download_stream_async        (SoupSession                *soup_session,
                                                 const gchar                *uri,
                                                 GOutputStream              *output_stream,
                                                 const gchar                *last_etag,
+                                                GDateTime                  *last_modified_date,
                                                 int                         io_priority,
                                                 GsDownloadProgressCallback  progress_callback,
                                                 gpointer                    progress_user_data,
@@ -53,6 +54,7 @@ void          gs_download_stream_async        (SoupSession                *soup_session,
 gboolean       gs_download_stream_finish       (SoupSession   *soup_session,
                                                 GAsyncResult  *result,
                                                 gchar        **new_etag_out,
+                                                GDateTime    **new_last_modified_date_out,
                                                 GError       **error);
 
 void           gs_download_file_async          (SoupSession                *soup_session,
diff --git a/lib/gs-utils.c b/lib/gs-utils.c
index b8eefe36e..b14e24de7 100644
--- a/lib/gs-utils.c
+++ b/lib/gs-utils.c
@@ -1493,6 +1493,9 @@ gs_utils_get_file_size (const gchar *filename,
 /**
  * gs_utils_get_file_etag:
  * @file: a file to get the ETag for
+ * @last_modified_date_out: (out callee-allocates) (transfer full) (optional) (nullable):
+ *   return location for the last modified date of the file (%NULL to ignore),
+ *   or %NULL if unknown
  * @cancellable: (nullable): an optional #GCancellable or %NULL
  *
  * Gets the ETag for the @file, previously stored by
@@ -1502,26 +1505,40 @@ gs_utils_get_file_size (const gchar *filename,
  *    or %NULL, when the file does not exist, no ETag is stored for it
  *    or other error occurs.
  *
- * Since: 42
+ * Since: 43
  **/
 gchar *
-gs_utils_get_file_etag (GFile        *file,
-                        GCancellable *cancellable)
+gs_utils_get_file_etag (GFile         *file,
+                        GDateTime    **last_modified_date_out,
+                        GCancellable  *cancellable)
 {
        g_autoptr(GFileInfo) info = NULL;
+       const gchar *attributes;
        g_autoptr(GError) local_error = NULL;
 
        g_return_val_if_fail (G_IS_FILE (file), NULL);
        g_return_val_if_fail (cancellable == NULL || G_IS_CANCELLABLE (cancellable), NULL);
 
-       info = g_file_query_info (file, METADATA_ETAG_ATTRIBUTE, G_FILE_QUERY_INFO_NONE, cancellable, 
&local_error);
+       if (last_modified_date_out == NULL)
+               attributes = METADATA_ETAG_ATTRIBUTE;
+       else
+               attributes = METADATA_ETAG_ATTRIBUTE "," G_FILE_ATTRIBUTE_TIME_MODIFIED;
+
+       info = g_file_query_info (file, attributes, G_FILE_QUERY_INFO_NONE, cancellable, &local_error);
 
        if (info == NULL) {
                g_debug ("Error getting attribute ‘%s’ for file ‘%s’: %s",
                         METADATA_ETAG_ATTRIBUTE, g_file_peek_path (file), local_error->message);
+
+               if (last_modified_date_out != NULL)
+                       *last_modified_date_out = NULL;
+
                return NULL;
        }
 
+       if (last_modified_date_out != NULL)
+               *last_modified_date_out = g_file_info_get_modification_date_time (info);
+
        return g_strdup (g_file_info_get_attribute_string (info, METADATA_ETAG_ATTRIBUTE));
 }
 
diff --git a/lib/gs-utils.h b/lib/gs-utils.h
index 50135ce77..7d1ce10ab 100644
--- a/lib/gs-utils.h
+++ b/lib/gs-utils.h
@@ -134,6 +134,7 @@ guint64              gs_utils_get_file_size         (const gchar            *filename,
                                                 gpointer                user_data,
                                                 GCancellable           *cancellable);
 gchar *                 gs_utils_get_file_etag         (GFile                  *file,
+                                                GDateTime              **last_modified_date_out,
                                                 GCancellable           *cancellable);
 gboolean        gs_utils_set_file_etag         (GFile                  *file,
                                                 const gchar            *etag,
diff --git a/meson.build b/meson.build
index 4e8b596f6..aeeb72a06 100644
--- a/meson.build
+++ b/meson.build
@@ -23,7 +23,7 @@ conf.set_quoted('APPLICATION_ID', application_id)
 
 # this refers to the gnome-software plugin API version
 # this is not in any way related to a package or soname version
-gs_plugin_api_version = '17'
+gs_plugin_api_version = '18'
 conf.set_quoted('GS_PLUGIN_API_VERSION', gs_plugin_api_version)
 
 # private subdirectory of libdir for the private shared libgnomesoftware to live in


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]