[libmediaart/sam/valid-utf8: 3/3] Mandate that inputs are valid UTF8




commit 054477587484dc31f23555db37c316f8ccba1ba4
Author: Sam Thursfield <sam afuera me uk>
Date:   Sun Dec 27 00:48:49 2020 +0100

    Mandate that inputs are valid UTF8
    
    This restriction was already in place, since we passed the inputs
    directly to GLib functions that expect valid UTF8.
    
    If this library were widely used then we might proactively validate
    inputs to protect against exploits. At time of writing, this module is
    only used in 3 places (grilo-plugins, gnome-music and rygel), so just
    check they are doing the right thing when calling the API.
    
    See https://gitlab.gnome.org/GNOME/libmediaart/-/merge_requests/5 for
    background.

 libmediaart/cache.c  | 26 ++++++++++++++++++++++++++
 tests/mediaarttest.c |  2 ++
 2 files changed, 28 insertions(+)
---
diff --git a/libmediaart/cache.c b/libmediaart/cache.c
index ecbc7a1..7d35401 100644
--- a/libmediaart/cache.c
+++ b/libmediaart/cache.c
@@ -115,6 +115,9 @@ media_art_strip_find_next_block (const gchar    *original,
  * 2. Text inside brackets of (), {}, [] and <> pairs are removed.
  * 3. Multiples of space characters are removed.
  *
+ * This function expects that the input is valid UTF-8. Use g_utf8_validate()
+ * if the input has not already been validated.
+ *
  * Returns: @original stripped of invalid characters which must be
  * freed. On error or if @original is empty, %NULL is returned.
  *
@@ -141,6 +144,7 @@ media_art_strip_invalid_entities (const gchar *original)
        };
 
        g_return_val_if_fail (original != NULL, NULL);
+       g_return_val_if_fail (g_utf8_validate (original, -1, NULL), NULL);
 
        str_no_blocks = g_string_new ("");
 
@@ -262,6 +266,9 @@ media_art_checksum_for_data (GChecksumType  checksum_type,
  * This operation should not use i/o, but it depends on the backend
  * GFile implementation.
  *
+ * All string inputs must be valid UTF8. Use g_utf8_validate() if the
+ * input has not already been validated.
+ *
  * Returns: %TRUE if @cache_file was returned, otherwise %FALSE.
  *
  * Since: 0.2.0
@@ -272,6 +279,10 @@ media_art_get_file (const gchar  *artist,
                     const gchar  *prefix,
                     GFile       **cache_file)
 {
+       g_return_val_if_fail (g_utf8_validate (artist, -1, NULL), FALSE);
+       g_return_val_if_fail (g_utf8_validate (title, -1, NULL), FALSE);
+       g_return_val_if_fail (g_utf8_validate (prefix, -1, NULL), FALSE);
+
        const gchar *space_checksum = "7215ee9c7d9dc229d2921a40e899ec5f";
        const gchar *a, *b;
 
@@ -369,6 +380,9 @@ media_art_get_file (const gchar  *artist,
  * Get the path to media art for a given resource. Newly allocated
  * data returned in @cache_path must be freed with g_free().
  *
+ * All string inputs must be valid UTF8. Use g_utf8_validate() if the
+ * input has not already been validated.
+ *
  * Returns: %TRUE if @cache_path was returned, otherwise %FALSE.
  *
  * Since: 0.2.0
@@ -381,6 +395,10 @@ media_art_get_path (const gchar  *artist,
 {
        GFile *cache_file = NULL;
 
+       g_return_val_if_fail (g_utf8_validate (artist, -1, NULL), FALSE);
+       g_return_val_if_fail (g_utf8_validate (title, -1, NULL), FALSE);
+       g_return_val_if_fail (g_utf8_validate (prefix, -1, NULL), FALSE);
+
        /* Rules:
         * 1. artist OR title must be non-NULL.
         * 2. cache_file must be non-NULL
@@ -407,6 +425,9 @@ media_art_get_path (const gchar  *artist,
  *
  * If @artist and @album are %NULL, ALL media art cache is removed.
  *
+ * All string inputs must be valid UTF8. Use g_utf8_validate() if the
+ * input has not already been validated.
+ *
  * Returns: #TRUE on success, otherwise #FALSE where @error will be set.
  *
  * Since: 0.2.0
@@ -424,6 +445,8 @@ media_art_remove (const gchar   *artist,
        gboolean success = TRUE;
 
        g_return_val_if_fail (artist != NULL && artist[0] != '\0', FALSE);
+       g_return_val_if_fail (g_utf8_validate (artist, -1, NULL), FALSE);
+       g_return_val_if_fail (g_utf8_validate (album, -1, NULL), FALSE);
 
        dirname = g_build_filename (g_get_user_cache_dir (), "media-art", NULL);
 
@@ -604,6 +627,9 @@ remove_thread (GTask        *task,
  * value) will be executed before an outstanding request with lower
  * priority. Default priority is %G_PRIORITY_DEFAULT.
  *
+ * All string inputs must be valid UTF8. Use g_utf8_validate() if the
+ * input has not already been validated.
+ *
  * Since: 0.7.0
  */
 void
diff --git a/tests/mediaarttest.c b/tests/mediaarttest.c
index cef36c2..1ddb2d7 100644
--- a/tests/mediaarttest.c
+++ b/tests/mediaarttest.c
@@ -54,6 +54,8 @@ static TestInfo strip_test_cases [] = {
        { "unbalanced-brackets-gt-lt-end", "Unbalanced brackets>", NULL, "unbalanced brackets" },
        { "messy-title-punctuation", "Live at *WEMBLEY* dude!", NULL, "live at wembley dude" },
        { "crap-brackets-everywhere", "met[xX[x]alli]ca", NULL, "metallica" },
+       /* This value found by fuzz testing, see 
https://gitlab.gnome.org/GNOME/libmediaart/-/merge_requests/5 */
+       { "invalid-utf8-1", 
"\x0a\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1",
 NULL, "" },
        { NULL, NULL, NULL, NULL }
 };
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]