[libmediaart/sam/valid-utf8-2: 2/2] Mandate that inputs are valid UTF8




commit 18fb1f141311c50967654acd8ab25d2929ca75e5
Author: Sam Thursfield <sam afuera me uk>
Date:   Sun Dec 27 00:48:49 2020 +0100

    Mandate that inputs are valid UTF8
    
    This restriction was already in place, since we passed the inputs
    directly to GLib functions that expect valid UTF8.
    
    If this library were widely used then we might proactively validate
    inputs to protect against exploits. At time of writing, this module is
    only used in 3 places (grilo-plugins, gnome-music and rygel), so just
    check they are doing the right thing when calling the API.
    
    See https://gitlab.gnome.org/GNOME/libmediaart/-/merge_requests/5 for
    background.

 libmediaart/cache.c  | 17 +++++++++++++++++
 tests/mediaarttest.c |  2 ++
 2 files changed, 19 insertions(+)
---
diff --git a/libmediaart/cache.c b/libmediaart/cache.c
index b5b023b..f91eda8 100644
--- a/libmediaart/cache.c
+++ b/libmediaart/cache.c
@@ -115,6 +115,9 @@ media_art_strip_find_next_block (const gchar    *original,
  * 2. Text inside brackets of (), {}, [] and <> pairs are removed.
  * 3. Multiples of space characters are removed.
  *
+ * This function expects that the input is valid UTF-8. Use g_utf8_validate()
+ * if the input has not already been validated.
+ *
  * Returns: @original stripped of invalid characters which must be
  * freed. On error or if @original is NULL, %NULL is returned.
  *
@@ -143,6 +146,8 @@ media_art_strip_invalid_entities (const gchar *original)
        if (original == NULL)
                return NULL;
 
+       g_return_val_if_fail (g_utf8_validate (original, -1, NULL), NULL);
+
        str_no_blocks = g_string_new ("");
 
        p = original;
@@ -263,6 +268,9 @@ media_art_checksum_for_data (GChecksumType  checksum_type,
  * This operation should not use i/o, but it depends on the backend
  * GFile implementation.
  *
+ * All string inputs must be valid UTF8. Use g_utf8_validate() if the
+ * input has not already been validated.
+ *
  * Returns: %TRUE if @cache_file was returned, otherwise %FALSE.
  *
  * Since: 0.2.0
@@ -374,6 +382,9 @@ media_art_get_file (const gchar  *artist,
  * Get the path to media art for a given resource. Newly allocated
  * data returned in @cache_path must be freed with g_free().
  *
+ * All string inputs must be valid UTF8. Use g_utf8_validate() if the
+ * input has not already been validated.
+ *
  * Returns: %TRUE if @cache_path was returned, otherwise %FALSE.
  *
  * Since: 0.2.0
@@ -416,6 +427,9 @@ media_art_get_path (const gchar  *artist,
  *
  * If @artist and @album are %NULL, ALL media art cache is removed.
  *
+ * All string inputs must be valid UTF8. Use g_utf8_validate() if the
+ * input has not already been validated.
+ *
  * Returns: #TRUE on success, otherwise #FALSE where @error will be set.
  *
  * Since: 0.2.0
@@ -615,6 +629,9 @@ remove_thread (GTask        *task,
  * value) will be executed before an outstanding request with lower
  * priority. Default priority is %G_PRIORITY_DEFAULT.
  *
+ * All string inputs must be valid UTF8. Use g_utf8_validate() if the
+ * input has not already been validated.
+ *
  * Since: 0.7.0
  */
 void
diff --git a/tests/mediaarttest.c b/tests/mediaarttest.c
index 93ac684..65e300c 100644
--- a/tests/mediaarttest.c
+++ b/tests/mediaarttest.c
@@ -54,6 +54,8 @@ static TestInfo strip_test_cases [] = {
        { "unbalanced-brackets-gt-lt-end", "Unbalanced brackets>", NULL, "unbalanced brackets" },
        { "messy-title-punctuation", "Live at *WEMBLEY* dude!", NULL, "live at wembley dude" },
        { "crap-brackets-everywhere", "met[xX[x]alli]ca", NULL, "metallica" },
+       /* This value found by fuzz testing, see 
https://gitlab.gnome.org/GNOME/libmediaart/-/merge_requests/5 */
+       { "invalid-utf8-1", 
"\x0a\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1\xc1",
 NULL, "" },
        { NULL, NULL, NULL, NULL }
 };
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]