[tracker/mp3-id3v2-fixes: 2/2] tracker-extract: MP3 id3v23 and id3v24 functions documented and v23 fixed



commit 032dd1b591f07a06c74f9cc24209ca81d7654700
Author: Martyn Russell <martyn lanedo com>
Date:   Mon Jun 9 12:38:36 2014 +0100

    tracker-extract: MP3 id3v23 and id3v24 functions documented and v23 fixed
    
    - The padding logic in v23 was incorrect.
    - Also the frame reading would continue while < total file size, which lead to
      infinite loops and issues for some files, now we go only while < tsize (tag
      size).
    - Instead of +10 here and there, now we use a defined integer to know why
      we're increasing the iteration through the file for maintainability

 src/tracker-extract/tracker-extract-mp3.c |  189 ++++++++++++++++++++++++-----
 1 files changed, 159 insertions(+), 30 deletions(-)
---
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index 7dad217..7e3adda 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -1735,6 +1735,8 @@ parse_id3v24 (const gchar           *data,
               MP3Data               *filedata,
               size_t                *offset_delta)
 {
+       const gint header_size = 10;
+       const gint frame_size = 10;
        gint unsync;
        gint ext_header;
        gint experimental;
@@ -1742,40 +1744,87 @@ parse_id3v24 (const gchar           *data,
        guint pos;
        guint ext_header_size;
 
+       /* Check header, expecting (in hex), 10 bytes long:
+        *
+        *   $ 49 44 33 yy yy xx zz zz zz zz
+        *
+        * Where yy is less than $FF, xx is the 'flags' byte and zz is
+        * less than $80.
+        *
+        * Here yy is the version, so v24 == 04.
+        *
+        * MP3's look like this:
+        *
+        *   [Header][?External Header?][Tags][Content]
+        */
        if ((size < 16) ||
            (data[0] != 0x49) ||
            (data[1] != 0x44) ||
            (data[2] != 0x33) ||
            (data[3] != 0x04) ||
-           (data[4] != 0x00) ) {
+           (data[4] != 0x00)) {
+               /* It's not an error, we might try another function
+                * if we have the wrong version header here.
+                */
                return;
        }
 
+       /* Get the flags (xx) in the header */
        unsync = (data[5] & 0x80) > 0;
        ext_header = (data[5] & 0x40) > 0;
        experimental = (data[5] & 0x20) > 0;
+
+       /* Get the complete tag size (zz) in the header:
+        * Tag size is size of the complete tag after
+        * unsychronisation, including padding, excluding the header
+        * but not excluding the extended header (total tag size - 10)
+        */
        tsize = (((data[6] & 0x7F) << 21) |
                 ((data[7] & 0x7F) << 14) |
                 ((data[8] & 0x7F) << 7) |
                 ((data[9] & 0x7F) << 0));
 
-       if ((tsize + 10 > size) || (experimental)) {
+       /* We don't handle experimental cases */
+       if (experimental) {
+               g_warning ("Experimental MP3s are not extracted, doing nothing");
                return;
        }
 
-       pos = 10;
+       /* Check if we can read even the first frame, The complete
+        * tag size (tsize) does not include the header which is 10
+        * bytes, so we check that there is some content AFTER the
+        * headers. */
+       if (tsize + header_size > size) {
+               g_warning ("Expected MP3 tag size and header size to be within file size boundaries");
+               return;
+       }
+
+       /* Start after the header (10 bytes long) */
+       pos = header_size;
 
+       /* Completely optional */
        if (ext_header) {
+               /* Extended header is expected to be:
+                *   Extended header size   $xx xx xx xx (4 chars)
+                *   Extended Flags         $xx xx
+                *   Size of padding        $xx xx xx xx
+                */
                ext_header_size = (((data[10] & 0x7F) << 21) |
                                   ((data[11] & 0x7F) << 14) |
                                   ((data[12] & 0x7F) << 7) |
                                   ((data[13] & 0x7F) << 0));
-               pos += ext_header_size;
 
-               if (pos + tsize > size) {
-                       /* invalid size: extended header longer than tag */
+               /* Where the 'Extended header size', currently 6 or 10
+                * bytes, excludes itself. The 'Size of padding' is
+                * simply the total tag size excluding the frames and
+                * the headers, in other words the padding.
+                */
+               if (tsize + ext_header_size > size) {
+                       g_warning ("Expected MP3 tag size and extended header size to be within file size 
boundaries");
                        return;
                }
+
+               pos += ext_header_size;
        }
 
        while (pos < size) {
@@ -1783,7 +1832,13 @@ parse_id3v24 (const gchar           *data,
                size_t csize;
                unsigned short flags;
 
-               if (pos + 10 > size) {
+               /* Frames are 10 bytes each and made up of:
+                *   Frame ID       $xx xx xx xx (4 chars)
+                *   Size           $xx xx xx xx
+                *   Flags          $xx xx
+                */
+               if (pos + frame_size > tsize) {
+                       g_warning ("Expected MP3 frame size to be within tag size boundaries");
                        return;
                }
 
@@ -1797,20 +1852,33 @@ parse_id3v24 (const gchar           *data,
                flags = (((unsigned char) (data[pos + 8]) << 8) +
                         ((unsigned char) (data[pos + 9])));
 
-               pos += 10;
+               pos += frame_size;
 
                if (frame == ID3V24_UNKNOWN) {
-                       /* ignore unknown frames */
+                       /* Ignore unknown frames */
                        pos += csize;
                        continue;
                }
 
-               if (pos + csize > size) {
+               /* If content size is more than size of file, stop. If
+                * If content size is 0 then continue to next frame. */
+               if (pos + csize > tsize) {
                        break;
                } else if (csize == 0) {
                        continue;
                }
 
+               /* Frame flags expected are in format of:
+                *
+                *   %abc00000 %ijk00000
+                *
+                * a - Tag alter preservation
+                * b - File alter preservation
+                * c - Read only
+                * i - Compression
+                * j - Encryption
+                * k - Grouping identity
+                */
                if (((flags & 0x80) > 0) ||
                    ((flags & 0x40) > 0)) {
                        pos += csize;
@@ -1837,7 +1905,7 @@ parse_id3v24 (const gchar           *data,
                pos += csize;
        }
 
-       *offset_delta = tsize + 10;
+       *offset_delta = tsize + header_size;
 }
 
 static void
@@ -1849,61 +1917,103 @@ parse_id3v23 (const gchar          *data,
               MP3Data              *filedata,
               size_t               *offset_delta)
 {
+       const gint header_size = 10;
+       const gint frame_size = 10;
        gint unsync;
        gint ext_header;
        gint experimental;
        guint tsize;
        guint pos;
        guint ext_header_size;
-       guint padding;
 
+       /* Check header, expecting (in hex), 10 bytes long:
+        *
+        *   $ 49 44 33 yy yy xx zz zz zz zz
+        *
+        * Where yy is less than $FF, xx is the 'flags' byte and zz is
+        * less than $80.
+        *
+        * Here yy is the version, so v23 == 03.
+        *
+        * MP3's look like this:
+        *
+        *   [Header][?External Header?][Tags][Content]
+        */
        if ((size < 16) ||
            (data[0] != 0x49) ||
            (data[1] != 0x44) ||
            (data[2] != 0x33) ||
            (data[3] != 0x03) ||
            (data[4] != 0x00)) {
+               /* It's not an error, we might try another function
+                * if we have the wrong version header here.
+                */
                return;
        }
 
+       /* Get the flags (xx) in the header */
        unsync = (data[5] & 0x80) > 0;
        ext_header = (data[5] & 0x40) > 0;
        experimental = (data[5] & 0x20) > 0;
+
+       /* Get the complete tag size (zz) in the header:
+        * Tag size is size of the complete tag after
+        * unsychronisation, including padding, excluding the header
+        * but not excluding the extended header (total tag size - 10)
+        */
        tsize = (((data[6] & 0x7F) << 21) |
                 ((data[7] & 0x7F) << 14) |
                 ((data[8] & 0x7F) << 7) |
                 ((data[9] & 0x7F) << 0));
 
-       if ((tsize + 10 > size) || (experimental)) {
+       /* We don't handle experimental cases */
+       if (experimental) {
+               g_warning ("Experimental MP3s are not extracted, doing nothing");
                return;
        }
 
-       pos = 10;
-       padding = 0;
+       /* Check if we can read even the first frame, The complete
+        * tag size (tsize) does not include the header which is 10
+        * bytes, so we check that there is some content AFTER the
+        * headers. */
+       if (tsize + header_size > size) {
+               g_warning ("Expected MP3 tag size and header size to be within file size boundaries");
+               return;
+       }
 
+       /* Start after the header (10 bytes long) */
+       pos = header_size;
+
+       /* Completely optional */
        if (ext_header) {
+               guint padding;
+
+               /* Extended header is expected to be:
+                *   Extended header size   $xx xx xx xx (4 chars)
+                *   Extended Flags         $xx xx
+                *   Size of padding        $xx xx xx xx
+                */
                ext_header_size = (((unsigned char)(data[10]) << 24) |
                                   ((unsigned char)(data[11]) << 16) |
                                   ((unsigned char)(data[12]) << 8) |
-                                  ((unsigned char)(data[12]) << 0));
+                                  ((unsigned char)(data[13]) << 0));
 
                padding = (((unsigned char)(data[15]) << 24) |
                           ((unsigned char)(data[16]) << 16) |
                           ((unsigned char)(data[17]) << 8) |
                           ((unsigned char)(data[18]) << 0));
 
-               pos += 4 + ext_header_size;
-
-               if (padding < tsize)
-                       tsize -= padding;
-               else {
+               /* Where the 'Extended header size', currently 6 or 10
+                * bytes, excludes itself. The 'Size of padding' is
+                * simply the total tag size excluding the frames and
+                * the headers, in other words the padding.
+                */
+               if (tsize + ext_header_size > size) {
+                       g_warning ("Expected MP3 tag size and extended header size to be within file size 
boundaries");
                        return;
                }
 
-               if (pos + tsize > size) {
-                       /* invalid size: extended header longer than tag */
-                       return;
-               }
+               pos += ext_header_size;
        }
 
        while (pos < size) {
@@ -1911,7 +2021,12 @@ parse_id3v23 (const gchar          *data,
                size_t csize;
                unsigned short flags;
 
-               if (pos + 10 > size) {
+               /* Frames are 10 bytes each and made up of:
+                *   Frame ID       $xx xx xx xx (4 chars)
+                *   Size           $xx xx xx xx
+                *   Flags          $xx xx
+                */
+               if (pos + frame_size > tsize) {
                        return;
                }
 
@@ -1925,21 +2040,35 @@ parse_id3v23 (const gchar          *data,
                flags = (((unsigned char)(data[pos + 8]) << 8) +
                         ((unsigned char)(data[pos + 9])));
 
-               pos += 10;
+               pos += frame_size;
 
                if (frame == ID3V24_UNKNOWN) {
-                       /* ignore unknown frames */
+                       /* Ignore unknown frames */
                        pos += csize;
                        continue;
                }
 
-               if (pos + csize > size) {
+               /* If content size is more than size of file, stop. If
+                * If content size is 0 then continue to next frame. */
+               if (pos + csize > tsize) {
                        break;
                } else if (csize == 0) {
                        continue;
                }
 
-               if (((flags & 0x80) > 0) || ((flags & 0x40) > 0)) {
+               /* Frame flags expected are in format of:
+                *
+                *   %abc00000 %ijk00000
+                *
+                * a - Tag alter preservation
+                * b - File alter preservation
+                * c - Read only
+                * i - Compression
+                * j - Encryption
+                * k - Grouping identity
+                */
+               if (((flags & 0x80) > 0) ||
+                   ((flags & 0x40) > 0)) {
                        pos += csize;
                        continue;
                }


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]