[tracker-miners: 2/4] tracker-extract-mp3.c: Extract MusicBrainz IDs for mp3 files



commit 165d8fa71b7dfe5bcc452e03772a7dd823791d9d
Author: Sumaid Syed <sumaidsyed gmail com>
Date:   Wed Jun 12 15:34:21 2019 +0530

    tracker-extract-mp3.c: Extract MusicBrainz IDs for mp3 files
    
    Tracker needs to store MusicBrainz IDs to allow retrieval of tags and cover-art in GNOME Music.
    If a file already has MusicBrainz IDs, then extractor should detect and index them.
    Extraction of MusicBrainz IDs is done based on tags used by Picard.
    Tags Mapping : https://picard.musicbrainz.org/docs/mappings/
    Recording ID(UFID Frame) is handled seperately from other MBIDs(TXXX Frames).
    
    https://gitlab.gnome.org/GNOME/tracker-miners/issues/68

 src/tracker-extract/tracker-extract-mp3.c | 214 ++++++++++++++++++++++++++++++
 1 file changed, 214 insertions(+)
---
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index 56706b2b2..bf2a0ed75 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -95,6 +95,11 @@ typedef struct {
        gint track_count;
        gint set_number;
        gint set_count;
+       gchar *mb_recording_id;
+       gchar *mb_track_id;
+       gchar *mb_release_id;
+       gchar *mb_artist_id;
+       gchar *mb_release_group_id;
 } id3v2tag;
 
 typedef enum {
@@ -139,9 +144,24 @@ typedef enum {
        ID3V24_TPUB,
        ID3V24_TRCK,
        ID3V24_TPOS,
+       ID3V24_TXXX,
        ID3V24_TYER,
+       ID3V24_UFID,
 } id3v24frame;
 
+typedef enum {
+       MB_TRACK_ID,
+       MB_RELEASE_ID,
+       MB_ARTIST_ID,
+       MB_RELEASE_GROUP_ID,
+       TXXX_UNKNOWN,
+} id3txxxtype;
+
+typedef enum {
+       MB_RECORDING_ID,
+       UFID_UNKNOWN,
+} id3ufidtype;
+
 typedef struct {
        size_t size;
        size_t id3v2_size;
@@ -167,6 +187,11 @@ typedef struct {
        gint track_count;
        gint set_number;
        gint set_count;
+       const gchar *mb_recording_id;
+       const gchar *mb_track_id;
+       const gchar *mb_release_id;
+       const gchar *mb_artist_id;
+       const gchar *mb_release_group_id;
 
        const unsigned char *media_art_data;
        size_t media_art_size;
@@ -217,7 +242,9 @@ static const struct {
        { "TPOS", ID3V24_TPOS },
        { "TPUB", ID3V24_TPUB },
        { "TRCK", ID3V24_TRCK },
+       { "TXXX", ID3V24_TXXX },
        { "TYER", ID3V24_TYER },
+       { "UFID", ID3V24_UFID },
 };
 
 /* sorted array */
@@ -243,6 +270,23 @@ static const struct {
        { "TYE", ID3V2_TYE },
 };
 
+static const struct {
+       const char *name;
+       id3txxxtype txxxtype;
+} id3_txxxtypes[] = {
+       { "MusicBrainz Release Track Id", MB_TRACK_ID },
+       { "MusicBrainz Album Id", MB_RELEASE_ID },
+       { "MusicBrainz Artist Id", MB_ARTIST_ID },
+       { "MusicBrainz Release Group Id", MB_RELEASE_GROUP_ID },
+};
+
+static const struct {
+       const char *name;
+       id3ufidtype ufidtype;
+} id3_ufidtypes[] = {
+       { "http://musicbrainz.org";, MB_RECORDING_ID},
+};
+
 static const char *const genre_names[] = {
        "Blues",
        "Classic Rock",
@@ -528,6 +572,11 @@ id3v2tag_free (id3v2tag *tags)
        g_free (tags->title1);
        g_free (tags->title2);
        g_free (tags->title3);
+       g_free (tags->mb_recording_id);
+       g_free (tags->mb_track_id);
+       g_free (tags->mb_release_id);
+       g_free (tags->mb_artist_id);
+       g_free (tags->mb_release_group_id);
 }
 
 static gboolean
@@ -1281,6 +1330,116 @@ id3v2_get_frame (const gchar *name)
        }
 }
 
+static id3txxxtype
+id3_get_txxx_type (const gchar *name)
+{
+       gint i;
+       for (i = 0; i < G_N_ELEMENTS (id3_txxxtypes); i++) {
+               if (strcmp (id3_txxxtypes[i].name, name) == 0) {
+                       return id3_txxxtypes[i].txxxtype;
+               }
+       }
+       return TXXX_UNKNOWN;
+}
+
+static id3ufidtype
+id3_get_ufid_type (const gchar *name)
+{
+       gint i;
+       for (i = 0; i < G_N_ELEMENTS (id3_ufidtypes); i++) {
+               if (strcmp (id3_ufidtypes[i].name, name) == 0) {
+                       return id3_ufidtypes[i].ufidtype;
+               }
+       }
+       return UFID_UNKNOWN;
+}
+
+static void
+extract_txxx_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize, id3tag *info, gfloat version)
+{
+       gchar *description;
+       gchar *value;
+       gchar text_encode;
+       const gchar *text_desc;
+       const gchar *text;
+       guint offset;
+       gint text_desc_len;
+       id3txxxtype txxxtype;
+
+       text_encode   =  data[pos + 0]; /* $xx */
+       text_desc     = &data[pos + 4]; /* <text string according to encoding> $00 (00) */
+       text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4);
+
+       offset        = 4 + text_desc_len + id3v2_nul_size (text_encode);
+       text          = &data[pos + offset]; /* <full text string according to encoding> */
+
+       if (version == 2.3f) {
+               description = id3v2_text_to_utf8 (data[pos], &data[pos + 1], csize - 1, info);
+               value = id3v2_text_to_utf8 (text_encode, text, csize - offset, info);
+       }
+       else if (version == 2.4f) {
+               description = id3v24_text_to_utf8 (data[pos], &data[pos + 1], csize - 1, info);
+               value = id3v24_text_to_utf8 (text_encode, text, csize - offset, info);
+       }
+       if (!tracker_is_empty_string (description)) {
+               g_strstrip (description);
+               txxxtype = id3_get_txxx_type (description);
+       } else {
+               /* Can't do anything without mb tag. */
+               g_free (description);
+               return;
+       }
+
+       if (!tracker_is_empty_string (value)) {
+               g_strstrip (value);
+       } else {
+               /* Can't do anything without value. */
+               g_free (value);
+               return;
+       }
+       switch (txxxtype) {
+       case MB_TRACK_ID:
+               tag->mb_track_id = value;
+               break;
+       case MB_RELEASE_ID:
+               tag->mb_release_id = value;
+               break;
+       case MB_ARTIST_ID:
+               tag->mb_artist_id = value;
+               break;
+       case MB_RELEASE_GROUP_ID:
+               tag->mb_release_group_id = value;
+               break;
+       default:
+               g_free (value);
+               break;
+       }
+}
+
+static void
+extract_ufid_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize)
+{
+       id3ufidtype ufid_type;
+       const gchar *owner;
+       gint owner_len;
+       gchar *identifier;
+
+       owner      = &data[pos + 0];
+       owner_len  = strnlen (owner, csize);
+       if (tracker_is_empty_string (owner) ||
+               (ufid_type = id3_get_ufid_type (owner)) == UFID_UNKNOWN) {
+               return;
+       }
+
+       identifier = g_strndup (&data[pos + owner_len + 1], csize - owner_len - 1);
+       if (tracker_is_empty_string (identifier)) {
+               g_free (identifier);
+               return;
+       }
+
+       tag->mb_recording_id = identifier;
+}
+
 static void
 get_id3v24_tags (id3v24frame           frame,
                  const gchar          *data,
@@ -1350,6 +1509,16 @@ get_id3v24_tags (id3v24frame           frame,
                break;
        }
 
+       case ID3V24_TXXX: {
+               extract_txxx_tags (tag, data, pos, csize, info, 2.4f);
+               break;
+       }
+
+       case ID3V24_UFID: {
+               extract_ufid_tags (tag, data, pos, csize);
+               break;
+       }
+
        default: {
                gchar *word;
 
@@ -1541,6 +1710,16 @@ get_id3v23_tags (id3v24frame           frame,
                break;
        }
 
+       case ID3V24_TXXX: {
+               extract_txxx_tags (tag, data, pos, csize, info, 2.3f);
+               break;
+       }
+
+       case ID3V24_UFID: {
+               extract_ufid_tags (tag, data, pos, csize);
+               break;
+       }
+
        default: {
                gchar *word;
 
@@ -2461,6 +2640,21 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
                                                md.id3v23.encoded_by,
                                                md.id3v22.encoded_by);
 
+       md.mb_recording_id = tracker_coalesce_strip (2, md.id3v24.mb_recording_id,
+                                                    md.id3v23.mb_recording_id);
+
+       md.mb_track_id = tracker_coalesce_strip (2, md.id3v24.mb_track_id,
+                                                md.id3v23.mb_track_id);
+
+       md.mb_release_id = tracker_coalesce_strip (2, md.id3v24.mb_release_id,
+                                                  md.id3v23.mb_release_id);
+
+       md.mb_artist_id = tracker_coalesce_strip (2, md.id3v24.mb_artist_id,
+                                                 md.id3v23.mb_artist_id);
+
+       md.mb_release_group_id = tracker_coalesce_strip (2, md.id3v24.mb_release_group_id,
+                                                        md.id3v23.mb_release_group_id);
+
        if (md.id3v24.track_number != 0) {
                md.track_number = md.id3v24.track_number;
        } else if (md.id3v23.track_number != 0) {
@@ -2523,6 +2717,15 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
                tracker_resource_set_relation (main_resource, "nmm:musicAlbum", md.album);
                tracker_resource_set_relation (main_resource, "nmm:musicAlbumDisc", album_disc);
 
+               if (md.mb_release_id) {
+                       tracker_resource_set_string (md.album, "nmm:mbReleaseID", md.mb_release_id);
+               }
+
+               if (md.mb_release_group_id) {
+                       tracker_resource_set_string (md.album, "nmm:mbReleaseGroupID",
+                                                                                md.mb_release_group_id);
+               }
+
                if (md.track_count > 0) {
                        tracker_resource_set_int (md.album, "nmm:albumTrackCount", md.track_count);
                }
@@ -2547,6 +2750,9 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
 
        if (md.performer) {
                tracker_resource_set_relation (main_resource, "nmm:performer", md.performer);
+               if (md.mb_artist_id) {
+                       tracker_resource_set_string (md.performer, "nmm:mbArtistID", md.mb_artist_id);
+               }
        }
 
        if (md.composer) {
@@ -2588,6 +2794,14 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
                tracker_resource_set_int (main_resource, "nmm:trackNumber", md.track_number);
        }
 
+       if (md.mb_recording_id) {
+               tracker_resource_set_string (main_resource, "nmm:mbRecordingID", md.mb_recording_id);
+       }
+
+       if (md.mb_track_id) {
+               tracker_resource_set_string (main_resource, "nmm:mbTrackID", md.mb_track_id);
+       }
+
        /* Get mp3 stream info */
        parsed = mp3_parse (buffer, buffer_size, audio_offset, uri, main_resource, &md);
        g_clear_object (&md.performer);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]