[tracker-miners: 2/4] tracker-extract-mp3.c: Extract MusicBrainz IDs for mp3 files
- From: Sam Thursfield <sthursfield src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [tracker-miners: 2/4] tracker-extract-mp3.c: Extract MusicBrainz IDs for mp3 files
- Date: Wed, 12 Jun 2019 11:36:14 +0000 (UTC)
commit 165d8fa71b7dfe5bcc452e03772a7dd823791d9d
Author: Sumaid Syed <sumaidsyed gmail com>
Date: Wed Jun 12 15:34:21 2019 +0530
tracker-extract-mp3.c: Extract MusicBrainz IDs for mp3 files
Tracker needs to store MusicBrainz IDs to allow retrieval of tags and cover-art in GNOME Music.
If a file already has MusicBrainz IDs, then extractor should detect and index them.
Extraction of MusicBrainz IDs is done based on tags used by Picard.
Tags Mapping : https://picard.musicbrainz.org/docs/mappings/
Recording ID(UFID Frame) is handled seperately from other MBIDs(TXXX Frames).
https://gitlab.gnome.org/GNOME/tracker-miners/issues/68
src/tracker-extract/tracker-extract-mp3.c | 214 ++++++++++++++++++++++++++++++
1 file changed, 214 insertions(+)
---
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index 56706b2b2..bf2a0ed75 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -95,6 +95,11 @@ typedef struct {
gint track_count;
gint set_number;
gint set_count;
+ gchar *mb_recording_id;
+ gchar *mb_track_id;
+ gchar *mb_release_id;
+ gchar *mb_artist_id;
+ gchar *mb_release_group_id;
} id3v2tag;
typedef enum {
@@ -139,9 +144,24 @@ typedef enum {
ID3V24_TPUB,
ID3V24_TRCK,
ID3V24_TPOS,
+ ID3V24_TXXX,
ID3V24_TYER,
+ ID3V24_UFID,
} id3v24frame;
+typedef enum {
+ MB_TRACK_ID,
+ MB_RELEASE_ID,
+ MB_ARTIST_ID,
+ MB_RELEASE_GROUP_ID,
+ TXXX_UNKNOWN,
+} id3txxxtype;
+
+typedef enum {
+ MB_RECORDING_ID,
+ UFID_UNKNOWN,
+} id3ufidtype;
+
typedef struct {
size_t size;
size_t id3v2_size;
@@ -167,6 +187,11 @@ typedef struct {
gint track_count;
gint set_number;
gint set_count;
+ const gchar *mb_recording_id;
+ const gchar *mb_track_id;
+ const gchar *mb_release_id;
+ const gchar *mb_artist_id;
+ const gchar *mb_release_group_id;
const unsigned char *media_art_data;
size_t media_art_size;
@@ -217,7 +242,9 @@ static const struct {
{ "TPOS", ID3V24_TPOS },
{ "TPUB", ID3V24_TPUB },
{ "TRCK", ID3V24_TRCK },
+ { "TXXX", ID3V24_TXXX },
{ "TYER", ID3V24_TYER },
+ { "UFID", ID3V24_UFID },
};
/* sorted array */
@@ -243,6 +270,23 @@ static const struct {
{ "TYE", ID3V2_TYE },
};
+static const struct {
+ const char *name;
+ id3txxxtype txxxtype;
+} id3_txxxtypes[] = {
+ { "MusicBrainz Release Track Id", MB_TRACK_ID },
+ { "MusicBrainz Album Id", MB_RELEASE_ID },
+ { "MusicBrainz Artist Id", MB_ARTIST_ID },
+ { "MusicBrainz Release Group Id", MB_RELEASE_GROUP_ID },
+};
+
+static const struct {
+ const char *name;
+ id3ufidtype ufidtype;
+} id3_ufidtypes[] = {
+ { "http://musicbrainz.org", MB_RECORDING_ID},
+};
+
static const char *const genre_names[] = {
"Blues",
"Classic Rock",
@@ -528,6 +572,11 @@ id3v2tag_free (id3v2tag *tags)
g_free (tags->title1);
g_free (tags->title2);
g_free (tags->title3);
+ g_free (tags->mb_recording_id);
+ g_free (tags->mb_track_id);
+ g_free (tags->mb_release_id);
+ g_free (tags->mb_artist_id);
+ g_free (tags->mb_release_group_id);
}
static gboolean
@@ -1281,6 +1330,116 @@ id3v2_get_frame (const gchar *name)
}
}
+static id3txxxtype
+id3_get_txxx_type (const gchar *name)
+{
+ gint i;
+ for (i = 0; i < G_N_ELEMENTS (id3_txxxtypes); i++) {
+ if (strcmp (id3_txxxtypes[i].name, name) == 0) {
+ return id3_txxxtypes[i].txxxtype;
+ }
+ }
+ return TXXX_UNKNOWN;
+}
+
+static id3ufidtype
+id3_get_ufid_type (const gchar *name)
+{
+ gint i;
+ for (i = 0; i < G_N_ELEMENTS (id3_ufidtypes); i++) {
+ if (strcmp (id3_ufidtypes[i].name, name) == 0) {
+ return id3_ufidtypes[i].ufidtype;
+ }
+ }
+ return UFID_UNKNOWN;
+}
+
+static void
+extract_txxx_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize, id3tag *info, gfloat version)
+{
+ gchar *description;
+ gchar *value;
+ gchar text_encode;
+ const gchar *text_desc;
+ const gchar *text;
+ guint offset;
+ gint text_desc_len;
+ id3txxxtype txxxtype;
+
+ text_encode = data[pos + 0]; /* $xx */
+ text_desc = &data[pos + 4]; /* <text string according to encoding> $00 (00) */
+ text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4);
+
+ offset = 4 + text_desc_len + id3v2_nul_size (text_encode);
+ text = &data[pos + offset]; /* <full text string according to encoding> */
+
+ if (version == 2.3f) {
+ description = id3v2_text_to_utf8 (data[pos], &data[pos + 1], csize - 1, info);
+ value = id3v2_text_to_utf8 (text_encode, text, csize - offset, info);
+ }
+ else if (version == 2.4f) {
+ description = id3v24_text_to_utf8 (data[pos], &data[pos + 1], csize - 1, info);
+ value = id3v24_text_to_utf8 (text_encode, text, csize - offset, info);
+ }
+ if (!tracker_is_empty_string (description)) {
+ g_strstrip (description);
+ txxxtype = id3_get_txxx_type (description);
+ } else {
+ /* Can't do anything without mb tag. */
+ g_free (description);
+ return;
+ }
+
+ if (!tracker_is_empty_string (value)) {
+ g_strstrip (value);
+ } else {
+ /* Can't do anything without value. */
+ g_free (value);
+ return;
+ }
+ switch (txxxtype) {
+ case MB_TRACK_ID:
+ tag->mb_track_id = value;
+ break;
+ case MB_RELEASE_ID:
+ tag->mb_release_id = value;
+ break;
+ case MB_ARTIST_ID:
+ tag->mb_artist_id = value;
+ break;
+ case MB_RELEASE_GROUP_ID:
+ tag->mb_release_group_id = value;
+ break;
+ default:
+ g_free (value);
+ break;
+ }
+}
+
+static void
+extract_ufid_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize)
+{
+ id3ufidtype ufid_type;
+ const gchar *owner;
+ gint owner_len;
+ gchar *identifier;
+
+ owner = &data[pos + 0];
+ owner_len = strnlen (owner, csize);
+ if (tracker_is_empty_string (owner) ||
+ (ufid_type = id3_get_ufid_type (owner)) == UFID_UNKNOWN) {
+ return;
+ }
+
+ identifier = g_strndup (&data[pos + owner_len + 1], csize - owner_len - 1);
+ if (tracker_is_empty_string (identifier)) {
+ g_free (identifier);
+ return;
+ }
+
+ tag->mb_recording_id = identifier;
+}
+
static void
get_id3v24_tags (id3v24frame frame,
const gchar *data,
@@ -1350,6 +1509,16 @@ get_id3v24_tags (id3v24frame frame,
break;
}
+ case ID3V24_TXXX: {
+ extract_txxx_tags (tag, data, pos, csize, info, 2.4f);
+ break;
+ }
+
+ case ID3V24_UFID: {
+ extract_ufid_tags (tag, data, pos, csize);
+ break;
+ }
+
default: {
gchar *word;
@@ -1541,6 +1710,16 @@ get_id3v23_tags (id3v24frame frame,
break;
}
+ case ID3V24_TXXX: {
+ extract_txxx_tags (tag, data, pos, csize, info, 2.3f);
+ break;
+ }
+
+ case ID3V24_UFID: {
+ extract_ufid_tags (tag, data, pos, csize);
+ break;
+ }
+
default: {
gchar *word;
@@ -2461,6 +2640,21 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
md.id3v23.encoded_by,
md.id3v22.encoded_by);
+ md.mb_recording_id = tracker_coalesce_strip (2, md.id3v24.mb_recording_id,
+ md.id3v23.mb_recording_id);
+
+ md.mb_track_id = tracker_coalesce_strip (2, md.id3v24.mb_track_id,
+ md.id3v23.mb_track_id);
+
+ md.mb_release_id = tracker_coalesce_strip (2, md.id3v24.mb_release_id,
+ md.id3v23.mb_release_id);
+
+ md.mb_artist_id = tracker_coalesce_strip (2, md.id3v24.mb_artist_id,
+ md.id3v23.mb_artist_id);
+
+ md.mb_release_group_id = tracker_coalesce_strip (2, md.id3v24.mb_release_group_id,
+ md.id3v23.mb_release_group_id);
+
if (md.id3v24.track_number != 0) {
md.track_number = md.id3v24.track_number;
} else if (md.id3v23.track_number != 0) {
@@ -2523,6 +2717,15 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
tracker_resource_set_relation (main_resource, "nmm:musicAlbum", md.album);
tracker_resource_set_relation (main_resource, "nmm:musicAlbumDisc", album_disc);
+ if (md.mb_release_id) {
+ tracker_resource_set_string (md.album, "nmm:mbReleaseID", md.mb_release_id);
+ }
+
+ if (md.mb_release_group_id) {
+ tracker_resource_set_string (md.album, "nmm:mbReleaseGroupID",
+ md.mb_release_group_id);
+ }
+
if (md.track_count > 0) {
tracker_resource_set_int (md.album, "nmm:albumTrackCount", md.track_count);
}
@@ -2547,6 +2750,9 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
if (md.performer) {
tracker_resource_set_relation (main_resource, "nmm:performer", md.performer);
+ if (md.mb_artist_id) {
+ tracker_resource_set_string (md.performer, "nmm:mbArtistID", md.mb_artist_id);
+ }
}
if (md.composer) {
@@ -2588,6 +2794,14 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
tracker_resource_set_int (main_resource, "nmm:trackNumber", md.track_number);
}
+ if (md.mb_recording_id) {
+ tracker_resource_set_string (main_resource, "nmm:mbRecordingID", md.mb_recording_id);
+ }
+
+ if (md.mb_track_id) {
+ tracker_resource_set_string (main_resource, "nmm:mbTrackID", md.mb_track_id);
+ }
+
/* Get mp3 stream info */
parsed = mp3_parse (buffer, buffer_size, audio_offset, uri, main_resource, &md);
g_clear_object (&md.performer);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]