[tracker] tracker-extract-mp3: Fix use of single-valued properties
- From: Jürg Billeter <juergbi src gnome org>
- To: svn-commits-list gnome org
- Cc:
- Subject: [tracker] tracker-extract-mp3: Fix use of single-valued properties
- Date: Mon, 31 Aug 2009 11:11:16 +0000 (UTC)
commit 823e91026609343f97ca3786cd2ff69806a526d8
Author: Jürg Billeter <j bitron ch>
Date: Thu Aug 20 10:47:22 2009 +0200
tracker-extract-mp3: Fix use of single-valued properties
Fixes NB#123379 and NB#125733.
src/libtracker-common/tracker-statement-list.c | 20 +-
src/libtracker-common/tracker-statement-list.h | 3 +
src/tracker-extract/tracker-extract-mp3.c | 1496 +++++++++++++-----------
3 files changed, 803 insertions(+), 716 deletions(-)
---
diff --git a/src/libtracker-common/tracker-statement-list.c b/src/libtracker-common/tracker-statement-list.c
index 124810e..ccc3c94 100644
--- a/src/libtracker-common/tracker-statement-list.c
+++ b/src/libtracker-common/tracker-statement-list.c
@@ -36,8 +36,6 @@ tracker_statement_list_insert (TrackerSparqlBuilder *statements,
const gchar *predicate,
const gchar *value)
{
- const gchar *end;
-
g_return_if_fail (TRACKER_IS_SPARQL_BUILDER (statements));
g_return_if_fail (subject != NULL);
g_return_if_fail (predicate != NULL);
@@ -45,6 +43,18 @@ tracker_statement_list_insert (TrackerSparqlBuilder *statements,
tracker_sparql_builder_subject_iri (statements, subject);
tracker_sparql_builder_predicate_iri (statements, predicate);
+ tracker_sparql_builder_object_unvalidated (statements, value);
+}
+
+
+void
+tracker_sparql_builder_object_unvalidated (TrackerSparqlBuilder *sparql,
+ const gchar *value)
+{
+ const gchar *end;
+
+ g_return_if_fail (TRACKER_IS_SPARQL_BUILDER (sparql));
+ g_return_if_fail (value != NULL);
if (!g_utf8_validate (value, -1, &end)) {
gchar *valid;
@@ -55,16 +65,16 @@ tracker_statement_list_insert (TrackerSparqlBuilder *statements,
if (value != end) {
valid = g_strndup (value, end - value);
- tracker_sparql_builder_object_string (statements, valid);
+ tracker_sparql_builder_object_string (sparql, valid);
g_free (valid);
} else {
- tracker_sparql_builder_object_string (statements, "(invalid data)");
+ tracker_sparql_builder_object_string (sparql, "(invalid data)");
}
return;
}
- tracker_sparql_builder_object_string (statements, value);
+ tracker_sparql_builder_object_string (sparql, value);
}
void
diff --git a/src/libtracker-common/tracker-statement-list.h b/src/libtracker-common/tracker-statement-list.h
index 0551bbf..c9a1e20 100644
--- a/src/libtracker-common/tracker-statement-list.h
+++ b/src/libtracker-common/tracker-statement-list.h
@@ -28,6 +28,9 @@
G_BEGIN_DECLS
+void tracker_sparql_builder_object_unvalidated (TrackerSparqlBuilder *sparql,
+ const gchar *value);
+
void tracker_statement_list_insert (TrackerSparqlBuilder *statements,
const gchar *subject,
const gchar *predicate,
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index 5e81dcd..2d7fc35 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -81,11 +81,7 @@
typedef struct {
const gchar *text;
- const gchar *type;
- const gchar *urn;
- const gchar *rdf_type;
- const gchar *predicate;
- gchar **nullify;
+ gchar **field;
} Matches;
typedef struct {
@@ -100,18 +96,49 @@ typedef struct {
} id3tag;
typedef struct {
+ gchar *album;
+ gchar *comment;
+ gchar *content_type;
+ gchar *copyright;
+ guint32 length;
+ gchar *performer1;
+ gchar *performer2;
+ gchar *publisher;
+ gchar *recording_time;
+ gchar *release_time;
+ gchar *text;
+ gchar *title1;
+ gchar *title2;
+ gchar *title3;
+ gint track_number;
+} id3v2tag;
+
+typedef struct {
size_t size;
size_t id3v2_size;
guint32 duration;
gchar *title;
+ gchar *performer;
+ gchar *performer_uri;
+ gchar *album;
+ gchar *album_uri;
+ gchar *genre;
+ gchar *text;
+ gchar *recording_time;
+ gchar *copyright;
+ gchar *publisher;
+ gchar *comment;
unsigned char *albumartdata;
size_t albumartsize;
gchar *albumartmime;
- id3tag *id3v1_info;
+ id3tag id3v1_info;
+ id3v2tag id3v22_info;
+ id3v2tag id3v23_info;
+ id3v2tag id3v24_info;
} file_data;
enum {
@@ -128,6 +155,84 @@ enum {
LAYER_3
};
+typedef enum {
+ ID3V24_UNKNOWN,
+ ID3V24_APIC,
+ ID3V24_COMM,
+ ID3V24_TALB,
+ ID3V24_TCON,
+ ID3V24_TCOP,
+ ID3V24_TDRC,
+ ID3V24_TDRL,
+ ID3V24_TEXT,
+ ID3V24_TIT1,
+ ID3V24_TIT2,
+ ID3V24_TIT3,
+ ID3V24_TLEN,
+ ID3V24_TPE1,
+ ID3V24_TPE2,
+ ID3V24_TPUB,
+ ID3V24_TRCK,
+ ID3V24_TYER,
+} id3v24frame;
+
+/* sorted array */
+static const struct { const char *name; id3v24frame frame; } id3v24_frames[] = {
+ { "APIC", ID3V24_APIC },
+ { "COMM", ID3V24_COMM },
+ { "TALB", ID3V24_TALB },
+ { "TCON", ID3V24_TCON },
+ { "TCOP", ID3V24_TCOP },
+ { "TDRC", ID3V24_TDRC },
+ { "TDRL", ID3V24_TDRL },
+ { "TEXT", ID3V24_TEXT },
+ { "TIT1", ID3V24_TIT1 },
+ { "TIT2", ID3V24_TIT2 },
+ { "TIT3", ID3V24_TIT3 },
+ { "TLEN", ID3V24_TLEN },
+ { "TPE1", ID3V24_TPE1 },
+ { "TPE2", ID3V24_TPE2 },
+ { "TPUB", ID3V24_TPUB },
+ { "TRCK", ID3V24_TRCK },
+ { "TYER", ID3V24_TYER },
+};
+
+typedef enum {
+ ID3V2_UNKNOWN,
+ ID3V2_COM,
+ ID3V2_PIC,
+ ID3V2_TAL,
+ ID3V2_TCO,
+ ID3V2_TCR,
+ ID3V2_TLE,
+ ID3V2_TPB,
+ ID3V2_TP1,
+ ID3V2_TP2,
+ ID3V2_TT1,
+ ID3V2_TT2,
+ ID3V2_TT3,
+ ID3V2_TXT,
+ ID3V2_TYE,
+} id3v2frame;
+
+/* sorted array */
+static const struct { const char *name; id3v2frame frame; } id3v2_frames[] = {
+ { "COM", ID3V2_COM },
+ { "PIC", ID3V2_PIC },
+ { "TAL", ID3V2_TAL },
+ { "TCO", ID3V2_TCO },
+ { "TCR", ID3V2_TCR },
+ { "TLE", ID3V2_TLE },
+ { "TPB", ID3V2_TPB },
+ { "TP1", ID3V2_TP1 },
+ { "TP2", ID3V2_TP2 },
+ { "TT1", ID3V2_TT1 },
+ { "TT2", ID3V2_TT2 },
+ { "TT3", ID3V2_TT3 },
+ { "TXT", ID3V2_TXT },
+ { "TYE", ID3V2_TYE },
+};
+
static void extract_mp3 (const gchar *filename,
TrackerSparqlBuilder *metadata);
@@ -675,7 +780,6 @@ mp3_parse_header (const gchar *data,
gint vbr_flag = 0;
guint length = 0;
guint sample_rate = 0;
- gint ch = 0;
guint frame_size;
guint frames = 0;
size_t pos = 0;
@@ -685,85 +789,59 @@ mp3_parse_header (const gchar *data,
memcpy (&header, &data[pos], sizeof (header));
switch (header & mpeg_ver_mask) {
- case 0x800:
- mpeg_ver = MPEG_ERR;
- break;
- case 0x1000:
- tracker_statement_list_insert (metadata, uri,
- NFO_PREFIX "codec",
- "MPEG");
-/* tracker_statement_list_insert (metadata, uri,
- "Audio:CodecVersion",
- "2");*/
- mpeg_ver = MPEG_V2;
- break;
- case 0x1800:
- tracker_statement_list_insert (metadata, uri,
- NFO_PREFIX "codec",
- "MPEG");
-/* tracker_statement_list_insert (metadata, uri,
- "Audio:CodecVersion",
- "1");*/
- mpeg_ver = MPEG_V1;
- break;
- case 0:
- tracker_statement_list_insert (metadata, uri,
- NFO_PREFIX "codec",
- "MPEG");
-/* tracker_statement_list_insert (metadata, uri,
- "Audio:CodecVersion",
- "2.5");*/
- mpeg_ver = MPEG_V25;
- break;
- default:
- break;
+ case 0x1000:
+ tracker_sparql_builder_predicate (metadata, "nfo:codec");
+ tracker_sparql_builder_object_string (metadata, "MPEG");
+ mpeg_ver = MPEG_V2;
+ break;
+ case 0x1800:
+ tracker_sparql_builder_predicate (metadata, "nfo:codec");
+ tracker_sparql_builder_object_string (metadata, "MPEG");
+ mpeg_ver = MPEG_V1;
+ break;
+ case 0:
+ tracker_sparql_builder_predicate (metadata, "nfo:codec");
+ tracker_sparql_builder_object_string (metadata, "MPEG");
+ mpeg_ver = MPEG_V25;
+ break;
+ default:
+ /* unknown version */
+ return FALSE;
}
switch (header & mpeg_layer_mask) {
- case 0x400:
- layer_ver = LAYER_2;
- padsize = 1;
- break;
- case 0x200:
- layer_ver = LAYER_3;
- padsize = 1;
- break;
- case 0x600:
- layer_ver = LAYER_1;
- padsize = 4;
- break;
- case 0:
- layer_ver = LAYER_ERR;
- default:
- break;
- }
-
- if (!layer_ver || !mpeg_ver) {
- /* g_debug ("Unknown mpeg type: %d, %d", mpeg_ver, layer_ver); */
- /* Unknown mpeg type */
+ case 0x400:
+ layer_ver = LAYER_2;
+ padsize = 1;
+ break;
+ case 0x200:
+ layer_ver = LAYER_3;
+ padsize = 1;
+ break;
+ case 0x600:
+ layer_ver = LAYER_1;
+ padsize = 4;
+ break;
+ default:
+ /* unknown layer */
return FALSE;
}
- if (mpeg_ver<3) {
+ if (mpeg_ver < 3) {
idx_num = (mpeg_ver - 1) * 3 + layer_ver - 1;
} else {
idx_num = 2 + layer_ver;
}
spfp8 = spf_table[idx_num];
-
+
+ tracker_sparql_builder_predicate (metadata, "nfo:channels");
if ((header & ch_mask) == ch_mask) {
- ch = 1;
- tracker_statement_list_insert (metadata, uri,
- NFO_PREFIX "channels",
- "1");
+ tracker_sparql_builder_object_int64 (metadata, 1);
} else {
- ch=2; /*stereo non stereo select*/
- tracker_statement_list_insert (metadata, uri,
- NFO_PREFIX "channels",
- "2");
+ tracker_sparql_builder_object_int64 (metadata, 2);
}
-
+
/* We assume mpeg version, layer and channels are constant in frames */
do {
frames++;
@@ -775,13 +853,6 @@ mp3_parse_header (const gchar *data,
}
sample_rate = freq_table[(header & freq_mask) >> 18][mpeg_ver - 1];
- /* Whoever wrote this check: it's pointless, sample_rate is a uint,
- so it can't ever be < 0. Hence commenting it out (pvanhoof)
- if (sample_rate < 0) {
- * Error in header *
- frames--;
- return FALSE;
- }*/
frame_size = spfp8 * bitrate / (sample_rate ? sample_rate : 1) + padsize*((header & pad_mask) >> 17);
avg_bps += bitrate / 1000;
@@ -816,7 +887,7 @@ mp3_parse_header (const gchar *data,
avg_bps /= frames;
- if (filedata->duration==0) {
+ if (filedata->duration == 0) {
if ((!vbr_flag && frames > VBR_THRESHOLD) || (frames > MAX_FRAMES_SCAN)) {
/* If not all frames scanned */
length = (filedata->size - filedata->id3v2_size) / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125;
@@ -824,17 +895,14 @@ mp3_parse_header (const gchar *data,
length = spfp8 * 8 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF);
}
- tracker_statement_list_insert_with_int (metadata, uri,
- NMM_PREFIX "length",
- length);
+ tracker_sparql_builder_predicate (metadata, "nmm:length");
+ tracker_sparql_builder_object_int64 (metadata, length);
}
- tracker_statement_list_insert_with_int (metadata, uri,
- NFO_PREFIX "sampleRate",
- sample_rate);
- tracker_statement_list_insert_with_int (metadata, uri,
- NFO_PREFIX "averageBitrate",
- avg_bps*1000);
+ tracker_sparql_builder_predicate (metadata, "nfo:sampleRate");
+ tracker_sparql_builder_object_int64 (metadata, sample_rate);
+ tracker_sparql_builder_predicate (metadata, "nfo:averageBitrate");
+ tracker_sparql_builder_object_int64 (metadata, avg_bps*1000);
return TRUE;
}
@@ -871,6 +939,142 @@ mp3_parse (const gchar *data,
} while (counter < MAX_MP3_SCAN_DEEP);
}
+static gchar *
+id3v24_text_to_utf8 (gchar encoding, const gchar *text, gssize len)
+{
+ /* This byte describes the encoding
+ * try to convert strings to UTF-8
+ * if it fails, then forget it
+ */
+
+ switch (encoding) {
+ case 0x00:
+ return t_convert (text,
+ len,
+ "UTF-8",
+ "ISO-8859-1",
+ NULL, NULL, NULL);
+ case 0x01 :
+ return t_convert (text,
+ len,
+ "UTF-8",
+ "UTF-16",
+ NULL, NULL, NULL);
+ case 0x02 :
+ return t_convert (text,
+ len,
+ "UTF-8",
+ "UTF-16BE",
+ NULL, NULL, NULL);
+ case 0x03 :
+ return strndup (text, len);
+
+ default:
+ /* Bad encoding byte,
+ * try to convert from
+ * iso-8859-1
+ */
+ return t_convert (text,
+ len,
+ "UTF-8",
+ "ISO-8859-1",
+ NULL, NULL, NULL);
+ }
+}
+
+static gchar *
+id3v2_text_to_utf8 (gchar encoding, const gchar *text, gssize len)
+{
+ /* This byte describes the encoding
+ * try to convert strings to UTF-8
+ * if it fails, then forget it
+ */
+
+ switch (encoding) {
+ case 0x00:
+ return t_convert (text,
+ len,
+ "UTF-8",
+ "ISO-8859-1",
+ NULL, NULL, NULL);
+ case 0x01 :
+/* return g_convert (text, */
+/* len, */
+/* "UTF-8", */
+/* "UCS-2", */
+/* NULL, NULL, NULL); */
+ return ucs2_to_utf8 (text,
+ len);
+
+ default:
+ /* Bad encoding byte,
+ * try to convert from
+ * iso-8859-1
+ */
+ return t_convert (text,
+ len,
+ "UTF-8",
+ "ISO-8859-1",
+ NULL, NULL, NULL);
+ }
+}
+
+static id3v24frame
+id3v24_get_frame (const gchar *name)
+{
+ int l, r, m;
+
+ /* use binary search */
+
+ l = 0;
+ r = G_N_ELEMENTS (id3v24_frames) - 1;
+ m = 0;
+
+ do {
+ m = (l + r) / 2;
+ if (strncmp (name, id3v24_frames[m].name, 4) < 0) {
+ // left half
+ r = m - 1;
+ } else {
+ // right half
+ l = m + 1;
+ }
+ } while (l <= r && strncmp (id3v24_frames[m].name, name, 4) != 0);
+ if (strncmp (id3v24_frames[m].name, name, 4) == 0) {
+ return id3v24_frames[m].frame;
+ } else {
+ return ID3V24_UNKNOWN;
+ }
+}
+
+static id3v2frame
+id3v2_get_frame (const gchar *name)
+{
+ int l, r, m;
+
+ /* use binary search */
+
+ l = 0;
+ r = G_N_ELEMENTS (id3v2_frames) - 1;
+ m = 0;
+
+ do {
+ m = (l + r) / 2;
+ if (strncmp (name, id3v2_frames[m].name, 4) < 0) {
+ // left half
+ r = m - 1;
+ } else {
+ // right half
+ l = m + 1;
+ }
+ } while (l <= r && strncmp (id3v2_frames[m].name, name, 4) != 0);
+ if (strncmp (id3v2_frames[m].name, name, 4) == 0) {
+ return id3v2_frames[m].frame;
+ } else {
+ return ID3V2_UNKNOWN;
+ }
+}
+
static void
get_id3v24_tags (const gchar *data,
size_t size,
@@ -879,179 +1083,83 @@ get_id3v24_tags (const gchar *data,
TrackerSparqlBuilder *metadata,
file_data *filedata)
{
+ id3v2tag *tag = &filedata->id3v24_info;
guint pos = 0;
- Matches tmap[] = {
- {"TCOP", NIE_PREFIX "copyright", NULL, NULL, NULL, NULL},
- {"TDRC", NIE_PREFIX "contentCreated", NULL, NULL, NULL, &filedata->id3v1_info->year},
- {"TCON", NFO_PREFIX "genre", NULL, NULL, NULL, &filedata->id3v1_info->genre},
- {"TIT1", NFO_PREFIX "genre", NULL, NULL, NULL, &filedata->id3v1_info->genre},
- {"TENC", NCO_PREFIX "publisher", "publisher", NMM_PREFIX "Artist", NMM_PREFIX "artistName", NULL},
- {"TEXT", NIE_PREFIX "plainTextContent", NULL, NULL, NULL, NULL},
- {"TPE1", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TPE2", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TPE3", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- /* {"TOPE", NID3_LEAD_ARTIST}, We dont' want the original artist for now */
- {"TPUB", NCO_PREFIX "publisher", "publisher", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TOAL", NMM_PREFIX "musicAlbum", "album", NMM_PREFIX "MusicAlbum", NMM_PREFIX "albumTitle", &filedata->id3v1_info->album},
- {"TALB", NMM_PREFIX "musicAlbum", "album", NMM_PREFIX "MusicAlbum", NMM_PREFIX "albumTitle", &filedata->id3v1_info->album},
- {"TLAN", NIE_PREFIX "language", NULL, NULL, NULL, NULL},
- {"TIT2", NIE_PREFIX "title", NULL, NULL, NULL, &filedata->id3v1_info->title},
- {"TIT3", NIE_PREFIX "comment", NULL, NULL, NULL, &filedata->id3v1_info->comment},
- {"TDRL", NIE_PREFIX "contentCreated", NULL, NULL, NULL, &filedata->id3v1_info->year},
- {"TRCK", NMM_PREFIX "trackNumber", NULL, NULL, NULL, &filedata->id3v1_info->trackno},
- {"PCNT", NIE_PREFIX "usageCounter", NULL, NULL, NULL, NULL},
- {"TLEN", NMM_PREFIX "length", NULL, NULL, NULL, NULL},
- {NULL, 0, NULL, NULL, NULL},
- };
while (pos < size) {
+ id3v24frame frame;
size_t csize;
- gint i;
unsigned short flags;
if (pos + 10 > size) {
return;
}
+ frame = id3v24_get_frame (&data[pos]);
+
csize = (((data[pos+4] & 0x7F) << 21) |
((data[pos+5] & 0x7F) << 14) |
((data[pos+6] & 0x7F) << 7) |
((data[pos+7] & 0x7F) << 0));
- if ((pos + 10 + csize > size) ||
- (csize > size) ||
- (csize == 0)) {
+ flags = (((unsigned char) (data[pos + 8]) << 8) +
+ ((unsigned char) (data[pos + 9])));
+
+ pos += 10;
+
+ if (frame == ID3V24_UNKNOWN) {
+ /* ignore unknown frames */
+ pos += csize;
+ continue;
+ }
+
+ if (pos + csize > size) {
break;
+ } else if (csize == 0) {
+ continue;
}
- flags = (((unsigned char) (data[pos + 8]) << 8) +
- ((unsigned char) (data[pos + 9])));
if (((flags & 0x80) > 0) ||
((flags & 0x40) > 0)) {
- pos += 10 + csize;
+ pos += csize;
continue;
}
- i = 0;
- while (tmap[i].text != NULL) {
- if (strncmp (tmap[i].text, (const char*) &data[pos], 4) == 0) {
- gchar * word;
-
- if ((flags & 0x20) > 0) {
- /* The "group" identifier, skip a byte */
- pos++;
- csize--;
- }
+ if ((flags & 0x20) > 0) {
+ /* The "group" identifier, skip a byte */
+ pos++;
+ csize--;
+ }
- /* This byte describes the encoding
- * try to convert strings to UTF-8
- * if it fails, then forget it
- */
-
- switch (data[pos + 10]) {
- case 0x00:
- word = t_convert (&data[pos + 11],
- csize - 1,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- case 0x01 :
- word = t_convert (&data[pos + 11],
- csize - 1,
- "UTF-8",
- info->encoding ? info->encoding : "UTF-16",
- NULL, NULL, NULL);
- break;
- case 0x02 :
- word = t_convert (&data[pos + 11],
- csize - 1,
- "UTF-8",
- info->encoding ? info->encoding : "UTF-16BE",
- NULL, NULL, NULL);
- break;
- case 0x03 :
- word = strndup (&data[pos + 11], csize - 1);
- break;
-
- default:
- /* Bad encoding byte,
- * try to convert from
- * iso-8859-1
- */
- word = t_convert (&data[pos + 11],
- csize - 1,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- }
+ switch (frame) {
+ case ID3V24_APIC:
+ {
+ /* embedded image */
+ gchar text_type;
+ const gchar *mime;
+ gchar pic_type;
+ const gchar *desc;
+ guint offset;
+ gint mime_len;
- pos++;
- csize--;
-
- if (!tracker_is_empty_string (word)) {
- g_strstrip (word);
-
- if (strcmp (tmap[i].text, "TRCK") == 0) {
- gchar **parts;
-
- parts = g_strsplit (word, "/", 2);
- g_free (word);
- word = g_strdup (parts[0]);
- g_strfreev (parts);
- } else if (strcmp (tmap[i].text, "TCON") == 0) {
- gint genre;
-
- if (get_genre_number (word, &genre)) {
- g_free (word);
- word = g_strdup (get_genre_name (genre));
- }
-
- if (!word || strcasecmp (word, "unknown") == 0) {
- break;
- }
- } else if (strcmp (tmap[i].text, "TLEN") == 0) {
- guint32 duration;
-
- duration = atoi (word);
- g_free (word);
- word = g_strdup_printf ("%d", duration/1000);
- filedata->duration = duration/1000;
- }
-
- if (tmap[i].nullify) {
- /* prefer ID3v2 tag over ID3v1 tag */
- g_free (*tmap[i].nullify);
- *tmap[i].nullify = NULL;
-
- /* keep title around for albumart */
- if (tmap[i].nullify == &filedata->id3v1_info->title) {
- filedata->title = g_strdup (word);
- }
- }
- if (tmap[i].urn) {
- gchar *canonical_uri = tmap[i].urn[0]!=':'?tracker_uri_printf_escaped ("urn:%s:%s", tmap[i].urn, word):g_strdup(tmap[i].urn);
- tracker_statement_list_insert (metadata, canonical_uri, RDF_TYPE, tmap[i].rdf_type);
- tracker_statement_list_insert (metadata, canonical_uri, tmap[i].predicate, word);
- tracker_statement_list_insert (metadata, uri, tmap[i].type, canonical_uri);
- g_free (canonical_uri);
- } else {
- tracker_statement_list_insert (metadata, uri,
- tmap[i].type,
- word);
- }
- }
+ text_type = data[pos + 0];
+ mime = &data[pos + 1];
+ mime_len = strlen (mime);
+ pic_type = data[pos + 1 + mime_len + 1];
+ desc = &data[pos + 1 + mime_len + 1 + 1];
- g_free (word);
+ if (pic_type == 3 || (pic_type == 0 && filedata->albumartsize == 0)) {
+ offset = pos + 1 + mime_len + 2 + strlen (desc) + 1;
- break;
+ filedata->albumartdata = g_malloc0 (csize);
+ filedata->albumartmime = g_strdup (mime);
+ memcpy (filedata->albumartdata, &data[offset], csize);
+ filedata->albumartsize = csize;
}
-
- i++;
+ break;
}
-
- if (strncmp (&data[pos], "COMM", 4) == 0) {
+ case ID3V24_COMM:
+ {
gchar *word;
gchar text_encode;
const gchar *text_language;
@@ -1060,91 +1168,106 @@ get_id3v24_tags (const gchar *data,
guint offset;
gint text_desc_len;
- text_encode = data[pos + 10]; /* $xx */
- text_language = &data[pos + 11]; /* $xx xx xx */
- text_desc = &data[pos + 14]; /* <text string according to encoding> $00 (00) */
+ text_encode = data[pos + 0]; /* $xx */
+ text_language = &data[pos + 1]; /* $xx xx xx */
+ text_desc = &data[pos + 4]; /* <text string according to encoding> $00 (00) */
text_desc_len = strlen (text_desc);
- text = &data[pos + 14 + text_desc_len + 1]; /* <full text string according to encoding> */
-
+ text = &data[pos + 4 + text_desc_len + 1]; /* <full text string according to encoding> */
+
offset = 4 + text_desc_len + 1;
- switch (text_encode) {
- case 0x00:
- word = t_convert (text,
- csize - offset,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- case 0x01:
- word = t_convert (text,
- csize - offset,
- "UTF-8",
- info->encoding ? info->encoding : "UTF-16",
- NULL, NULL, NULL);
- break;
- case 0x02:
- word = t_convert (text,
- csize-offset,
- "UTF-8",
- info->encoding ? info->encoding : "UTF-16BE",
- NULL, NULL, NULL);
- break;
- case 0x03:
- word = g_strndup (text, csize - offset);
- break;
-
- default:
- /* Bad encoding byte,
- * try to convert from
- * iso-8859-1
- */
- word = t_convert (text,
- csize - offset,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- }
+ word = id3v24_text_to_utf8 (text_encode, text, csize - offset);
if (!tracker_is_empty_string (word)) {
g_strstrip (word);
-
- tracker_statement_list_insert (metadata, uri,
- NIE_PREFIX "comment",
- word);
+ tag->comment = word;
+ } else {
+ g_free (word);
}
-
- g_free (word);
+ break;
}
+ default:
+ {
+ gchar *word;
+ /* text frames */
+ word = id3v24_text_to_utf8 (data[pos], &data[pos + 1], csize - 1);
+ if (!tracker_is_empty_string (word)) {
+ g_strstrip (word);
+ }
- /* Check for embedded images */
- if (strncmp (&data[pos], "APIC", 4) == 0) {
- gchar text_type;
- const gchar *mime;
- gchar pic_type;
- const gchar *desc;
- guint offset;
- gint mime_len;
+ switch (frame) {
+ case ID3V24_TALB:
+ tag->album = word;
+ break;
+ case ID3V24_TCON:
+ {
+ gint genre;
- text_type = data[pos + 10];
- mime = &data[pos + 11];
- mime_len = strlen (mime);
- pic_type = data[pos + 11 + mime_len + 1];
- desc = &data[pos + 11 + mime_len + 1 + 1];
+ if (get_genre_number (word, &genre)) {
+ g_free (word);
+ word = g_strdup (get_genre_name (genre));
+ }
+ if (word && strcasecmp (word, "unknown") != 0) {
+ tag->content_type = word;
+ } else {
+ g_free (word);
+ }
+ break;
+ }
+ case ID3V24_TCOP:
+ tag->copyright = word;
+ break;
+ case ID3V24_TDRC:
+ tag->recording_time = word;
+ break;
+ case ID3V24_TDRL:
+ tag->release_time = word;
+ break;
+ case ID3V24_TEXT:
+ tag->text = word;
+ break;
+ case ID3V24_TIT1:
+ tag->title1 = word;
+ break;
+ case ID3V24_TIT2:
+ tag->title2 = word;
+ break;
+ case ID3V24_TIT3:
+ tag->title3 = word;
+ break;
+ case ID3V24_TLEN:
+ tag->length = atoi (word) / 1000;
+ break;
+ case ID3V24_TPE1:
+ tag->performer1 = word;
+ break;
+ case ID3V24_TPE2:
+ tag->performer2 = word;
+ break;
+ case ID3V24_TPUB:
+ tag->publisher = word;
+ break;
+ case ID3V24_TRCK:
+ {
+ gchar **parts;
- if (pic_type == 3 || (pic_type == 0 && filedata->albumartsize == 0)) {
- offset = pos + 11 + mime_len + 2 + strlen (desc) + 1;
+ parts = g_strsplit (word, "/", 2);
+ if (parts[0]) {
+ tag->track_number = atoi (parts[0]);
+ }
+ g_strfreev (parts);
+ g_free (word);
- filedata->albumartdata = g_malloc0 (csize);
- filedata->albumartmime = g_strdup (mime);
- memcpy (filedata->albumartdata, &data[offset], csize);
- filedata->albumartsize = csize;
+ break;
+ }
+ default:
+ g_assert_not_reached ();
}
}
+ }
- pos += 10 + csize;
+ pos += csize;
}
}
@@ -1156,169 +1279,82 @@ get_id3v23_tags (const gchar *data,
TrackerSparqlBuilder *metadata,
file_data *filedata)
{
+ id3v2tag *tag = &filedata->id3v23_info;
guint pos = 0;
- Matches tmap[] = {
- {"TCOP", NIE_PREFIX "copyright", NULL, NULL, NULL, NULL},
- {"TDAT", NIE_PREFIX "contentCreated", NULL, NULL, NULL, &filedata->id3v1_info->year},
- {"TCON", NFO_PREFIX "genre", NULL, NULL, NULL, &filedata->id3v1_info->genre},
- {"TIT1", NFO_PREFIX "genre", NULL, NULL, NULL, &filedata->id3v1_info->genre},
- {"TENC", NCO_PREFIX "publisher", "publisher", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TEXT", NIE_PREFIX "plainTextContent", NULL, NULL, NULL, NULL},
- {"TPE1", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TPE2", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TPE3", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- /* {"TOPE", NID3_LEAD_ARTIST}, We don't want the original artist for now */
- {"TPUB", NCO_PREFIX "publisher", "publisher", NMM_PREFIX "Artist", NMM_PREFIX "artistName", NULL},
- {"TOAL", NMM_PREFIX "musicAlbum", "album", NMM_PREFIX "MusicAlbum", NMM_PREFIX "albumTitle", &filedata->id3v1_info->album},
- {"TALB", NMM_PREFIX "musicAlbum", "album", NMM_PREFIX "MusicAlbum", NMM_PREFIX "albumTitle", &filedata->id3v1_info->album},
- {"TLAN", NIE_PREFIX "language", NULL, NULL, NULL, NULL},
- {"TIT2", NIE_PREFIX "title", NULL, NULL, NULL, &filedata->id3v1_info->title},
- {"TYER", NIE_PREFIX "contentCreated", NULL, NULL, NULL, &filedata->id3v1_info->year},
- {"TRCK", NMM_PREFIX "trackNumber", NULL, NULL, NULL, &filedata->id3v1_info->trackno},
- {"PCNT", NIE_PREFIX "usageCounter", NULL, NULL, NULL, NULL},
- {"TLEN", NMM_PREFIX "length", NULL, NULL, NULL, NULL},
- {NULL, 0, NULL, NULL, NULL},
- };
while (pos < size) {
+ id3v24frame frame;
size_t csize;
- gint i;
unsigned short flags;
if (pos + 10 > size) {
return;
}
+ frame = id3v24_get_frame (&data[pos]);
+
csize = (((unsigned char)(data[pos + 4]) << 24) |
((unsigned char)(data[pos + 5]) << 16) |
((unsigned char)(data[pos + 6]) << 8) |
((unsigned char)(data[pos + 7]) << 0) );
- if ((pos + 10 + csize > size) ||
- (csize > size) ||
- (csize == 0)) {
- break;
- }
-
flags = (((unsigned char)(data[pos + 8]) << 8) +
((unsigned char)(data[pos + 9])));
- if (((flags & 0x80) > 0) || ((flags & 0x40) > 0)) {
- pos += 10 + csize;
+ pos += 10;
+
+ if (frame == ID3V24_UNKNOWN) {
+ /* ignore unknown frames */
+ pos += csize;
continue;
}
- i = 0;
- while (tmap[i].text != NULL) {
- if (strncmp (tmap[i].text, (const gchar*) &data[pos], 4) == 0) {
- gchar * word;
+ if (pos + csize > size) {
+ break;
+ } else if (csize == 0) {
+ continue;
+ }
- if ((flags & 0x20) > 0) {
- /* The "group" identifier, skip a byte */
- pos++;
- csize--;
- }
+ if (((flags & 0x80) > 0) || ((flags & 0x40) > 0)) {
+ pos += csize;
+ continue;
+ }
- /* This byte describes the encoding
- * try to convert strings to UTF-8 if
- * it fails, then forget it./
- */
-
- switch (data[pos + 10]) {
- case 0x00:
- word = t_convert (&data[pos + 11],
- csize - 1,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- case 0x01 :
-/* word = g_convert (&data[pos + 11], */
-/* csize - 1, */
-/* "UTF-8", */
-/* "UCS-2", */
-/* NULL, NULL, NULL); */
- word = ucs2_to_utf8 (&data[pos + 11],
- csize - 1);
- break;
- default:
- /* Bad encoding byte,
- * try to convert from
- * iso-8859-1
- */
- word = t_convert (&data[pos + 11],
- csize - 1,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- }
+ if ((flags & 0x20) > 0) {
+ /* The "group" identifier, skip a byte */
+ pos++;
+ csize--;
+ }
- pos++;
- csize--;
-
- if (!tracker_is_empty_string (word)) {
- g_strstrip (word);
-
- if (strcmp (tmap[i].text, "TRCK") == 0) {
- gchar **parts;
-
- parts = g_strsplit (word, "/", 2);
- g_free (word);
- word = g_strdup (parts[0]);
- g_strfreev (parts);
- } else if (strcmp (tmap[i].text, "TCON") == 0) {
- gint genre;
-
- if (get_genre_number (word, &genre)) {
- g_free (word);
- word = g_strdup (get_genre_name (genre));
- }
-
- if (!word || strcasecmp (word, "unknown") == 0) {
- break;
- }
- } else if (strcmp (tmap[i].text, "TLEN") == 0) {
- guint32 duration;
-
- duration = atoi (word);
- g_free (word);
- word = g_strdup_printf ("%d", duration/1000);
- filedata->duration = duration/1000;
- }
-
- if (tmap[i].nullify) {
- /* prefer ID3v2 tag over ID3v1 tag */
- g_free (*tmap[i].nullify);
- *tmap[i].nullify = NULL;
-
- /* keep title around for albumart */
- if (tmap[i].nullify == &filedata->id3v1_info->title) {
- filedata->title = g_strdup (word);
- }
- }
- if (tmap[i].urn) {
- gchar *canonical_uri = tmap[i].urn[0]!=':'?tracker_uri_printf_escaped ("urn:%s:%s", tmap[i].urn, word):g_strdup(tmap[i].urn);
- tracker_statement_list_insert (metadata, canonical_uri, RDF_TYPE, tmap[i].rdf_type);
- tracker_statement_list_insert (metadata, canonical_uri, tmap[i].predicate, word);
- tracker_statement_list_insert (metadata, uri, tmap[i].type, canonical_uri);
- g_free (canonical_uri);
- } else {
- tracker_statement_list_insert (metadata, uri,
- tmap[i].type,
- word);
- }
- }
+ switch (frame) {
+ case ID3V24_APIC:
+ {
+ /* embedded image */
+ gchar text_type;
+ const gchar *mime;
+ gchar pic_type;
+ const gchar *desc;
+ guint offset;
+ gint mime_len;
- g_free (word);
+ text_type = data[pos + 0];
+ mime = &data[pos + 1];
+ mime_len = strlen (mime);
+ pic_type = data[pos + 1 + mime_len + 1];
+ desc = &data[pos + 1 + mime_len + 1 + 1];
- break;
- }
+ if (pic_type == 3 || (pic_type == 0 && filedata->albumartsize == 0)) {
+ offset = pos + 1 + mime_len + 2 + strlen (desc) + 1;
- i++;
+ filedata->albumartdata = g_malloc0 (csize);
+ filedata->albumartmime = g_strdup (mime);
+ memcpy (filedata->albumartdata, &data[offset], csize);
+ filedata->albumartsize = csize;
+ }
+ break;
}
-
- if (strncmp (&data[pos], "COMM", 4) == 0) {
+ case ID3V24_COMM:
+ {
gchar *word;
gchar text_encode;
const gchar *text_language;
@@ -1326,82 +1362,104 @@ get_id3v23_tags (const gchar *data,
const gchar *text;
guint offset;
gint text_desc_len;
-
- text_encode = data[pos + 10]; /* $xx */
- text_language = &data[pos + 11]; /* $xx xx xx */
- text_desc = &data[pos + 14]; /* <text string according to encoding> $00 (00) */
+
+ text_encode = data[pos + 0]; /* $xx */
+ text_language = &data[pos + 1]; /* $xx xx xx */
+ text_desc = &data[pos + 4]; /* <text string according to encoding> $00 (00) */
text_desc_len = strlen (text_desc);
- text = &data[pos + 14 + text_desc_len + 1]; /* <full text string according to encoding> */
+ text = &data[pos + 4 + text_desc_len + 1]; /* <full text string according to encoding> */
offset = 4 + text_desc_len + 1;
- switch (text_encode) {
- case 0x00:
- word = t_convert (text,
- csize - offset,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- case 0x01 :
-/* word = g_convert (text, */
-/* csize-offset, */
-/* "UTF-8", */
-/* "UCS-2", */
-/* NULL, NULL, NULL); */
- word = ucs2_to_utf8 (&data[pos + 11],
- csize - offset);
- break;
- default:
- /* Bad encoding byte,
- * try to convert from
- * iso-8859-1
- */
- word = t_convert (text,
- csize - offset,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- }
+ word = id3v2_text_to_utf8 (text_encode, text, csize - offset);
if (!tracker_is_empty_string (word)) {
g_strstrip (word);
+ tag->comment = word;
+ } else {
+ g_free (word);
+ }
+ break;
+ }
+ default:
+ {
+ gchar *word;
- tracker_statement_list_insert (metadata, uri,
- NIE_PREFIX "comment",
- word);
+ /* text frames */
+ word = id3v2_text_to_utf8 (data[pos], &data[pos + 1], csize - 1);
+ if (!tracker_is_empty_string (word)) {
+ g_strstrip (word);
}
- g_free (word);
- }
+ switch (frame) {
+ case ID3V24_TALB:
+ tag->album = word;
+ break;
+ case ID3V24_TCON:
+ {
+ gint genre;
- /* Check for embedded images */
- if (strncmp (&data[pos], "APIC", 4) == 0) {
- gchar text_type;
- const gchar *mime;
- gchar pic_type;
- const gchar *desc;
- guint offset;
- gint mime_len;
+ if (get_genre_number (word, &genre)) {
+ g_free (word);
+ word = g_strdup (get_genre_name (genre));
+ }
+ if (word && strcasecmp (word, "unknown") != 0) {
+ tag->content_type = word;
+ } else {
+ g_free (word);
+ }
+ break;
+ }
+ case ID3V24_TCOP:
+ tag->copyright = word;
+ break;
+ case ID3V24_TEXT:
+ tag->text = word;
+ break;
+ case ID3V24_TIT1:
+ tag->title1 = word;
+ break;
+ case ID3V24_TIT2:
+ tag->title2 = word;
+ break;
+ case ID3V24_TIT3:
+ tag->title3 = word;
+ break;
+ case ID3V24_TLEN:
+ tag->length = atoi (word) / 1000;
+ break;
+ case ID3V24_TPE1:
+ tag->performer1 = word;
+ break;
+ case ID3V24_TPE2:
+ tag->performer2 = word;
+ break;
+ case ID3V24_TPUB:
+ tag->publisher = word;
+ break;
+ case ID3V24_TRCK:
+ {
+ gchar **parts;
- text_type = data[pos + 10];
- mime = &data[pos + 11];
- mime_len = strlen (mime);
- pic_type = data[pos + 11 + mime_len + 1];
- desc = &data[pos + 11 + mime_len + 1 + 1];
-
- if (pic_type == 3 || (pic_type == 0 && filedata->albumartsize == 0)) {
- offset = pos + 11 + mime_len + 2 + strlen (desc) + 1;
-
- filedata->albumartdata = g_malloc0 (csize);
- filedata->albumartmime = g_strdup (mime);
- memcpy (filedata->albumartdata, &data[offset], csize);
- filedata->albumartsize = csize;
+ parts = g_strsplit (word, "/", 2);
+ if (parts[0]) {
+ tag->track_number = atoi (parts[0]);
+ }
+ g_strfreev (parts);
+ g_free (word);
+
+ break;
+ }
+ case ID3V24_TYER:
+ tag->recording_time = word;
+ break;
+ default:
+ g_assert_not_reached ();
}
}
+ }
- pos += 10 + csize;
+ pos += csize;
}
}
@@ -1413,180 +1471,124 @@ get_id3v20_tags (const gchar *data,
TrackerSparqlBuilder *metadata,
file_data *filedata)
{
+ id3v2tag *tag = &filedata->id3v22_info;
guint pos = 0;
- Matches tmap[] = {
- {"TAL", NMM_PREFIX "musicAlbum", "album", NMM_PREFIX "MusicAlbum", NMM_PREFIX "albumTitle", &filedata->id3v1_info->album},
- {"TT1", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TT2", NIE_PREFIX "title", NULL, NULL, NULL, &filedata->id3v1_info->title},
- {"TT3", NIE_PREFIX "title", NULL, NULL, NULL, &filedata->id3v1_info->title},
- {"TXT", NIE_PREFIX "comment", NULL, NULL, NULL, &filedata->id3v1_info->comment},
- {"TPB", NCO_PREFIX "publisher", "publisher", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- /* TODO {"WAF", "DC:Location", NULL, NULL, NULL},
- TODO {"WAR", "DC:Location", NULL, NULL, NULL},
- TODO {"WAS", "DC:Location", NULL, NULL, NULL},
- TODO {"WAF", "DC:Location", NULL, NULL, NULL}, */
- {"WCM", NIE_PREFIX "license", NULL, NULL, NULL, NULL},
- {"TYE", NIE_PREFIX "contentCreated", NULL, NULL, NULL, &filedata->id3v1_info->year},
- {"TLA", NIE_PREFIX "language", NULL, NULL, NULL, NULL},
- {"TP1", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TP2", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TP3", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TEN", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TCO", NMM_PREFIX "genre", NULL, NULL, NULL, &filedata->id3v1_info->genre},
- {"TCR", NIE_PREFIX "copyright", NULL, NULL, NULL, NULL},
- {"SLT", NIE_PREFIX "plainTextContent", NULL, NULL, NULL, NULL}, /* Lyrics */
- {"TOA", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"TOT", NMM_PREFIX "musicAlbum", "album", NMM_PREFIX "MusicAlbum", NMM_PREFIX "albumTitle", &filedata->id3v1_info->album},
- {"TOL", NMM_PREFIX "performer", "artist", NMM_PREFIX "Artist", NMM_PREFIX "artistName", &filedata->id3v1_info->artist},
- {"COM", NIE_PREFIX "comment", NULL, NULL, NULL, &filedata->id3v1_info->comment},
- {"TLE", NMM_PREFIX "length", NULL, NULL, NULL, NULL},
- { NULL, 0, NULL, NULL, NULL},
- };
while (pos < size) {
+ id3v2frame frame;
size_t csize;
- gint i;
if (pos + 6 > size) {
return;
}
+ frame = id3v2_get_frame (&data[pos]);
+
csize = (((unsigned char)(data[pos + 3]) << 16) +
((unsigned char)(data[pos + 4]) << 8) +
((unsigned char)(data[pos + 5]) ) );
- if ((pos + 6 + csize > size) ||
- (csize > size) ||
- (csize == 0)) {
- break;
- }
-
- i = 0;
-
- while (tmap[i].text != NULL) {
- if (strncmp(tmap[i].text, (const char*) &data[pos], 3) == 0) {
- gchar * word;
-
- /* This byte describes the encoding
- * try to convert strings to UTF-8 if
- * it fails, then forget it./
- */
- switch (data[pos + 6]) {
- case 0x00:
- word = t_convert (&data[pos + 7],
- csize - 1,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- case 0x01 :
-/* word = g_convert (&data[pos+7], */
-/* csize, */
-/* "UTF-8", */
-/* "UCS-2", */
-/* NULL, NULL, NULL); */
- word = ucs2_to_utf8 (&data[pos + 7],
- csize - 1);
- break;
- default:
- /* Bad encoding byte,
- * try to convert from
- * iso-8859-1
- */
- word = t_convert (&data[pos + 7],
- csize - 1,
- "UTF-8",
- info->encoding ? info->encoding : "ISO-8859-1",
- NULL, NULL, NULL);
- break;
- }
-
- pos++;
- csize--;
-
- if (!tracker_is_empty_string (word)) {
- g_strstrip (word);
-
- if (strcmp (tmap[i].text, "COM") == 0) {
- gchar *s;
-
- s = g_strdup (word + strlen (word) + 1);
- g_free (word);
- word = s;
- }
-
- if (strcmp (tmap[i].text, "TCO") == 0) {
- gint genre;
- if (get_genre_number (word, &genre)) {
- g_free (word);
- word = g_strdup (get_genre_name (genre));
- }
-
- if (!word || strcasecmp (word, "unknown") == 0) {
- g_free (word);
- break;
- }
- } else if (strcmp (tmap[i].text, "TLE") == 0) {
- guint32 duration;
-
- duration = atoi (word);
- g_free (word);
- word = g_strdup_printf ("%d", duration/1000);
- filedata->duration = duration/1000;
- }
-
- if (tmap[i].nullify) {
- /* prefer ID3v2 tag over ID3v1 tag */
- g_free (*tmap[i].nullify);
- *tmap[i].nullify = NULL;
-
- /* keep title around for albumart */
- if (tmap[i].nullify == &filedata->id3v1_info->title) {
- filedata->title = g_strdup (word);
- }
- }
- if (tmap[i].urn) {
- gchar *canonical_uri = tmap[i].urn[0]!=':'?tracker_uri_printf_escaped ("urn:%s:%s", tmap[i].urn, word):g_strdup(tmap[i].urn);
- tracker_statement_list_insert (metadata, canonical_uri, RDF_TYPE, tmap[i].rdf_type);
- tracker_statement_list_insert (metadata, canonical_uri, tmap[i].predicate, word);
- tracker_statement_list_insert (metadata, uri, tmap[i].type, canonical_uri);
- g_free (canonical_uri);
- } else {
- tracker_statement_list_insert (metadata, uri,
- tmap[i].type,
- word);
- }
- }
- g_free (word);
+ pos += 6;
- break;
- }
+ if (frame == ID3V2_UNKNOWN) {
+ /* ignore unknown frames */
+ pos += csize;
+ continue;
+ }
- i++;
+ if (pos + csize > size) {
+ break;
+ } else if (csize == 0) {
+ continue;
}
- /* Check for embedded images */
- if (strncmp (&data[pos], "PIC", 3) == 0) {
+ if (frame == ID3V2_PIC) {
+ /* embedded image */
gchar pic_type;
const gchar *desc;
guint offset;
const gchar *mime;
- mime = &data[pos + 6 + 3 + 1];
- pic_type = data[pos + 6 + 3 + 1 + 3];
- desc = &data[pos + 6 + 3 + 1 + 3 + 1];
+ mime = &data[pos + 3 + 1];
+ pic_type = data[pos + 3 + 1 + 3];
+ desc = &data[pos + 3 + 1 + 3 + 1];
if (pic_type == 3 || (pic_type == 0 && filedata->albumartsize == 0)) {
- offset = pos + 6 + 3 + 1 + 3 + 1 + strlen (desc) + 1;
+ offset = pos + 3 + 1 + 3 + 1 + strlen (desc) + 1;
filedata->albumartmime = g_strdup (mime);
filedata->albumartdata = g_malloc0 (csize);
memcpy (filedata->albumartdata, &data[offset], csize);
filedata->albumartsize = csize;
}
+ } else {
+ /* text frames */
+ gchar * word;
+
+ word = id3v2_text_to_utf8 (data[pos], &data[pos + 1], csize - 1);
+ if (!tracker_is_empty_string (word)) {
+ g_strstrip (word);
+ }
+
+ switch (frame) {
+ case ID3V2_COM:
+ {
+ tag->comment = g_strdup (word + strlen (word) + 1);
+ g_free (word);
+ }
+ case ID3V2_TAL:
+ tag->album = word;
+ break;
+ case ID3V2_TCO:
+ {
+ gint genre;
+ if (get_genre_number (word, &genre)) {
+ g_free (word);
+ word = g_strdup (get_genre_name (genre));
+ }
+
+ if (word && strcasecmp (word, "unknown") != 0) {
+ tag->content_type = word;
+ } else {
+ g_free (word);
+ }
+ }
+ case ID3V2_TCR:
+ tag->copyright = word;
+ break;
+ case ID3V2_TLE:
+ tag->length = atoi (word) / 1000;
+ break;
+ case ID3V2_TPB:
+ tag->publisher = word;
+ break;
+ case ID3V2_TP1:
+ tag->performer1 = word;
+ break;
+ case ID3V2_TP2:
+ tag->performer2 = word;
+ break;
+ case ID3V2_TT1:
+ tag->title1 = word;
+ break;
+ case ID3V2_TT2:
+ tag->title2 = word;
+ break;
+ case ID3V2_TT3:
+ tag->title3 = word;
+ break;
+ case ID3V2_TXT:
+ tag->text = word;
+ break;
+ case ID3V2_TYE:
+ tag->recording_time = word;
+ break;
+ default:
+ g_assert_not_reached ();
+ }
}
- pos += 6 + csize;
+ pos += csize;
}
}
@@ -1806,6 +1808,32 @@ parse_id3v2 (const gchar *data,
return offset;
}
+static gchar *
+coalesce (gint n_values,
+ ...)
+{
+ va_list args;
+ gint i;
+ gchar *result = NULL;
+
+ va_start (args, n_values);
+
+ for (i = 0; i < n_values; i++) {
+ gchar *value;
+
+ value = va_arg (args, gchar *);
+ if (value) {
+ if (!result) {
+ result = value;
+ } else {
+ g_free (value);
+ }
+ }
+ }
+
+ return result;
+}
+
static void
extract_mp3 (const gchar *uri,
TrackerSparqlBuilder *metadata)
@@ -1816,27 +1844,10 @@ extract_mp3 (const gchar *uri,
void *id3v1_buffer;
goffset size;
goffset buffer_size;
- id3tag info;
goffset audio_offset;
file_data filedata;
- info.title = NULL;
- info.artist = NULL;
- info.album = NULL;
- info.year = NULL;
- info.comment = NULL;
- info.genre = NULL;
- info.trackno = NULL;
- info.encoding = NULL;
-
- filedata.size = 0;
- filedata.id3v2_size = 0;
- filedata.duration = 0;
- filedata.title = NULL;
- filedata.albumartdata = NULL;
- filedata.albumartmime = NULL;
- filedata.albumartsize = 0;
- filedata.id3v1_info = &info;
+ memset (&filedata, 0, sizeof (file_data));
filename = g_filename_from_uri (uri, NULL, NULL);
@@ -1889,95 +1900,158 @@ extract_mp3 (const gchar *uri,
return;
}
- if (!get_id3 (id3v1_buffer, ID3V1_SIZE, &info)) {
+ if (!get_id3 (id3v1_buffer, ID3V1_SIZE, &filedata.id3v1_info)) {
/* Do nothing? */
}
g_free (id3v1_buffer);
- tracker_statement_list_insert (metadata, uri,
- RDF_TYPE,
- NMM_PREFIX "MusicPiece");
+ /* Get other embedded tags */
+ audio_offset = parse_id3v2 (buffer, buffer_size, &filedata.id3v1_info, uri, metadata, &filedata);
+
+ filedata.title = coalesce (4, filedata.id3v24_info.title2,
+ filedata.id3v23_info.title2,
+ filedata.id3v22_info.title2,
+ filedata.id3v1_info.title);
+ filedata.performer = coalesce (7, filedata.id3v24_info.performer1,
+ filedata.id3v24_info.performer2,
+ filedata.id3v23_info.performer1,
+ filedata.id3v23_info.performer2,
+ filedata.id3v22_info.performer1,
+ filedata.id3v22_info.performer2,
+ filedata.id3v1_info.artist);
+ filedata.album = coalesce (4, filedata.id3v24_info.album,
+ filedata.id3v23_info.album,
+ filedata.id3v22_info.album,
+ filedata.id3v1_info.album);
+ filedata.genre = coalesce (7, filedata.id3v24_info.content_type,
+ filedata.id3v24_info.title1,
+ filedata.id3v23_info.content_type,
+ filedata.id3v23_info.title1,
+ filedata.id3v22_info.content_type,
+ filedata.id3v22_info.title1,
+ filedata.id3v1_info.genre);
+ filedata.recording_time = coalesce (5, filedata.id3v24_info.recording_time,
+ filedata.id3v24_info.release_time,
+ filedata.id3v23_info.recording_time,
+ filedata.id3v22_info.recording_time,
+ filedata.id3v1_info.year);
+ filedata.publisher = coalesce (3, filedata.id3v24_info.publisher,
+ filedata.id3v23_info.publisher,
+ filedata.id3v22_info.publisher);
+ filedata.text = coalesce (3, filedata.id3v24_info.text,
+ filedata.id3v23_info.text,
+ filedata.id3v22_info.text);
+ filedata.copyright = coalesce (3, filedata.id3v24_info.copyright,
+ filedata.id3v23_info.copyright,
+ filedata.id3v22_info.copyright);
+ filedata.comment = coalesce (7, filedata.id3v24_info.title3,
+ filedata.id3v24_info.comment,
+ filedata.id3v23_info.title3,
+ filedata.id3v23_info.comment,
+ filedata.id3v22_info.title3,
+ filedata.id3v22_info.comment,
+ filedata.id3v1_info.comment);
+
+ if (filedata.performer) {
+ filedata.performer_uri = tracker_uri_printf_escaped ("urn:artist:%s", filedata.performer);
+ tracker_sparql_builder_subject_iri (metadata, filedata.performer_uri);
+ tracker_sparql_builder_predicate (metadata, "a");
+ tracker_sparql_builder_object (metadata, "nmm:Artist");
+ tracker_sparql_builder_predicate (metadata, "nmm:artistName");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.performer);
+ g_free (filedata.performer);
+ }
+
+ if (filedata.album) {
+ filedata.album_uri = tracker_uri_printf_escaped ("urn:album:%s", filedata.album);
+ tracker_sparql_builder_subject_iri (metadata, filedata.album_uri);
+ tracker_sparql_builder_predicate (metadata, "a");
+ tracker_sparql_builder_object (metadata, "nmm:MusicAlbum");
+ tracker_sparql_builder_predicate (metadata, "nmm:albumTitle");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.album);
+ g_free (filedata.album);
+ }
- tracker_statement_list_insert (metadata, uri,
- RDF_TYPE,
- NFO_PREFIX "Audio");
+ tracker_sparql_builder_subject_iri (metadata, uri);
+ tracker_sparql_builder_predicate (metadata, "a");
+ tracker_sparql_builder_object (metadata, "nmm:MusicPiece");
+ tracker_sparql_builder_object (metadata, "nfo:Audio");
- if (!tracker_is_empty_string (info.title)) {
- tracker_statement_list_insert (metadata, uri,
- NIE_PREFIX "title",
- info.title);
+ if (filedata.title) {
+ tracker_sparql_builder_predicate (metadata, "nie:title");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.title);
+ /* do not delete title, needed by albumart */
+ }
- /* keep title around for albumart */
- filedata.title = g_strdup (info.title);
+ if (filedata.performer_uri) {
+ tracker_sparql_builder_predicate (metadata, "nmm:performer");
+ tracker_sparql_builder_object_iri (metadata, filedata.performer_uri);
+ g_free (filedata.performer_uri);
}
- if (!tracker_is_empty_string (info.artist)) {
- gchar *canonical_uri = tracker_uri_printf_escaped ("urn:artist:%s", info.artist);
- tracker_statement_list_insert (metadata, canonical_uri, RDF_TYPE, NMM_PREFIX "Artist");
- tracker_statement_list_insert (metadata, canonical_uri, NMM_PREFIX "artistName", info.artist);
- tracker_statement_list_insert (metadata, uri, NMM_PREFIX "performer", canonical_uri);
- g_free (canonical_uri);
+ if (filedata.album_uri) {
+ tracker_sparql_builder_predicate (metadata, "nmm:musicAlbum");
+ tracker_sparql_builder_object_iri (metadata, filedata.album_uri);
+ g_free (filedata.album_uri);
}
- if (!tracker_is_empty_string (info.album)) {
- gchar *canonical_uri = tracker_uri_printf_escaped ("urn:album:%s", info.album);
- tracker_statement_list_insert (metadata, canonical_uri, RDF_TYPE, NMM_PREFIX "MusicAlbum");
- tracker_statement_list_insert (metadata, canonical_uri, NMM_PREFIX "albumTitle", info.album);
- tracker_statement_list_insert (metadata, uri, NMM_PREFIX "musicAlbum", canonical_uri);
- g_free (canonical_uri);
+ if (filedata.recording_time) {
+ tracker_sparql_builder_predicate (metadata, "nie:contentCreated");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.recording_time);
+ g_free (filedata.recording_time);
}
- if (!tracker_is_empty_string (info.year)) {
- tracker_statement_list_insert (metadata, uri,
- NIE_PREFIX "contentCreated",
- info.year);
+ if (filedata.text) {
+ tracker_sparql_builder_predicate (metadata, "nie:plainTextContent");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.text);
+ g_free (filedata.text);
}
- if (!tracker_is_empty_string (info.genre)) {
- tracker_statement_list_insert (metadata, uri,
- NFO_PREFIX "genre",
- info.genre);
+ if (filedata.genre) {
+ tracker_sparql_builder_predicate (metadata, "nfo:genre");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.genre);
+ g_free (filedata.genre);
}
- if (!tracker_is_empty_string (info.comment)) {
- tracker_statement_list_insert (metadata, uri,
- NIE_PREFIX "comment",
- info.comment);
+ if (filedata.copyright) {
+ tracker_sparql_builder_predicate (metadata, "nie:copyright");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.copyright);
+ g_free (filedata.copyright);
}
- if (!tracker_is_empty_string (info.trackno)) {
- tracker_statement_list_insert (metadata, uri,
- NMM_PREFIX "trackNumber",
- info.trackno);
+ if (filedata.comment) {
+ tracker_sparql_builder_predicate (metadata, "nie:comment");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.comment);
+ g_free (filedata.comment);
}
- /* Get other embedded tags */
- audio_offset = parse_id3v2 (buffer, buffer_size, &info, uri, metadata, &filedata);
+ if (filedata.publisher) {
+ tracker_sparql_builder_predicate (metadata, "nco:publisher");
+ tracker_sparql_builder_object_blank_open (metadata);
+ tracker_sparql_builder_predicate (metadata, "a");
+ tracker_sparql_builder_object (metadata, "nco:Contact");
+ tracker_sparql_builder_predicate (metadata, "nco:fullname");
+ tracker_sparql_builder_object_unvalidated (metadata, filedata.publisher);
+ tracker_sparql_builder_object_blank_close (metadata);
+ g_free (filedata.publisher);
+ }
/* Get mp3 stream info */
mp3_parse (buffer, buffer_size, audio_offset, uri, metadata, &filedata);
- g_free (info.title);
- g_free (info.year);
- g_free (info.album);
- g_free (info.artist);
- g_free (info.comment);
- g_free (info.trackno);
- g_free (info.genre);
-
#ifdef HAVE_GDKPIXBUF
tracker_albumart_process (filedata.albumartdata,
filedata.albumartsize,
filedata.albumartmime,
- /* tracker_statement_list_find (metadata, NMM_PREFIX "performer") */ NULL,
+ NULL,
filedata.title,
filename);
#else
tracker_albumart_process (NULL,
0,
NULL,
- /* tracker_statement_list_find (metadata, NMM_PREFIX "performer") */ NULL,
+ NULL,
filedata.title,
filename);
@@ -1987,7 +2061,7 @@ extract_mp3 (const gchar *uri,
g_free (filedata.albumartdata);
g_free (filedata.albumartmime);
- g_free (info.encoding);
+ g_free (filedata.id3v1_info.encoding);
#ifndef G_OS_WIN32
munmap (buffer, buffer_size);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]