[tracker/tracker-0.6] Read mp3 files in parts in extraction
- From: Mikael Ottela <mottela src gnome org>
- To: svn-commits-list gnome org
- Subject: [tracker/tracker-0.6] Read mp3 files in parts in extraction
- Date: Mon, 20 Apr 2009 16:21:09 -0400 (EDT)
commit 1e479746a6fc6062a0c0e2d37b74e19ec7f9c95a
Author: Mikael Ottela <mikael ottela ixonos com>
Date: Mon Apr 20 23:05:02 2009 +0300
Read mp3 files in parts in extraction
We now read last 128 bytes for id3v1 metadata separately from mmap of the mp3
file in mp3 extraction for better performance. We now handle large mp3 files
without problems.
Fixes NB#111560
---
src/tracker-extract/tracker-extract-mp3.c | 156 ++++++++++++++++++++++------
1 files changed, 122 insertions(+), 34 deletions(-)
diff --git a/src/tracker-extract/tracker-extract-mp3.c b/src/tracker-extract/tracker-extract-mp3.c
index fa6425e..8cf2549 100644
--- a/src/tracker-extract/tracker-extract-mp3.c
+++ b/src/tracker-extract/tracker-extract-mp3.c
@@ -31,6 +31,7 @@
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <errno.h>
#include <glib.h>
#include <glib/gstdio.h>
@@ -46,15 +47,22 @@
#include "tracker-extract-albumart.h"
#include "tracker-escape.h"
-/* FIXME The max file read is not a good idea as basic
- * id3 are the _last_ 128 bits of the file. We should
- * probably read 2 buffers (beginning, end) instead.
+/* We mmap the beginning of the file and read separately the last 128 bytes
+ for id3v1 tags. While these are probably cornercases the rationale is that
+ we don't want to fault a whole page for the last 128 bytes and on the other
+ we don't want to mmap the whole file with unlimited size (might need to create
+ private copy in some special cases, finding continuous space etc). We now take
+ 5 first MB of the file and assume that this is enough. In theory there is no
+ maximum size as someone could embed 50 gigabytes of albumart there.
*/
-#define MAX_FILE_READ 1024 * 1024 * 20
+
+#define MAX_FILE_READ 1024 * 1024 * 5
#define MAX_MP3_SCAN_DEEP 16768
-#define MAX_FRAMES_SCAN 1024 * 3
-#define VBR_THRESHOLD 64
+#define MAX_FRAMES_SCAN 512
+#define VBR_THRESHOLD 16
+
+#define ID3V1_SIZE 128
typedef struct {
const gchar *text;
@@ -72,7 +80,10 @@ typedef struct {
} id3tag;
typedef struct {
- size_t audio_offset;
+ size_t size;
+ size_t id3v2_size;
+
+ guint32 duration;
unsigned char *albumartdata;
size_t albumartsize;
@@ -285,6 +296,45 @@ static TrackerExtractData extract_data[] = {
{ NULL, NULL }
};
+static char *
+read_id3v1_buffer (int fd, goffset size)
+{
+ char *buffer;
+ guint bytes_read;
+ guint rc;
+
+ buffer = g_malloc (ID3V1_SIZE);
+
+ if (!buffer) {
+ return NULL;
+ }
+
+ if (lseek (fd, size-ID3V1_SIZE, SEEK_SET) < 0) {
+ g_free (buffer);
+ return NULL;
+ }
+
+ bytes_read = 0;
+
+ while (bytes_read < ID3V1_SIZE) {
+ rc = read(fd,
+ buffer + bytes_read,
+ ID3V1_SIZE - bytes_read);
+ if (rc == -1) {
+ if (errno != EINTR) {
+ g_free (buffer);
+ return NULL;
+ }
+ }
+ else if (rc == 0)
+ break;
+ else
+ bytes_read += rc;
+ }
+
+ return buffer;
+}
+
/* Convert from UCS-2 to UTF-8 checking the BOM.*/
static gchar *
ucs2_to_utf8 (const gchar *data, guint len)
@@ -460,7 +510,8 @@ static gboolean
mp3_parse_header (const gchar *data,
size_t size,
size_t seek_pos,
- GHashTable *metadata)
+ GHashTable *metadata,
+ file_data *filedata)
{
guint header;
gchar mpeg_ver = 0;
@@ -613,17 +664,20 @@ mp3_parse_header (const gchar *data,
avg_bps /= frames;
- if ((!vbr_flag || frames > VBR_THRESHOLD) || (frames > MAX_FRAMES_SCAN)) {
- /* If not all frames scanned */
- length = size / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125;
- } else{
- length = 1152 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF);
+ if (filedata->duration==0) {
+ if ((!vbr_flag || frames > VBR_THRESHOLD) || (frames > MAX_FRAMES_SCAN)) {
+ /* If not all frames scanned */
+ length = (filedata->size - filedata->id3v2_size) / (avg_bps ? avg_bps : bitrate ? bitrate : 0xFFFFFFFF) / 125;
+ } else{
+ length = 1152 * frames / (sample_rate ? sample_rate : 0xFFFFFFFF);
+ }
+
+ g_hash_table_insert (metadata,
+ g_strdup ("Audio:Duration"),
+ tracker_escape_metadata_printf ("%d", length));
}
g_hash_table_insert (metadata,
- g_strdup ("Audio:Duration"),
- tracker_escape_metadata_printf ("%d", length));
- g_hash_table_insert (metadata,
g_strdup ("Audio:Samplerate"),
tracker_escape_metadata_printf ("%d", sample_rate));
g_hash_table_insert (metadata,
@@ -636,12 +690,13 @@ mp3_parse_header (const gchar *data,
static void
mp3_parse (const gchar *data,
size_t size,
+ size_t offset,
GHashTable *metadata,
file_data *filedata)
{
guint header;
guint counter = 0;
- guint pos = filedata->audio_offset;
+ guint pos = offset;
do {
/* Seek for frame start */
@@ -653,7 +708,7 @@ mp3_parse (const gchar *data,
if ((header & sync_mask) == sync_mask) {
/* Found header sync */
- if (mp3_parse_header (data, size, pos, metadata)) {
+ if (mp3_parse_header (data, size, pos, metadata, filedata)) {
return;
}
}
@@ -690,6 +745,7 @@ get_id3v24_tags (const gchar *data,
{"TDRL", "Audio:ReleaseDate"},
{"TRCK", "Audio:TrackNo"},
{"PCNT", "Audio:PlayCount"},
+ {"TLEN", "Audio:Duration"},
{NULL, 0},
};
@@ -787,9 +843,7 @@ get_id3v24_tags (const gchar *data,
g_free (word);
word = g_strdup (parts[0]);
g_strfreev (parts);
- }
-
- if (strcmp (tmap[i].text, "TCON") == 0) {
+ } else if (strcmp (tmap[i].text, "TCON") == 0) {
gint genre;
if (get_genre_number (word, &genre)) {
@@ -800,7 +854,14 @@ get_id3v24_tags (const gchar *data,
if (strcasecmp (word, "unknown") == 0) {
break;
}
- }
+ } else if (strcmp (tmap[i].text, "TLEN") == 0) {
+ guint32 duration;
+
+ duration = atoi (word);
+ g_free (word);
+ word = g_strdup_printf ("%d", duration/1000);
+ filedata->duration = duration/1000;
+ }
g_hash_table_insert (metadata,
g_strdup (tmap[i].type),
@@ -935,6 +996,7 @@ get_id3v23_tags (const gchar *data,
{"TYER", "Audio:ReleaseDate"},
{"TRCK", "Audio:TrackNo"},
{"PCNT", "Audio:PlayCount"},
+ {"TLEN", "Audio:Duration"},
{NULL, 0},
};
@@ -1023,9 +1085,7 @@ get_id3v23_tags (const gchar *data,
g_free (word);
word = g_strdup (parts[0]);
g_strfreev (parts);
- }
-
- if (strcmp (tmap[i].text, "TCON") == 0) {
+ } else if (strcmp (tmap[i].text, "TCON") == 0) {
gint genre;
if (get_genre_number (word, &genre)) {
@@ -1036,6 +1096,13 @@ get_id3v23_tags (const gchar *data,
if (strcasecmp (word, "unknown") == 0) {
break;
}
+ } else if (strcmp (tmap[i].text, "TLEN") == 0) {
+ guint32 duration;
+
+ duration = atoi (word);
+ g_free (word);
+ word = g_strdup_printf ("%d", duration/1000);
+ filedata->duration = duration/1000;
}
g_hash_table_insert (metadata,
@@ -1167,6 +1234,7 @@ get_id3v20_tags (const gchar *data,
{"TOT", "Audio:Album"},
{"TOL", "Audio:Artist"},
{"COM", "Audio:Comment"},
+ {"TLE", "Audio:Duration"},
{ NULL, 0},
};
@@ -1247,8 +1315,16 @@ get_id3v20_tags (const gchar *data,
}
if (strcasecmp (word, "unknown") == 0) {
+ g_free (word);
break;
}
+ } else if (strcmp (tmap[i].text, "TLE") == 0) {
+ guint32 duration;
+
+ duration = atoi (word);
+ g_free (word);
+ word = g_strdup_printf ("%d", duration/1000);
+ filedata->duration = duration/1000;
}
g_hash_table_insert (metadata,
@@ -1467,9 +1543,9 @@ parse_id3v20 (const gchar *data,
*offset_delta = tsize + 10;
}
-static void
+static goffset
parse_id3v2 (const gchar *data,
- size_t size,
+ size_t size,
GHashTable *metadata,
file_data *filedata)
{
@@ -1484,12 +1560,14 @@ parse_id3v2 (const gchar *data,
if (offset_delta == 0) {
done = TRUE;
- filedata->audio_offset = offset;
+ filedata->id3v2_size = offset;
} else {
offset += offset_delta;
}
} while (!done);
+
+ return offset;
}
static void
@@ -1498,8 +1576,10 @@ extract_mp3 (const gchar *filename,
{
int fd;
void *buffer;
+ void *id3v1_buffer;
goffset size;
id3tag info;
+ goffset audio_offset;
file_data filedata;
info.title = NULL;
@@ -1510,16 +1590,20 @@ extract_mp3 (const gchar *filename,
info.genre = NULL;
info.trackno = NULL;
- filedata.audio_offset = 0;
+ filedata.size = 0;
+ filedata.id3v2_size = 0;
+ filedata.duration = 0;
filedata.albumartdata = NULL;
filedata.albumartsize = 0;
size = tracker_file_get_size (filename);
- if (size == 0 || size > MAX_FILE_READ) {
+ if (size == 0) {
return;
}
+ filedata.size = size;
+
#if defined(__linux__)
/* Can return -1 because of O_NOATIME, so we try again after
* without as a last resort. This can happen due to
@@ -1550,13 +1634,15 @@ extract_mp3 (const gchar *filename,
0);
#endif
+ id3v1_buffer = read_id3v1_buffer (fd, size);
+
close (fd);
if (buffer == NULL || buffer == (void*) -1) {
return;
}
- if (!get_id3 (buffer, size, &info)) {
+ if (!get_id3 (id3v1_buffer, ID3V1_SIZE, &info)) {
/* Do nothing? */
}
@@ -1611,10 +1697,10 @@ extract_mp3 (const gchar *filename,
g_free (info.genre);
/* Get other embedded tags */
- parse_id3v2 (buffer, size, metadata, &filedata);
+ audio_offset = parse_id3v2 (buffer, MIN(size, MAX_FILE_READ), metadata, &filedata);
/* Get mp3 stream info */
- mp3_parse (buffer, size, metadata, &filedata);
+ mp3_parse (buffer, MIN(size, MAX_FILE_READ), audio_offset, metadata, &filedata);
#ifdef HAVE_GDKPIXBUF
tracker_process_albumart (filedata.albumartdata, filedata.albumartsize,
@@ -1654,8 +1740,10 @@ extract_mp3 (const gchar *filename,
}
#ifndef G_OS_WIN32
- munmap (buffer, size);
+ munmap (buffer, MIN(size, MAX_FILE_READ));
#endif
+
+ g_free (id3v1_buffer);
}
TrackerExtractData *
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]