Re: [Tracker] Request for review: Upstreaming Jolla patches adding a libav based audio and video extractor



On 02/01/14 14:09, Philip Van Hoof wrote:
From e14fbe8bea83a5131520d8925792b899032fcc47 Mon Sep 17 00:00:00 2001
From: Andrew den Exter<andrew den exter jollamobile com>
Date: Tue, 3 Sep 2013 04:23:12 +0000
Subject: [PATCH 1/4] Add a libav based generic media extractor.

Hello Philip,

Thanks for the patch. Review below:

---
  configure.ac                                |  45 +++-
  src/tracker-extract/Makefile.am             |  19 ++
  src/tracker-extract/tracker-extract-libav.c | 369 ++++++++++++++++++++++++++++
  3 files changed, 432 insertions(+), 1 deletion(-)
  create mode 100644 src/tracker-extract/tracker-extract-libav.c

diff --git a/configure.ac b/configure.ac
index 090ebd2..91cb91a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1646,7 +1646,7 @@ AM_CONDITIONAL(HAVE_GDKPIXBUF, test "x$have_gdkpixbuf" = "xyes")

  AC_ARG_ENABLE(generic-media-extractor,
                AS_HELP_STRING([--enable-generic-media-extractor=ARG],
-                             [enables one of the (gstreamer, xine, external, auto) generic media extractor 
backends [[default=auto]]]),,
+                             [enables one of the (gstreamer, xine, libav, external, auto) generic media 
extractor backends [[default=auto]]]),,

I would personally put libav before xine - does anyone use xine any more? :)

+# libav
+libextract_libav_la_SOURCES = tracker-extract-libav.c
+libextract_libav_la_CFLAGS = $(TRACKER_EXTRACT_MODULES_CFLAGS)

Aren't you missing AV{FORMAT|CODECS|UTILS}_CFLAGS here?

+libextract_libav_la_LDFLAGS = $(module_flags)
+libextract_libav_la_LIBADD = \
+       $(top_builddir)/src/libtracker-extract/libtracker-extract- TRACKER_API_VERSION@.la  \
+       $(top_builddir)/src/libtracker-common/libtracker-common.la \
+       $(BUILD_LIBS) \
+       $(TRACKER_EXTRACT_MODULES_LIBS) \
+        $(AVFORMAT_LIBS) \
+        $(AVUTIL_LIBS)

Aren't you missing AVCODEC_LIBS here?

diff --git a/src/tracker-extract/tracker-extract-libav.c b/src/tracker-extract/tracker-extract-libav.c
new file mode 100644
index 0000000..04ed3a0
--- /dev/null
+++ b/src/tracker-extract/tracker-extract-libav.c
@@ -0,0 +1,369 @@
+/*
+ * Copyright (C) 2013 Jolla Ltd.
+ * Contact: Andrew den Exter<andrew den exter jollamobile com>

This is not the correct way to put the copyright/author details...
It should be:

  Copyright (C) 2013 Jolla Ltd. <andrew den exter jollamobile com>

then:

  Author: Andrew den Exter <andrew den exter jollamobile com>

where the copyright address is the formal address for anyone that should be contacted about legal issues from Jolla. For Nokia (for example), we used <ivan frade nokia com> even if the author was you Philip.

Also, might want to make it 2013-2014 ;)

+#include <glib.h>
+
+#include <libtracker-common/tracker-ontologies.h>
+#include <libtracker-common/tracker-utils.h>
+
+#include <libtracker-extract/tracker-extract.h>
+
+#include <tracker-media-art.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libavutil/mathematics.h>
+
+static void
+open_insert (TrackerSparqlBuilder *preupdate, const char *graph, const gchar *iri, const gchar *type)

Coding style :)

  https://wiki.gnome.org/Projects/Tracker/Documentation/CodingStyle

Basically, each parameter should be on it's own line and aligned plz :)

+{
+       tracker_sparql_builder_insert_open (preupdate, NULL);
+       if (graph) {
+               tracker_sparql_builder_graph_open(preupdate, graph);

Missing space before "("

+       }
+
+       tracker_sparql_builder_subject_iri (preupdate, iri);
+       tracker_sparql_builder_predicate (preupdate, "a");
+       tracker_sparql_builder_object (preupdate, type);
+}
+
+static void
+close_insert (TrackerSparqlBuilder *preupdate, const char *graph)
+{
+       if (graph) {
+               tracker_sparql_builder_graph_close (preupdate);
+       }
+       tracker_sparql_builder_insert_close (preupdate);
+}

Shouldn't we have a utility function for this sort of thing? I see this code reproduced a lot. Really libtracker-sparql or libtracker-extract should have some function to avoid us doing this all the time (the open, close, etc). All we need is to provide the IRI and TYPE and the rest is quite similar in a lot of cases.

+static void
+delete_value (TrackerSparqlBuilder *preupdate, const gchar *iri, const gchar *predicate,
+                         const char *value)

Coding style again above ^^
Same below, won't mention it again ;)

+{
+       tracker_sparql_builder_delete_open (preupdate, NULL);
+       tracker_sparql_builder_subject_iri (preupdate, iri);
+       tracker_sparql_builder_predicate (preupdate, predicate);
+       tracker_sparql_builder_object_variable (preupdate, value);
+       tracker_sparql_builder_delete_close (preupdate);
+
+       tracker_sparql_builder_where_open (preupdate);
+       tracker_sparql_builder_subject_iri (preupdate, iri);
+       tracker_sparql_builder_predicate (preupdate, predicate);
+       tracker_sparql_builder_object_variable (preupdate, value);
+       tracker_sparql_builder_where_close (preupdate);
+}
+
+static void
+set_value_iri (TrackerSparqlBuilder *metadata, const gchar *predicate, const gchar *iri)
+{
+       tracker_sparql_builder_predicate (metadata, predicate);
+       tracker_sparql_builder_object_iri (metadata, iri);
+}
+
+static void
+set_value_double (TrackerSparqlBuilder *metadata, const gchar *predicate, gdouble value)
+{
+       tracker_sparql_builder_predicate (metadata, predicate);
+       tracker_sparql_builder_object_double (metadata, value);
+}
+
+static void
+set_value_int64 (TrackerSparqlBuilder *metadata, const gchar *predicate, gint64 value)
+{
+       tracker_sparql_builder_predicate (metadata, predicate);
+       tracker_sparql_builder_object_int64 (metadata, value);
+}
+
+static void
+set_value_string (TrackerSparqlBuilder *metadata, const gchar *predicate, const gchar *value)
+{
+       tracker_sparql_builder_predicate (metadata, predicate);
+       tracker_sparql_builder_object_unvalidated (metadata, value);
+}

We do these sort of things a lot in other extractors too. Starting to think a higher level API in libtracker-sparql could be useful :)

+G_MODULE_EXPORT gboolean
+tracker_extract_get_metadata (TrackerExtractInfo *info)
+{
+       GFile *file;
+       TrackerSparqlBuilder *metadata;
+       TrackerSparqlBuilder *preupdate;
+       const gchar *graph;
+       gchar *absoluteFilePath;

Coding style on variable names. No camel case plz.

+       gchar *uri;
+       AVFormatContext *format = NULL;
+       AVStream *audio_stream = NULL;
+       AVStream *video_stream = NULL;
+       int streamIndex;
+       AVDictionaryEntry *tag = NULL;
+       const char *title;
+
+       av_register_all();

Space missing above before "(" and others below again.

+
+       file = tracker_extract_info_get_file (info);
+       metadata = tracker_extract_info_get_metadata_builder (info);
+       preupdate = tracker_extract_info_get_preupdate_builder (info);
+       graph = tracker_extract_info_get_graph (info);
+
+       uri = g_file_get_uri (file);
+
+       absoluteFilePath = g_file_get_path (file);
+       if (avformat_open_input(&format, absoluteFilePath, NULL, NULL)) {
+               g_free (absoluteFilePath);

No free for the uri above, that's a memory leak.

+               return FALSE;
+       }
+       g_free (absoluteFilePath);
+
+       streamIndex = av_find_best_stream (format, AVMEDIA_TYPE_AUDIO, -1, -1, NULL, 0);
+       if (streamIndex >= 0) {
+               audio_stream = format->streams[streamIndex];
+       }
+
+       streamIndex = av_find_best_stream (format, AVMEDIA_TYPE_VIDEO, -1, -1, NULL, 0);
+       if (streamIndex >= 0) {
+               video_stream = format->streams[streamIndex];
+       }
+
+       if (!audio_stream && !video_stream) {

What is one exists and not the other, is that still unacceptable for extraction? It's possible that videos have no audio stream and of course you can have a file with no video stream (just audio).

+               avformat_free_context(format);

Missing clean up for uri again.

+               return FALSE;
+       }
+
+       if (video_stream) {
+               tracker_sparql_builder_predicate (metadata, "a");
+               tracker_sparql_builder_object (metadata, "nmm:Video");
+
+               if (video_stream->codec->width > 0 && video_stream->codec->height > 0) {
+                       set_value_int64 (metadata, "nfo:width", video_stream->codec->width);
+                       set_value_int64 (metadata, "nfo:height", video_stream->codec->height);
+               }

Does it ever make sense to have just width or just height? Wondering if we shouldn't be checking these things separately in cases where we know one and not both?

+
+               if (video_stream->avg_frame_rate.num > 0) {
+                       gdouble frame_rate
+                                       = (gdouble) video_stream->avg_frame_rate.num
+                                       / video_stream->avg_frame_rate.den;

Coding style with the variable setting above (and below).

+                       set_value_double (metadata, "nfo:frameRate", frame_rate);
+               }
+
+               if (video_stream->duration > 0) {
+                       gint64 duration = av_rescale(video_stream->duration, video_stream->time_base.num,
+                                                    video_stream->time_base.den);

Usually we have parameters one per line to avoid long lines.


+       } else if (audio_stream) {
+               const char *album_title = NULL;
+               const char *album_artist = NULL;
+               gchar *album_artist_uri = NULL;
+               gchar *performer_uri = NULL;
+
+               tracker_sparql_builder_predicate (metadata, "a");
+               tracker_sparql_builder_object (metadata, "nmm:MusicPiece");
+               tracker_sparql_builder_object (metadata, "nfo:Audio");
+

[snip]

+               if (performer_uri) {
+                       set_value_iri (metadata, "nmm:performer", performer_uri);
+
+                       if (album_title && album_artist_uri) {
+                               g_free(performer_uri);
+                       }

OK, so you conditionally free performer_uri here and I can see why, but I would make this much easier to follow by cleaning it up at the end of the scope where it was originally declared. That way, you don't need any conditionals to know if you can free the variable and it's a lot easier to figure out what's happening when people review the code.

+               } else if (album_artist_uri) {
+                       set_value_iri (metadata, "nmm:performer", album_artist_uri);
+               }
+
+               if ((tag = av_dict_get (format->metadata, "composer", tag, 0))) {
+                       gchar *composer_uri = create_artist (preupdate, graph, tag->value);
+                       set_value_iri (metadata, "nmm:composer", composer_uri);
+                       g_free(composer_uri);
+               }
+
+
+               if (album_title) {
+                       int disc = 1;
+                       gchar *disc_uri;
+                       gchar *album_uri = tracker_sparql_escape_uri_printf ("urn:album:%s", album_title);
+
+                       open_insert (preupdate, graph, album_uri, "nmm:MusicAlbum");
+                       set_value_string (preupdate, "nmm:albumTitle", album_title);
+                       if (album_artist_uri) {
+                               set_value_iri (preupdate, "nmm:albumArtist", album_artist_uri);
+                       } else if (performer_uri) {
+                               set_value_iri (preupdate, "nmm:albumArtist", performer_uri);
+                       }
+                       close_insert (preupdate, graph);
+
+
+                       if ((tag = av_dict_get (format->metadata, "disc", NULL, 0))) {
+                               disc = atoi (tag->value);
+                       }
+
+                       disc_uri = tracker_sparql_escape_uri_printf ("urn:album-disc:%s:Disc%d",
+                                                                    album_title,
+                                                                    disc);

Passing comment ...

Starting to think we really should have some libtracker-extract APIs for creating consistent looking urns - all these string defined albums, discs, and artists concerns me that we fail with consistency at some point down the line.

+                       delete_value (preupdate, disc_uri, "nmm:setNumber", "unknown");
+                       delete_value (preupdate, disc_uri, "nmm:albumDiscAlbum", "unknown");

Hmm, do we need to delete first? I thought we used update_or_replace for extracted data? I could be wrong.

0003-Ensure-video-size-is-extracted-by-libav-extractor.patch


From 6572856a17c77738a52ce14d0cd65f32b2e1883d Mon Sep 17 00:00:00 2001
From: Andrew den Exter<andrew den exter jollamobile com>
Date: Tue, 22 Oct 2013 03:28:08 +0000
Subject: [PATCH 3/4] Ensure video size is extracted by libav extractor.

It may be necessary to parse the MPEG bitstream to get width and height
from some videos so if the resolution isn't immediately available decode
enough data to determine it.
---
  src/tracker-extract/Makefile.am             |  5 +-
  src/tracker-extract/tracker-extract-libav.c | 96 ++++++++++++++++++++++++-----
  2 files changed, 83 insertions(+), 18 deletions(-)

diff --git a/src/tracker-extract/Makefile.am b/src/tracker-extract/Makefile.am
index bb8296e..5ec6b66 100644
--- a/src/tracker-extract/Makefile.am
+++ b/src/tracker-extract/Makefile.am
@@ -506,8 +506,9 @@ libextract_libav_la_LIBADD = \
        $(top_builddir)/src/libtracker-common/libtracker-common.la \
        $(BUILD_LIBS) \
        $(TRACKER_EXTRACT_MODULES_LIBS) \
-        $(AVFORMAT_LIBS) \
-        $(AVUTIL_LIBS)
+       $(AVFORMAT_LIBS) \
+       $(AVUTIL_LIBS) \
+       $(AVCODEC_LIBS)

Ah I see you fixed this then :) possibly missing the CFLAGS though.

Anyway, the rest looks fine. Thanks for the patch! :)

--
Regards,
Martyn

Founder & Director @ Lanedo GmbH.
http://www.linkedin.com/in/martynrussell


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]