diff --git a/src/tracker-extract/tracker-extract-gstreamer.c b/src/tracker-extract/tracker-extract-gstreamer.c index 2a2fb7b..15da85a 100644 --- a/src/tracker-extract/tracker-extract-gstreamer.c +++ b/src/tracker-extract/tracker-extract-gstreamer.c @@ -89,6 +89,29 @@ #define GST_TAG_FRAMERATE "framerate" #endif +/* These are in newer GStreamer releases, but we define them here + so we don't need to depend on a new release */ +#ifndef GST_TAG_SHOW_NAME +#define GST_TAG_SHOW_NAME "show-name" +#endif + +#ifndef GST_TAG_SHOW_EPISODE_NUMBER +#define GST_TAG_SHOW_EPISODE_NUMBER "show-episode-number" +#endif + +#ifndef GST_TAG_SHOW_SEASON_NUMBER +#define GST_TAG_SHOW_SEASON_NUMBER "show-season-number" +#endif + +#define TV_REGEX "(?.*)\\.(?(?:\\d{1,2})|(?:[sS]\\K\\d{1,2}))(?(?:\\d{2})|(?:[eE]\\K\\d{1,2}))\\.?(?.*)?" +#define MOVIE_REGEX "(?.*)\\.?[\\(\\[](?[12][90]\\d{2})[\\)\\]]" + +typedef enum { + VIDEO_TYPE_UNKNOWN, + VIDEO_TYPE_MOVIE, + VIDEO_TYPE_SERIES +} VideoType; + typedef enum { EXTRACT_MIME_AUDIO, EXTRACT_MIME_VIDEO, @@ -1013,6 +1036,295 @@ delete_existing_tracks (TrackerSparqlBuilder *postupdate, g_free (sparql); } +const gchar *blacklisted_prefix[] = { + "tpz-", NULL +}; + +/* Blacklisted are words that we ignore everything after */ +const char *blacklist[] = { + "720p", "1080p", + "ws", "WS", "proper", "PROPER", + "repack", "real.repack", + "hdtv", "HDTV", "pdtv", "PDTV", "notv", "NOTV", + "dsr", "DSR", "DVDRip", "divx", "DIVX", "xvid", "Xvid", + NULL +}; + +static gchar * +sanitise_string (const gchar *str) +{ + int i; + gchar *line; + + line = (gchar *) str; + for (i = 0; blacklisted_prefix[i]; i++) { + if (g_str_has_prefix (str, blacklisted_prefix[i])) { + int len = strlen (blacklisted_prefix[i]); + + line = (gchar *) str + len; + } + } + + for (i = 0; blacklist[i]; i++) { + gchar *end; + + end = strstr (line, blacklist[i]); + if (end) { + return g_strndup (line, end - line); + } + } + + return g_strdup (line); +} + +/* tidies strings before we run them through the regexes */ +static gchar * +uri_to_metadata (const gchar *uri) +{ + gchar *ext, *basename, *name, *whitelisted; + + basename = g_path_get_basename (uri); + ext = strrchr (basename, '.'); + if (ext) { + name = g_strndup (basename, ext - basename); + g_free (basename); + } else { + name = basename; + } + + /* Replace _ with . */ + g_strdelimit (name, "_ ", '.'); + whitelisted = sanitise_string (name); + g_free (name); + + return whitelisted; +} + +static VideoType +parse_uri (const gchar *uri, + gchar **title, + gchar **showname, + GDate **date, + gint *season, + gint *episode) +{ + gchar *metadata; + GRegex *regex; + GMatchInfo *info; + + metadata = uri_to_metadata (uri); + + regex = g_regex_new (MOVIE_REGEX, 0, 0, NULL); + g_regex_match (regex, metadata, 0, &info); + + if (g_match_info_matches (info)) { + if (title) { + *title= g_match_info_fetch_named (info, "name"); + /* Replace "." with */ + g_strdelimit (*title, ".", ' '); + } + + if (date) { + gchar *year = g_match_info_fetch_named (info, "year"); + + *date = g_date_new (); + g_date_set_year (*date, atoi (year)); + g_free (year); + } + + if (showname) { + *showname = NULL; + } + + if (season) { + *season = 0; + } + + if (episode) { + *episode = 0; + } + + g_regex_unref (regex); + g_match_info_free (info); + g_free (metadata); + + return VIDEO_TYPE_MOVIE; + } + + g_regex_unref (regex); + g_match_info_free (info); + + regex = g_regex_new (TV_REGEX, 0, 0, NULL); + g_regex_match (regex, metadata, 0, &info); + + if (g_match_info_matches (info)) { + if (title) { + *title = g_match_info_fetch_named (info, "name"); + g_strdelimit (*title, ".", ' '); + } + + if (showname) { + *showname = g_match_info_fetch_named (info, "showname"); + g_strdelimit (*showname, ".", ' '); + } + + if (season) { + gchar *s = g_match_info_fetch_named (info, "season"); + if (s) { + if (*s == 's' || *s == 'S') { + *season = atoi (s + 1); + } else { + *season = atoi (s); + } + } else { + *season = 0; + } + + g_free (s); + } + + if (episode) { + gchar *e = g_match_info_fetch_named (info, "episode"); + if (e) { + if (*e == 'e' || *e == 'E') { + *episode = atoi (e + 1); + } else { + *episode = atoi (e); + } + } else { + *episode = 0; + } + + g_free (e); + } + + if (date) { + *date = NULL; + } + + g_regex_unref (regex); + g_match_info_free (info); + g_free (metadata); + + return VIDEO_TYPE_SERIES; + } + + g_regex_unref (regex); + g_match_info_free (info); + + /* The filename doesn't look like a movie or a TV show, just use the + filename without extension as the title */ + if (title) { + *title = g_strdelimit (metadata, ".", ' '); + } + + if (showname) { + *showname = NULL; + } + + if (date) { + *date = NULL; + } + + if (season) { + *season = 0; + } + + if (episode) { + *episode = 0; + } + + return VIDEO_TYPE_UNKNOWN; +} + +static void +sanity_check_video_metadata (MetadataExtractor *extractor, + const gchar *uri, + TrackerSparqlBuilder *metadata) +{ + VideoType type; + gchar *title, *showname; + gboolean ret; + GDate *date; + gint season, episode; + + ret = gst_tag_list_get_string (extractor->tagcache, + GST_TAG_TITLE, &title); + if (ret == FALSE) { + title = NULL; + } + + ret = gst_tag_list_get_string (extractor->tagcache, + GST_TAG_SHOW_NAME, &showname); + if (ret == FALSE) { + showname = NULL; + } + + ret = gst_tag_list_get_int (extractor->tagcache, + GST_TAG_SHOW_EPISODE_NUMBER, &episode); + if (ret == FALSE) { + episode = 0; + } + + ret = gst_tag_list_get_int (extractor->tagcache, + GST_TAG_SHOW_SEASON_NUMBER, &season); + if (ret == FALSE) { + season = 0; + } + + ret = gst_tag_list_get_date (extractor->tagcache, GST_TAG_DATE, &date); + if (ret == FALSE) { + date = NULL; + } + + type = parse_uri (uri, title ? NULL : &title, + showname ? NULL : &showname, + date ? NULL : &date, + season != 0 ? NULL : &season, + episode != 0 ? NULL : &episode); + + if (title && title[0] != '\0') { + tracker_sparql_builder_predicate (metadata, "nie:title"); + tracker_sparql_builder_object_unvalidated (metadata, title); + } + + /* FIXME: Tracker doesn't have (as far as I can see) anyway to store + showname, so if title hasn't been set, then we set it to showname */ + if (showname && showname != '\0') { + if (title == NULL || title[0] == '\0') { + tracker_sparql_builder_predicate (metadata, "nie:title"); + tracker_sparql_builder_object_unvalidated (metadata, showname); + } + } + g_free (showname); + g_free (title); + + if (date) { + gchar buf[10]; + + if (g_date_strftime (buf, 10, "%Y", date)) { + tracker_sparql_builder_predicate (metadata, "nie:contentCreated"); + tracker_sparql_builder_object_unvalidated (metadata, buf); + } + + g_date_free (date); + } + + if (episode > 0) { + tracker_sparql_builder_predicate (metadata, "nmm:episodeNumber"); + tracker_sparql_builder_object_int64 (metadata, episode); + } + + if (season > 0) { + tracker_sparql_builder_predicate (metadata, "nmm:season"); + tracker_sparql_builder_object_int64 (metadata, season); + } + + if (type == VIDEO_TYPE_SERIES) { + /* FIXME: Set isSeries */ + } +} + static void extract_metadata (MetadataExtractor *extractor, const gchar *file_url,