[grilo-plugins] local-metadata: Improve string sanitise
- From: Victor Toso de Carvalho <victortoso src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [grilo-plugins] local-metadata: Improve string sanitise
- Date: Fri, 19 Dec 2014 20:02:29 +0000 (UTC)
commit 92141ba9dcecacdca3a8469b4edeaed10237d8ac
Author: Victor Toso <me victortoso com>
Date: Thu Dec 18 21:26:15 2014 +0100
local-metadata: Improve string sanitise
* Do not get substring with a blacklisted word.
* Only allow a few chars to finish the substring.
* Include new blacklist word "x264".
https://bugzilla.gnome.org/show_bug.cgi?id=741562
src/local-metadata/grl-local-metadata.c | 26 ++++++++++++++++++++++----
1 files changed, 22 insertions(+), 4 deletions(-)
---
diff --git a/src/local-metadata/grl-local-metadata.c b/src/local-metadata/grl-local-metadata.c
index 2678c99..b4b88ab 100644
--- a/src/local-metadata/grl-local-metadata.c
+++ b/src/local-metadata/grl-local-metadata.c
@@ -90,7 +90,7 @@ const gchar *video_blacklisted_prefix[] = {
};
const char *video_blacklisted_words[] = {
- "720p", "1080p",
+ "720p", "1080p", "x264",
"ws", "WS", "proper", "PROPER",
"repack", "real.repack",
"hdtv", "HDTV", "pdtv", "PDTV", "notv", "NOTV",
@@ -246,7 +246,7 @@ static gchar *
video_sanitise_string (const gchar *str)
{
int i;
- gchar *line;
+ gchar *line, *line_end;
GRegex *regex;
line = (gchar *) str;
@@ -258,14 +258,32 @@ video_sanitise_string (const gchar *str)
}
}
+ /* Get the substring limited by the first blacklisted word */
+ line_end = line + strlen (line);
for (i = 0; video_blacklisted_words[i]; i++) {
gchar *end;
end = strcasestr (line, video_blacklisted_words[i]);
- if (end) {
- return g_strndup (line, end - line);
+ if (end && end < line_end) {
+ line_end = end;
}
}
+
+ if (*line_end != '\0') {
+ line_end = g_utf8_find_prev_char (line, line_end);
+
+ /* After removing substring with blacklisted word, ignore non alpha-numeric
+ * char in the end of the sanitised string */
+ while (g_unichar_isalnum (*line_end) == FALSE &&
+ *line_end != '!' &&
+ *line_end != '?' &&
+ *line_end != '.') {
+ line_end = g_utf8_find_prev_char (line, line_end);
+ }
+
+ return g_strndup (line, line_end - line);
+ }
+
regex = g_regex_new ("\\.-\\.", 0, 0, NULL);
line = g_regex_replace_literal(regex, line, -1, 0, ".", 0, NULL);
g_regex_unref(regex);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]