[totem-pl-parser] podcast: Always prefer recent date as publish date for feeds
- From: Bastien Nocera <hadess src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [totem-pl-parser] podcast: Always prefer recent date as publish date for feeds
- Date: Tue, 23 Feb 2021 17:07:29 +0000 (UTC)
commit d8021d71d32764e7d67c5b3650953707619de392
Author: crvi <crvisqr gmail com>
Date: Thu Oct 1 00:44:26 2020 +0530
podcast: Always prefer recent date as publish date for feeds
For rss feed channels, <lastBuildDate> and <pubDate> indeed have
different meanings. Please refer [1] for more details. But podcast
providers use it in their own ways. Some use only <pubDate> (
e.g. podbean ), some only <lastBuildDate> ( e.g. anchor.fm, buzzsprout
), some use both ( e.g. podigee, soundcloud ). Podigee seems to use
<lastBuildDate> for any modifications to feed contents ( including
episode additions ), which infact should be covered by <pubDate>.
In short, we are only interested in the recent timestamp of both these
tags, when both are available.
[1] https://www.rssboard.org/rss-profile#element-channel-lastbuilddate
plparse/tests/parser.c | 33 +++++++++++++++++++++++++++++++++
plparse/tests/podcast-image-url.1.rss | 4 ++--
plparse/totem-pl-parser-podcast.c | 25 +++++++++++++++++++++++--
3 files changed, 58 insertions(+), 4 deletions(-)
---
diff --git a/plparse/tests/parser.c b/plparse/tests/parser.c
index ca97945..f71091d 100644
--- a/plparse/tests/parser.c
+++ b/plparse/tests/parser.c
@@ -883,6 +883,38 @@ test_parsing_item_image (void)
g_free (uri);
}
+static void
+test_parsing_feed_pubdate (void)
+{
+ char *uri;
+
+ /* no <lastBuildDate> or <pubDate> */
+ uri = get_relative_uri (TEST_SRCDIR "585407.rss");
+ g_assert_cmpstr (parser_test_get_playlist_field (uri, TOTEM_PL_PARSER_FIELD_PUB_DATE), ==, NULL);
+ g_free (uri);
+
+ /* only <lastBuildDate> */
+ uri = get_relative_uri (TEST_SRCDIR "791154-kqed.rss");
+ g_assert_cmpstr (parser_test_get_playlist_field (uri, TOTEM_PL_PARSER_FIELD_PUB_DATE), ==, "Mon, 04
Dec 2017 08:01:09 +0000");
+ g_free (uri);
+
+ /* same <lastBuildDate> and <pubDate> */
+ uri = get_relative_uri (TEST_SRCDIR "560051.xml");
+ g_assert_cmpstr (parser_test_get_playlist_field (uri, TOTEM_PL_PARSER_FIELD_PUB_DATE), ==, "Mon, 8
Dec 2008 13:20:00 CST");
+ g_free (uri);
+
+ /* <pubDate> followed by <lastBuildDate> */
+ uri = get_relative_uri (TEST_SRCDIR "podcast-empty-description.rss");
+ g_assert_cmpstr (parser_test_get_playlist_field (uri, TOTEM_PL_PARSER_FIELD_PUB_DATE), ==, "Sun, 26
Jul 2020 20:07:40 +0000");
+ g_free (uri);
+
+ /* <lastBuildDate> followed by <pubDate> */
+ uri = get_relative_uri (TEST_SRCDIR "podcast-image-url.1.rss");
+ g_assert_cmpstr (parser_test_get_playlist_field (uri, TOTEM_PL_PARSER_FIELD_PUB_DATE), ==, "Wed, 23
Aug 2017 01:55:17 +0000");
+ g_free (uri);
+
+}
+
static void
test_parsing_hadess (void)
{
@@ -1568,6 +1600,7 @@ main (int argc, char *argv[])
g_test_add_func ("/parser/parsing/podcast_item_description", test_parsing_item_description);
g_test_add_func ("/parser/parsing/podcast_feed_image", test_parsing_feed_image);
g_test_add_func ("/parser/parsing/podcast_item_image", test_parsing_item_image);
+ g_test_add_func ("/parser/parsing/podcast_feed_pubdate", test_parsing_feed_pubdate);
g_test_add_func ("/parser/parsing/live_streaming", test_parsing_live_streaming);
g_test_add_func ("/parser/parsing/xml_mixed_cdata", test_parsing_xml_mixed_cdata);
g_test_add_func ("/parser/parsing/m3u_streaming", test_parsing_m3u_streaming);
diff --git a/plparse/tests/podcast-image-url.1.rss b/plparse/tests/podcast-image-url.1.rss
index a28eb88..b331bb9 100644
--- a/plparse/tests/podcast-image-url.1.rss
+++ b/plparse/tests/podcast-image-url.1.rss
@@ -5,8 +5,8 @@
<atom:link
href="http://feeds.soundcloud.com/users/soundcloud:users:320899690/sounds.rss?before=336780890" rel="next"
type="application/rss+xml"/>
<title>Exit Poll New England</title>
<link>http://soundcloud.com/exitpollnewengland</link>
- <pubDate>Wed, 23 Aug 2017 01:55:17 +0000</pubDate>
<lastBuildDate>Wed, 23 Aug 2017 01:55:17 +0000</lastBuildDate>
+ <pubDate>Mon, 07 Aug 2017 02:08:50 +0000</pubDate>
<ttl>60</ttl>
<language>en</language>
<copyright>All rights reserved</copyright>
@@ -54,4 +54,4 @@
<itunes:image href="http://i1.sndcdn.com/artworks-000237209681-dqpcbk-t3000x3000.jpg"/>
</item>
</channel>
- </rss>
\ No newline at end of file
+ </rss>
diff --git a/plparse/totem-pl-parser-podcast.c b/plparse/totem-pl-parser-podcast.c
index 237795e..3c445eb 100644
--- a/plparse/totem-pl-parser-podcast.c
+++ b/plparse/totem-pl-parser-podcast.c
@@ -142,6 +142,26 @@ set_longer_description (xml_node_t *node, const char **description)
}
}
+static void
+set_recent_date (xml_node_t *node, const char **date)
+{
+ if (node->data == NULL)
+ return;
+
+ if (*date) {
+ guint64 old, new;
+
+ old = totem_pl_parser_parse_date (*date, FALSE);
+ new = totem_pl_parser_parse_date (node->data, FALSE);
+
+ /* prefer recent date */
+ if (new <= old)
+ return;
+ }
+
+ *date = node->data;
+}
+
static TotemPlParserResult
parse_rss_item (TotemPlParser *parser, xml_node_t *parent)
{
@@ -312,8 +332,9 @@ parse_rss_items (TotemPlParser *parser, const char *uri, xml_node_t *parent)
if (href != NULL)
img = href;
} else if (g_ascii_strcasecmp (node->name, "lastBuildDate") == 0
- || g_ascii_strcasecmp (node->name, "pubDate") == 0) {
- pub_date = node->data;
+ || (g_ascii_strcasecmp (node->name, "pubDate") == 0)) {
+ /* prefer recent of <lastBuildDate> and <pubDate> date */
+ set_recent_date (node, &pub_date);
} else if (g_ascii_strcasecmp (node->name, "copyright") == 0) {
copyright = node->data;
}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]