[totem-pl-parser] podcast: Always prefer item tags with longer descriptions



commit 973bd03a8cc3fccf90a399236366a01feadfd80a
Author: crvi <crvisqr gmail com>
Date:   Wed Jul 29 00:24:56 2020 +0530

    podcast: Always prefer item tags with longer descriptions
    
    This is to handle rss items with description tags with different
    descriptions. We now look for the longer description of <description>
    and <itunes:summary> for items. We avoid looking for item description
    in <itunes:subtitle> as it could contain the full text subtitle of the
    episode.

 plparse/tests/parser.c                      | 24 ++++++++++++++++++++++++
 plparse/tests/podcast-description.rss       |  6 +++---
 plparse/tests/podcast-empty-description.rss |  2 +-
 plparse/totem-pl-parser-podcast.c           |  3 ++-
 4 files changed, 30 insertions(+), 5 deletions(-)
---
diff --git a/plparse/tests/parser.c b/plparse/tests/parser.c
index da3b0dc..71e2227 100644
--- a/plparse/tests/parser.c
+++ b/plparse/tests/parser.c
@@ -796,6 +796,29 @@ test_parsing_feed_description (void)
        g_free (uri);
 }
 
+static void
+test_parsing_item_description (void)
+{
+       char *uri;
+       const char *description1;
+       const char *description2;
+
+       description1 =
+               "H&M ad and hair standards in the black community";
+       description2 =
+               "Wie versprochen die zweite Hälfte unseres kleinen Auftritts in Frankfurt. Ist ab sofort auch 
in voller Länge auf YouTube zu finden: https://youtu.be/GAQakfNHGj8";;
+
+       /* test for longer item description */
+       uri = get_relative_uri (TEST_SRCDIR "podcast-description.rss");
+       g_assert_cmpstr (parser_test_get_entry_field (uri, TOTEM_PL_PARSER_FIELD_DESCRIPTION), ==, 
description1);
+       g_free (uri);
+
+       /* test for empty item description */
+       uri = get_relative_uri (TEST_SRCDIR "podcast-empty-description.rss");
+       g_assert_cmpstr (parser_test_get_entry_field (uri, TOTEM_PL_PARSER_FIELD_DESCRIPTION), ==, 
description2);
+       g_free (uri);
+}
+
 static void
 test_parsing_hadess (void)
 {
@@ -1478,6 +1501,7 @@ main (int argc, char *argv[])
                g_test_add_func ("/parser/parsing/podcast_content_type", test_parsing_content_type);
                g_test_add_func ("/parser/parsing/podcast_medium", test_parsing_medium);
                g_test_add_func ("/parser/parsing/podcast_feed_description", test_parsing_feed_description);
+               g_test_add_func ("/parser/parsing/podcast_item_description", test_parsing_item_description);
                g_test_add_func ("/parser/parsing/live_streaming", test_parsing_live_streaming);
                g_test_add_func ("/parser/parsing/xml_mixed_cdata", test_parsing_xml_mixed_cdata);
                g_test_add_func ("/parser/parsing/m3u_streaming", test_parsing_m3u_streaming);
diff --git a/plparse/tests/podcast-description.rss b/plparse/tests/podcast-description.rss
index 6712e42..98a11d1 100644
--- a/plparse/tests/podcast-description.rss
+++ b/plparse/tests/podcast-description.rss
@@ -35,8 +35,8 @@
       <itunes:author>Busola Saka</itunes:author>
       <itunes:explicit>no</itunes:explicit>
       <itunes:summary>H&amp;M ad and hair standards in the black community</itunes:summary>
-      <itunes:subtitle>H&amp;M ad and hair standards in the black community</itunes:subtitle>
-      <description>H&amp;M ad and hair standards in the black community</description>
+      <itunes:subtitle>H&amp;M ad and hair standards in the black</itunes:subtitle>
+      <description>H&amp;M ad and hair standards in </description>
       <enclosure type="audio/mpeg" 
url="http://feeds.soundcloud.com/stream/684738454-akataisabadword-tougher-than-nigerian-hair.mp3"; 
length="4186042"/>
       <itunes:image href="http://i1.sndcdn.com/artworks-000601159948-xg47rg-t3000x3000.jpg"/>
     </item><item>
@@ -80,4 +80,4 @@
       <itunes:image href="http://i1.sndcdn.com/artworks-000575458196-rpbwsb-t3000x3000.jpg"/>
     </item>
       </channel>
-    </rss>
\ No newline at end of file
+    </rss>
diff --git a/plparse/tests/podcast-empty-description.rss b/plparse/tests/podcast-empty-description.rss
index 9aff0a4..5f4e69b 100644
--- a/plparse/tests/podcast-empty-description.rss
+++ b/plparse/tests/podcast-empty-description.rss
@@ -60,7 +60,7 @@
       <itunes:episode>125</itunes:episode>
       <itunes:episodeType>full</itunes:episodeType>
       <itunes:subtitle>...mit den beiden betrunkenen Autodirigenten.</itunes:subtitle>
-      <itunes:summary>Wie versprochen die zweite Hälfte unseres kleinen Auftritts in Frankfurt. Ist ab 
sofort auch in voller Länge auf YouTube zu finden: https://youtu.be/GAQakfNHGj8</itunes:summary>
+      <itunes:summary/>
       <itunes:explicit>yes</itunes:explicit>
       <itunes:keywords>Jahrhunderthalle,Autokino,live</itunes:keywords>
       <itunes:author>Bastian Bielendorfer und Reinhard Remfort</itunes:author>
diff --git a/plparse/totem-pl-parser-podcast.c b/plparse/totem-pl-parser-podcast.c
index 0bad26f..6e69e0e 100644
--- a/plparse/totem-pl-parser-podcast.c
+++ b/plparse/totem-pl-parser-podcast.c
@@ -151,7 +151,8 @@ parse_rss_item (TotemPlParser *parser, xml_node_t *parent)
                        id = node->data;
                } else if (g_ascii_strcasecmp (node->name, "description") == 0
                           || g_ascii_strcasecmp (node->name, "itunes:summary") == 0) {
-                       description = node->data;
+                       /* prefer longer item descriptions */
+                       set_longer_description (node, &description);
                } else if (g_ascii_strcasecmp (node->name, "author") == 0
                           || g_ascii_strcasecmp (node->name, "itunes:author") == 0) {
                        author = node->data;


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]