[gnome-maps/wip/mlundblad/wikidata: 1/3] WIP: wikipedia: Add function to fetch article from Wikidata




commit a4f56e2261e55affce8e3b30446b781783a15034
Author: Marcus Lundblad <ml dfupdate se>
Date:   Tue Oct 4 23:23:49 2022 +0200

    WIP: wikipedia: Add function to fetch article from Wikidata

 src/wikipedia.js | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 150 insertions(+), 1 deletion(-)
---
diff --git a/src/wikipedia.js b/src/wikipedia.js
index 80ad3a7d..89889890 100644
--- a/src/wikipedia.js
+++ b/src/wikipedia.js
@@ -32,6 +32,11 @@ import * as Utils from './utils.js';
  */
 const WP_REGEX = /^[a-z][a-z][a-z]?(\-[a-z]+)?$|^simple$/;
 
+/**
+ * Wikidata properties
+ */
+const WIKIDATA_PROPERTY_IMAGE = 'P18';
+
 let _soupSession = null;
 function _getSoupSession() {
     if (_soupSession === null) {
@@ -43,6 +48,8 @@ function _getSoupSession() {
 
 let _thumbnailCache = {};
 let _metadataCache = {};
+let _wikidataCache = {};
+let _wikidataImageSourceCache = {};
 
 export function getLanguage(wiki) {
     return wiki.split(':')[0];
@@ -155,6 +162,144 @@ export function fetchArticleInfo(wiki, size, metadataCb, thumbnailCb) {
     });
 }
 
+/*
+ * Fetch various metadata about a Wikidata reference.
+ *
+ * @defaultArticle is the native Wikipedia article, if set, for the object
+ *                 when present, it used as a fallback if none of the references
+ *                 of the Wikidate tag matches a user's language
+ * @size is the maximum width of the thumbnail.
+ *
+ * Calls @metadataCb with the lang:title pair for the article and an object
+ * containing information about the article. For the keys/values of this
+ * object, see the relevant MediaWiki API documentation.
+ *
+ * Calls @thumbnailCb with the Gdk.Pixbuf of the icon when successful, otherwise
+ * null.
+ */
+export function fetchArticleInfoForWikidata(wikidata, defaultArticle,
+                                            size, metadataCb, thumbnailCb) {
+    let cachedWikidata = _wikidataCache[wikidata];
+
+    if (cachedWikidata) {
+        _onWikidataFetched(wikidata, defaultArticle, size, metadataCb,
+                           thumbnailCb);
+        return;
+    }
+
+    let uri = 'https://www.wikidata.org/w/api.php';
+    let encodedForm = Soup.form_encode_hash({ action: 'wbgetentities',
+                                              ids:    wikidata,
+                                              format: 'json' });
+    let msg = Soup.Message.new_from_encoded_form('GET', uri, encodedForm);
+    let session = _getSoupSession();
+
+    session.send_and_read_async(msg, GLib.PRIORIRY_DEFAULT, null,
+                                     (source, res) => {
+        if (msg.get_status() !== Soup.Status.OK) {
+            log("Failed to request Wikidata entities: " + msg.reason_phrase);
+            metadataCb(null, {});
+            thumbnailCb(null);
+            return;
+        }
+
+        let buffer = session.send_and_read_finish(res).get_data();
+        let response = JSON.parse(Utils.getBufferText(buffer));
+
+        Utils.debug('entities: ' + JSON.stringify(response, '', 2));
+        _wikidataCache[wikidata] = response;
+        _onWikidataFetched(wikidata, defaultArticle, response, size,
+                           metadataCb, thumbnailCb);
+    });
+}
+
+function _onWikidataFetched(wikidata, defaultArticle, response, size,
+                            metadataCb, thumbnailCb) {
+    let sitelinks = response?.entities?.[wikidata]?.sitelinks;
+
+    Utils.debug('sitelinks: ' + JSON.stringify(sitelinks, '', 2));
+
+    if (!sitelinks) {
+        Utils.debug('No sitelinks element in response');
+        metadataCb(null, {});
+        if (thumbnailCb)
+            thumbnailCb(null);
+        return;
+    }
+
+    let claims = response?.entities?.[wikidata]?.claims;
+
+    if (claims) {
+        let imageName =
+            claims?.[WIKIDATA_PROPERTY_IMAGE]?.[0]?.mainsnak?.datavalue?.value;
+
+        if (imageName) {
+            _fetchWikidataThumbnail(imageName, size, thumbnailCb);
+            thumbnailCb = null;
+        }
+    }
+
+    for (let language of _getLanguages()) {
+        /* sitelinks appear under "sitelinks" in the form:
+         * langwiki, e.g. "enwiki"
+         */
+        if (sitelinks[language + 'wiki']) {
+            let article = `${language}:${sitelinks[language + 'wiki'].title}`;
+
+            Utils.debug('fetching article info: ' + article);
+
+            fetchArticleInfo(article, size, metadataCb, thumbnailCb);
+            return;
+        }
+    }
+
+    // if no article reference matches a preferred language
+    if (defaultArticle) {
+        // if there's a default article from the "wikipedia" tag, use it
+        fetchArticleInfo(defaultArticle, size, metadataCb, thumbnailCb);
+    }
+}
+
+function _fetchWikidataThumbnail(imageName, size, thumbnailCb) {
+    let cachedImageUrl = _wikidataImageSourceCache[imageName + '/' + size];
+
+    if (cachedImageUrl) {
+        _fetchThumbnailImage(imageName, size, cachedImageUrl, thumbnailCb);
+        return;
+    }
+
+    let uri = 'https://wikipedia.org/w/api.php';
+    let encodedForm = Soup.form_encode_hash({ action:     'query',
+                                              prop:       'imageinfo',
+                                              iiprop:     'url',
+                                              iiurlwidth: size + '',
+                                              titles:     'Image:' + imageName,
+                                              format:     'json' });
+    let msg = Soup.Message.new_from_encoded_form('GET', uri, encodedForm);
+    let session = _getSoupSession();
+
+    Utils.debug('uri: ' + msg.get_uri().to_string());
+
+    session.send_and_read_async(msg, GLib.PRIORIRY_DEFAULT, null,
+                                     (source, res) => {
+        if (msg.get_status() !== Soup.Status.OK) {
+            log("Failed to request Wikidata image thumbnail URL: " +
+                msg.reason_phrase);
+            thumbnailCb(null);
+            return;
+        }
+
+        let buffer = session.send_and_read_finish(res).get_data();
+        let response = JSON.parse(Utils.getBufferText(buffer));
+        let thumburl = response?.query?.pages?.[-1]?.imageinfo?.[0]?.thumburl;
+
+        if (thumburl) {
+            _fetchThumbnailImage(imageName, size, thumburl, thumbnailCb);
+            _wikidataImageSourceCache[imageName + '/' + size] = thumburl;
+        }
+    });
+}
+
 function _onMetadataFetched(wiki, page, size, metadataCb, thumbnailCb) {
     /* Try to get a thumbnail *before* following language links--the primary
        article probably has the best thumbnail image */
@@ -218,7 +363,7 @@ function _fetchThumbnailImage(wiki, size, source, callback) {
    the original article should be used. */
 function _findLanguageLink(wiki, page) {
     let originalLang = getLanguage(wiki);
-    let languages = GLib.get_language_names().map((lang) => lang.split(/[\._\-]/)[0]);
+    let languages = _getLanguages();
 
     if (!languages.includes(originalLang)) {
         let langlinks = {};
@@ -233,3 +378,7 @@ function _findLanguageLink(wiki, page) {
         }
     }
 }
+
+function _getLanguages() {
+    return GLib.get_language_names().map((lang) => lang.split(/[\._\-]/)[0]);
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]