[gnome-maps/wip/mlundblad/localized-names-from-overpass: 4/5] WIP: Add module with utility functions for localized OSM names




commit 07502427cbcb03ac811a7d916a8c69b2a7c3ae42
Author: Marcus Lundblad <ml update uu se>
Date:   Fri Oct 9 22:39:26 2020 +0200

    WIP: Add module with utility functions for localized OSM names

 src/org.gnome.Maps.src.gresource.xml |   1 +
 src/osmNames.js                      | 166 +++++++++++++++++++++++++++++++++++
 2 files changed, 167 insertions(+)
---
diff --git a/src/org.gnome.Maps.src.gresource.xml b/src/org.gnome.Maps.src.gresource.xml
index ab2e12f0..8c455163 100644
--- a/src/org.gnome.Maps.src.gresource.xml
+++ b/src/org.gnome.Maps.src.gresource.xml
@@ -45,6 +45,7 @@
     <file>osmConnection.js</file>
     <file>osmEdit.js</file>
     <file>osmEditDialog.js</file>
+    <file>osmNames.js</file>
     <file>osmTypeSearchEntry.js</file>
     <file>osmTypeListRow.js</file>
     <file>osmTypePopover.js</file>
diff --git a/src/osmNames.js b/src/osmNames.js
new file mode 100644
index 00000000..7f1968bf
--- /dev/null
+++ b/src/osmNames.js
@@ -0,0 +1,166 @@
+/* -*- Mode: JS2; indent-tabs-mode: nil; js2-basic-offset: 4 -*- */
+/* vim: set et ts=4 sw=4: */
+/*
+ * Copyright (c) 2020 Marcus Lundblad
+ *
+ * GNOME Maps is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * GNOME Maps is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with GNOME Maps; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author: Marcus Lundblad <ml update uu se>
+ */
+
+/**
+ * Utility functions for getting localized names of OSM objects.
+ * See https://wiki.openstreetmap.org/wiki/Multilingual_names
+ */
+
+const Utils = imports.utils
+
+// writing systems
+const ARABIC     = 0;
+const ARMENIAN   = 1;
+const BENGALI    = 2;
+const CHINESE    = 3;
+const CYRILLIC   = 4;
+const DEVANAGARI = 5;
+const GEORIGIAN  = 6;
+const GEEZ       = 7;
+const GREEK      = 8;
+const HEBREW     = 9;
+const JAPANESE   = 10;
+const KOREAN     = 11;
+const LATIN      = 12;
+const LAST_WRITING_SYSTEM = LATIN;
+
+/**
+ * Lookup tables for writing systems (scripts) to languages (most commonly
+ * used scripts).
+ *
+ * See:
+ * https://en.wikipedia.org/wiki/Arabic_script
+ * https://en.wikipedia.org/wiki/Cyrillic_alphabets
+ *
+ */
+const languageWritingSystemMap = {
+    ARABIC:     new Set(['ar', 'bal', 'bft', 'bhd', 'brh', 'bsk', 'fa', 'khv',
+                         'ks', 'ur', 'pa', 'ps', 'sd', 'skr', 'ug', 'ckb']),
+    ARMENIAN:   new Set(['am']),
+    BENGALI:    new Set(['as', 'bn', 'bpy', 'ctg', 'mni', 'rkt', 'syl']),
+    CHINESE:    new Set(['zh']),
+    CYRILLIC:   new Set(['ab', 'abq', 'ady', 'alt', 'alr', 'aqc', 'av', 'ba',
+                         'be', 'bg', 'bgx', 'ce', 'chm', 'ckt', 'cnr', 'crh',
+                         'cv', 'dar', 'dlg', 'dng', 'eve', 'evn', 'gld', 'inh',
+                         'itl', 'jdt', 'kbd', 'kjh', 'koi', 'kpy', 'krc', 'kum',
+                         'ky', 'lbe', 'lez', 'mn', 'mk', 'nog', 'oaa', 'os',
+                         'ru', 'rue', 'sah', 'sgh', 'sjd', 'sr', 'sty', 'tab',
+                         'tg', 'tt', 'ttt', 'tyv', 'uby', 'uk', 'ulc', 'uum',
+                         'yah', 'yai']),
+    DEVANAGARI: new Set(['awa', 'anp', 'bgc', 'bhb', 'bho', 'brx', 'doi', 'hi', 'hne', 'kok', 'mag', 'mai', 
'mr', 'ne', 'new', 'raj', 'sa', 'sgj']),
+    GEORIGIAN:  new Set(['ka']),
+    GEEZ:       new Set(['am', 'gez', 'har', 'ti', 'tig']),
+    GREEK:      new Set(['gr']),
+    HEBREW:     new Set(['he', 'yi']),
+    JAPANESE:   new Set(['ja']),
+    KOREAN:     new Set(['ko'])
+};
+
+const countryWritingSystemMap = {
+    ARABIC:    new Set(['AE', 'AF', 'BH', 'DZ', 'EG', 'IQ', 'IR', 'JO', 'KW',
+                        'LB', 'LY', 'MA', 'OM', 'PK', 'PS', 'QA', 'SA', 'SD',
+                        'SY', 'TN', 'YE']),
+    BENGALI:   new Set(['BG']),
+    CHINESE:   new Set(['CN', 'HK', 'MO', 'TW']),
+    CYRILLIC:  new Set(['BY', 'KG', 'MN', 'TJ', 'RS', 'RU', 'UA']),
+    GEORIGIAN: new Set(['GE']),
+    GEEZ:      new Set(['ET', 'ER']),
+    GREEK:     new Set(['CY', 'GR']),
+    HEBREW:    new Set(['IL']),
+    JAPANESE:  new Set(['JP']),
+    KOREAN:    new Set(['KR', 'NK'])
+};
+
+function getNameForLanguage(tags, language) {
+    let localizedName = tags['name:' + language];
+
+    /* for names in Norwegian, the best practice in OSM is to use the
+     * general code 'no' for translated names, unless the translation
+     * differs between Bokmål (nb) and Nynorsk (nn), in which case the standard
+     * ISO 639-2 codes are used, e.g. the default case from above will be used
+     */
+    if (!localizedName && (language === 'nb' || language === 'nn'))
+        localizedName = tags['name:no'];
+
+    return localizedName;
+}
+
+function getFallbackNameForLanguage(tags, country, language) {
+    /* TODO: determine fallback name variant depending on location-dependent
+     * rules
+     */
+    let natName;
+    let intName;
+
+    if (_predominantWritingSystemMatchesLanguage(country, language))
+        return tags.name;
+
+    // TODO: country/territory-specific heuristics for fallback, romanization etc.
+    switch (country) {
+        case 'JP':
+            if (tags['name:ja-Latn'])
+                intName = tags['name:ja-Latn'];
+            else if (tags['name:ja-rm'])
+                intName = tags['name:ja-rm'];
+            break;
+        case 'KO':
+        case 'NK':
+            if (tags['name:ko-Latn'])
+                intName = tags['name:ko-Latn'];
+            break;
+        case 'RS':
+            if (tags['name:sr-Latn'])
+                intName = tags['name:sr-Latn'];
+            break;
+
+    }
+
+    if (natName)
+        return natName;
+    else if (intName)
+        return intName;
+    else if (tags.int_name)
+        return tags.int_name;
+    else if (tags['name:en'])
+        return tags['name:en'];
+    else
+        return tags.name;
+}
+
+function _predominantWritingSystemMatchesLanguage(country, language) {
+    let writingSystem = _getWritingSystemOfCountry(country);
+
+    return languageWritingSystemMap[writingSystem] &&
+           languageWritingSystemMap[writingSystem].has(language);
+}
+
+function _getWritingSystemOfCountry(country) {
+    for (let writingSystem = 0; writingSystem <= LAST_WRITING_SYSTEM;
+         writingSystem++) {
+        let mapping = countryWritingSystemMap[writingSystem];
+
+        if (mapping && mapping.has(country))
+            return writingSystem;
+    }
+
+    // fallback on Latin
+    return WritingSystem.LATIN;
+}


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]