[evolution-data-server/gnome-41] evo-I#1621 - Prevent IDN homograph attacks
- From: Milan Crha <mcrha src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evolution-data-server/gnome-41] evo-I#1621 - Prevent IDN homograph attacks
- Date: Thu, 30 Sep 2021 12:44:46 +0000 (UTC)
commit 5fc534c14b1eeef723c0234048c5a1fde1e13c65
Author: Milan Crha <mcrha redhat com>
Date: Thu Sep 30 14:39:01 2021 +0200
evo-I#1621 - Prevent IDN homograph attacks
Add methods to help to recognize when the domain should be in ASCII
and use it on appropriate places.
Related to https://gitlab.gnome.org/GNOME/evolution/-/issues/1621
src/addressbook/libebook/e-destination.c | 42 +-
src/camel/CMakeLists.txt | 2 +
src/camel/camel-hostname-utils.c | 681 +++++++++++++++++++++++++++++++
src/camel/camel-hostname-utils.h | 33 ++
src/camel/camel-internet-address.c | 54 ++-
src/camel/camel-internet-address.h | 2 +
src/camel/camel-message-info-base.c | 53 +--
src/camel/camel-mime-filter-tohtml.c | 32 +-
src/camel/camel-net-utils.c | 143 +++++++
src/camel/camel-net-utils.h | 11 +
src/camel/camel-string-utils.c | 25 ++
src/camel/camel-string-utils.h | 2 +
src/camel/camel.h | 1 +
src/camel/tests/misc/CMakeLists.txt | 1 +
src/camel/tests/misc/test3.c | 132 ++++++
15 files changed, 1165 insertions(+), 49 deletions(-)
---
diff --git a/src/addressbook/libebook/e-destination.c b/src/addressbook/libebook/e-destination.c
index 475bbe45f..cf841004f 100644
--- a/src/addressbook/libebook/e-destination.c
+++ b/src/addressbook/libebook/e-destination.c
@@ -487,12 +487,14 @@ e_destination_set_contact (EDestination *dest,
raw = e_vcard_attribute_get_value (attr->data);
addr = camel_internet_address_new ();
- if (camel_address_unformat (CAMEL_ADDRESS (addr), raw) > 0 &&
- camel_internet_address_get (addr, 0, &name, &email)) {
- e_destination_set_name (s_dest, name);
- e_destination_set_email (s_dest, email);
-
- dest->priv->list_alldests = g_list_append
(dest->priv->list_alldests, s_dest);
+ if (camel_address_unformat (CAMEL_ADDRESS (addr), raw) > 0) {
+ camel_internet_address_sanitize_ascii_domain (addr);
+ if (camel_internet_address_get (addr, 0, &name,
&email)) {
+ e_destination_set_name (s_dest, name);
+ e_destination_set_email (s_dest, email);
+
+ dest->priv->list_alldests = g_list_append
(dest->priv->list_alldests, s_dest);
+ }
}
g_object_unref (addr);
@@ -743,13 +745,15 @@ e_destination_set_email (EDestination *dest,
if (email == NULL) {
if (dest->priv->email != NULL) {
- g_free (dest->priv->addr);
- dest->priv->addr = NULL;
+ g_free (dest->priv->email);
+ dest->priv->email = NULL;
changed = TRUE;
}
} else if (dest->priv->email == NULL || strcmp (dest->priv->email, email)) {
g_free (dest->priv->email);
- dest->priv->email = g_strdup (email);
+ dest->priv->email = camel_utils_sanitize_ascii_domain_in_address (email, TRUE);
+ if (!dest->priv->email)
+ dest->priv->email = g_strdup (email);
changed = TRUE;
}
@@ -995,6 +999,7 @@ e_destination_get_email (const EDestination *dest)
if (camel_address_unformat (CAMEL_ADDRESS (addr), priv->raw)) {
const gchar *camel_email = NULL;
+ camel_internet_address_sanitize_ascii_domain (addr);
if (camel_internet_address_get (addr, 0, NULL, &camel_email))
priv->email = g_strdup (camel_email);
}
@@ -1066,12 +1071,16 @@ e_destination_get_address (const EDestination *dest)
if (e_destination_is_evolution_list (dest)) {
destination_get_address (dest, addr);
+ camel_internet_address_sanitize_ascii_domain (addr);
priv->addr = camel_address_encode (CAMEL_ADDRESS (addr));
} else if (priv->raw) {
- if (camel_address_unformat (CAMEL_ADDRESS (addr), priv->raw))
+ if (camel_address_unformat (CAMEL_ADDRESS (addr), priv->raw)) {
+ camel_internet_address_sanitize_ascii_domain (addr);
priv->addr = camel_address_encode (CAMEL_ADDRESS (addr));
+ }
} else {
destination_get_address (dest, addr);
+ camel_internet_address_sanitize_ascii_domain (addr);
priv->addr = camel_address_encode (CAMEL_ADDRESS (addr));
}
@@ -1096,9 +1105,17 @@ e_destination_set_raw (EDestination *dest,
g_return_if_fail (raw != NULL);
if (dest->priv->raw == NULL || strcmp (dest->priv->raw, raw)) {
+ CamelInternetAddress *addr = camel_internet_address_new ();
e_destination_clear (dest);
- dest->priv->raw = g_strdup (raw);
+
+ if (camel_address_unformat (CAMEL_ADDRESS (addr), raw) > 0 &&
+ camel_internet_address_sanitize_ascii_domain (addr))
+ dest->priv->raw = camel_address_format (CAMEL_ADDRESS (addr));
+ else
+ dest->priv->raw = g_strdup (raw);
+
+ g_object_unref (addr);
g_signal_emit (dest, signals[CHANGED], 0);
}
@@ -1133,11 +1150,12 @@ e_destination_get_textrep (const EDestination *dest,
return name;
/* Make sure that our address gets quoted properly */
- if (name && email && dest->priv->textrep == NULL) {
+ if (email && dest->priv->textrep == NULL) {
CamelInternetAddress *addr = camel_internet_address_new ();
camel_internet_address_add (addr, name, email);
g_free (dest->priv->textrep);
+ camel_internet_address_sanitize_ascii_domain (addr);
dest->priv->textrep = camel_address_format (CAMEL_ADDRESS (addr));
g_object_unref (addr);
}
diff --git a/src/camel/CMakeLists.txt b/src/camel/CMakeLists.txt
index 7058b9c08..df537f992 100644
--- a/src/camel/CMakeLists.txt
+++ b/src/camel/CMakeLists.txt
@@ -46,6 +46,7 @@ set(SOURCES
camel-folder-thread.c
camel-folder.c
camel-gpg-context.c
+ camel-hostname-utils.c
camel-html-parser.c
camel-iconv.c
camel-index.c
@@ -185,6 +186,7 @@ set(HEADERS
camel-folder-thread.h
camel-folder.h
camel-gpg-context.h
+ camel-hostname-utils.h
camel-html-parser.h
camel-iconv.h
camel-index.h
diff --git a/src/camel/camel-hostname-utils.c b/src/camel/camel-hostname-utils.c
new file mode 100644
index 000000000..956f6c1cf
--- /dev/null
+++ b/src/camel/camel-hostname-utils.c
@@ -0,0 +1,681 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2021 Red Hat (www.redhat.com)
+ *
+ * This library is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This code is based on WebKit's URL Helpers:
+ * https://trac.webkit.org/browser/webkit/trunk/Source/WTF/wtf/URLHelpers.cpp?rev=278879
+ */
+
+#include "evolution-data-server-config.h"
+
+#include <unicode/uchar.h>
+#include <unicode/uscript.h>
+
+#include "camel-string-utils.h"
+#include "camel-hostname-utils.h"
+
+/* This needs to be higher than the UScriptCode for any of the scripts on the IDN allowed list.
+ * At one point we used USCRIPT_CODE_LIMIT from ICU, but there are two reasons not to use it.
+ * 1) ICU considers it deprecated, so by setting U_HIDE_DEPRECATED we can't see it.
+ * 2) No good reason to limit ourselves to scripts that existed in the ICU headers when
+ * WebKit was compiled.
+ * This is only really important for platforms that load an external IDN allowed script list.
+ * Not important for the compiled-in one.
+ */
+#define SCRIPT_CODE_LIMIT 256
+
+static guint32 allowed_idn_script_bits[(SCRIPT_CODE_LIMIT + 31) / 32];
+
+static gpointer
+camel_hostname_utils_init_global_memory (gpointer user_data)
+{
+ const UScriptCode scripts[] = {
+ USCRIPT_COMMON,
+ USCRIPT_INHERITED,
+ USCRIPT_ARABIC,
+ USCRIPT_ARMENIAN,
+ USCRIPT_BOPOMOFO,
+ USCRIPT_CANADIAN_ABORIGINAL,
+ USCRIPT_DEVANAGARI,
+ USCRIPT_DESERET,
+ USCRIPT_GUJARATI,
+ USCRIPT_GURMUKHI,
+ USCRIPT_HANGUL,
+ USCRIPT_HAN,
+ USCRIPT_HEBREW,
+ USCRIPT_HIRAGANA,
+ USCRIPT_KATAKANA_OR_HIRAGANA,
+ USCRIPT_KATAKANA,
+ USCRIPT_LATIN,
+ USCRIPT_TAMIL,
+ USCRIPT_THAI,
+ USCRIPT_YI
+ };
+ guint ii;
+
+ for (ii = 0; ii < G_N_ELEMENTS (scripts); ii++) {
+ gint32 script = (gint32) scripts[ii];
+ if (script >= 0 && script < SCRIPT_CODE_LIMIT) {
+ guint32 index = script / 32;
+ guint32 mask = 1 << (script % 32);
+ allowed_idn_script_bits[index] |= mask;
+ }
+ }
+
+ return NULL;
+}
+
+static gboolean
+is_lookalike_character_for_script (UScriptCode expected_script,
+ UChar32 code_point)
+{
+ switch (code_point) {
+ case 0x0548: /* ARMENIAN CAPITAL LETTER VO */
+ case 0x054D: /* ARMENIAN CAPITAL LETTER SEH */
+ case 0x0551: /* ARMENIAN CAPITAL LETTER CO */
+ case 0x0555: /* ARMENIAN CAPITAL LETTER OH */
+ case 0x0578: /* ARMENIAN SMALL LETTER VO */
+ case 0x057D: /* ARMENIAN SMALL LETTER SEH */
+ case 0x0581: /* ARMENIAN SMALL LETTER CO */
+ case 0x0585: /* ARMENIAN SMALL LETTER OH */
+ return expected_script == USCRIPT_ARMENIAN;
+ case 0x0BE6: /* TAMIL DIGIT ZERO */
+ return expected_script == USCRIPT_TAMIL;
+ default:
+ return FALSE;
+ }
+}
+
+static gboolean
+is_of_script_type (UScriptCode expected_script,
+ UChar32 code_point)
+{
+ UErrorCode error = U_ZERO_ERROR;
+ UScriptCode script = uscript_getScript (code_point, &error);
+ if (error != U_ZERO_ERROR)
+ return FALSE;
+
+ return script == expected_script;
+}
+
+static gboolean
+is_ascii_digit_or_punctuation (UChar32 character)
+{
+ return (character >= '!' && character <= '@') || (character >= '[' && character <= '`') || (character
= '{' && character <= '~');
+}
+
+static gboolean
+is_ascii_digit_or_valid_host_character (UChar32 character)
+{
+ if (!is_ascii_digit_or_punctuation (character))
+ return FALSE;
+
+ /* Things the URL Parser rejects: */
+ switch (character) {
+ case '#':
+ case '%':
+ case '/':
+ case ':':
+ case '?':
+ case '@':
+ case '[':
+ case '\\':
+ case ']':
+ return FALSE;
+ default:
+ return TRUE;
+ }
+}
+
+static gboolean
+is_lookalike_sequence (UScriptCode expected_script,
+ UChar32 previous_code_point,
+ UChar32 code_point)
+{
+ if (!previous_code_point || previous_code_point == '/')
+ return FALSE;
+
+ return (is_lookalike_character_for_script (expected_script, code_point) && !(is_of_script_type
(expected_script, previous_code_point) ||
+ is_ascii_digit_or_valid_host_character (previous_code_point))) ||
+ (is_lookalike_character_for_script (expected_script, previous_code_point) &&
!(is_of_script_type (expected_script, code_point) ||
+ is_ascii_digit_or_valid_host_character (code_point)));
+}
+
+static gboolean
+is_lookalike_character (UChar32 previous_code_point,
+ UChar32 code_point)
+{
+ /* This function treats the following as unsafe, lookalike characters:
+ * any non-printable character, any character considered as whitespace,
+ * any ignorable character, and emoji characters related to locks.
+ *
+ * We also considered the characters in Mozilla's list of characters
<http://kb.mozillazine.org/Network.IDN.blacklist_chars>.
+ *
+ * Some of the characters here will never appear once ICU has encoded.
+ * For example, ICU transforms most spaces into an ASCII space and most
+ * slashes into an ASCII solidus. But one of the two callers uses this
+ * on characters that have not been processed by ICU, so they are needed here.
+ */
+
+ if (!u_isprint (code_point) || u_isUWhiteSpace (code_point) || u_hasBinaryProperty (code_point,
UCHAR_DEFAULT_IGNORABLE_CODE_POINT))
+ return TRUE;
+
+ switch (code_point) {
+ case 0x00BC: /* VULGAR FRACTION ONE QUARTER */
+ case 0x00BD: /* VULGAR FRACTION ONE HALF */
+ case 0x00BE: /* VULGAR FRACTION THREE QUARTERS */
+ case 0x00ED: /* LATIN SMALL LETTER I WITH ACUTE */
+ /* 0x0131 LATIN SMALL LETTER DOTLESS I is intentionally not considered a lookalike character because
it is visually distinguishable from i and it has legitimate use in the Turkish language. */
+ case 0x01C0: /* LATIN LETTER DENTAL CLICK */
+ case 0x01C3: /* LATIN LETTER RETROFLEX CLICK */
+ case 0x0237: /* LATIN SMALL LETTER DOTLESS J */
+ case 0x0251: /* LATIN SMALL LETTER ALPHA */
+ case 0x0261: /* LATIN SMALL LETTER SCRIPT G */
+ case 0x0274: /* LATIN LETTER SMALL CAPITAL N */
+ case 0x027E: /* LATIN SMALL LETTER R WITH FISHHOOK */
+ case 0x02D0: /* MODIFIER LETTER TRIANGULAR COLON */
+ case 0x0335: /* COMBINING SHORT STROKE OVERLAY */
+ case 0x0337: /* COMBINING SHORT SOLIDUS OVERLAY */
+ case 0x0338: /* COMBINING LONG SOLIDUS OVERLAY */
+ case 0x0589: /* ARMENIAN FULL STOP */
+ case 0x05B4: /* HEBREW POINT HIRIQ */
+ case 0x05BC: /* HEBREW POINT DAGESH OR MAPIQ */
+ case 0x05C3: /* HEBREW PUNCTUATION SOF PASUQ */
+ case 0x05F4: /* HEBREW PUNCTUATION GERSHAYIM */
+ case 0x0609: /* ARABIC-INDIC PER MILLE SIGN */
+ case 0x060A: /* ARABIC-INDIC PER TEN THOUSAND SIGN */
+ case 0x0650: /* ARABIC KASRA */
+ case 0x0660: /* ARABIC INDIC DIGIT ZERO */
+ case 0x066A: /* ARABIC PERCENT SIGN */
+ case 0x06D4: /* ARABIC FULL STOP */
+ case 0x06F0: /* EXTENDED ARABIC INDIC DIGIT ZERO */
+ case 0x0701: /* SYRIAC SUPRALINEAR FULL STOP */
+ case 0x0702: /* SYRIAC SUBLINEAR FULL STOP */
+ case 0x0703: /* SYRIAC SUPRALINEAR COLON */
+ case 0x0704: /* SYRIAC SUBLINEAR COLON */
+ case 0x1735: /* PHILIPPINE SINGLE PUNCTUATION */
+ case 0x1D04: /* LATIN LETTER SMALL CAPITAL C */
+ case 0x1D0F: /* LATIN LETTER SMALL CAPITAL O */
+ case 0x1D1C: /* LATIN LETTER SMALL CAPITAL U */
+ case 0x1D20: /* LATIN LETTER SMALL CAPITAL V */
+ case 0x1D21: /* LATIN LETTER SMALL CAPITAL W */
+ case 0x1D22: /* LATIN LETTER SMALL CAPITAL Z */
+ case 0x1ECD: /* LATIN SMALL LETTER O WITH DOT BELOW */
+ case 0x2010: /* HYPHEN */
+ case 0x2011: /* NON-BREAKING HYPHEN */
+ case 0x2024: /* ONE DOT LEADER */
+ case 0x2027: /* HYPHENATION POINT */
+ case 0x2039: /* SINGLE LEFT-POINTING ANGLE QUOTATION MARK */
+ case 0x203A: /* SINGLE RIGHT-POINTING ANGLE QUOTATION MARK */
+ case 0x2041: /* CARET INSERTION POINT */
+ case 0x2044: /* FRACTION SLASH */
+ case 0x2052: /* COMMERCIAL MINUS SIGN */
+ case 0x2153: /* VULGAR FRACTION ONE THIRD */
+ case 0x2154: /* VULGAR FRACTION TWO THIRDS */
+ case 0x2155: /* VULGAR FRACTION ONE FIFTH */
+ case 0x2156: /* VULGAR FRACTION TWO FIFTHS */
+ case 0x2157: /* VULGAR FRACTION THREE FIFTHS */
+ case 0x2158: /* VULGAR FRACTION FOUR FIFTHS */
+ case 0x2159: /* VULGAR FRACTION ONE SIXTH */
+ case 0x215A: /* VULGAR FRACTION FIVE SIXTHS */
+ case 0x215B: /* VULGAR FRACTION ONE EIGHT */
+ case 0x215C: /* VULGAR FRACTION THREE EIGHTHS */
+ case 0x215D: /* VULGAR FRACTION FIVE EIGHTHS */
+ case 0x215E: /* VULGAR FRACTION SEVEN EIGHTHS */
+ case 0x215F: /* FRACTION NUMERATOR ONE */
+ case 0x2212: /* MINUS SIGN */
+ case 0x2215: /* DIVISION SLASH */
+ case 0x2216: /* SET MINUS */
+ case 0x2236: /* RATIO */
+ case 0x233F: /* APL FUNCTIONAL SYMBOL SLASH BAR */
+ case 0x23AE: /* INTEGRAL EXTENSION */
+ case 0x244A: /* OCR DOUBLE BACKSLASH */
+ case 0x2571: /* BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT */
+ case 0x2572: /* BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT */
+ case 0x29F6: /* SOLIDUS WITH OVERBAR */
+ case 0x29F8: /* BIG SOLIDUS */
+ case 0x2AFB: /* TRIPLE SOLIDUS BINARY RELATION */
+ case 0x2AFD: /* DOUBLE SOLIDUS OPERATOR */
+ case 0x2FF0: /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT */
+ case 0x2FF1: /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO BELOW */
+ case 0x2FF2: /* IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT */
+ case 0x2FF3: /* IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW */
+ case 0x2FF4: /* IDEOGRAPHIC DESCRIPTION CHARACTER FULL SURROUND */
+ case 0x2FF5: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE */
+ case 0x2FF6: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM BELOW */
+ case 0x2FF7: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LEFT */
+ case 0x2FF8: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER LEFT */
+ case 0x2FF9: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM UPPER RIGHT */
+ case 0x2FFA: /* IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM LOWER LEFT */
+ case 0x2FFB: /* IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID */
+ case 0x3002: /* IDEOGRAPHIC FULL STOP */
+ case 0x3008: /* LEFT ANGLE BRACKET */
+ case 0x3014: /* LEFT TORTOISE SHELL BRACKET */
+ case 0x3015: /* RIGHT TORTOISE SHELL BRACKET */
+ case 0x3033: /* VERTICAL KANA REPEAT MARK UPPER HALF */
+ case 0x3035: /* VERTICAL KANA REPEAT MARK LOWER HALF */
+ case 0x321D: /* PARENTHESIZED KOREAN CHARACTER OJEON */
+ case 0x321E: /* PARENTHESIZED KOREAN CHARACTER O HU */
+ case 0x33AE: /* SQUARE RAD OVER S */
+ case 0x33AF: /* SQUARE RAD OVER S SQUARED */
+ case 0x33C6: /* SQUARE C OVER KG */
+ case 0x33DF: /* SQUARE A OVER M */
+ case 0x05B9: /* HEBREW POINT HOLAM */
+ case 0x05BA: /* HEBREW POINT HOLAM HASER FOR VAV */
+ case 0x05C1: /* HEBREW POINT SHIN DOT */
+ case 0x05C2: /* HEBREW POINT SIN DOT */
+ case 0x05C4: /* HEBREW MARK UPPER DOT */
+ case 0xA731: /* LATIN LETTER SMALL CAPITAL S */
+ case 0xA771: /* LATIN SMALL LETTER DUM */
+ case 0xA789: /* MODIFIER LETTER COLON */
+ case 0xFE14: /* PRESENTATION FORM FOR VERTICAL SEMICOLON */
+ case 0xFE15: /* PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK */
+ case 0xFE3F: /* PRESENTATION FORM FOR VERTICAL LEFT ANGLE BRACKET */
+ case 0xFE5D: /* SMALL LEFT TORTOISE SHELL BRACKET */
+ case 0xFE5E: /* SMALL RIGHT TORTOISE SHELL BRACKET */
+ case 0xFF0E: /* FULLWIDTH FULL STOP */
+ case 0xFF0F: /* FULL WIDTH SOLIDUS */
+ case 0xFF61: /* HALFWIDTH IDEOGRAPHIC FULL STOP */
+ case 0xFFFC: /* OBJECT REPLACEMENT CHARACTER */
+ case 0xFFFD: /* REPLACEMENT CHARACTER */
+ case 0x1F50F: /* LOCK WITH INK PEN */
+ case 0x1F510: /* CLOSED LOCK WITH KEY */
+ case 0x1F511: /* KEY */
+ case 0x1F512: /* LOCK */
+ case 0x1F513: /* OPEN LOCK */
+ return TRUE;
+ case 0x0307: /* COMBINING DOT ABOVE */
+ return previous_code_point == 0x0237 || /* LATIN SMALL LETTER DOTLESS J */
+ previous_code_point == 0x0131 || /* LATIN SMALL LETTER DOTLESS I */
+ previous_code_point == 0x05D5; /* HEBREW LETTER VAV */
+ case '.':
+ return FALSE;
+ default:
+ return is_lookalike_sequence (USCRIPT_ARMENIAN, previous_code_point, code_point) ||
+ is_lookalike_sequence (USCRIPT_TAMIL, previous_code_point, code_point);
+ }
+}
+
+static gboolean
+all_characters_in_allowed_idn_script_list (const UChar *buffer,
+ gint32 length)
+{
+ gint32 ii = 0;
+ UChar32 previous_code_point = 0;
+
+ while (ii < length) {
+ UChar32 cc;
+ UErrorCode error;
+ UScriptCode script;
+ guint32 index, mask;
+
+ U16_NEXT (buffer, ii, length, cc);
+ error = U_ZERO_ERROR;
+ script = uscript_getScript (cc, &error);
+ if (error != U_ZERO_ERROR) {
+ return FALSE;
+ }
+ if (script < 0) {
+ return FALSE;
+ }
+ if (script >= SCRIPT_CODE_LIMIT)
+ return FALSE;
+
+ index = script / 32;
+ mask = 1 << (script % 32);
+
+ if (!(allowed_idn_script_bits[index] & mask))
+ return FALSE;
+
+ if (is_lookalike_character (previous_code_point, cc))
+ return FALSE;
+
+ previous_code_point = cc;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+is_second_level_domain_name_allowed_by_tld_rules (const UChar *buffer,
+ gint32 length,
+ gboolean (* character_is_allowed) (UChar ch))
+{
+ gint32 ii;
+
+ g_return_val_if_fail (length > 0, FALSE);
+
+ for (ii = length - 1; ii >= 0; ii--) {
+ UChar ch = buffer[ii];
+
+ if (character_is_allowed (ch))
+ continue;
+
+ /* Only check the second level domain. Lower level registrars may have different rules. */
+ if (ch == '.')
+ break;
+
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+static gboolean
+check_rules_if_suffix_matches (const UChar *buffer,
+ gint length,
+ const UChar suffix[],
+ guint n_suffix,
+ guint sizeof_suffix,
+ gboolean (* func) (const UChar ch),
+ gboolean *out_result)
+{
+ if (length > n_suffix && !memcmp (buffer + length - n_suffix, suffix, sizeof_suffix)) {
+ *out_result = is_second_level_domain_name_allowed_by_tld_rules (buffer, length - n_suffix,
func);
+ return TRUE;
+ }
+
+ return FALSE;
+}
+
+static gboolean
+is_russian_domain_name_character (const UChar ch)
+{
+ /* Only modern Russian letters, digits and dashes are allowed. */
+ return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || g_ascii_isdigit (ch) || ch == '-';
+}
+
+static gboolean
+is_russian_and_byelorussian_domain_name_character (const UChar ch)
+{
+ /* Russian and Byelorussian letters, digits and dashes are allowed. */
+ return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x0456 || ch == 0x045E || ch == 0x2019
|| g_ascii_isdigit (ch) || ch == '-';
+}
+
+static gboolean
+is_kazakh_domain_name_character (const UChar ch)
+{
+ /* Kazakh letters, digits and dashes are allowed. */
+ return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x04D9 || ch == 0x0493 || ch == 0x049B
|| ch == 0x04A3 ||
+ ch == 0x04E9 || ch == 0x04B1 || ch == 0x04AF || ch == 0x04BB || ch == 0x0456 ||
g_ascii_isdigit (ch) || ch == '-';
+}
+
+static gboolean
+is_russian_and_ukrainian_domain_name_character (const UChar ch)
+{
+ /* Russian and Ukrainian letters, digits and dashes are allowed. */
+ return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x0491 || ch == 0x0404 || ch == 0x0456
|| ch == 0x0457 || g_ascii_isdigit (ch) || ch == '-';
+}
+
+static gboolean
+is_serbian_domain_name_character (const UChar ch)
+{
+ /* Serbian letters, digits and dashes are allowed. */
+ return (ch >= 0x0430 && ch <= 0x0438) || (ch >= 0x043A && ch <= 0x0448) || ch == 0x0452 || ch ==
0x0458 || ch == 0x0459 ||
+ ch == 0x045A || ch == 0x045B || ch == 0x045F || g_ascii_isdigit (ch) || ch == '-';
+}
+
+static gboolean
+is_macedonian_domain_name_character (const UChar ch)
+{
+ /* Macedonian letters, digits and dashes are allowed. */
+ return (ch >= 0x0430 && ch <= 0x0438) || (ch >= 0x043A && ch <= 0x0448) || ch == 0x0453 || ch ==
0x0455 || ch == 0x0458 ||
+ ch == 0x0459 || ch == 0x045A || ch == 0x045C || ch == 0x045F || g_ascii_isdigit (ch) || ch ==
'-';
+}
+
+static gboolean
+is_mongolian_domain_name_character (const UChar ch)
+{
+ /* Mongolian letters, digits and dashes are allowed. */
+ return (ch >= 0x0430 && ch <= 0x044f) || ch == 0x0451 || ch == 0x04E9 || ch == 0x04AF ||
g_ascii_isdigit (ch) || ch == '-';
+}
+
+static gboolean
+is_bulgarian_domain_name_character (const UChar ch)
+{
+ /* Bulgarian letters, digits and dashes are allowed. */
+ return (ch >= 0x0430 && ch <= 0x044A) || ch == 0x044C || (ch >= 0x044E && ch <= 0x0450) || ch ==
0x045D || g_ascii_isdigit (ch) || ch == '-';
+}
+
+static gboolean
+all_characters_allowed_by_tld_rules (const UChar *buffer,
+ gint32 length)
+{
+ /* Skip trailing dot for root domain. */
+ if (buffer[length - 1] == '.')
+ length--;
+
+ #define CHECK_RULES_IF_SUFFIX_MATCHES(suffix, func) G_STMT_START { \
+ gboolean result = FALSE; \
+ if (check_rules_if_suffix_matches (buffer, length, suffix, G_N_ELEMENTS (suffix), sizeof
(suffix), func, &result)) \
+ return result; \
+ } G_STMT_END
+
+ {
+ /* http://cctld.ru/files/pdf/docs/rules_ru-rf.pdf */
+ static const UChar cyrillic_RF[] = {
+ '.',
+ 0x0440, /* CYRILLIC SMALL LETTER ER */
+ 0x0444, /* CYRILLIC SMALL LETTER EF */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_RF, is_russian_domain_name_character);
+ }
+
+ {
+ /* http://rusnames.ru/rules.pl */
+ static const UChar cyrillic_RUS[] = {
+ '.',
+ 0x0440, /* CYRILLIC SMALL LETTER ER */
+ 0x0443, /* CYRILLIC SMALL LETTER U */
+ 0x0441, /* CYRILLIC SMALL LETTER ES */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_RUS, is_russian_domain_name_character);
+ }
+
+ {
+ /* http://ru.faitid.org/projects/moscow/documents/moskva/idn */
+ static const UChar cyrillic_MOSKVA[] = {
+ '.',
+ 0x043C, /* CYRILLIC SMALL LETTER EM */
+ 0x043E, /* CYRILLIC SMALL LETTER O */
+ 0x0441, /* CYRILLIC SMALL LETTER ES */
+ 0x043A, /* CYRILLIC SMALL LETTER KA */
+ 0x0432, /* CYRILLIC SMALL LETTER VE */
+ 0x0430, /* CYRILLIC SMALL LETTER A */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_MOSKVA, is_russian_domain_name_character);
+ }
+
+ {
+ /* http://www.dotdeti.ru/foruser/docs/regrules.php */
+ static const UChar cyrillic_DETI[] = {
+ '.',
+ 0x0434, /* CYRILLIC SMALL LETTER DE */
+ 0x0435, /* CYRILLIC SMALL LETTER IE */
+ 0x0442, /* CYRILLIC SMALL LETTER TE */
+ 0x0438, /* CYRILLIC SMALL LETTER I */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_DETI, is_russian_domain_name_character);
+ }
+
+ {
+ /* http://corenic.org - rules not published. The word is Russian, so only allowing Russian at this
time,
+ although we may need to revise the checks if this ends up being used with other languages spoken
in Russia. */
+ static const UChar cyrillic_ONLAYN[] = {
+ '.',
+ 0x043E, /* CYRILLIC SMALL LETTER O */
+ 0x043D, /* CYRILLIC SMALL LETTER EN */
+ 0x043B, /* CYRILLIC SMALL LETTER EL */
+ 0x0430, /* CYRILLIC SMALL LETTER A */
+ 0x0439, /* CYRILLIC SMALL LETTER SHORT I */
+ 0x043D, /* CYRILLIC SMALL LETTER EN */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_ONLAYN, is_russian_domain_name_character);
+ }
+
+ {
+ /* http://corenic.org - same as above. */
+ static const UChar cyrillic_SAYT[] = {
+ '.',
+ 0x0441, /* CYRILLIC SMALL LETTER ES */
+ 0x0430, /* CYRILLIC SMALL LETTER A */
+ 0x0439, /* CYRILLIC SMALL LETTER SHORT I */
+ 0x0442, /* CYRILLIC SMALL LETTER TE */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_SAYT, is_russian_domain_name_character);
+ }
+
+ {
+ /* http://pir.org/products/opr-domain/ - rules not published. According to the registry site,
+ the intended audience is "Russian and other Slavic-speaking markets".
+ Chrome appears to only allow Russian, so sticking with that for now. */
+ static const UChar cyrillic_ORG[] = {
+ '.',
+ 0x043E, /* CYRILLIC SMALL LETTER O */
+ 0x0440, /* CYRILLIC SMALL LETTER ER */
+ 0x0433, /* CYRILLIC SMALL LETTER GHE */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_ORG, is_russian_domain_name_character);
+ }
+
+ {
+ /* http://cctld.by/rules.html */
+ static const UChar cyrillic_BEL[] = {
+ '.',
+ 0x0431, /* CYRILLIC SMALL LETTER BE */
+ 0x0435, /* CYRILLIC SMALL LETTER IE */
+ 0x043B, /* CYRILLIC SMALL LETTER EL */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_BEL, is_russian_and_byelorussian_domain_name_character);
+ }
+
+ {
+ /* http://www.nic.kz/docs/poryadok_vnedreniya_kaz_ru.pdf */
+ static const UChar cyrillic_KAZ[] = {
+ '.',
+ 0x049B, /* CYRILLIC SMALL LETTER KA WITH DESCENDER */
+ 0x0430, /* CYRILLIC SMALL LETTER A */
+ 0x0437, /* CYRILLIC SMALL LETTER ZE */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_KAZ, is_kazakh_domain_name_character);
+ }
+
+ {
+ /* http://uanic.net/docs/documents-ukr/Rules%20of%20UKR_v4.0.pdf */
+ static const UChar cyrillic_UKR[] = {
+ '.',
+ 0x0443, /* CYRILLIC SMALL LETTER U */
+ 0x043A, /* CYRILLIC SMALL LETTER KA */
+ 0x0440, /* CYRILLIC SMALL LETTER ER */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_UKR, is_russian_and_ukrainian_domain_name_character);
+ }
+
+ {
+ /* http://www.rnids.rs/data/DOKUMENTI/idn-srb-policy-termsofuse-v1.4-eng.pdf */
+ static const UChar cyrillic_SRB[] = {
+ '.',
+ 0x0441, /* CYRILLIC SMALL LETTER ES */
+ 0x0440, /* CYRILLIC SMALL LETTER ER */
+ 0x0431, /* CYRILLIC SMALL LETTER BE */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_SRB, is_serbian_domain_name_character);
+ }
+
+ {
+ /* http://marnet.mk/doc/pravilnik-mk-mkd.pdf */
+ static const UChar cyrillic_MKD[] = {
+ '.',
+ 0x043C, /* CYRILLIC SMALL LETTER EM */
+ 0x043A, /* CYRILLIC SMALL LETTER KA */
+ 0x0434, /* CYRILLIC SMALL LETTER DE */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_MKD, is_macedonian_domain_name_character);
+ }
+
+ {
+ /* https://www.mon.mn/cs/ */
+ static const UChar cyrillic_MON[] = {
+ '.',
+ 0x043C, /* CYRILLIC SMALL LETTER EM */
+ 0x043E, /* CYRILLIC SMALL LETTER O */
+ 0x043D, /* CYRILLIC SMALL LETTER EN */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_MON, is_mongolian_domain_name_character);
+ }
+
+ {
+ /* https://www.icann.org/sites/default/files/packages/lgr/lgr-second-level-bulgarian-30aug16-en.html
*/
+ static const UChar cyrillic_BG[] = {
+ '.',
+ 0x0431, /* CYRILLIC SMALL LETTER BE */
+ 0x0433 /* CYRILLIC SMALL LETTER GHE */
+ };
+ CHECK_RULES_IF_SUFFIX_MATCHES (cyrillic_BG, is_bulgarian_domain_name_character);
+ }
+
+ /* Not a known top level domain with special rules. */
+ return FALSE;
+}
+
+/**
+ * camel_hostname_utils_requires_ascii:
+ * @hostname: a host name
+ *
+ * Check whether the @hostname requires conversion to ASCII. That can
+ * be when a character in it can look like an ASCII character, even
+ * it being a Unicode letter. This can be used to display host names
+ * in a way of invulnerable to IDN homograph attacks.
+ *
+ * Returns: %TRUE, when the @hostname should be converted to an ASCII equivalent,
+ * %FALSE, when it can be shown as is.
+ *
+ * Since: 3.44
+ **/
+gboolean
+camel_hostname_utils_requires_ascii (const gchar *hostname)
+{
+ static GOnce initialized = G_ONCE_INIT;
+ UErrorCode uerror = U_ZERO_ERROR;
+ int32_t uhost_len = 0;
+ gboolean needs_conversion = FALSE;
+
+ if (camel_string_is_all_ascii (hostname))
+ return FALSE;
+
+ g_once (&initialized, camel_hostname_utils_init_global_memory, NULL);
+
+ u_strFromUTF8 (NULL, 0, &uhost_len, hostname, -1, &uerror);
+ if (uhost_len > 0) {
+ UChar *uhost = g_new0 (UChar, uhost_len + 2);
+
+ uerror = U_ZERO_ERROR;
+ u_strFromUTF8 (uhost, uhost_len + 1, &uhost_len, hostname, -1, &uerror);
+ if (uerror == U_ZERO_ERROR && uhost_len > 0) {
+ needs_conversion = !all_characters_in_allowed_idn_script_list (uhost, uhost_len) ||
+ !all_characters_allowed_by_tld_rules (uhost, uhost_len);
+ } else {
+ needs_conversion = uerror != U_ZERO_ERROR;
+ }
+
+ g_free (uhost);
+ } else {
+ needs_conversion = TRUE;
+ }
+
+ return needs_conversion;
+}
diff --git a/src/camel/camel-hostname-utils.h b/src/camel/camel-hostname-utils.h
new file mode 100644
index 000000000..dff7eee45
--- /dev/null
+++ b/src/camel/camel-hostname-utils.h
@@ -0,0 +1,33 @@
+/* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
+/*
+ * Copyright (C) 2021 Red Hat (www.redhat.com)
+ *
+ * This library is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if !defined (__CAMEL_H_INSIDE__) && !defined (CAMEL_COMPILATION)
+#error "Only <camel/camel.h> can be included directly."
+#endif
+
+#ifndef CAMEL_HOSTNAME_UTILS_H
+#define CAMEL_HOSTNAME_UTILS_H
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+gboolean camel_hostname_utils_requires_ascii (const gchar *hostname);
+
+G_END_DECLS
+
+#endif /* CAMEL_HOSTNAME_UTILS_H */
diff --git a/src/camel/camel-internet-address.c b/src/camel/camel-internet-address.c
index 790730738..bfb7e4ab6 100644
--- a/src/camel/camel-internet-address.c
+++ b/src/camel/camel-internet-address.c
@@ -20,6 +20,7 @@
#include <string.h>
#include "camel-internet-address.h"
+#include "camel-hostname-utils.h"
#include "camel-mime-utils.h"
#include "camel-net-utils.h"
@@ -452,8 +453,7 @@ camel_internet_address_ensure_ascii_domains (CamelInternetAddress *addr)
domain = camel_host_idna_to_ascii (a->address + at_pos + 1);
if (at_pos >= 0) {
- gchar *name = g_strndup (a->address, at_pos);
- address = g_strconcat (name, "@", domain, NULL);
+ address = g_strdup_printf ("%.*s@%s", at_pos, a->address, domain);
} else {
address = domain;
domain = NULL;
@@ -466,6 +466,56 @@ camel_internet_address_ensure_ascii_domains (CamelInternetAddress *addr)
}
}
+/**
+ * camel_internet_address_sanitize_ascii_domain:
+ * @addr: a #CamelInternetAddress
+ *
+ * Checks the addresses in @addr for any suspicious characters in the domain
+ * name and coverts those domains into their representation. In contrast to
+ * camel_internet_address_ensure_ascii_domains(), this converts the domains
+ * into ASCII only when needed, as returned by camel_hostname_utils_requires_ascii().
+ *
+ * Returns: %TRUE, when converted at least one address
+ *
+ * Since: 3.44
+ **/
+gboolean
+camel_internet_address_sanitize_ascii_domain (CamelInternetAddress *addr)
+{
+ struct _address *a;
+ gboolean did_convert = FALSE;
+ gint ii, len;
+
+ g_return_val_if_fail (CAMEL_IS_INTERNET_ADDRESS (addr), FALSE);
+
+ len = addr->priv->addresses->len;
+ for (ii = 0; ii < len; ii++) {
+ gint at_pos = -1;
+ a = g_ptr_array_index (addr->priv->addresses, ii);
+ if (a->address && !domain_contains_only_ascii (a->address, &at_pos) &&
+ at_pos >= 0 && at_pos + 1 < strlen (a->address) &&
+ camel_hostname_utils_requires_ascii (a->address + at_pos + 1)) {
+ gchar *address, *domain;
+
+ did_convert = TRUE;
+
+ domain = camel_host_idna_to_ascii (a->address + at_pos + 1);
+ if (at_pos >= 0) {
+ address = g_strdup_printf ("%.*s@%s", at_pos, a->address, domain);
+ } else {
+ address = domain;
+ domain = NULL;
+ }
+
+ g_free (domain);
+ g_free (a->address);
+ a->address = address;
+ }
+ }
+
+ return did_convert;
+}
+
/**
* camel_internet_address_find_address:
* @addr: a #CamelInternetAddress object
diff --git a/src/camel/camel-internet-address.h b/src/camel/camel-internet-address.h
index 956246025..93996d2b8 100644
--- a/src/camel/camel-internet-address.h
+++ b/src/camel/camel-internet-address.h
@@ -82,6 +82,8 @@ gint camel_internet_address_find_address
const gchar **namep);
void camel_internet_address_ensure_ascii_domains
(CamelInternetAddress *addr);
+gboolean camel_internet_address_sanitize_ascii_domain
+ (CamelInternetAddress *addr);
/* utility functions, for network/display formatting */
gchar * camel_internet_address_encode_address
diff --git a/src/camel/camel-message-info-base.c b/src/camel/camel-message-info-base.c
index 432655b84..76ebefe3b 100644
--- a/src/camel/camel-message-info-base.c
+++ b/src/camel/camel-message-info-base.c
@@ -21,6 +21,7 @@
#include "camel-folder-summary.h"
#include "camel-message-info.h"
+#include "camel-net-utils.h"
#include "camel-string-utils.h"
#include "camel-message-info-base.h"
@@ -364,6 +365,28 @@ message_info_base_get_from (const CamelMessageInfo *mi)
return result;
}
+#define SET_ADDRESS(_member, _value) G_STMT_START { \
+ changed = g_strcmp0 (bmi->_member, _value) != 0; \
+ \
+ if (changed) { \
+ gchar *in_ascii; \
+ \
+ in_ascii = camel_utils_sanitize_ascii_domain_in_address (_value, TRUE); \
+ if (in_ascii) { \
+ if (g_strcmp0 (in_ascii, bmi->_member) == 0) { \
+ changed = FALSE; \
+ g_free (in_ascii); \
+ } else { \
+ camel_pstring_free (bmi->_member); \
+ bmi->_member = camel_pstring_add (in_ascii, TRUE); \
+ } \
+ } else { \
+ camel_pstring_free (bmi->_member); \
+ bmi->_member = camel_pstring_strdup (_value); \
+ } \
+ } \
+ } G_STMT_END
+
static gboolean
message_info_base_set_from (CamelMessageInfo *mi,
const gchar *from)
@@ -377,12 +400,7 @@ message_info_base_set_from (CamelMessageInfo *mi,
camel_message_info_property_lock (mi);
- changed = g_strcmp0 (bmi->priv->from, from) != 0;
-
- if (changed) {
- camel_pstring_free (bmi->priv->from);
- bmi->priv->from = camel_pstring_strdup (from);
- }
+ SET_ADDRESS (priv->from, from);
camel_message_info_property_unlock (mi);
@@ -419,12 +437,7 @@ message_info_base_set_to (CamelMessageInfo *mi,
camel_message_info_property_lock (mi);
- changed = g_strcmp0 (bmi->priv->to, to) != 0;
-
- if (changed) {
- camel_pstring_free (bmi->priv->to);
- bmi->priv->to = camel_pstring_strdup (to);
- }
+ SET_ADDRESS (priv->to, to);
camel_message_info_property_unlock (mi);
@@ -461,12 +474,7 @@ message_info_base_set_cc (CamelMessageInfo *mi,
camel_message_info_property_lock (mi);
- changed = g_strcmp0 (bmi->priv->cc, cc) != 0;
-
- if (changed) {
- camel_pstring_free (bmi->priv->cc);
- bmi->priv->cc = camel_pstring_strdup (cc);
- }
+ SET_ADDRESS (priv->cc, cc);
camel_message_info_property_unlock (mi);
@@ -503,18 +511,15 @@ message_info_base_set_mlist (CamelMessageInfo *mi,
camel_message_info_property_lock (mi);
- changed = g_strcmp0 (bmi->priv->mlist, mlist) != 0;
-
- if (changed) {
- camel_pstring_free (bmi->priv->mlist);
- bmi->priv->mlist = camel_pstring_strdup (mlist);
- }
+ SET_ADDRESS (priv->mlist, mlist);
camel_message_info_property_unlock (mi);
return changed;
}
+#undef SET_ADDRESS
+
static guint32
message_info_base_get_size (const CamelMessageInfo *mi)
{
diff --git a/src/camel/camel-mime-filter-tohtml.c b/src/camel/camel-mime-filter-tohtml.c
index 6731635b7..25f216fc7 100644
--- a/src/camel/camel-mime-filter-tohtml.c
+++ b/src/camel/camel-mime-filter-tohtml.c
@@ -23,6 +23,7 @@
#include <string.h>
#include "camel-mime-filter-tohtml.h"
+#include "camel-net-utils.h"
#include "camel-url-scanner.h"
#include "camel-utf8.h"
@@ -459,6 +460,9 @@ html_convert (CamelMimeFilter *mime_filter,
do {
if (camel_url_scanner_scan (priv->scanner, start, len - (len > 0 && start[len
- 1] == 0 ? 1 : 0), &match)) {
+ gchar *url_str, *sanitized_url;
+ gint prefix_len = strlen (match.prefix), url_len;
+
/* write out anything before the first regex match */
outptr = writeln (
mime_filter,
@@ -471,18 +475,22 @@ html_convert (CamelMimeFilter *mime_filter,
matchlen = match.um_eo - match.um_so;
+ url_str = g_strdup_printf ("%s%.*s", match.prefix, (gint) matchlen,
start);
+ sanitized_url = camel_utils_sanitize_ascii_domain_in_url_str
(url_str);
+ if (sanitized_url) {
+ g_free (url_str);
+ url_str = sanitized_url;
+ sanitized_url = NULL;
+ }
+
+ url_len = strlen (url_str);
+
/* write out the href tag */
outptr = append_string_verbatim (mime_filter, "<a href=\"", outptr,
&outend);
- /* prefix shouldn't need escaping, but let's be safe */
- outptr = writeln (
- mime_filter,
- match.prefix,
- match.prefix + strlen (match.prefix),
- outptr, &outend);
outptr = writeln (
mime_filter,
- start,
- start + matchlen,
+ url_str,
+ url_str + url_len,
outptr, &outend);
outptr = append_string_verbatim (
mime_filter, "\">",
@@ -491,10 +499,10 @@ html_convert (CamelMimeFilter *mime_filter,
/* now write the matched string */
outptr = writeln (
mime_filter,
- start,
- start + matchlen,
+ url_str + prefix_len,
+ url_str + url_len,
outptr, &outend);
- priv->column += matchlen;
+ priv->column += url_len - prefix_len;
start += matchlen;
len -= matchlen;
@@ -502,6 +510,8 @@ html_convert (CamelMimeFilter *mime_filter,
outptr = append_string_verbatim (
mime_filter, "</a>",
outptr, &outend);
+
+ g_free (url_str);
} else {
/* nothing matched so write out the remainder of this line buffer */
outptr = writeln (
diff --git a/src/camel/camel-net-utils.c b/src/camel/camel-net-utils.c
index 93e1409c4..ef37877f1 100644
--- a/src/camel/camel-net-utils.c
+++ b/src/camel/camel-net-utils.c
@@ -40,6 +40,8 @@
#include "camel-object.h"
#include "camel-operation.h"
#include "camel-service.h"
+#include "camel-hostname-utils.h"
+#include "camel-string-utils.h"
#define d(x)
@@ -861,3 +863,144 @@ camel_host_idna_to_ascii (const gchar *host)
return ascii;
}
+
+/**
+ * camel_utils_sanitize_ascii_domain_in_address:
+ * @email_address: an email address as string
+ * @do_format: what format will be returned
+ *
+ * Checks whether the domain in the @email_address requires
+ * conversion to ASCII and if it does it also converts it.
+ * When the @do_format is %TRUE, the output string is formatted
+ * for display, otherwise it's encoded for use in the message
+ * headers. A %NULL is returned when no conversion was needed.
+ *
+ * Returns: (nullable): the @email_address with only ASCII letters,
+ * if such conversion is needed or %NULL, when no conversion was
+ * required.
+ *
+ * See: camel_hostname_utils_requires_ascii(), camel_internet_address_sanitize_ascii_domain(),
+ * camel_utils_sanitize_ascii_domain_in_url_str()
+ *
+ * Since: 3.44
+ **/
+gchar *
+camel_utils_sanitize_ascii_domain_in_address (const gchar *email_address,
+ gboolean do_format)
+{
+ CamelInternetAddress *addr;
+ gchar *res = NULL;
+
+ g_return_val_if_fail (email_address != NULL, NULL);
+
+ if (camel_string_is_all_ascii (email_address))
+ return NULL;
+
+ addr = camel_internet_address_new ();
+
+ if (camel_address_decode (CAMEL_ADDRESS (addr), email_address) == -1)
+ camel_address_unformat (CAMEL_ADDRESS (addr), email_address);
+
+ if (camel_internet_address_sanitize_ascii_domain (addr)) {
+ if (do_format)
+ res = camel_address_format (CAMEL_ADDRESS (addr));
+ else
+ res = camel_address_encode (CAMEL_ADDRESS (addr));
+ }
+
+ g_clear_object (&addr);
+
+ return res;
+}
+
+/**
+ * camel_utils_sanitize_ascii_domain_in_url_str:
+ * @url_str: a URL as string
+ *
+ * Checks whether the host name of the @url_str requires conversion
+ * to ASCII and converts it if needed. Returns %NULL, when no conversion
+ * was required.
+ *
+ * Returns: (nullable): converted @url_str to ASCII host name, or %NULL, when
+ * no conversion was needed.
+ *
+ * See: camel_hostname_utils_requires_ascii(), camel_utils_sanitize_ascii_domain_in_url()
+ *
+ * Since: 3.44
+ **/
+gchar *
+camel_utils_sanitize_ascii_domain_in_url_str (const gchar *url_str)
+{
+ CamelURL *url;
+ gchar *res = NULL;
+
+ g_return_val_if_fail (url_str != NULL, NULL);
+
+ if (camel_string_is_all_ascii (url_str))
+ return NULL;
+
+ url = camel_url_new (url_str, NULL);
+ if (!url)
+ return NULL;
+
+ if (camel_utils_sanitize_ascii_domain_in_url (url))
+ res = camel_url_to_string (url, 0);
+
+ camel_url_free (url);
+
+ return res;
+}
+
+/**
+ * camel_utils_sanitize_ascii_domain_in_url:
+ * @url: a #CamelURL
+ *
+ * Checks whether the host name of the @url requires conversion
+ * to ASCII and converts it, if needed.
+ *
+ * Returns: %TRUE, when the conversion was required.
+ *
+ * See: camel_hostname_utils_requires_ascii(), camel_utils_sanitize_ascii_domain_in_url_str()
+ *
+ * Since: 3.44
+ **/
+gboolean
+camel_utils_sanitize_ascii_domain_in_url (CamelURL *url)
+{
+ g_return_val_if_fail (url != NULL, FALSE);
+
+ if (!url->host && url->path && url->protocol && g_ascii_strcasecmp (url->protocol, "mailto") == 0) {
+ const gchar *at_pos = strchr (url->path, '@');
+ gboolean res = FALSE;
+
+ if (at_pos && camel_hostname_utils_requires_ascii (at_pos + 1)) {
+ gchar *ascii_domain, *tmp;
+
+ ascii_domain = camel_host_idna_to_ascii (at_pos + 1);
+ tmp = g_strdup_printf ("%.*s@%s", (gint) (at_pos - url->path), url->path,
ascii_domain);
+ g_free (ascii_domain);
+ g_free (url->path);
+ url->path = tmp;
+
+ res = TRUE;
+ } else if (camel_hostname_utils_requires_ascii (url->path)) {
+ gchar *ascii_path = camel_host_idna_to_ascii (url->path);
+ g_free (url->path);
+ url->path = ascii_path;
+
+ res = TRUE;
+ }
+
+ return res;
+ }
+
+ if (camel_hostname_utils_requires_ascii (url->host)) {
+ gchar *ascii_host = camel_host_idna_to_ascii (url->host);
+ g_free (url->host);
+ url->host = ascii_host;
+
+ return TRUE;
+ }
+
+ return FALSE;
+}
diff --git a/src/camel/camel-net-utils.h b/src/camel/camel-net-utils.h
index 8090263f2..0432c1096 100644
--- a/src/camel/camel-net-utils.h
+++ b/src/camel/camel-net-utils.h
@@ -37,6 +37,9 @@ struct sockaddr;
struct addrinfo;
#endif
+#include <camel/camel-internet-address.h>
+#include <camel/camel-url.h>
+
G_BEGIN_DECLS
#ifndef _WIN32
@@ -96,6 +99,14 @@ void camel_freeaddrinfo (struct addrinfo *host);
gchar * camel_host_idna_to_ascii (const gchar *host);
+gchar * camel_utils_sanitize_ascii_domain_in_address
+ (const gchar *email_address,
+ gboolean do_format);
+gchar * camel_utils_sanitize_ascii_domain_in_url_str
+ (const gchar *url_str);
+gboolean camel_utils_sanitize_ascii_domain_in_url
+ (CamelURL *url);
+
G_END_DECLS
#ifdef _WIN32
diff --git a/src/camel/camel-string-utils.c b/src/camel/camel-string-utils.c
index f362c853d..2dfcdf23d 100644
--- a/src/camel/camel-string-utils.c
+++ b/src/camel/camel-string-utils.c
@@ -374,3 +374,28 @@ camel_pstring_dump_stat (void)
g_mutex_unlock (&string_pool_lock);
}
+
+/**
+ * camel_string_is_all_ascii:
+ * @str: (nullable): a string to check, or %NULL
+ *
+ * Returns: %TRUE, when the @str is %NULL, an empty string or when
+ * it contains only ASCII letters.
+ *
+ * Since: 3.44
+ **/
+gboolean
+camel_string_is_all_ascii (const gchar *str)
+{
+ gint ii;
+
+ if (!str || !*str)
+ return TRUE;
+
+ for (ii = 0; str[ii]; ii++) {
+ if (str[ii] < 0)
+ break;
+ }
+
+ return str[ii] == '\0';
+}
diff --git a/src/camel/camel-string-utils.h b/src/camel/camel-string-utils.h
index a66fcdf36..33306e833 100644
--- a/src/camel/camel-string-utils.h
+++ b/src/camel/camel-string-utils.h
@@ -42,6 +42,8 @@ const gchar * camel_pstring_peek (const gchar *string);
gboolean camel_pstring_contains (const gchar *string);
void camel_pstring_dump_stat (void);
+gboolean camel_string_is_all_ascii (const gchar *str);
+
G_END_DECLS
#endif /* CAMEL_STRING_UTILS_H */
diff --git a/src/camel/camel.h b/src/camel/camel.h
index 0df1baf22..f5685f653 100644
--- a/src/camel/camel.h
+++ b/src/camel/camel.h
@@ -44,6 +44,7 @@
#include <camel/camel-folder-summary.h>
#include <camel/camel-folder-thread.h>
#include <camel/camel-gpg-context.h>
+#include <camel/camel-hostname-utils.h>
#include <camel/camel-html-parser.h>
#include <camel/camel-iconv.h>
#include <camel/camel-index.h>
diff --git a/src/camel/tests/misc/CMakeLists.txt b/src/camel/tests/misc/CMakeLists.txt
index 272925cc3..7aae19694 100644
--- a/src/camel/tests/misc/CMakeLists.txt
+++ b/src/camel/tests/misc/CMakeLists.txt
@@ -1,6 +1,7 @@
set(TESTS
test1
test2
+ test3
utf7
split
rfc2047
diff --git a/src/camel/tests/misc/test3.c b/src/camel/tests/misc/test3.c
new file mode 100644
index 000000000..42eb57743
--- /dev/null
+++ b/src/camel/tests/misc/test3.c
@@ -0,0 +1,132 @@
+/*
+ * This library is free software: you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "evolution-data-server-config.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "camel-test.h"
+
+static void
+detect_hostname_bad_chars (void)
+{
+ struct _data {
+ const gchar *hostname;
+ gboolean needs_convert;
+ } data[] = {
+ { "example.com", FALSE },
+ { "ex\xd0\xb0" "mple.com", TRUE }
+ };
+ gint ii;
+
+ camel_test_start ("Detect hostname bad chars");
+
+ for (ii = 0; ii < G_N_ELEMENTS (data); ii++) {
+ gboolean needs_convert = camel_hostname_utils_requires_ascii (data[ii].hostname);
+ check_msg (needs_convert == data[ii].needs_convert,
+ "Failed on [%d] (%s): returns %d, expected %d", ii, data[ii].hostname, needs_convert,
data[ii].needs_convert);
+ }
+
+ camel_test_end ();
+}
+
+static void
+convert_hostname_bad_chars_email (void)
+{
+ struct _data {
+ const gchar *value;
+ const gchar *fmt_expected;
+ const gchar *enc_expected;
+ } data[] = {
+ { "user example com", NULL, NULL },
+ { "user@ex\xd0\xb0" "mple.com",
+ "user xn--exmple-4nf com",
+ "user xn--exmple-4nf com" },
+ { "Žába1 <1st@žába.no.where>",
+ "Žába1 <1st xn--ba-lia14d no.where>",
+ "=?iso-8859-2?Q?=AE=E1ba1?= <1st xn--ba-lia14d no.where>" },
+ { "Zaba2 <2nd@zab\xd0\xb0" ".no.where>",
+ "Zaba2 <2nd xn--zab-8cd no.where>",
+ "Zaba2 <2nd xn--zab-8cd no.where>" },
+ { "Žába1 <1st@žába.no.where>, Zaba2 <2nd@zab\xd0\xb0" ".no.where>",
+ "Žába1 <1st xn--ba-lia14d no.where>, Zaba2 <2nd xn--zab-8cd no.where>",
+ "=?iso-8859-2?Q?=AE=E1ba1?= <1st xn--ba-lia14d no.where>, Zaba2\n\t <2nd xn--zab-8cd
no.where>" }
+ };
+ gint ii;
+
+ camel_test_start ("Convert hostname bad chars in email");
+
+ for (ii = 0; ii < G_N_ELEMENTS (data); ii++) {
+ gchar *converted = camel_utils_sanitize_ascii_domain_in_address (data[ii].value, TRUE);
+ check_msg (g_strcmp0 (converted, data[ii].fmt_expected) == 0,
+ "Failed on [%d] (%s): returns '%s', expected formatted '%s'", ii, data[ii].value,
converted, data[ii].fmt_expected);
+ g_free (converted);
+
+ converted = camel_utils_sanitize_ascii_domain_in_address (data[ii].value, FALSE);
+ check_msg (g_strcmp0 (converted, data[ii].enc_expected) == 0,
+ "Failed on [%d] (%s): returns '%s', expected encoded '%s'", ii, data[ii].value,
converted, data[ii].enc_expected);
+ g_free (converted);
+ }
+
+ camel_test_end ();
+}
+
+static void
+convert_hostname_bad_chars_url (void)
+{
+ struct _data {
+ const gchar *value;
+ const gchar *expected;
+ } data[] = {
+ { "mailto:user example com", NULL },
+ { "mailto:user@ex\xd0\xb0" "mple.com?subject=Tést",
+ "mailto:user xn--exmple-4nf com?subject=T%c3%a9st" },
+ { "http://žába.no.where/index.html?param1=a&param2=b#fragment",
+ "http://xn--ba-lia14d.no.where/index.html?param1=a&param2=b#fragment" },
+ { "https://1st@žába.no.where/",
+ "https://1st xn--ba-lia14d no.where/" },
+ { "ftp://2nd@zab\xd0\xb0" ".no.where/index.html",
+ "ftp://2nd xn--zab-8cd no.where/index.html" }
+ };
+ gint ii;
+
+ camel_test_start ("Convert hostname bad chars in URL");
+
+ for (ii = 0; ii < G_N_ELEMENTS (data); ii++) {
+ gchar *converted = camel_utils_sanitize_ascii_domain_in_url_str (data[ii].value);
+ check_msg (g_strcmp0 (converted, data[ii].expected) == 0,
+ "Failed on [%d] (%s): returns '%s', expected '%s'", ii, data[ii].value, converted,
data[ii].expected);
+ g_free (converted);
+ }
+
+ camel_test_end ();
+}
+
+gint
+main (gint argc,
+ gchar **argv)
+{
+
+ camel_test_init (argc, argv);
+
+ detect_hostname_bad_chars ();
+ convert_hostname_bad_chars_email ();
+ convert_hostname_bad_chars_url ();
+
+ return 0;
+}
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]