[gnome-keysign: 30/75] gpgkey: Make the UID "safe" for direct consumption
- From: Gitlab System User <gitlab src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gnome-keysign: 30/75] gpgkey: Make the UID "safe" for direct consumption
- Date: Fri, 29 Sep 2017 11:24:51 +0000 (UTC)
commit 4feef12c8cac584eeb70bbbd35c263807fb1980c
Author: Tobias Mueller <muelli cryptobitch de>
Date: Sun Jul 23 10:29:50 2017 +0200
gpgkey: Make the UID "safe" for direct consumption
If we have a key with a non-UTF-8 UID gpgme will return a proper string
with surrogates. These are the actual undecodable bytes encoded in a
way that allows to get hold of the actual undecodable bytes later when
we need them. We don't though, because we don't have to call gpg with
the exact byte sequence again. Hence we can make the UID safe for
display centrally, i.e. in the gpgkey.UID class.
keysign/KeyPresent.py | 5 ++---
keysign/gpgkey.py | 39 ++++++++++++++++++++++++++++++++-------
keysign/keyconfirm.py | 5 ++---
keysign/keylistwidget.py | 3 +--
keysign/util.py | 22 ----------------------
tests/test_gpgmeh.py | 12 ++++++++----
tests/test_uids.py | 13 +++++++------
7 files changed, 52 insertions(+), 47 deletions(-)
---
diff --git a/keysign/KeyPresent.py b/keysign/KeyPresent.py
index 9ec7717..d5177d0 100644
--- a/keysign/KeyPresent.py
+++ b/keysign/KeyPresent.py
@@ -40,7 +40,7 @@ if __name__ == "__main__" and __package__ is None:
from .__init__ import __version__
from .gpgmh import get_usable_keys
from .QRCode import QRImage
-from .util import format_fingerprint, glib_markup_escape_rencoded_text
+from .util import format_fingerprint
@@ -96,8 +96,7 @@ class KeyPresentWidget(Gtk.Widget):
self.key_id_label.set_markup(
format_fingerprint(key.fingerprint).replace('\n', ' '))
self.uids_label.set_markup("\n".join(
- [glib_markup_escape_rencoded_text(
- uid.uid.decode('utf-8', 'replace'))
+ [GLib.markup_escape_text(uid.uid)
for uid
in key.uidslist]))
self.fingerprint_label.set_markup(format_fingerprint(key.fingerprint))
diff --git a/keysign/gpgkey.py b/keysign/gpgkey.py
index 95034c6..9b872dd 100644
--- a/keysign/gpgkey.py
+++ b/keysign/gpgkey.py
@@ -23,6 +23,22 @@ import warnings
log = logging.getLogger(__name__)
+
+def to_valid_utf8_string(s, errors='replace', replacement='?'):
+ """Takes a string and returns a valid utf8 encodable string
+
+ Not every Python string is utf-8 encodable.
+ Take 'fo\udcf6e\udce9ba <foo@bma.d>' for example.
+ This function replaces undecodable characters with a '?'
+ """
+ try:
+ safe = s.encode('utf-8', errors=errors).decode('utf-8', errors=errors)
+ except UnicodeDecodeError:
+ # This is the Python 2 way...
+ safe = s.decode('utf-8', errors=errors).replace(u"\uFFFD", replacement)
+ return safe
+
+
def parse_uid(uid, errors='replace'):
"""Parses a GnuPG UID into it's name, comment, and email component
@@ -113,7 +129,7 @@ class Key(namedtuple("Key", ["expiry", "fingerprint", "uidslist"])):
@classmethod
def from_gpgme(cls, key):
- "Creates a new Key from an existing monkeysign key"
+ "Creates a new Key from an existing gpgme key"
uids = [UID.from_gpgme(uid) for uid in key.uids]
expiry = parse_expiry(key.subkeys[0].expires)
fingerprint = key.fpr
@@ -130,23 +146,32 @@ class UID(namedtuple("UID", "expiry uid name comment email")):
# We expect to get raw bytes.
# While RFC4880 demands UTF-8 encoded data,
# real-life has produced non UTF-8 keys...
- rawuid = uid.uid
+ rawuid = to_valid_utf8_string(uid.uid).encode('utf-8')
log.debug("UidStr (%d): %r", len(rawuid), rawuid)
name, comment, email = parse_uid(rawuid)
expiry = parse_expiry(uid.expire)
- return cls(expiry, rawuid, name, comment, email)
+ return cls(expiry, rawuid.decode('utf-8'),
+ name, comment, email)
@classmethod
def from_gpgme(cls, uid):
- "Creates a new UID from a monkeysign key"
+ "Creates a new UID from a gpgme UID"
# Weird. I would expect the uid to be raw bytes,
# because how would gpgme know what encoding to apply?
# Also, you can have invalid encodings.
- rawuid = uid.uid.encode('utf-8', 'replace')
- name = uid.name
+ # Turns out, that Python strings can be encoded according to PEP 383
+ # which basically encodes invalid bytes as 0xDC80 + byte.
+ # That's the "surrogateescape" error handler available in Python 3.
+ # Here, we don't care about that, though. We are in the user facing
+ # abstraction for a UID. As such, we ensure that it can be rendered.
+ # So we take the string we get from gpgme and try to convert it to
+ # to utf-8 bytes.
+ log.debug("UID from gpgme: %r", uid.uid)
+ rawuid = to_valid_utf8_string(uid.uid)
+ name = to_valid_utf8_string(uid.name)
comment = '' # FIXME: uid.comment
- email = uid.email
+ email = to_valid_utf8_string(uid.email)
expiry = None # FIXME: Maybe UIDs don't expire themselves but via the binding signature
return cls(expiry, rawuid, name, comment, email)
diff --git a/keysign/keyconfirm.py b/keysign/keyconfirm.py
index 3283ba6..b5c6d0e 100644
--- a/keysign/keyconfirm.py
+++ b/keysign/keyconfirm.py
@@ -47,7 +47,7 @@ if __name__ == "__main__" and __package__ is None:
from .gpgmh import get_usable_keys
from .scan_barcode import ScalingImage
-from .util import format_fingerprint, glib_markup_escape_rencoded_text
+from .util import format_fingerprint
log = logging.getLogger(__name__)
@@ -69,8 +69,7 @@ def format_key_header(fpr, length='2048', creation_time=None):
def format_uidslist(uidslist):
result = ""
for uid in uidslist:
- uidstr = glib_markup_escape_rencoded_text(
- uid.uid.decode('utf-8', 'replace'))
+ uidstr = GLib.markup_escape_text(uid.uid)
result += ("{}\n".format(uidstr))
return result
diff --git a/keysign/keylistwidget.py b/keysign/keylistwidget.py
index fe85d38..e23a3e6 100644
--- a/keysign/keylistwidget.py
+++ b/keysign/keylistwidget.py
@@ -22,7 +22,6 @@ if __name__ == "__main__" and __package__ is None:
__package__ = str('keysign')
from .gpgmh import get_usable_keys
-from .util import glib_markup_escape_rencoded_text
log = logging.getLogger(__name__)
@@ -48,7 +47,7 @@ class ListBoxRowWithKey(Gtk.ListBoxRow):
for k in items}
log.info("format dicT: %r", format_dict)
d = {k: (log.debug("handling kv: %r %r", k, v),
- glib_markup_escape_rencoded_text(
+ GLib.markup_escape_text(
"{}".format(v)))[1]
for k, v in format_dict.items()}
log.info("Formatting UID %r", d)
diff --git a/keysign/util.py b/keysign/util.py
index 89ac2e6..565ff6b 100644
--- a/keysign/util.py
+++ b/keysign/util.py
@@ -31,8 +31,6 @@ except ImportError:
import requests
-from gi.repository import GLib
-
from .gpgmh import fingerprint_from_keydata
from .gpgmh import sign_keydata_and_encrypt
@@ -222,23 +220,3 @@ def download_key_http(address, port):
data = requests.get(url.geturl(), timeout=5).content
log.debug("finished downloading %d bytes", len(data))
return data
-
-
-def glib_markup_escape_rencoded_text(s, errors='replace'):
- """Calls GLib.markup_escape and the re-encoded text.
- The re-encoding is for getting rid of surrogates in unicode strings.
- Those surrogates appear when the UID contains non UTF-8 bytes, e.g.
- latin1. gpgme will return a unicode string with those surrogates.
- Because surrogates cannot be encoded as utf-8, we replace the
- errornous bytes (with '?'). You can control that behaviour via the
- errors parameter.
- You better pass a string here that we can `encode` in first place.
- """
- log.debug('markup rencode escape %s %r (%r)', type(s), s, errors)
- encoded = s.encode('utf-8', errors)
- decoded = encoded.decode('utf-8')
- log.debug('Decoded: %r', decoded)
- replaced = decoded.replace('\ufffd', '?')
- escaped = GLib.markup_escape_text(replaced)
- log.debug('escaped: %r', escaped)
- return escaped
diff --git a/tests/test_gpgmeh.py b/tests/test_gpgmeh.py
index c05a811..01ed433 100644
--- a/tests/test_gpgmeh.py
+++ b/tests/test_gpgmeh.py
@@ -437,12 +437,15 @@ class TestSignAndEncrypt:
sigs_before = [s for l in get_signatures_for_uids_on_key(sender,
key).values() for s in l]
+ # FIXME: Refactor this a little bit.
+ # We have duplication of code with the other test below.
for uid, uid_enc in zip(uids_before, uid_encrypted):
+ uid_enc_str = uid_enc[0].uid
# The test doesn't work so well, because comments
# are not rendered :-/
# assert_equals(uid, uid_enc[0])
- assert_in(uid.name, uid_enc[0].uid)
- assert_in(uid.email, uid_enc[0].uid)
+ assert_in(uid.name, uid_enc_str)
+ assert_in(uid.email, uid_enc_str)
ciphertext = uid_enc[1]
log.debug("Decrypting %r", ciphertext)
plaintext, result, vrfy = sender.decrypt(ciphertext)
@@ -497,9 +500,10 @@ class TestSignAndEncrypt:
sigs_before = [s for l in get_signatures_for_uids_on_key(sender,
sender_key).values() for s in l]
for uid, uid_enc in zip(uids_before, uid_encrypted):
+ uid_enc_str = uid_enc[0].uid
# FIXME: assert_equals(uid, uid_enc[0])
- assert_in(uid.name, uid_enc[0].uid)
- assert_in(uid.email, uid_enc[0].uid)
+ assert_in(uid.name, uid_enc_str)
+ assert_in(uid.email, uid_enc_str)
ciphertext = uid_enc[1]
log.debug("Decrypting %r", ciphertext)
plaintext, result, vrfy = sender.decrypt(ciphertext)
diff --git a/tests/test_uids.py b/tests/test_uids.py
index 7c6c961..506f188 100644
--- a/tests/test_uids.py
+++ b/tests/test_uids.py
@@ -1,6 +1,7 @@
#!/usr/bin/env python
-"""We want our customs UID wrapper to return raw bytes for the raw UID
-but decoded strings for email, name, and comment component.
+"""We want our custom UID wrapper to return encodable and displayable
+strings, rather than raw bytes, for the raw UID, email, name,
+and comment component.
"""
from __future__ import unicode_literals
@@ -25,18 +26,18 @@ class FakeMKSUID:
def test_mks_utf8_uid():
"The normal case"
uid = FakeMKSUID()
- uid.uid = b'foo bar <foo bar com>'
+ uid.uid = 'foo bar <foo bar com>'
u = gpgkey.UID.from_monkeysign(uid)
assert_string(u.name)
assert_string(u.comment)
assert_string(u.email)
- assert_bytes(u.uid)
+ assert_string(u.uid)
def test_mks_latin_uid():
uid = FakeMKSUID()
- uid.uid = b"fo\xf6\x65\xe9\x62a"
+ uid.uid = 'fo\udcf6e\udce9ba <foo@bma.d>'
u = gpgkey.UID.from_monkeysign(uid)
assert_string(u.name)
assert_string(u.comment)
assert_string(u.email)
- assert_bytes(u.uid)
+ assert_string(u.uid)
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]