[gedit-latex] fixurl does not work properly in python3 (hence incorrect tab handling), it does not seem to be need
- From: Ignacio Casal Quinteiro <icq src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gedit-latex] fixurl does not work properly in python3 (hence incorrect tab handling), it does not seem to be need
- Date: Mon, 29 Apr 2013 14:42:36 +0000 (UTC)
commit 41d15fc3879b610ca09c90fcb1b5245fd5563b78
Author: Aleksei Lissitsin <aldgracil gmail com>
Date: Sat Apr 27 20:09:09 2013 +0300
fixurl does not work properly in python3 (hence incorrect tab handling), it does not seem to be needed
anymore either, so remove it
latex/file.py | 105 +--------------------------------------------------------
1 files changed, 1 insertions(+), 104 deletions(-)
---
diff --git a/latex/file.py b/latex/file.py
index d579bd6..e603693 100644
--- a/latex/file.py
+++ b/latex/file.py
@@ -26,7 +26,6 @@ import os.path
import re
import urllib.request, urllib.parse, urllib.error
-import urllib.parse
class File(object):
"""
@@ -122,8 +121,7 @@ class File(object):
@property
def uri(self):
- # TODO: urllib.quote doesn't support utf-8
- return fixurl(self._uri.geturl())
+ return self._uri.geturl()
@property
def exists(self):
@@ -296,105 +294,4 @@ class Folder(File):
return []
-def fixurl(url):
- r"""From
http://stackoverflow.com/questions/804336/best-way-to-convert-a-unicode-url-to-ascii-utf-8-percent-escaped-in-python/805166#805166
.
- Was named canonurl(). Comments added to the original are prefixed with ##.
-
- Return the canonical, ASCII-encoded form of a UTF-8 encoded URL, or ''
- if the URL looks invalid.
-
- >>> canonurl(' ')
- ''
- >>> canonurl('www.google.com')
- 'http://www.google.com/'
- >>> canonurl('bad-utf8.com/path\xff/file')
- ''
- >>> canonurl('svn://blah.com/path/file')
- 'svn://blah.com/path/file'
- >>> canonurl('1234://badscheme.com')
- ''
- >>> canonurl('bad$scheme://google.com')
- ''
- >>> canonurl('site.badtopleveldomain')
- ''
- >>> canonurl('site.com:badport')
- ''
- >>> canonurl('http://123.24.8.240/blah')
- 'http://123.24.8.240/blah'
- >>> canonurl('http://123.24.8.240:1234/blah?q#f')
- 'http://123.24.8.240:1234/blah?q#f'
- >>> canonurl('\xe2\x9e\xa1.ws') # tinyarro.ws
- 'http://xn--hgi.ws/'
- >>> canonurl(' http://www.google.com:80/path/file;params?query#fragment ')
- 'http://www.google.com:80/path/file;params?query#fragment'
- >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5')
- 'http://xn--hgi.ws/%E2%99%A5'
- >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/pa%2Fth')
- 'http://xn--hgi.ws/%E2%99%A5/pa/th'
- >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/pa%2Fth;par%2Fams?que%2Fry=a&b=c')
- 'http://xn--hgi.ws/%E2%99%A5/pa/th;par/ams?que/ry=a&b=c'
- >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5?\xe2\x99\xa5#\xe2\x99\xa5')
- 'http://xn--hgi.ws/%E2%99%A5?%E2%99%A5#%E2%99%A5'
- >>> canonurl('http://\xe2\x9e\xa1.ws/%e2%99%a5?%E2%99%A5#%E2%99%A5')
- 'http://xn--hgi.ws/%E2%99%A5?%E2%99%A5#%E2%99%A5'
- >>> canonurl('http://badutf8pcokay.com/%FF?%FE#%FF')
- 'http://badutf8pcokay.com/%FF?%FE#%FF'
- >>> len(canonurl('google.com/' + 'a' * 16384))
- 4096
- """
- # strip spaces at the ends and ensure it's prefixed with 'scheme://'
- url = url.strip()
- if not url:
- return ''
- if not urllib.parse.urlsplit(url).scheme:
- ## We usually deal with local files here
- url = 'file://' + url
- ## url = 'http://' + url
-
- # turn it into Unicode
- try:
- url = str(url, 'utf-8')
- except Exception as exc: # UnicodeDecodeError, exc:
- ## It often happens that the url is already "python unicode" encoded
- if not str(exc) == "decoding Unicode is not supported":
- return '' # bad UTF-8 chars in URL
- ## If the exception is indeed "decoding Unicode is not supported"
- ## this generally means that url is already unicode encoded,
- ## so we can just continue (see
http://www.red-mercury.com/blog/eclectic-tech/python-mystery-of-the-day/ )
-
- # parse the URL into its components
- parsed = urllib.parse.urlsplit(url)
- scheme, netloc, path, query, fragment = parsed
-
- # ensure scheme is a letter followed by letters, digits, and '+-.' chars
- if not re.match(r'[a-z][-+.a-z0-9]*$', scheme, flags=re.I):
- return ''
- scheme = str(scheme)
-
- ## We mostly deal with local files here, and the following check
- ## would exclude all local files, so we drop it.
- # ensure domain and port are valid, eg: sub.domain.<1-to-6-TLD-chars>[:port]
- #~ match = re.match(r'(.+\.[a-z0-9]{1,6})(:\d{1,5})?$', netloc, flags=re.I)
- #~ if not match:
- #~ print "return 4"
- #~ return ''
- #~ domain, port = match.groups()
- #~ netloc = domain + (port if port else '')
- netloc = netloc.encode('idna')
-
- # ensure path is valid and convert Unicode chars to %-encoded
- if not path:
- path = '/' # eg: 'http://google.com' -> 'http://google.com/'
- path = urllib.parse.quote(urllib.parse.unquote(path.encode('utf-8')), safe='/;')
-
- # ensure query is valid
- query = urllib.parse.quote(urllib.parse.unquote(query.encode('utf-8')), safe='=&?/')
-
- # ensure fragment is valid
- fragment = urllib.parse.quote(urllib.parse.unquote(fragment.encode('utf-8')))
-
- # piece it all back together, truncating it to a maximum of 4KB
- url = urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))
- return url[:4096]
-
# ex:ts=4:et:
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]