[gedit-latex] fixurl does not work properly in python3 (hence incorrect tab handling), it does not seem to be need

From: Ignacio Casal Quinteiro <icq src gnome org>
To: commits-list gnome org
Cc:
Subject: [gedit-latex] fixurl does not work properly in python3 (hence incorrect tab handling), it does not seem to be need
Date: Mon, 29 Apr 2013 14:42:36 +0000 (UTC)
commit 41d15fc3879b610ca09c90fcb1b5245fd5563b78
Author: Aleksei Lissitsin <aldgracil gmail com>
Date:   Sat Apr 27 20:09:09 2013 +0300

    fixurl does not work properly in python3 (hence incorrect tab handling), it does not seem to be needed 
anymore either, so remove it

 latex/file.py |  105 +--------------------------------------------------------
 1 files changed, 1 insertions(+), 104 deletions(-)
---
diff --git a/latex/file.py b/latex/file.py
index d579bd6..e603693 100644
--- a/latex/file.py
+++ b/latex/file.py
@@ -26,7 +26,6 @@ import os.path
 
 import re
 import urllib.request, urllib.parse, urllib.error
-import urllib.parse
 
 class File(object):
     """
@@ -122,8 +121,7 @@ class File(object):
 
     @property
     def uri(self):
-        # TODO: urllib.quote doesn't support utf-8
-        return fixurl(self._uri.geturl())
+        return self._uri.geturl()
 
     @property
     def exists(self):
@@ -296,105 +294,4 @@ class Folder(File):
 
             return []
 
-def fixurl(url):
-    r"""From 
http://stackoverflow.com/questions/804336/best-way-to-convert-a-unicode-url-to-ascii-utf-8-percent-escaped-in-python/805166#805166
 .
-    Was named canonurl(). Comments added to the original are prefixed with ##.
-
-    Return the canonical, ASCII-encoded form of a UTF-8 encoded URL, or ''
-    if the URL looks invalid.
-
-    >>> canonurl('    ')
-    ''
-    >>> canonurl('www.google.com')
-    'http://www.google.com/'
-    >>> canonurl('bad-utf8.com/path\xff/file')
-    ''
-    >>> canonurl('svn://blah.com/path/file')
-    'svn://blah.com/path/file'
-    >>> canonurl('1234://badscheme.com')
-    ''
-    >>> canonurl('bad$scheme://google.com')
-    ''
-    >>> canonurl('site.badtopleveldomain')
-    ''
-    >>> canonurl('site.com:badport')
-    ''
-    >>> canonurl('http://123.24.8.240/blah')
-    'http://123.24.8.240/blah'
-    >>> canonurl('http://123.24.8.240:1234/blah?q#f')
-    'http://123.24.8.240:1234/blah?q#f'
-    >>> canonurl('\xe2\x9e\xa1.ws')  # tinyarro.ws
-    'http://xn--hgi.ws/'
-    >>> canonurl('  http://www.google.com:80/path/file;params?query#fragment  ')
-    'http://www.google.com:80/path/file;params?query#fragment'
-    >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5')
-    'http://xn--hgi.ws/%E2%99%A5'
-    >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/pa%2Fth')
-    'http://xn--hgi.ws/%E2%99%A5/pa/th'
-    >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/pa%2Fth;par%2Fams?que%2Fry=a&b=c')
-    'http://xn--hgi.ws/%E2%99%A5/pa/th;par/ams?que/ry=a&b=c'
-    >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5?\xe2\x99\xa5#\xe2\x99\xa5')
-    'http://xn--hgi.ws/%E2%99%A5?%E2%99%A5#%E2%99%A5'
-    >>> canonurl('http://\xe2\x9e\xa1.ws/%e2%99%a5?%E2%99%A5#%E2%99%A5')
-    'http://xn--hgi.ws/%E2%99%A5?%E2%99%A5#%E2%99%A5'
-    >>> canonurl('http://badutf8pcokay.com/%FF?%FE#%FF')
-    'http://badutf8pcokay.com/%FF?%FE#%FF'
-    >>> len(canonurl('google.com/' + 'a' * 16384))
-    4096
-    """
-    # strip spaces at the ends and ensure it's prefixed with 'scheme://'
-    url = url.strip()
-    if not url:
-        return ''
-    if not urllib.parse.urlsplit(url).scheme:
-        ## We usually deal with local files here
-        url = 'file://' + url
-        ## url = 'http://' + url
-
-    # turn it into Unicode
-    try:
-        url = str(url, 'utf-8')
-    except Exception as exc:   # UnicodeDecodeError, exc:
-        ## It often happens that the url is already "python unicode" encoded
-        if not str(exc) == "decoding Unicode is not supported":
-            return ''  # bad UTF-8 chars in URL
-        ## If the exception is indeed "decoding Unicode is not supported"
-        ## this generally means that url is already unicode encoded,
-        ## so we can just continue (see 
http://www.red-mercury.com/blog/eclectic-tech/python-mystery-of-the-day/ )
-
-    # parse the URL into its components
-    parsed = urllib.parse.urlsplit(url)
-    scheme, netloc, path, query, fragment = parsed
-
-    # ensure scheme is a letter followed by letters, digits, and '+-.' chars
-    if not re.match(r'[a-z][-+.a-z0-9]*$', scheme, flags=re.I):
-        return ''
-    scheme = str(scheme)
-
-    ## We mostly deal with local files here, and the following check
-    ## would exclude all local files, so we drop it.
-    # ensure domain and port are valid, eg: sub.domain.<1-to-6-TLD-chars>[:port]
-    #~ match = re.match(r'(.+\.[a-z0-9]{1,6})(:\d{1,5})?$', netloc, flags=re.I)
-    #~ if not match:
-        #~ print "return 4"
-        #~ return ''
-    #~ domain, port = match.groups()
-    #~ netloc = domain + (port if port else '')
-    netloc = netloc.encode('idna')
-
-    # ensure path is valid and convert Unicode chars to %-encoded
-    if not path:
-        path = '/'  # eg: 'http://google.com' -> 'http://google.com/'
-    path = urllib.parse.quote(urllib.parse.unquote(path.encode('utf-8')), safe='/;')
-
-    # ensure query is valid
-    query = urllib.parse.quote(urllib.parse.unquote(query.encode('utf-8')), safe='=&?/')
-
-    # ensure fragment is valid
-    fragment = urllib.parse.quote(urllib.parse.unquote(fragment.encode('utf-8')))
-
-    # piece it all back together, truncating it to a maximum of 4KB
-    url = urllib.parse.urlunsplit((scheme, netloc, path, query, fragment))
-    return url[:4096]
-
 # ex:ts=4:et:
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]