[gedit-latex] Move file and folder abstraction to file.py
- From: Ignacio Casal Quinteiro <icq src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [gedit-latex] Move file and folder abstraction to file.py
- Date: Mon, 4 Jul 2011 23:04:16 +0000 (UTC)
commit 4a6617541a160ac782106af22d7fe05bcc357ed5
Author: Ignacio Casal Quinteiro <icq gnome org>
Date: Tue Jul 5 01:04:04 2011 +0200
Move file and folder abstraction to file.py
latex/base/Makefile.am | 1 +
latex/base/__init__.py | 372 +-------------------------------------
latex/base/decorators.py | 2 +-
latex/base/file.py | 389 +++++++++++++++++++++++++++++++++++++++
latex/base/windowactivatable.py | 3 +-
latex/bibtex/parser.py | 2 +-
latex/latex/__init__.py | 2 +-
latex/latex/environment.py | 2 +-
latex/latex/expander.py | 2 +-
latex/latex/inversesearch.py | 2 +-
latex/latex/model.py | 2 +-
latex/latex/outline.py | 2 +-
latex/latex/preview.py | 2 +-
latex/latex/validator.py | 2 +-
latex/latex/views.py | 2 +-
latex/tools/postprocess.py | 2 +-
16 files changed, 405 insertions(+), 384 deletions(-)
---
diff --git a/latex/base/Makefile.am b/latex/base/Makefile.am
index b724bea..0d2d640 100644
--- a/latex/base/Makefile.am
+++ b/latex/base/Makefile.am
@@ -5,6 +5,7 @@ plugin_PYTHON = \
completion.py \
config.py \
decorators.py \
+ file.py \
__init__.py \
job.py \
resources.py \
diff --git a/latex/base/__init__.py b/latex/base/__init__.py
index ec840ab..26adec5 100644
--- a/latex/base/__init__.py
+++ b/latex/base/__init__.py
@@ -26,6 +26,7 @@ These classes form the interface exposed by the plugin base layer.
from logging import getLogger
from gi.repository import Gtk, Gdk
+from .file import File
#FIXME: this should probably be just a Gtk.Orientable iface
# HORIZONTAL: means Bottom Panel
@@ -897,375 +898,4 @@ class WindowContext(object):
def __del__(self):
self._log.debug("Properly destroyed %s" % self)
-
-from os import remove
-import os.path
-from glob import glob
-
-import re
-import urllib
-import urlparse
-
-
-def fixurl(url):
- r"""From http://stackoverflow.com/questions/804336/best-way-to-convert-a-unicode-url-to-ascii-utf-8-percent-escaped-in-python/805166#805166 .
- Was named canonurl(). Comments added to the original are prefixed with ##.
-
- Return the canonical, ASCII-encoded form of a UTF-8 encoded URL, or ''
- if the URL looks invalid.
-
- >>> canonurl(' ')
- ''
- >>> canonurl('www.google.com')
- 'http://www.google.com/'
- >>> canonurl('bad-utf8.com/path\xff/file')
- ''
- >>> canonurl('svn://blah.com/path/file')
- 'svn://blah.com/path/file'
- >>> canonurl('1234://badscheme.com')
- ''
- >>> canonurl('bad$scheme://google.com')
- ''
- >>> canonurl('site.badtopleveldomain')
- ''
- >>> canonurl('site.com:badport')
- ''
- >>> canonurl('http://123.24.8.240/blah')
- 'http://123.24.8.240/blah'
- >>> canonurl('http://123.24.8.240:1234/blah?q#f')
- 'http://123.24.8.240:1234/blah?q#f'
- >>> canonurl('\xe2\x9e\xa1.ws') # tinyarro.ws
- 'http://xn--hgi.ws/'
- >>> canonurl(' http://www.google.com:80/path/file;params?query#fragment ')
- 'http://www.google.com:80/path/file;params?query#fragment'
- >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5')
- 'http://xn--hgi.ws/%E2%99%A5'
- >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/pa%2Fth')
- 'http://xn--hgi.ws/%E2%99%A5/pa/th'
- >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/pa%2Fth;par%2Fams?que%2Fry=a&b=c')
- 'http://xn--hgi.ws/%E2%99%A5/pa/th;par/ams?que/ry=a&b=c'
- >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5?\xe2\x99\xa5#\xe2\x99\xa5')
- 'http://xn--hgi.ws/%E2%99%A5?%E2%99%A5#%E2%99%A5'
- >>> canonurl('http://\xe2\x9e\xa1.ws/%e2%99%a5?%E2%99%A5#%E2%99%A5')
- 'http://xn--hgi.ws/%E2%99%A5?%E2%99%A5#%E2%99%A5'
- >>> canonurl('http://badutf8pcokay.com/%FF?%FE#%FF')
- 'http://badutf8pcokay.com/%FF?%FE#%FF'
- >>> len(canonurl('google.com/' + 'a' * 16384))
- 4096
- """
- # strip spaces at the ends and ensure it's prefixed with 'scheme://'
- url = url.strip()
- if not url:
- return ''
- if not urlparse.urlsplit(url).scheme:
- ## We usually deal with local files here
- url = 'file://' + url
- ## url = 'http://' + url
-
- # turn it into Unicode
- try:
- url = unicode(url, 'utf-8')
- except Exception, exc: # UnicodeDecodeError, exc:
- ## It often happens that the url is already "python unicode" encoded
- if not str(exc) == "decoding Unicode is not supported":
- return '' # bad UTF-8 chars in URL
- ## If the exception is indeed "decoding Unicode is not supported"
- ## this generally means that url is already unicode encoded,
- ## so we can just continue (see http://www.red-mercury.com/blog/eclectic-tech/python-mystery-of-the-day/ )
-
- # parse the URL into its components
- parsed = urlparse.urlsplit(url)
- scheme, netloc, path, query, fragment = parsed
-
- # ensure scheme is a letter followed by letters, digits, and '+-.' chars
- if not re.match(r'[a-z][-+.a-z0-9]*$', scheme, flags=re.I):
- return ''
- scheme = str(scheme)
-
- ## We mostly deal with local files here, and the following check
- ## would exclude all local files, so we drop it.
- # ensure domain and port are valid, eg: sub.domain.<1-to-6-TLD-chars>[:port]
- #~ match = re.match(r'(.+\.[a-z0-9]{1,6})(:\d{1,5})?$', netloc, flags=re.I)
- #~ if not match:
- #~ print "return 4"
- #~ return ''
- #~ domain, port = match.groups()
- #~ netloc = domain + (port if port else '')
- netloc = netloc.encode('idna')
-
- # ensure path is valid and convert Unicode chars to %-encoded
- if not path:
- path = '/' # eg: 'http://google.com' -> 'http://google.com/'
- path = urllib.quote(urllib.unquote(path.encode('utf-8')), safe='/;')
-
- # ensure query is valid
- query = urllib.quote(urllib.unquote(query.encode('utf-8')), safe='=&?/')
-
- # ensure fragment is valid
- fragment = urllib.quote(urllib.unquote(fragment.encode('utf-8')))
-
- # piece it all back together, truncating it to a maximum of 4KB
- url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
- return url[:4096]
-
-
-class File(object):
- """
- This is an object-oriented wrapper for all the os.* stuff. A File object
- represents the reference to a file.
- """
-
- # TODO: use Gio.File as underlying implementation
-
- @staticmethod
- def create_from_relative_path(relative_path, working_directory):
- """
- Create a File from a path relative to some working directory.
-
- File.create_from_relative_path('../sub/file.txt', '/home/michael/base') == File('/home/michael/sub/file.txt')
-
- @param relative_path: a relative path, e.g. '../../dir/myfile.txt'
- @param working_directory: an absolute directory to be used as the starting point for the relative path
- """
- absolute_path = os.path.abspath(os.path.join(working_directory, relative_path))
- return File(absolute_path)
-
- @staticmethod
- def is_absolute(path):
- return os.path.isabs(path)
-
- __log = getLogger("File")
-
- _DEFAULT_SCHEME = "file://"
-
- def __init__(self, uri):
- """
- @param uri: any URI, URL or local filename
- """
- if uri is None:
- raise ValueError("URI must not be None")
-
- self._uri = urlparse.urlparse(uri)
- if len(self._uri.scheme) == 0:
- # prepend default scheme if missing
- self._uri = urlparse.urlparse("%s%s" % (self._DEFAULT_SCHEME, uri))
-
- def create(self, content=None):
- """
- Create a the File in the file system
- """
- f = open(self.path, "w")
- if content is not None:
- f.write(content)
- f.close()
-
- @property
- def path(self):
- """
- Returns '/home/user/image.jpg' for 'file:///home/user/image.jpg'
- """
- return urllib.url2pathname(self._uri.path)
-
- @property
- def extension(self):
- """
- Returns '.jpg' for 'file:///home/user/image.jpg'
- """
- return os.path.splitext(self.path)[1]
-
- @property
- def shortname(self):
- """
- Returns '/home/user/image' for 'file:///home/user/image.jpg'
- """
- return os.path.splitext(self.path)[0]
-
- @property
- def basename(self):
- """
- Returns 'image.jpg' for 'file:///home/user/image.jpg'
- """
- return os.path.basename(self.path)
-
- @property
- def shortbasename(self):
- """
- Returns 'image' for 'file:///home/user/image.jpg'
- """
- return os.path.splitext(os.path.basename(self.path))[0]
-
- @property
- def dirname(self):
- """
- Returns '/home/user' for 'file:///home/user/image.jpg'
- """
- return os.path.dirname(self.path)
-
- @property
- def uri(self):
- # TODO: urllib.quote doesn't support utf-8
- return fixurl(self._uri.geturl())
-
- @property
- def exists(self):
- return os.path.exists(self.path)
-
- @property
- def mtime(self):
- if self.exists:
- return os.path.getmtime(self.path)
- else:
- raise IOError("File not found")
-
- def find_neighbors(self, extension):
- """
- Find other files in the directory of this one having
- a certain extension
-
- @param extension: a file extension pattern like '.tex' or '.*'
- """
-
- # TODO: glob is quite expensive, find a simpler way for this
-
- try:
- filenames = glob("%s/*%s" % (self.dirname, extension))
- neighbors = [File(filename) for filename in filenames]
- return neighbors
-
- except Exception, e:
- # as seen in Bug #2002630 the glob() call compiles a regex and so we must be prepared
- # for an exception from that because the shortname may contain regex characters
-
- # TODO: a more robust solution would be an escape() method for re
-
- self.__log.debug("find_neighbors: %s" % e)
-
- return []
-
- @property
- def siblings(self):
- """
- Find other files in the directory of this one having the same
- basename. This means for a file '/dir/a.doc' this method returns
- [ '/dir/a.tmp', '/dir/a.sh' ]
- """
- siblings = []
- try:
- filenames = glob("%s.*" % self.shortname)
- siblings = [File(filename) for filename in filenames]
- except Exception, e:
- # as seen in Bug #2002630 the glob() call compiles a regex and so we must be prepared
- # for an exception from that because the shortname may contain regex characters
-
- # TODO: a more robust solution would be an escape() method for re
-
- self.__log.debug("find_siblings: %s" % e)
- return siblings
-
- def relativize(self, base, allow_up_level=False):
- """
- Relativize the path of this File against a base directory. That means that e.g.
- File("/home/user/doc.tex").relativize("/home") == "user/doc.tex"
-
- If up-level references are NOT allowed but necessary (e.g. base='/a/b/c', path='/a/b/d')
- then the absolute path is returned.
-
- @param base: the base directory to relativize against
- @param allow_up_level: allow up-level references (../../) or not
- """
- if allow_up_level:
- return os.path.relpath(self.path, base)
- else:
- # TODO: why do we need this?
-
- # relative path must be 'below' base path
- if len(base) >= len(self.path):
- return self.path
- if self.path[:len(base)] == base:
- # bases match, return relative part
- return self.path[len(base) + 1:]
- return self.path
-
- def relativize_shortname(self, base):
- """
- Relativize the path of this File and return only the shortname of the resulting
- relative path. That means that e.g.
- File("/home/user/doc.tex").relativize_shortname("/home") == "user/doc"
-
- This is just a convenience method.
-
- @param base: the base directory to relativize against
- """
- relative_path = self.relativize(base)
- return os.path.splitext(relative_path)[0]
-
- def delete(self):
- """
- Delete the File from the file system
-
- @raise OSError:
- """
- if self.exists:
- remove(self.path)
- else:
- raise IOError("File not found")
-
- def __eq__(self, other):
- """
- Override == operator
- """
- try:
- return self.uri == other.uri
- except AttributeError: # no File object passed or None
- # returning NotImplemented is bad because we have to
- # compare None with File
- return False
-
- def __ne__(self, other):
- """
- Override != operator
- """
- return not self.__eq__(other)
-
- def __str__(self):
- return self.uri
-
- def __cmp__(self, other):
- try:
- return self.basename.__cmp__(other.basename)
- except AttributeError: # no File object passed or None
- # returning NotImplemented is bad because we have to
- # compare None with File
- return False
-
-
-class Folder(File):
-
- # FIXME: a Folder is NOT a subclass of a File, both are a subclass of some AbstractFileSystemObject,
- # this is just a quick hack
- #
- # FIXME: but basically a Folder is a File so this class should not be needed
-
- __log = getLogger("Folder")
-
- @property
- def files(self):
- """
- Return File objects for all files in this Folder
- """
- try:
- filenames = glob("%s/*" % (self.path))
- files = [File(filename) for filename in filenames]
- return files
-
- except Exception, e:
- # as seen in Bug #2002630 the glob() call compiles a regex and so we must be prepared
- # for an exception from that because the shortname may contain regex characters
-
- # TODO: a more robust solution would be an escape() method for re
-
- self.__log.debug("files: %s" % e)
-
- return []
-
# ex:ts=4:et:
diff --git a/latex/base/decorators.py b/latex/base/decorators.py
index 8c617a5..96795c1 100644
--- a/latex/base/decorators.py
+++ b/latex/base/decorators.py
@@ -29,7 +29,7 @@ from logging import getLogger
from gi.repository import Gedit, Gtk, Gio
from config import EDITORS
-from . import File
+from .file import File
# TODO: maybe create ActionDelegate for GeditWindowDecorator
diff --git a/latex/base/file.py b/latex/base/file.py
new file mode 100644
index 0000000..195c406
--- /dev/null
+++ b/latex/base/file.py
@@ -0,0 +1,389 @@
+# -*- coding: utf-8 -*-
+
+# This file is part of the Gedit LaTeX Plugin
+#
+# Copyright (C) 2010 Michael Zeising
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public Licence as published by the Free Software
+# Foundation; either version 2 of the Licence, or (at your option) any later
+# version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public Licence for more
+# details.
+#
+# You should have received a copy of the GNU General Public Licence along with
+# this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
+# Street, Fifth Floor, Boston, MA 02110-1301, USA
+
+from os import remove
+import os.path
+from glob import glob
+
+import re
+import urllib
+import urlparse
+
+class File(object):
+ """
+ This is an object-oriented wrapper for all the os.* stuff. A File object
+ represents the reference to a file.
+ """
+
+ # TODO: use Gio.File as underlying implementation
+
+ @staticmethod
+ def create_from_relative_path(relative_path, working_directory):
+ """
+ Create a File from a path relative to some working directory.
+
+ File.create_from_relative_path('../sub/file.txt', '/home/michael/base') == File('/home/michael/sub/file.txt')
+
+ @param relative_path: a relative path, e.g. '../../dir/myfile.txt'
+ @param working_directory: an absolute directory to be used as the starting point for the relative path
+ """
+ absolute_path = os.path.abspath(os.path.join(working_directory, relative_path))
+ return File(absolute_path)
+
+ @staticmethod
+ def is_absolute(path):
+ return os.path.isabs(path)
+
+ __log = getLogger("File")
+
+ _DEFAULT_SCHEME = "file://"
+
+ def __init__(self, uri):
+ """
+ @param uri: any URI, URL or local filename
+ """
+ if uri is None:
+ raise ValueError("URI must not be None")
+
+ self._uri = urlparse.urlparse(uri)
+ if len(self._uri.scheme) == 0:
+ # prepend default scheme if missing
+ self._uri = urlparse.urlparse("%s%s" % (self._DEFAULT_SCHEME, uri))
+
+ def create(self, content=None):
+ """
+ Create a the File in the file system
+ """
+ f = open(self.path, "w")
+ if content is not None:
+ f.write(content)
+ f.close()
+
+ @property
+ def path(self):
+ """
+ Returns '/home/user/image.jpg' for 'file:///home/user/image.jpg'
+ """
+ return urllib.url2pathname(self._uri.path)
+
+ @property
+ def extension(self):
+ """
+ Returns '.jpg' for 'file:///home/user/image.jpg'
+ """
+ return os.path.splitext(self.path)[1]
+
+ @property
+ def shortname(self):
+ """
+ Returns '/home/user/image' for 'file:///home/user/image.jpg'
+ """
+ return os.path.splitext(self.path)[0]
+
+ @property
+ def basename(self):
+ """
+ Returns 'image.jpg' for 'file:///home/user/image.jpg'
+ """
+ return os.path.basename(self.path)
+
+ @property
+ def shortbasename(self):
+ """
+ Returns 'image' for 'file:///home/user/image.jpg'
+ """
+ return os.path.splitext(os.path.basename(self.path))[0]
+
+ @property
+ def dirname(self):
+ """
+ Returns '/home/user' for 'file:///home/user/image.jpg'
+ """
+ return os.path.dirname(self.path)
+
+ @property
+ def uri(self):
+ # TODO: urllib.quote doesn't support utf-8
+ return fixurl(self._uri.geturl())
+
+ @property
+ def exists(self):
+ return os.path.exists(self.path)
+
+ @property
+ def mtime(self):
+ if self.exists:
+ return os.path.getmtime(self.path)
+ else:
+ raise IOError("File not found")
+
+ def find_neighbors(self, extension):
+ """
+ Find other files in the directory of this one having
+ a certain extension
+
+ @param extension: a file extension pattern like '.tex' or '.*'
+ """
+
+ # TODO: glob is quite expensive, find a simpler way for this
+
+ try:
+ filenames = glob("%s/*%s" % (self.dirname, extension))
+ neighbors = [File(filename) for filename in filenames]
+ return neighbors
+
+ except Exception, e:
+ # as seen in Bug #2002630 the glob() call compiles a regex and so we must be prepared
+ # for an exception from that because the shortname may contain regex characters
+
+ # TODO: a more robust solution would be an escape() method for re
+
+ self.__log.debug("find_neighbors: %s" % e)
+
+ return []
+
+ @property
+ def siblings(self):
+ """
+ Find other files in the directory of this one having the same
+ basename. This means for a file '/dir/a.doc' this method returns
+ [ '/dir/a.tmp', '/dir/a.sh' ]
+ """
+ siblings = []
+ try:
+ filenames = glob("%s.*" % self.shortname)
+ siblings = [File(filename) for filename in filenames]
+ except Exception, e:
+ # as seen in Bug #2002630 the glob() call compiles a regex and so we must be prepared
+ # for an exception from that because the shortname may contain regex characters
+
+ # TODO: a more robust solution would be an escape() method for re
+
+ self.__log.debug("find_siblings: %s" % e)
+ return siblings
+
+ def relativize(self, base, allow_up_level=False):
+ """
+ Relativize the path of this File against a base directory. That means that e.g.
+ File("/home/user/doc.tex").relativize("/home") == "user/doc.tex"
+
+ If up-level references are NOT allowed but necessary (e.g. base='/a/b/c', path='/a/b/d')
+ then the absolute path is returned.
+
+ @param base: the base directory to relativize against
+ @param allow_up_level: allow up-level references (../../) or not
+ """
+ if allow_up_level:
+ return os.path.relpath(self.path, base)
+ else:
+ # TODO: why do we need this?
+
+ # relative path must be 'below' base path
+ if len(base) >= len(self.path):
+ return self.path
+ if self.path[:len(base)] == base:
+ # bases match, return relative part
+ return self.path[len(base) + 1:]
+ return self.path
+
+ def relativize_shortname(self, base):
+ """
+ Relativize the path of this File and return only the shortname of the resulting
+ relative path. That means that e.g.
+ File("/home/user/doc.tex").relativize_shortname("/home") == "user/doc"
+
+ This is just a convenience method.
+
+ @param base: the base directory to relativize against
+ """
+ relative_path = self.relativize(base)
+ return os.path.splitext(relative_path)[0]
+
+ def delete(self):
+ """
+ Delete the File from the file system
+
+ @raise OSError:
+ """
+ if self.exists:
+ remove(self.path)
+ else:
+ raise IOError("File not found")
+
+ def __eq__(self, other):
+ """
+ Override == operator
+ """
+ try:
+ return self.uri == other.uri
+ except AttributeError: # no File object passed or None
+ # returning NotImplemented is bad because we have to
+ # compare None with File
+ return False
+
+ def __ne__(self, other):
+ """
+ Override != operator
+ """
+ return not self.__eq__(other)
+
+ def __str__(self):
+ return self.uri
+
+ def __cmp__(self, other):
+ try:
+ return self.basename.__cmp__(other.basename)
+ except AttributeError: # no File object passed or None
+ # returning NotImplemented is bad because we have to
+ # compare None with File
+ return False
+
+
+class Folder(File):
+
+ # FIXME: a Folder is NOT a subclass of a File, both are a subclass of some AbstractFileSystemObject,
+ # this is just a quick hack
+ #
+ # FIXME: but basically a Folder is a File so this class should not be needed
+
+ __log = getLogger("Folder")
+
+ @property
+ def files(self):
+ """
+ Return File objects for all files in this Folder
+ """
+ try:
+ filenames = glob("%s/*" % (self.path))
+ files = [File(filename) for filename in filenames]
+ return files
+
+ except Exception, e:
+ # as seen in Bug #2002630 the glob() call compiles a regex and so we must be prepared
+ # for an exception from that because the shortname may contain regex characters
+
+ # TODO: a more robust solution would be an escape() method for re
+
+ self.__log.debug("files: %s" % e)
+
+ return []
+
+def fixurl(url):
+ r"""From http://stackoverflow.com/questions/804336/best-way-to-convert-a-unicode-url-to-ascii-utf-8-percent-escaped-in-python/805166#805166 .
+ Was named canonurl(). Comments added to the original are prefixed with ##.
+
+ Return the canonical, ASCII-encoded form of a UTF-8 encoded URL, or ''
+ if the URL looks invalid.
+
+ >>> canonurl(' ')
+ ''
+ >>> canonurl('www.google.com')
+ 'http://www.google.com/'
+ >>> canonurl('bad-utf8.com/path\xff/file')
+ ''
+ >>> canonurl('svn://blah.com/path/file')
+ 'svn://blah.com/path/file'
+ >>> canonurl('1234://badscheme.com')
+ ''
+ >>> canonurl('bad$scheme://google.com')
+ ''
+ >>> canonurl('site.badtopleveldomain')
+ ''
+ >>> canonurl('site.com:badport')
+ ''
+ >>> canonurl('http://123.24.8.240/blah')
+ 'http://123.24.8.240/blah'
+ >>> canonurl('http://123.24.8.240:1234/blah?q#f')
+ 'http://123.24.8.240:1234/blah?q#f'
+ >>> canonurl('\xe2\x9e\xa1.ws') # tinyarro.ws
+ 'http://xn--hgi.ws/'
+ >>> canonurl(' http://www.google.com:80/path/file;params?query#fragment ')
+ 'http://www.google.com:80/path/file;params?query#fragment'
+ >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5')
+ 'http://xn--hgi.ws/%E2%99%A5'
+ >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/pa%2Fth')
+ 'http://xn--hgi.ws/%E2%99%A5/pa/th'
+ >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5/pa%2Fth;par%2Fams?que%2Fry=a&b=c')
+ 'http://xn--hgi.ws/%E2%99%A5/pa/th;par/ams?que/ry=a&b=c'
+ >>> canonurl('http://\xe2\x9e\xa1.ws/\xe2\x99\xa5?\xe2\x99\xa5#\xe2\x99\xa5')
+ 'http://xn--hgi.ws/%E2%99%A5?%E2%99%A5#%E2%99%A5'
+ >>> canonurl('http://\xe2\x9e\xa1.ws/%e2%99%a5?%E2%99%A5#%E2%99%A5')
+ 'http://xn--hgi.ws/%E2%99%A5?%E2%99%A5#%E2%99%A5'
+ >>> canonurl('http://badutf8pcokay.com/%FF?%FE#%FF')
+ 'http://badutf8pcokay.com/%FF?%FE#%FF'
+ >>> len(canonurl('google.com/' + 'a' * 16384))
+ 4096
+ """
+ # strip spaces at the ends and ensure it's prefixed with 'scheme://'
+ url = url.strip()
+ if not url:
+ return ''
+ if not urlparse.urlsplit(url).scheme:
+ ## We usually deal with local files here
+ url = 'file://' + url
+ ## url = 'http://' + url
+
+ # turn it into Unicode
+ try:
+ url = unicode(url, 'utf-8')
+ except Exception, exc: # UnicodeDecodeError, exc:
+ ## It often happens that the url is already "python unicode" encoded
+ if not str(exc) == "decoding Unicode is not supported":
+ return '' # bad UTF-8 chars in URL
+ ## If the exception is indeed "decoding Unicode is not supported"
+ ## this generally means that url is already unicode encoded,
+ ## so we can just continue (see http://www.red-mercury.com/blog/eclectic-tech/python-mystery-of-the-day/ )
+
+ # parse the URL into its components
+ parsed = urlparse.urlsplit(url)
+ scheme, netloc, path, query, fragment = parsed
+
+ # ensure scheme is a letter followed by letters, digits, and '+-.' chars
+ if not re.match(r'[a-z][-+.a-z0-9]*$', scheme, flags=re.I):
+ return ''
+ scheme = str(scheme)
+
+ ## We mostly deal with local files here, and the following check
+ ## would exclude all local files, so we drop it.
+ # ensure domain and port are valid, eg: sub.domain.<1-to-6-TLD-chars>[:port]
+ #~ match = re.match(r'(.+\.[a-z0-9]{1,6})(:\d{1,5})?$', netloc, flags=re.I)
+ #~ if not match:
+ #~ print "return 4"
+ #~ return ''
+ #~ domain, port = match.groups()
+ #~ netloc = domain + (port if port else '')
+ netloc = netloc.encode('idna')
+
+ # ensure path is valid and convert Unicode chars to %-encoded
+ if not path:
+ path = '/' # eg: 'http://google.com' -> 'http://google.com/'
+ path = urllib.quote(urllib.unquote(path.encode('utf-8')), safe='/;')
+
+ # ensure query is valid
+ query = urllib.quote(urllib.unquote(query.encode('utf-8')), safe='=&?/')
+
+ # ensure fragment is valid
+ fragment = urllib.quote(urllib.unquote(fragment.encode('utf-8')))
+
+ # piece it all back together, truncating it to a maximum of 4KB
+ url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
+ return url[:4096]
+
+# ex:ts=4:et:
diff --git a/latex/base/windowactivatable.py b/latex/base/windowactivatable.py
index 3caa7ee..d7379c6 100644
--- a/latex/base/windowactivatable.py
+++ b/latex/base/windowactivatable.py
@@ -36,7 +36,8 @@ from ..tools.views import ToolView
from .config import WINDOW_SCOPE_VIEWS, EDITOR_SCOPE_VIEWS, ACTIONS
from .decorators import GeditTabDecorator
from .resources import Resources
-from . import File, PanelView, WindowContext
+from . import PanelView, WindowContext
+from .file import File
class LaTeXWindowActivatable(GObject.Object, Gedit.WindowActivatable, PeasGtk.Configurable):
__gtype_name__ = "LaTeXWindowActivatable"
diff --git a/latex/bibtex/parser.py b/latex/bibtex/parser.py
index 061c3b0..96d00c0 100644
--- a/latex/bibtex/parser.py
+++ b/latex/bibtex/parser.py
@@ -47,7 +47,7 @@ BibTeX parser and object model
# sys.path.append("/home/michael/.gnome2/gedit/plugins")
#
# from issues import MockIssueHandler
-# from base import File
+# from base.file import File
#
# model = BibTeXParser().parse_async(open(filename).read(), filename)
#else:
diff --git a/latex/latex/__init__.py b/latex/latex/__init__.py
index 98fb8c8..906cf92 100644
--- a/latex/latex/__init__.py
+++ b/latex/latex/__init__.py
@@ -37,7 +37,7 @@ from logging import getLogger
from xml.dom import minidom
from xml.parsers.expat import ExpatError
-from ..base import File
+from ..base.file import File
class PropertyFile(dict):
diff --git a/latex/latex/environment.py b/latex/latex/environment.py
index a882d77..4983047 100644
--- a/latex/latex/environment.py
+++ b/latex/latex/environment.py
@@ -179,7 +179,7 @@ class TeXResource(object):
from os.path import expanduser
-from ..base import File
+from ..base.file import File
class Environment(object):
diff --git a/latex/latex/expander.py b/latex/latex/expander.py
index 7048a87..71d415a 100644
--- a/latex/latex/expander.py
+++ b/latex/latex/expander.py
@@ -24,7 +24,7 @@ latex.expander
from logging import getLogger
-from ..base import File
+from ..base.file import File
from cache import LaTeXDocumentCache
from parser import Node
diff --git a/latex/latex/inversesearch.py b/latex/latex/inversesearch.py
index 3aef4c7..2dbde7f 100644
--- a/latex/latex/inversesearch.py
+++ b/latex/latex/inversesearch.py
@@ -34,7 +34,7 @@ from logging import getLogger
_log = getLogger("latex.inversesearch")
-from ..base import File
+from ..base.file import File
from editor import LaTeXEditor
diff --git a/latex/latex/model.py b/latex/latex/model.py
index 72762fe..eff1931 100644
--- a/latex/latex/model.py
+++ b/latex/latex/model.py
@@ -243,7 +243,7 @@ class LanguageModelParser(sax.ContentHandler):
from copy import deepcopy
import pickle
-from ..base import File
+from ..base.file import File
class LanguageModelFactory(object):
diff --git a/latex/latex/outline.py b/latex/latex/outline.py
index cd22760..473fa1b 100644
--- a/latex/latex/outline.py
+++ b/latex/latex/outline.py
@@ -69,7 +69,7 @@ class Outline(object):
self.newenvironments = [] # OutlineNode objects
-from ..base import File
+from ..base.file import File
from ..preferences import Preferences
diff --git a/latex/latex/preview.py b/latex/latex/preview.py
index 0ab90b3..cc66fce 100644
--- a/latex/latex/preview.py
+++ b/latex/latex/preview.py
@@ -22,7 +22,7 @@
latex.preview
"""
-from ..base import File
+from ..base.file import File
from ..tools import Tool, Job, ToolRunner
from ..tools.postprocess import RubberPostProcessor, GenericPostProcessor
from ..issues import MockStructuredIssueHandler
diff --git a/latex/latex/validator.py b/latex/latex/validator.py
index 319c90d..db07007 100644
--- a/latex/latex/validator.py
+++ b/latex/latex/validator.py
@@ -25,7 +25,7 @@ latex.validator
from logging import getLogger
from os.path import exists
-from ..base import File
+from ..base.file import File
from ..issues import Issue
from ..util import escape
from parser import Node
diff --git a/latex/latex/views.py b/latex/latex/views.py
index 8cf45a5..0b53798 100644
--- a/latex/latex/views.py
+++ b/latex/latex/views.py
@@ -179,7 +179,7 @@ class LaTeXSymbolMapView(PanelView):
from os import system
-from ..base import File
+from ..base.file import File
from ..outline import OutlineOffsetMap, BaseOutlineView
from outline import OutlineNode
diff --git a/latex/tools/postprocess.py b/latex/tools/postprocess.py
index c7126fe..56fa031 100644
--- a/latex/tools/postprocess.py
+++ b/latex/tools/postprocess.py
@@ -187,7 +187,7 @@ class RubberPostProcessor(PostProcessor):
return self._summary
def process(self, file, stdout, stderr, condition):
- from ..base import File # FIXME: this produces a circ dep on toplevel
+ from ..base.file import File # FIXME: this produces a circ dep on toplevel
self._issues = []
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]