Inconsistent behaviour of BufferLines
- From: Piotr Piastucki <leech miranda gmail com>
- To: meld-list <meld-list gnome org>
- Subject: Inconsistent behaviour of BufferLines
- Date: Tue, 27 Mar 2012 19:25:34 +0200
Hi,
I have notices some inconsistencies in the way BufferLines class behaves. Please see the attached test case for an example of a buggy behaviour. I expect len(x), iter(x) and iter(x[0:len(x)] to return the same length, however, due to the current implementation of the __getitem__ method iter(x) returns 1 element more than expected.
This bug affects comparison results when one of the files is empty for instance.
There are 2 possible fixes (both marked with FIXME comment and commented out in the attached file):
1) change > operator in __getitem__ to >=, >= makes more sense here anyway, but I guess > is used for some reason
2) implement __iter__ method
Regards,
Piotr
import gtk
import sys
def get_iter_at_line_or_eof(buf, line):
if line >= buf.get_line_count():
return buf.get_end_iter()
return buf.get_iter_at_line(line)
class BufferLines(object):
"""gtk.TextBuffer shim with line-based access and optional filtering
This class allows a gtk.TextBuffer to be treated as a list of lines of
possibly-filtered text. If no filter is given, the raw output from the
gtk.TextBuffer is used.
The logic here (and in places in FileDiff) requires that Python's
unicode splitlines() implementation and gtk.TextBuffer agree on where
linebreaks occur. Happily, this is usually the case.
"""
def __init__(self, buf, textfilter=None):
self.buf = buf
if textfilter is not None:
self.textfilter = textfilter
else:
self.textfilter = lambda x: x
def __getslice__(self, lo, hi):
# FIXME: If we ask for arbitrary slices past the end of the buffer,
# this will return the last line.
start = get_iter_at_line_or_eof(self.buf, lo)
end = get_iter_at_line_or_eof(self.buf, hi)
txt = unicode(self.buf.get_text(start, end, False), 'utf8')
filter_txt = self.textfilter(txt)
lines = filter_txt.splitlines()
ends = filter_txt.splitlines(True)
# The last line in a gtk.TextBuffer is guaranteed never to end in a
# newline. As splitlines() discards an empty line at the end, we need
# to artificially add a line if the requested slice is past the end of
# the buffer, and the last line in the slice ended in a newline.
if hi >= self.buf.get_line_count() and \
(len(lines) == 0 or len(lines[-1]) != len(ends[-1])):
lines.append(u"")
ends.append(u"")
hi = self.buf.get_line_count() if hi == sys.maxint else hi
if hi - lo != len(lines):
# These codepoints are considered line breaks by Python, but not
# by GtkTextStore.
additional_breaks = set((u'\x0c', u'\x85'))
i = 0
while i < len(ends):
line, end = lines[i], ends[i]
# It's possible that the last line in a file would end in a
# line break character, which requires no joining.
if end and end[-1] in additional_breaks and \
(not line or line[-1] not in additional_breaks):
assert len(ends) >= i + 1
lines[i:i + 2] = [line + end[-1] + lines[i + 1]]
ends[i:i + 2] = [end + ends[i + 1]]
i += 1
return lines
def __getitem__(self, i):
# FIXME
# Either change > to >=
# if i >= len(self):
if i > len(self):
raise IndexError
line_start = get_iter_at_line_or_eof(self.buf, i)
line_end = line_start.copy()
if not line_end.ends_line():
line_end.forward_to_line_end()
txt = self.buf.get_text(line_start, line_end, False)
return unicode(self.textfilter(txt), 'utf8')
# FIXME
# or add __iter__ method
#def __iter__(self):
# for l in self[0:len(self)]:
# yield l
def __len__(self):
return self.buf.get_line_count()
def main():
textbuffer = gtk.TextBuffer()
bl = BufferLines(textbuffer)
print "len(bl) =", len(bl)
l = 0
for it in iter(bl):
l += 1
print "iter(bl) =", l
l = 0
for it in iter(bl[0:len(bl)]):
l += 1
print "iter(bl[0:len(bl)]) =", l
if __name__ == "__main__":
main()
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]