[meld] Correctly handle unicode linebreaks (closes bgo#602933, bgo#627940)
- From: Kai Willadsen <kaiw src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [meld] Correctly handle unicode linebreaks (closes bgo#602933, bgo#627940)
- Date: Tue, 23 Nov 2010 09:22:36 +0000 (UTC)
commit d6bb4e6ec4bb9cbcf7a1c8e56a0ea79dd6599c9b
Author: Kai Willadsen <kai willadsen gmail com>
Date: Tue Oct 26 19:05:27 2010 +1000
Correctly handle unicode linebreaks (closes bgo#602933, bgo#627940)
As it stands, Meld relies on Python's universal newline support to
translate various traditional newline sequences into '\n', which we
then split on. This approach fails for unicode linebreaks, and when
Python's universal newline support isn't available.
This patch moves to using a combination of Python's unicode
splitlines() and asking our gtk.TextBuffer where it thinks linebreaks
are in order to handle unicode linebreaks.
meld/filediff.py | 62 ++++++++++++++++++++++++++++++++++--------------------
1 files changed, 39 insertions(+), 23 deletions(-)
---
diff --git a/meld/filediff.py b/meld/filediff.py
index 21c86e4..caf6f78 100644
--- a/meld/filediff.py
+++ b/meld/filediff.py
@@ -81,7 +81,12 @@ class BufferLines(object):
This class allows a gtk.TextBuffer to be treated as a list of lines of
possibly-filtered text. If no filter is given, the raw output from the
gtk.TextBuffer is used.
+
+ The logic here (and in places in FileDiff) requires that Python's
+ unicode splitlines() implementation and gtk.TextBuffer agree on where
+ linebreaks occur. Happily, this is usually the case.
"""
+
def __init__(self, buf, textfilter=None):
self.buf = buf
if textfilter is not None:
@@ -90,13 +95,24 @@ class BufferLines(object):
self.textfilter = lambda x: x
def __getslice__(self, lo, hi):
+ # FIXME: If we ask for arbitrary slices past the end of the buffer,
+ # this will return the last line.
start = get_iter_at_line_or_eof(self.buf, lo)
end = get_iter_at_line_or_eof(self.buf, hi)
txt = unicode(self.buf.get_text(start, end, False), 'utf8')
- if hi >= self.buf.get_line_count():
- return self.textfilter(txt).split("\n")
- else:
- return self.textfilter(txt).split("\n")[:-1]
+
+ filter_txt = self.textfilter(txt)
+ lines = filter_txt.splitlines()
+ ends = filter_txt.splitlines(True)
+ # The last line in a gtk.TextBuffer is guaranteed never to end in a
+ # newline. As splitlines() discards an empty line at the end, we need
+ # to artificially add a line if the requested slice is past the end of
+ # the buffer, and the last line in the slice ended in a newline.
+ if hi >= self.buf.get_line_count() and \
+ (len(lines) == 0 or len(lines[-1]) != len(ends[-1])):
+ lines.append(u"")
+
+ return lines
def __getitem__(self, i):
line_start = get_iter_at_line_or_eof(self.buf, i)
@@ -125,6 +141,8 @@ def get_iter_at_line_or_eof(buffer, line):
def insert_with_tags_by_name(buffer, line, text, tag):
if line >= buffer.get_line_count():
+ # TODO: We need to insert a linebreak here, but there is no
+ # way to be certain what kind of linebreak to use.
text = "\n" + text
buffer.insert_with_tags_by_name(get_iter_at_line_or_eof(buffer, line), text, tag)
@@ -495,11 +513,12 @@ class FileDiff(melddoc.MeldDoc, gnomeglade.Component):
self.warned_bad_comparison = True
return txt
- def after_text_insert_text(self, buffer, it, newtext, textlen):
- newtext = unicode(newtext, 'utf8')
- lines_added = newtext.count("\n")
- starting_at = it.get_line() - lines_added
- self._after_text_modified(buffer, starting_at, lines_added)
+ def after_text_insert_text(self, buf, it, newtext, textlen):
+ start_mark = buf.get_mark("insertion-start")
+ starting_at = buf.get_iter_at_mark(start_mark).get_line()
+ buf.delete_mark(start_mark)
+ lines_added = it.get_line() - starting_at
+ self._after_text_modified(buf, starting_at, lines_added)
def after_text_delete_range(self, buffer, it0, it1):
starting_at = it0.get_line()
@@ -633,18 +652,18 @@ class FileDiff(melddoc.MeldDoc, gnomeglade.Component):
def on_textbuffer__end_user_action(self, *buffer):
self.undosequence.end_group()
- def on_text_insert_text(self, buffer, it, text, textlen):
+ def on_text_insert_text(self, buf, it, text, textlen):
text = unicode(text, 'utf8')
self.undosequence.add_action(
- BufferInsertionAction(buffer, it.get_offset(), text))
+ BufferInsertionAction(buf, it.get_offset(), text))
+ buf.create_mark("insertion-start", it, True)
- def on_text_delete_range(self, buffer, it0, it1):
- text = unicode(buffer.get_text(it0, it1, False), 'utf8')
- pane = self.textbuffer.index(buffer)
+ def on_text_delete_range(self, buf, it0, it1):
+ text = unicode(buf.get_text(it0, it1, False), 'utf8')
assert self.deleted_lines_pending == -1
- self.deleted_lines_pending = text.count("\n")
+ self.deleted_lines_pending = it1.get_line() - it0.get_line()
self.undosequence.add_action(
- BufferDeletionAction(buffer, it0.get_offset(), text))
+ BufferDeletionAction(buf, it0.get_offset(), text))
def on_undo_checkpointed(self, undosequence, buf, checkpointed):
self.set_buffer_modified(buf, not checkpointed)
@@ -857,13 +876,8 @@ class FileDiff(melddoc.MeldDoc, gnomeglade.Component):
def _diff_files(self, files):
yield _("[%s] Computing differences") % self.label_text
- panetext = []
- for b in self.textbuffer[:self.num_panes]:
- start, end = b.get_bounds()
- text = b.get_text(start, end, False)
- panetext.append(self._filter_text(text))
- lines = map(lambda x: x.split("\n"), panetext)
- step = self.linediffer.set_sequences_iter(lines)
+ texts = self.buffer_texts[:self.num_panes]
+ step = self.linediffer.set_sequences_iter(texts)
while step.next() is None:
yield 1
@@ -1470,6 +1484,8 @@ class FileDiff(melddoc.MeldDoc, gnomeglade.Component):
if copy_up:
if chunk[2] >= b0.get_line_count() and \
chunk[3] < b1.get_line_count():
+ # TODO: We need to insert a linebreak here, but there is no
+ # way to be certain what kind of linebreak to use.
t0 = t0 + "\n"
insert_with_tags_by_name(b1, chunk[3], t0, "edited line")
else: # copy down
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]