[evince] libview: Fix some broken aspects EvViewAccessible text support
- From: Joanmarie Diggs <joanied src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [evince] libview: Fix some broken aspects EvViewAccessible text support
- Date: Mon, 17 Mar 2014 12:27:07 +0000 (UTC)
commit 03afe275fabbb235f9697ad3274cde0c9e2b077b
Author: Joanmarie Diggs <jdiggs igalia com>
Date: Mon Mar 17 08:17:15 2014 -0400
libview: Fix some broken aspects EvViewAccessible text support
* Stop using GtkTextBuffer: It was a hack more than a real solution
* Fix setting and clearing of selection via AtkText for the current page
* Strip newline chars out of the sentence strings: Newlines break TTS prosody
* Add some logic to heuristically distinguish soft and hard returns
https://bugzilla.gnome.org/show_bug.cgi?id=725003
libview/ev-view-accessible.c | 384 +++++++++++++++++++++++-------------------
libview/ev-view-private.h | 5 +
libview/ev-view.c | 14 ++
3 files changed, 230 insertions(+), 173 deletions(-)
---
diff --git a/libview/ev-view-accessible.c b/libview/ev-view-accessible.c
index 60155c3..d79d0d3 100644
--- a/libview/ev-view-accessible.c
+++ b/libview/ev-view-accessible.c
@@ -22,7 +22,6 @@
#include <config.h>
#include <glib/gi18n-lib.h>
#include <gtk/gtk.h>
-#include <libgail-util/gail-util.h>
#include "ev-selection.h"
#include "ev-page-cache.h"
@@ -63,9 +62,6 @@ struct _EvViewAccessiblePrivate {
guint action_idle_handler;
GtkScrollType idle_scroll;
- /* AtkText */
- GtkTextBuffer *buffer;
-
/* AtkHypertext */
GHashTable *links;
@@ -90,7 +86,6 @@ clear_cache (EvViewAccessible *accessible)
{
EvViewAccessiblePrivate* priv = accessible->priv;
- g_clear_object (&priv->buffer);
g_clear_pointer (&priv->links, (GDestroyNotify)g_hash_table_destroy);
}
@@ -145,82 +140,190 @@ ev_view_accessible_init (EvViewAccessible *accessible)
accessible->priv = G_TYPE_INSTANCE_GET_PRIVATE (accessible, EV_TYPE_VIEW_ACCESSIBLE,
EvViewAccessiblePrivate);
}
-static GtkTextBuffer *
-ev_view_accessible_get_text_buffer (EvViewAccessible *accessible, EvView *view)
+/* ATs expect to be able to identify sentence boundaries based on content. Valid,
+ * content-based boundaries may be present at the end of a newline, for instance
+ * at the end of a heading within a document. Thus being able to distinguish hard
+ * returns from soft returns is necessary. However, the text we get from Poppler
+ * for non-tagged PDFs has "\n" inserted at the end of each line resulting in a
+ * broken accessibility implementation w.r.t. sentences.
+ */
+static gboolean
+treat_as_soft_return (EvView *view,
+ PangoLogAttr *log_attrs,
+ gint offset)
{
- EvPageCache *page_cache;
- const gchar *retval = NULL;
- EvViewAccessiblePrivate* priv = accessible->priv;
+ EvRectangle *areas = NULL;
+ guint n_areas = 0;
+ gdouble line_spacing, this_line_height, next_word_width;
+ EvRectangle *this_line_start;
+ EvRectangle *this_line_end;
+ EvRectangle *next_line_start;
+ EvRectangle *next_line_end;
+ EvRectangle *next_word_end;
+ gint prev_offset, next_offset;
- if (priv->buffer) {
- return priv->buffer;
- }
- page_cache = view->page_cache;
- if (!page_cache) {
- return NULL;
- }
+ if (!log_attrs[offset].is_white)
+ return FALSE;
+
+ ev_page_cache_get_text_layout (view->page_cache, get_relevant_page (view), &areas, &n_areas);
+ if (n_areas <= offset + 1)
+ return FALSE;
+
+ prev_offset = offset - 1;
+ next_offset = offset + 1;
+
+ /* In wrapped text, the character at the start of the next line starts a word.
+ * Examples where this condition might fail include bullets and images. But it
+ * also includes things like "(", so also check the next character.
+ */
+ if (!log_attrs[next_offset].is_word_start &&
+ (next_offset + 1 >= n_areas || !log_attrs[next_offset + 1].is_word_start))
+ return FALSE;
+
+ /* In wrapped text, the chars on either side of the newline have very similar heights.
+ * Examples where this condition might fail include a newline at the end of a heading,
+ * and a newline at the end of a paragraph that is followed by a heading.
+ */
+ this_line_end = areas + prev_offset;
+ next_line_start = areas + next_offset;;
+
+ this_line_height = this_line_end->y2 - this_line_end->y1;
+ if (ABS (this_line_height - (next_line_start->y2 - next_line_start->y1)) > 0.25)
+ return FALSE;
+
+ /* If there is significant white space between this line and the next, odds are this
+ * is not a soft return in wrapped text. Lines within a typical paragraph are at most
+ * double-spaced. If the spacing is more than that, assume a hard return is present.
+ */
+ line_spacing = next_line_start->y1 - this_line_end->y2;
+ if (line_spacing - this_line_height > 1)
+ return FALSE;
+
+ /* Lines within a typical paragraph have *reasonably* similar x1 coordinates. But
+ * we cannot count on them being nearly identical. Examples where indentation can
+ * be present in wrapped text include indenting the first line of the paragraph,
+ * and hanging indents (e.g. in the works cited within an academic paper). So we'll
+ * be somewhat tolerant here.
+ */
+ for ( ; prev_offset > 0 && !log_attrs[prev_offset].is_mandatory_break; prev_offset--);
+ this_line_start = areas + prev_offset;
+ if (ABS (this_line_start->x1 - next_line_start->x1) > 20)
+ return FALSE;
+
+ /* Ditto for x2, but this line might be short due to a wide word on the next line. */
+ for ( ; next_offset < n_areas && !log_attrs[next_offset].is_word_end; next_offset++);
+ next_word_end = areas + next_offset;
+ next_word_width = next_word_end->x2 - next_line_start->x1;
+
+ for ( ; next_offset < n_areas && !log_attrs[next_offset + 1].is_mandatory_break; next_offset++);
+ next_line_end = areas + next_offset;
+ if (next_line_end->x2 - (this_line_end->x2 + next_word_width) > 20)
+ return FALSE;
+
+ return TRUE;
+}
+
+static void
+ev_view_accessible_get_range_for_boundary (AtkText *text,
+ AtkTextBoundary boundary_type,
+ gint offset,
+ gint *start_offset,
+ gint *end_offset)
+{
+ GtkWidget *widget;
+ EvView *view;
+ gint start = 0;
+ gint end = 0;
+ PangoLogAttr *log_attrs = NULL;
+ gulong n_attrs;
- priv->buffer = gtk_text_buffer_new (NULL);
- retval = ev_page_cache_get_text (page_cache, get_relevant_page (view));
- if (retval)
- gtk_text_buffer_set_text (priv->buffer, retval, -1);
+ widget = gtk_accessible_get_widget (GTK_ACCESSIBLE (text));
+ if (widget == NULL)
+ return;
+
+ view = EV_VIEW (widget);
+ if (!view->page_cache)
+ return;
+
+ ev_page_cache_get_text_log_attrs (view->page_cache, get_relevant_page (view), &log_attrs, &n_attrs);
+ if (!log_attrs)
+ return;
+
+ switch (boundary_type) {
+ case ATK_TEXT_BOUNDARY_CHAR:
+ start = offset;
+ end = offset + 1;
+ break;
+ case ATK_TEXT_BOUNDARY_WORD_START:
+ for (start = offset; start >= 0 && !log_attrs[start].is_word_start; start--);
+ for (end = offset + 1; end <= n_attrs && !log_attrs[end].is_word_start; end++);
+ break;
+ case ATK_TEXT_BOUNDARY_SENTENCE_START:
+ for (start = offset; start >= 0; start--) {
+ if (log_attrs[start].is_mandatory_break && treat_as_soft_return (view, log_attrs,
start - 1))
+ continue;
+ if (log_attrs[start].is_sentence_start)
+ break;
+ }
+ for (end = offset + 1; end <= n_attrs; end++) {
+ if (log_attrs[end].is_mandatory_break && treat_as_soft_return (view, log_attrs, end -
1))
+ continue;
+ if (log_attrs[end].is_sentence_start)
+ break;
+ }
+ break;
+ case ATK_TEXT_BOUNDARY_LINE_START:
+ for (start = offset; start >= 0 && !log_attrs[start].is_mandatory_break; start--);
+ for (end = offset + 1; end <= n_attrs && !log_attrs[end].is_mandatory_break; end++);
+ break;
+ default:
+ /* The "END" boundary types are deprecated */
+ break;
+ }
- return priv->buffer;
+ *start_offset = start;
+ *end_offset = end;
}
static gchar *
-ev_view_accessible_get_text (AtkText *text,
- gint start_pos,
- gint end_pos)
+ev_view_accessible_get_substring (AtkText *text,
+ gint start_offset,
+ gint end_offset)
{
GtkWidget *widget;
- GtkTextIter start, end;
- GtkTextBuffer *buffer;
- gchar *retval;
+ EvView *view;
+ gchar *substring, *normalized;
+ const gchar* page_text;
widget = gtk_accessible_get_widget (GTK_ACCESSIBLE (text));
if (widget == NULL)
- /* State is defunct */
return NULL;
- buffer = ev_view_accessible_get_text_buffer (EV_VIEW_ACCESSIBLE (text), EV_VIEW (widget));
- if (!buffer)
+ view = EV_VIEW (widget);
+ if (!view->page_cache)
return NULL;
- gtk_text_buffer_get_iter_at_offset (buffer, &start, start_pos);
- gtk_text_buffer_get_iter_at_offset (buffer, &end, end_pos);
- retval = gtk_text_buffer_get_text (buffer, &start, &end, FALSE);
+ page_text = ev_page_cache_get_text (view->page_cache, get_relevant_page (view));
+ start_offset = MAX (0, start_offset);
+ if (end_offset < 0 || end_offset > g_utf8_strlen (page_text, -1))
+ end_offset = strlen (page_text);
- return retval;
+ substring = g_utf8_substring (page_text, start_offset, end_offset);
+ normalized = g_utf8_normalize (substring, -1, G_NORMALIZE_NFKC);
+ g_free (substring);
+
+ return normalized;
}
static gunichar
ev_view_accessible_get_character_at_offset (AtkText *text,
gint offset)
{
- GtkWidget *widget;
- GtkTextIter start, end;
- GtkTextBuffer *buffer;
gchar *string;
gunichar unichar;
- widget = gtk_accessible_get_widget (GTK_ACCESSIBLE (text));
- if (widget == NULL)
- /* State is defunct */
- return '\0';
-
- buffer = ev_view_accessible_get_text_buffer (EV_VIEW_ACCESSIBLE (text), EV_VIEW (widget));
- if (!buffer)
- return '\0';
-
- if (offset >= gtk_text_buffer_get_char_count (buffer))
- return '\0';
-
- gtk_text_buffer_get_iter_at_offset (buffer, &start, offset);
- end = start;
- gtk_text_iter_forward_char (&end);
- string = gtk_text_buffer_get_slice (buffer, &start, &end, FALSE);
+ string = ev_view_accessible_get_substring (text, offset, offset + 1);
unichar = g_utf8_get_char (string);
g_free(string);
@@ -228,34 +331,11 @@ ev_view_accessible_get_character_at_offset (AtkText *text,
}
static gchar *
-ev_view_accessible_get_text_for_offset (EvViewAccessible *view_accessible,
- gint offset,
- GailOffsetType offset_type,
- AtkTextBoundary boundary_type,
- gint *start_offset,
- gint *end_offset)
+ev_view_accessible_get_text (AtkText *text,
+ gint start_pos,
+ gint end_pos)
{
- GtkWidget *widget;
- GtkTextBuffer *buffer;
- GailTextUtil *gail_text;
- gchar *retval;
-
- widget = gtk_accessible_get_widget (GTK_ACCESSIBLE (view_accessible));
- if (widget == NULL)
- /* State is defunct */
- return NULL;
-
- buffer = ev_view_accessible_get_text_buffer (view_accessible, EV_VIEW (widget));
- if (!buffer)
- return NULL;
-
- gail_text = gail_text_util_new ();
- gail_text_util_buffer_setup (gail_text, buffer);
- retval = gail_text_util_get_text (gail_text, NULL, offset_type, boundary_type,
- offset, start_offset, end_offset);
- g_object_unref (gail_text);
-
- return retval;
+ return ev_view_accessible_get_substring (text, start_pos, end_pos);
}
static gchar *
@@ -265,17 +345,28 @@ ev_view_accessible_get_text_at_offset (AtkText *text,
gint *start_offset,
gint *end_offset)
{
- return ev_view_accessible_get_text_for_offset (EV_VIEW_ACCESSIBLE (text),
- offset, GAIL_AT_OFFSET,
- boundary_type,
- start_offset, end_offset);
+ gchar *retval;
+
+ ev_view_accessible_get_range_for_boundary (text, boundary_type, offset, start_offset, end_offset);
+ retval = ev_view_accessible_get_substring (text, *start_offset, *end_offset);
+
+ /* If newlines appear inside the text of a sentence (i.e. between the start and
+ * end offsets returned by ev_view_accessible_get_substring), it interferes with
+ * the prosody of text-to-speech based-solutions such as a screen reader because
+ * speech synthesizers tend to pause after the newline char as if it were the end
+ * of the sentence.
+ */
+ if (boundary_type == ATK_TEXT_BOUNDARY_SENTENCE_START)
+ g_strdelimit (retval, "\n", ' ');
+
+ return retval;
}
static gint
ev_view_accessible_get_character_count (AtkText *text)
{
GtkWidget *widget;
- GtkTextBuffer *buffer;
+ EvView *view;
gint retval;
widget = gtk_accessible_get_widget (GTK_ACCESSIBLE (text));
@@ -283,11 +374,8 @@ ev_view_accessible_get_character_count (AtkText *text)
/* State is defunct */
return 0;
- buffer = ev_view_accessible_get_text_buffer (EV_VIEW_ACCESSIBLE (text), EV_VIEW (widget));
- if (!buffer)
- return 0;
-
- retval = gtk_text_buffer_get_char_count (buffer);
+ view = EV_VIEW (widget);
+ retval = g_utf8_strlen (ev_page_cache_get_text (view->page_cache, get_relevant_page (view)), -1);
return retval;
}
@@ -697,83 +785,28 @@ ev_view_accessible_get_selection (AtkText *text,
return normalized_text;
}
-static gboolean
-ev_view_accessible_add_selection (AtkText *text,
- gint start_pos,
- gint end_pos)
-{
- GtkWidget *widget;
- GtkTextBuffer *buffer;
- GtkTextIter pos_itr;
- GtkTextIter start, end;
- gint select_start, select_end;
- gboolean retval = FALSE;
-
- widget = gtk_accessible_get_widget (GTK_ACCESSIBLE (text));
- if (widget == NULL)
- /* State is defunct */
- return FALSE;
-
- buffer = ev_view_accessible_get_text_buffer (EV_VIEW_ACCESSIBLE (text), EV_VIEW (widget));
- if (!buffer)
- return FALSE;
-
- gtk_text_buffer_get_selection_bounds (buffer, &start, &end);
- select_start = gtk_text_iter_get_offset (&start);
- select_end = gtk_text_iter_get_offset (&end);
-
- /* If there is already a selection, then don't allow
- * another to be added
- */
- if (select_start == select_end) {
- gtk_text_buffer_get_iter_at_offset (buffer, &pos_itr, start_pos);
- gtk_text_buffer_move_mark_by_name (buffer, "selection_bound", &pos_itr);
- gtk_text_buffer_get_iter_at_offset (buffer, &pos_itr, end_pos);
- gtk_text_buffer_move_mark_by_name (buffer, "insert", &pos_itr);
-
- retval = TRUE;
- }
-
- return retval;
-}
-
+/* ATK allows for multiple, non-contiguous selections within a single AtkText
+ * object. Unless and until Evince supports this, selection numbers are ignored.
+ */
static gboolean
ev_view_accessible_remove_selection (AtkText *text,
gint selection_num)
{
GtkWidget *widget;
- GtkTextBuffer *buffer;
- GtkTextMark *cursor_mark;
- GtkTextIter cursor_itr;
- GtkTextIter start, end;
- gint select_start, select_end;
- gboolean retval = FALSE;
+ EvView *view;
widget = gtk_accessible_get_widget (GTK_ACCESSIBLE (text));
if (widget == NULL)
/* State is defunct */
return FALSE;
- buffer = ev_view_accessible_get_text_buffer (EV_VIEW_ACCESSIBLE (text), EV_VIEW (widget));
- if (!buffer)
- return FALSE;
-
- gtk_text_buffer_get_selection_bounds(buffer, &start, &end);
- select_start = gtk_text_iter_get_offset(&start);
- select_end = gtk_text_iter_get_offset(&end);
-
- if (select_start != select_end) {
- /* Setting the start & end of the selected region
- * to the caret position turns off the selection.
- */
- cursor_mark = gtk_text_buffer_get_insert (buffer);
- gtk_text_buffer_get_iter_at_mark (buffer, &cursor_itr, cursor_mark);
- gtk_text_buffer_move_mark_by_name (buffer, "selection_bound", &cursor_itr);
-
- retval = TRUE;
+ view = EV_VIEW (widget);
+ if (ev_view_get_has_selection (view)) {
+ _ev_view_clear_selection (view);
+ return TRUE;
}
- return retval;
+ return FALSE;
}
static gboolean
@@ -783,35 +816,40 @@ ev_view_accessible_set_selection (AtkText *text,
gint end_pos)
{
GtkWidget *widget;
- GtkTextBuffer *buffer;
- GtkTextIter pos_itr;
- GtkTextIter start, end;
- gint select_start, select_end;
- gboolean retval = FALSE;
+ EvView *view;
+ EvRectangle *areas = NULL;
+ guint n_areas = 0;
+ GdkRectangle start_rect, end_rect;
+ GdkPoint start_point, end_point;
widget = gtk_accessible_get_widget (GTK_ACCESSIBLE (text));
if (widget == NULL)
/* State is defunct */
return FALSE;
- buffer = ev_view_accessible_get_text_buffer (EV_VIEW_ACCESSIBLE (text), EV_VIEW (widget));
- if (!buffer)
+ view = EV_VIEW (widget);
+ ev_page_cache_get_text_layout (view->page_cache, get_relevant_page (view), &areas, &n_areas);
+ if (start_pos < 0 || end_pos >= n_areas)
return FALSE;
- gtk_text_buffer_get_selection_bounds(buffer, &start, &end);
- select_start = gtk_text_iter_get_offset(&start);
- select_end = gtk_text_iter_get_offset(&end);
+ _ev_view_transform_doc_rect_to_view_rect (view, get_relevant_page (view), areas + start_pos,
&start_rect);
+ _ev_view_transform_doc_rect_to_view_rect (view, get_relevant_page (view), areas + end_pos - 1,
&end_rect);
+ start_point.x = start_rect.x;
+ start_point.y = start_rect.y;
+ end_point.x = end_rect.x + end_rect.width;
+ end_point.y = end_rect.y + end_rect.height;
+ _ev_view_set_selection (view, &start_point, &end_point);
- if (select_start != select_end) {
- gtk_text_buffer_get_iter_at_offset (buffer, &pos_itr, start_pos);
- gtk_text_buffer_move_mark_by_name (buffer, "selection_bound", &pos_itr);
- gtk_text_buffer_get_iter_at_offset (buffer, &pos_itr, end_pos);
- gtk_text_buffer_move_mark_by_name (buffer, "insert", &pos_itr);
+ return TRUE;
+}
- retval = TRUE;
- }
+static gboolean
+ev_view_accessible_add_selection (AtkText *text,
+ gint start_pos,
+ gint end_pos)
+{
+ return ev_view_accessible_set_selection (text, 0, start_pos, end_pos);
- return retval;
}
#if ATK_CHECK_VERSION (2, 11, 3)
diff --git a/libview/ev-view-private.h b/libview/ev-view-private.h
index 23fde54..4f1721a 100644
--- a/libview/ev-view-private.h
+++ b/libview/ev-view-private.h
@@ -281,5 +281,10 @@ gint _ev_view_get_caret_cursor_offset_at_doc_point (EvView *view,
gdouble doc_x,
gdouble doc_y);
+void _ev_view_clear_selection (EvView *view);
+void _ev_view_set_selection (EvView *view,
+ GdkPoint *start_point,
+ GdkPoint *end_point);
+
#endif /* __EV_VIEW_PRIVATE_H__ */
diff --git a/libview/ev-view.c b/libview/ev-view.c
index 9813cbf..7560084 100644
--- a/libview/ev-view.c
+++ b/libview/ev-view.c
@@ -8183,6 +8183,20 @@ ev_view_get_has_selection (EvView *view)
return view->selection_info.selections != NULL;
}
+void
+_ev_view_clear_selection (EvView *view)
+{
+ clear_selection (view);
+}
+
+void
+_ev_view_set_selection (EvView *view,
+ GdkPoint *start_point,
+ GdkPoint *end_point)
+{
+ compute_selections (view, EV_SELECTION_STYLE_GLYPH, start_point, end_point);
+}
+
static char *
get_selected_text (EvView *view)
{
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]