[evince/333-handle-spaces-and-hyphenation-when-search-pdf: 91/91] Add support for text search across lines



commit d2d6415c8388f60fd38a828ae41d2331bed10a0a
Author: Nelson Benítez León <nbenitezl gmail com>
Date:   Thu Mar 21 22:03:33 2019 -0400

    Add support for text search across lines
    
    Implemented in poppler MR:
    https://gitlab.freedesktop.org/poppler/poppler/merge_requests/267
    
    as the new poppler result data type is PopplerFindRectangle which
    incorporates more fields than just coordinates, we create a
    corresponding type in Evince, called EvFindRectangle, and use it
    all across Evince including the pdf backend (ev-poppler.cc) and
    djvu backend (djvu-document.c) which are the only backends that
    implement text search interface.
    
    This new feature has the following aspects:
    
     - Ignores hyphen character while matching when 1) it's the
       last character of the line and 2) its corresponding matching
       character in the search term is not an hyphen too.
    
     - Any whitespace characters in the search term will be allowed
       to match on the logic position where the lines split (i.e. what
       would normally be the newline character in a text file, but
       PDF text does not include newline characters between lines).
    
     - It won't match on text spanning more than two lines, i.e. it
       only matches text spanning from end of one line to start of
       next line.
    
    Part of issue #333

 backend/djvu/djvu-document.c   |  9 +++++
 backend/pdf/ev-poppler.cc      | 49 ++++++++++++++++++------
 libdocument/ev-document-find.c | 26 ++++++++++++-
 libdocument/ev-document-find.h | 17 +++++++++
 libview/ev-jobs.c              | 32 ++++++++++++++--
 libview/ev-jobs.h              |  2 +
 libview/ev-view-private.h      |  8 ++--
 libview/ev-view.c              | 84 ++++++++++++++++++++++++++++++++++++------
 shell/ev-find-sidebar.c        | 30 ++++++++++++---
 9 files changed, 219 insertions(+), 38 deletions(-)
---
diff --git a/backend/djvu/djvu-document.c b/backend/djvu/djvu-document.c
index 45cf33d1..ba9c1eff 100644
--- a/backend/djvu/djvu-document.c
+++ b/backend/djvu/djvu-document.c
@@ -898,6 +898,15 @@ djvu_document_find_find_text (EvDocumentFind   *document,
 
                r->y1 = height - r->y2 * 72.0 / dpi;
                r->y2 = height - tmp * 72.0 / dpi;
+
+               EvFindRectangle *ev_rect = ev_find_rectangle_new ();
+               ev_rect->x1 = r->x1;
+               ev_rect->x2 = r->x2;
+               ev_rect->y1 = r->y1;
+               ev_rect->y2 = r->y2;
+
+               ev_rectangle_free (r);
+               l->data = ev_rect;
        }
        
 
diff --git a/backend/pdf/ev-poppler.cc b/backend/pdf/ev-poppler.cc
index 8f2e9804..935bc04b 100644
--- a/backend/pdf/ev-poppler.cc
+++ b/backend/pdf/ev-poppler.cc
@@ -157,6 +157,11 @@ static gboolean    attachment_save_to_buffer (PopplerAttachment *attachment,
                                              gchar            **buffer,
                                              gsize             *buffer_size,
                                              GError           **error);
+static GList *pdf_document_find_find_text_with_options_real (EvDocumentFind *document_find,
+                                                            EvPage         *page,
+                                                            const gchar    *text,
+                                                            EvFindOptions   options,
+                                                            gboolean        returnFindRects);
 
 EV_BACKEND_REGISTER_WITH_CODE (PdfDocument, pdf_document,
                         {
@@ -1950,6 +1955,7 @@ pdf_document_find_find_text_with_options (EvDocumentFind *document_find,
        GList *matches, *l;
        PopplerPage *poppler_page;
        gdouble height;
+       gboolean uses_new_api;
        GList *retval = NULL;
        guint find_flags = 0;
 
@@ -1965,29 +1971,48 @@ pdf_document_find_find_text_with_options (EvDocumentFind *document_find,
                to broaden our search in order to match on more expected results */
                find_flags |= POPPLER_FIND_IGNORE_DIACRITICS;
 #endif
+
        if (options & EV_FIND_WHOLE_WORDS_ONLY)
                find_flags |= POPPLER_FIND_WHOLE_WORDS_ONLY;
+
+#if POPPLER_CHECK_VERSION(0, 77, 0)
+       /* Allow to match on text across lines */
+       find_flags |= POPPLER_FIND_ACROSS_LINES;
+       matches = poppler_page_find_text_with_options2 (poppler_page, text, (PopplerFindFlags)find_flags);
+       uses_new_api = TRUE;
+#else
        matches = poppler_page_find_text_with_options (poppler_page, text, (PopplerFindFlags)find_flags);
+       uses_new_api = FALSE;
+#endif
        if (!matches)
                return NULL;
 
        poppler_page_get_size (poppler_page, NULL, &height);
        for (l = matches; l && l->data; l = g_list_next (l)) {
-               PopplerRectangle *rect = (PopplerRectangle *)l->data;
-               EvRectangle      *ev_rect;
-
-               ev_rect = ev_rectangle_new ();
-               ev_rect->x1 = rect->x1;
-               ev_rect->x2 = rect->x2;
-               /* Invert this for X-style coordinates */
-               ev_rect->y1 = height - rect->y2;
-               ev_rect->y2 = height - rect->y1;
-
+               EvFindRectangle *ev_rect = ev_find_rectangle_new ();
+               if (uses_new_api) {
+                       PopplerFindRectangle *rect = (PopplerFindRectangle *)l->data;
+                       ev_rect->x1 = rect->x1;
+                       ev_rect->x2 = rect->x2;
+                       ev_rect->y1 = height - rect->y2;
+                       ev_rect->y2 = height - rect->y1;
+                       ev_rect->next_line = rect->next_line;
+                       ev_rect->after_hyphen = rect->after_hyphen;
+               } else {
+                       PopplerRectangle *rect = (PopplerRectangle *)l->data;
+                       ev_rect->x1 = rect->x1;
+                       ev_rect->x2 = rect->x2;
+                       /* Invert this for X-style coordinates */
+                       ev_rect->y1 = height - rect->y2;
+                       ev_rect->y2 = height - rect->y1;
+                       ev_rect->next_line = FALSE;
+                       ev_rect->after_hyphen = FALSE;
+               }
                retval = g_list_prepend (retval, ev_rect);
        }
 
-       g_list_foreach (matches, (GFunc)poppler_rectangle_free, NULL);
-       g_list_free (matches);
+       g_list_free_full (matches, (uses_new_api ? (GDestroyNotify) poppler_find_rectangle_free
+                                                : (GDestroyNotify) poppler_rectangle_free));
 
        return g_list_reverse (retval);
 }
diff --git a/libdocument/ev-document-find.c b/libdocument/ev-document-find.c
index 607a4957..1f750c38 100644
--- a/libdocument/ev-document-find.c
+++ b/libdocument/ev-document-find.c
@@ -36,7 +36,7 @@ ev_document_find_default_init (EvDocumentFindInterface *klass)
  * @text: text to find
  * @case_sensitive: whether to match the string case
  *
- * Returns: (transfer full) (element-type EvRectangle): a list of results
+ * Returns: (transfer full) (element-type EvFindRectangle): a list of results
  */
 GList *
 ev_document_find_find_text (EvDocumentFind *document_find,
@@ -56,7 +56,7 @@ ev_document_find_find_text (EvDocumentFind *document_find,
  * @text: text to find
  * @options: a set of #EvFindOptions
  *
- * Returns: (transfer full) (element-type EvRectangle): a list of results
+ * Returns: (transfer full) (element-type EvFindRectangle): a list of results
  */
 GList *
 ev_document_find_find_text_with_options (EvDocumentFind *document_find,
@@ -72,6 +72,28 @@ ev_document_find_find_text_with_options (EvDocumentFind *document_find,
        return ev_document_find_find_text (document_find, page, text, options & EV_FIND_CASE_SENSITIVE);
 }
 
+/* EvFindRectangle */
+G_DEFINE_BOXED_TYPE (EvFindRectangle, ev_find_rectangle, ev_find_rectangle_copy, ev_find_rectangle_free)
+
+EvFindRectangle *
+ev_find_rectangle_new (void)
+{
+       return g_slice_new0 (EvFindRectangle);
+}
+
+EvFindRectangle *
+ev_find_rectangle_copy (EvFindRectangle *rectangle)
+{
+       g_return_val_if_fail (rectangle != NULL, NULL);
+       return g_slice_dup (EvFindRectangle, rectangle);
+}
+
+void
+ev_find_rectangle_free (EvFindRectangle *rectangle)
+{
+       g_slice_free (EvFindRectangle, rectangle);
+}
+
 EvFindOptions
 ev_document_find_get_supported_options (EvDocumentFind *document_find)
 {
diff --git a/libdocument/ev-document-find.h b/libdocument/ev-document-find.h
index f50ef0a2..3c882f7f 100644
--- a/libdocument/ev-document-find.h
+++ b/libdocument/ev-document-find.h
@@ -42,6 +42,23 @@ G_BEGIN_DECLS
 
 typedef struct _EvDocumentFind         EvDocumentFind;
 typedef struct _EvDocumentFindInterface EvDocumentFindInterface;
+typedef struct _EvFindRectangle         EvFindRectangle;
+
+#define EV_TYPE_FIND_RECTANGLE (ev_find_rectangle_get_type ())
+struct _EvFindRectangle
+{
+       gdouble x1;
+       gdouble y1;
+       gdouble x2;
+       gdouble y2;
+       gboolean next_line;
+       gboolean after_hyphen;
+};
+
+GType            ev_find_rectangle_get_type (void) G_GNUC_CONST;
+EvFindRectangle *ev_find_rectangle_new      (void);
+EvFindRectangle *ev_find_rectangle_copy     (EvFindRectangle *ev_find_rect);
+void             ev_find_rectangle_free     (EvFindRectangle *ev_find_rect);
 
 typedef enum {
        EV_FIND_DEFAULT          = 0,
diff --git a/libview/ev-jobs.c b/libview/ev-jobs.c
index f24808b0..3a8c3b25 100644
--- a/libview/ev-jobs.c
+++ b/libview/ev-jobs.c
@@ -1640,8 +1640,7 @@ ev_job_find_dispose (GObject *object)
                gint i;
 
                for (i = 0; i < job->n_pages; i++) {
-                       g_list_foreach (job->pages[i], (GFunc)ev_rectangle_free, NULL);
-                       g_list_free (job->pages[i]);
+                       g_list_free_full (job->pages[i], (GDestroyNotify)ev_find_rectangle_free);
                }
 
                g_free (job->pages);
@@ -1779,6 +1778,33 @@ ev_job_find_get_n_results (EvJobFind *job,
        return g_list_length (job->pages[page]);
 }
 
+/**
+ * ev_job_find_get_n_main_results:
+ * @job: an #EvJobFind job
+ * @page: number of the page we want to count its match results.
+ *
+ * This is similar to ev_job_find_get_n_results() but it does not
+ * count the results where <next_line> field is TRUE, i.e. the
+ * results that mark the next-line part of an across-line match.
+ *
+ * Returns: total number of match results
+ *          (i.e. results which are not a next-line part) in @page
+ */
+gint
+ev_job_find_get_n_main_results (EvJobFind *job,
+                               gint       page)
+{
+       GList *l;
+       int n = 0;
+
+       for (l = job->pages[page]; l; l = l->next) {
+               if ( !((EvFindRectangle *) l->data)->next_line )
+                       n++;
+       }
+
+       return n;
+}
+
 gdouble
 ev_job_find_get_progress (EvJobFind *job)
 {
@@ -1808,7 +1834,7 @@ ev_job_find_has_results (EvJobFind *job)
  * ev_job_find_get_results: (skip)
  * @job: an #EvJobFind
  *
- * Returns: a #GList of #GList<!-- -->s containing #EvRectangle<!-- -->s
+ * Returns: a #GList of #GList<!-- -->s containing #EvFindRectangle<!-- -->s
  */
 GList **
 ev_job_find_get_results (EvJobFind *job)
diff --git a/libview/ev-jobs.h b/libview/ev-jobs.h
index 9a197c96..41dd2f02 100644
--- a/libview/ev-jobs.h
+++ b/libview/ev-jobs.h
@@ -604,6 +604,8 @@ EvJob          *ev_job_find_new           (EvDocument      *document,
 void            ev_job_find_set_options   (EvJobFind       *job,
                                            EvFindOptions    options);
 EvFindOptions   ev_job_find_get_options   (EvJobFind       *job);
+gint       ev_job_find_get_n_main_results (EvJobFind       *job,
+                                          gint             pages);
 gint            ev_job_find_get_n_results (EvJobFind       *job,
                                           gint             pages);
 gdouble         ev_job_find_get_progress  (EvJobFind       *job);
diff --git a/libview/ev-view-private.h b/libview/ev-view-private.h
index 02562ddd..a04c35f3 100644
--- a/libview/ev-view-private.h
+++ b/libview/ev-view-private.h
@@ -141,9 +141,11 @@ struct _EvView {
 
        /* Find */
        EvJobFind *find_job;
-       GList **find_pages; /* Backwards compatibility */
-       gint find_page;
-       gint find_result;
+       GList **find_pages; /* Backwards compatibility. Contains EvFindRectangles's elements per page */
+       gint find_page;     /* Page of current find result */
+       gint find_result;   /* Index of current find result on find_pages[find_page]. For matches across
+                            * two lines (which comprise two EvFindRectangle's), this will always point
+                            * to the first one, i.e. the one where rect->next_line is FALSE */
        gboolean jump_to_find_result;
        gboolean highlight_find_results;
 
diff --git a/libview/ev-view.c b/libview/ev-view.c
index 31d7caef..ef8a75de 100644
--- a/libview/ev-view.c
+++ b/libview/ev-view.c
@@ -288,7 +288,7 @@ static void       ev_view_handle_cursor_over_xy              (EvView *view,
 /*** Find ***/
 static gint         ev_view_find_get_n_results               (EvView             *view,
                                                              gint                page);
-static EvRectangle *ev_view_find_get_result                  (EvView             *view,
+static EvFindRectangle *ev_view_find_get_result              (EvView             *view,
                                                              gint                page,
                                                              gint                result);
 static void       jump_to_find_result                        (EvView             *view);
@@ -6936,25 +6936,33 @@ highlight_find_results (EvView *view,
                         cairo_t *cr,
                         int page)
 {
+       EvRectangle *rectangle;
        gint i, n_results = 0;
 
        n_results = ev_view_find_get_n_results (view, page);
+       rectangle = ev_rectangle_new ();
 
        for (i = 0; i < n_results; i++) {
-               EvRectangle *rectangle;
+               EvFindRectangle *find_rect;
                GdkRectangle view_rectangle;
                gdouble      alpha;
 
-               if (i == view->find_result && page == view->find_page) {
+               find_rect = ev_view_find_get_result (view, page, i);
+               if (page == view->find_page && (i == view->find_result ||
+                   (find_rect->next_line && i == view->find_result + 1))) {
                        alpha = 0.6;
                } else {
                        alpha = 0.3;
                }
-
-               rectangle = ev_view_find_get_result (view, page, i);
+               rectangle->x1 = find_rect->x1;
+               rectangle->x2 = find_rect->x2;
+               rectangle->y1 = find_rect->y1;
+               rectangle->y2 = find_rect->y2;
                _ev_view_transform_doc_rect_to_view_rect (view, page, rectangle, &view_rectangle);
                draw_rubberband (view, cr, &view_rectangle, alpha);
         }
+
+       ev_rectangle_free (rectangle);
 }
 
 static void
@@ -9082,32 +9090,80 @@ ev_view_find_get_n_results (EvView *view, gint page)
        return view->find_pages ? g_list_length (view->find_pages[page]) : 0;
 }
 
-static EvRectangle *
+static EvFindRectangle *
 ev_view_find_get_result (EvView *view, gint page, gint result)
 {
-       return view->find_pages ? (EvRectangle *) g_list_nth_data (view->find_pages[page], result) : NULL;
+       return view->find_pages ? (EvFindRectangle *) g_list_nth_data (view->find_pages[page], result) : NULL;
+}
+
+static EvFindRectangle *
+ev_view_find_get_result_and_next (EvView *view, gint page, gint result, EvFindRectangle **next_rect)
+{
+       GList *elem;
+
+       if (!view->find_pages)
+               return NULL;
+
+       elem = g_list_nth (view->find_pages[page], result);
+       if (!elem)
+               return NULL;
+
+       if (elem->next)
+               *next_rect = (EvFindRectangle *) elem->next->data;
+
+       return (EvFindRectangle *) elem->data;
+}
+
+static gboolean
+ev_view_find_is_next_line (EvView *view, gint page, gint result)
+{
+       if (!view->find_pages)
+               return FALSE;
+
+       GList *elem = g_list_nth (view->find_pages[page], result);
+       if (!elem)
+               return FALSE;
+
+       return ((EvFindRectangle *) elem->data)->next_line;
 }
 
 static void
 jump_to_find_result (EvView *view)
 {
+       EvRectangle *rect;
        gint n_results;
        gint page = view->find_page;
 
        n_results = ev_view_find_get_n_results (view, page);
+       rect = ev_rectangle_new ();
 
        if (n_results > 0 && view->find_result < n_results) {
-               EvRectangle *rect;
+               EvFindRectangle *find_rect, *rect_next;
                GdkRectangle view_rect;
 
-               rect = ev_view_find_get_result (view, page, view->find_result);
+               rect_next = NULL;
+               find_rect = ev_view_find_get_result_and_next (view, page, view->find_result, &rect_next);
+               if (rect_next && rect_next->next_line) {
+                       /* For an across-lines match, make sure both rectangles are visible */
+                       rect->x1 = MIN (find_rect->x1, rect_next->x1);
+                       rect->y1 = MIN (find_rect->y1, rect_next->y1);
+                       rect->x2 = MAX (find_rect->x2, rect_next->x2);
+                       rect->y2 = MAX (find_rect->y2, rect_next->y2);
+               } else {
+                       rect->x1 = find_rect->x1;
+                       rect->y1 = find_rect->y1;
+                       rect->x2 = find_rect->x2;
+                       rect->y2 = find_rect->y2;
+               }
                _ev_view_transform_doc_rect_to_view_rect (view, page, rect, &view_rect);
                _ev_view_ensure_rectangle_is_visible (view, &view_rect);
                if (view->caret_enabled && view->rotation == 0)
-                       position_caret_cursor_at_doc_point (view, page, rect->x1, rect->y1);
+                       position_caret_cursor_at_doc_point (view, page, find_rect->x1, find_rect->y1);
 
                view->jump_to_find_result = FALSE;
        }
+
+       ev_rectangle_free (rect);
 }
 
 /**
@@ -9233,7 +9289,8 @@ ev_view_find_next (EvView *view)
        gint n_results;
 
        n_results = ev_view_find_get_n_results (view, view->find_page);
-       view->find_result++;
+       view->find_result += ev_view_find_is_next_line (view, view->find_page, view->find_result + 1)
+                            ? 2 : 1;
 
        if (view->find_result >= n_results) {
                view->find_result = 0;
@@ -9249,11 +9306,14 @@ ev_view_find_next (EvView *view)
 void
 ev_view_find_previous (EvView *view)
 {
-       view->find_result--;
+       view->find_result -= ev_view_find_is_next_line (view, view->find_page, view->find_result - 1)
+                            ? 2 : 1;
 
        if (view->find_result < 0) {
                jump_to_find_page (view, EV_VIEW_FIND_PREV, -1);
                view->find_result = MAX (0, ev_view_find_get_n_results (view, view->find_page) - 1);
+               if (view->find_result && ev_view_find_is_next_line (view, view->find_page, view->find_result))
+                       view->find_result--; /* set to last "non-nextline" result */
        } else if (view->find_page != view->current_page) {
                jump_to_find_page (view, EV_VIEW_FIND_PREV, 0);
        }
diff --git a/shell/ev-find-sidebar.c b/shell/ev-find-sidebar.c
index 25b1d842..5a046ce8 100644
--- a/shell/ev-find-sidebar.c
+++ b/shell/ev-find-sidebar.c
@@ -268,7 +268,7 @@ ev_find_sidebar_highlight_first_match_of_page (EvFindSidebar *sidebar,
                 return;
 
         for (i = 0; i < page; i++)
-                index += ev_job_find_get_n_results (priv->job, i);
+                index += ev_job_find_get_n_main_results (priv->job, i);
 
         if (priv->highlighted_result)
                 gtk_tree_path_free (priv->highlighted_result);
@@ -339,7 +339,9 @@ get_surrounding_text_markup (const gchar  *text,
                              gboolean      case_sensitive,
                              PangoLogAttr *log_attrs,
                              gint          log_attrs_length,
-                             gint          offset)
+                             gint          offset,
+                             gboolean      has_nextline,
+                             gboolean      hyphen_was_ignored)
 {
         gint   iter;
         gchar *prec = NULL;
@@ -356,7 +358,15 @@ get_surrounding_text_markup (const gchar  *text,
 
         iter = offset;
         offset += g_utf8_strlen (find_text, -1);
-        if (!case_sensitive)
+
+       if (has_nextline || g_utf8_offset_to_pointer (text, offset-1)[0] == '\n') {
+                if (has_nextline) {
+                  offset += 1; /* for newline */
+                  if (hyphen_was_ignored)
+                    offset += 1; /* for hyphen */
+                }
+                match = sanitized_substring (text, iter, offset);
+       } else if (!case_sensitive)
                 match = g_utf8_substring (text, iter, offset);
 
         iter = MIN (log_attrs_length, offset + 1);
@@ -409,7 +419,7 @@ get_page_text (EvDocument   *document,
 static gint
 get_match_offset (EvRectangle *areas,
                   guint        n_areas,
-                  EvRectangle *match,
+                  EvFindRectangle *match,
                   gint         offset)
 {
         gdouble x, y;
@@ -489,9 +499,13 @@ process_matches_idle (EvFindSidebar *sidebar)
                 offset = 0;
 
                 for (l = matches, result = 0; l; l = g_list_next (l), result++) {
-                        EvRectangle *match = (EvRectangle *)l->data;
+                        EvFindRectangle *match = (EvFindRectangle *)l->data;
                         gchar       *markup;
                         GtkTreeIter  iter;
+                        gboolean has_nextline, hyphen_ignored;
+
+                       if (match->next_line)
+                               continue;
 
                         offset = get_match_offset (areas, n_areas, match, offset);
                         if (offset == -1) {
@@ -508,12 +522,16 @@ process_matches_idle (EvFindSidebar *sidebar)
                                 priv->insert_position++;
                         }
 
+                       has_nextline = l->next && ((EvFindRectangle *)l->next->data)->next_line;
+                       hyphen_ignored = l->next && ((EvFindRectangle *)l->next->data)->after_hyphen;
                         markup = get_surrounding_text_markup (page_text,
                                                               priv->job->text,
                                                               priv->job->case_sensitive,
                                                               text_log_attrs,
                                                               text_log_attrs_length,
-                                                              offset);
+                                                              offset,
+                                                              has_nextline,
+                                                              hyphen_ignored);
 
                         gtk_list_store_set (GTK_LIST_STORE (model), &iter,
                                             TEXT_COLUMN, markup,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]