[evince/333-handle-spaces-and-hyphenation-when-search-pdf: 437/437] Add support for multi-line text search




commit 2b64a2acb4155e36e6f6adb79a7aa0ab7c509b31
Author: Nelson Benítez León <nbenitezl gmail com>
Date:   Sun Apr 4 15:47:37 2021 -0400

    Add support for multi-line text search
    
    Implemented in poppler MR:
    https://gitlab.freedesktop.org/poppler/poppler/merge_requests/267
    
    creates a new EvFindRectangle type to hold more match information
    apart from coordinates, and use it all across Evince including
    the pdf backend (ev-poppler.cc) and djvu backend (djvu-document.c)
    which are the only backends implementing the text search interface.
    
    This new feature has the following aspects:
    
     - Ignores hyphen character while matching when 1) it's the
       last character of the line and 2) its corresponding matching
       character in the search term is not an hyphen too.
    
     - Any whitespace characters in the search term will be allowed
       to match on the logic position where the lines split (i.e. what
       would normally be the newline character in a text file, but
       PDF text does not include newline characters between lines).
    
     - It won't match on text spanning more than two lines, i.e. it
       only matches text spanning from end of one line to start of
       next line.
    
    Part of issue #333

 backend/djvu/djvu-document.c   |  9 +++++
 backend/pdf/ev-poppler.cc      | 21 ++++++++----
 libdocument/ev-document-find.c | 26 ++++++++++++--
 libdocument/ev-document-find.h | 17 +++++++++
 libview/ev-jobs.c              | 30 ++++++++++++++--
 libview/ev-jobs.h              |  2 ++
 libview/ev-view-private.h      |  8 +++--
 libview/ev-view.c              | 78 +++++++++++++++++++++++++++++++++++-------
 shell/ev-find-sidebar.c        | 27 +++++++++++----
 9 files changed, 185 insertions(+), 33 deletions(-)
---
diff --git a/backend/djvu/djvu-document.c b/backend/djvu/djvu-document.c
index 45cf33d1..ba9c1eff 100644
--- a/backend/djvu/djvu-document.c
+++ b/backend/djvu/djvu-document.c
@@ -898,6 +898,15 @@ djvu_document_find_find_text (EvDocumentFind   *document,
 
                r->y1 = height - r->y2 * 72.0 / dpi;
                r->y2 = height - tmp * 72.0 / dpi;
+
+               EvFindRectangle *ev_rect = ev_find_rectangle_new ();
+               ev_rect->x1 = r->x1;
+               ev_rect->x2 = r->x2;
+               ev_rect->y1 = r->y1;
+               ev_rect->y2 = r->y2;
+
+               ev_rectangle_free (r);
+               l->data = ev_rect;
        }
        
 
diff --git a/backend/pdf/ev-poppler.cc b/backend/pdf/ev-poppler.cc
index 50a87342..8793a414 100644
--- a/backend/pdf/ev-poppler.cc
+++ b/backend/pdf/ev-poppler.cc
@@ -2018,27 +2018,36 @@ pdf_document_find_find_text_with_options (EvDocumentFind *document_find,
 #endif
        if (options & EV_FIND_WHOLE_WORDS_ONLY)
                find_flags |= POPPLER_FIND_WHOLE_WORDS_ONLY;
+
+#if POPPLER_CHECK_VERSION(21, 03, 0)
+       /* Allow to match on text spanning from one line to the next */
+       find_flags |= POPPLER_FIND_MULTILINE;
+#endif
        matches = poppler_page_find_text_with_options (poppler_page, text, (PopplerFindFlags)find_flags);
        if (!matches)
                return NULL;
 
        poppler_page_get_size (poppler_page, NULL, &height);
        for (l = matches; l && l->data; l = g_list_next (l)) {
-               PopplerRectangle *rect = (PopplerRectangle *)l->data;
-               EvRectangle      *ev_rect;
+               EvFindRectangle *ev_rect = ev_find_rectangle_new ();
 
-               ev_rect = ev_rectangle_new ();
+               PopplerRectangle *rect = (PopplerRectangle *)l->data;
                ev_rect->x1 = rect->x1;
                ev_rect->x2 = rect->x2;
                /* Invert this for X-style coordinates */
                ev_rect->y1 = height - rect->y2;
                ev_rect->y2 = height - rect->y1;
-
+#if POPPLER_CHECK_VERSION(21, 03, 0)
+               ev_rect->next_line = poppler_rectangle_find_get_match_continued (rect);
+               ev_rect->after_hyphen = ev_rect->next_line && poppler_rectangle_find_get_ignored_hyphen 
(rect);
+#else
+               ev_rect->next_line = FALSE;
+               ev_rect->after_hyphen = FALSE;
+#endif
                retval = g_list_prepend (retval, ev_rect);
        }
 
-       g_list_foreach (matches, (GFunc)poppler_rectangle_free, NULL);
-       g_list_free (matches);
+       g_list_free_full (matches, (GDestroyNotify) poppler_rectangle_free);
 
        return g_list_reverse (retval);
 }
diff --git a/libdocument/ev-document-find.c b/libdocument/ev-document-find.c
index 607a4957..1f750c38 100644
--- a/libdocument/ev-document-find.c
+++ b/libdocument/ev-document-find.c
@@ -36,7 +36,7 @@ ev_document_find_default_init (EvDocumentFindInterface *klass)
  * @text: text to find
  * @case_sensitive: whether to match the string case
  *
- * Returns: (transfer full) (element-type EvRectangle): a list of results
+ * Returns: (transfer full) (element-type EvFindRectangle): a list of results
  */
 GList *
 ev_document_find_find_text (EvDocumentFind *document_find,
@@ -56,7 +56,7 @@ ev_document_find_find_text (EvDocumentFind *document_find,
  * @text: text to find
  * @options: a set of #EvFindOptions
  *
- * Returns: (transfer full) (element-type EvRectangle): a list of results
+ * Returns: (transfer full) (element-type EvFindRectangle): a list of results
  */
 GList *
 ev_document_find_find_text_with_options (EvDocumentFind *document_find,
@@ -72,6 +72,28 @@ ev_document_find_find_text_with_options (EvDocumentFind *document_find,
        return ev_document_find_find_text (document_find, page, text, options & EV_FIND_CASE_SENSITIVE);
 }
 
+/* EvFindRectangle */
+G_DEFINE_BOXED_TYPE (EvFindRectangle, ev_find_rectangle, ev_find_rectangle_copy, ev_find_rectangle_free)
+
+EvFindRectangle *
+ev_find_rectangle_new (void)
+{
+       return g_slice_new0 (EvFindRectangle);
+}
+
+EvFindRectangle *
+ev_find_rectangle_copy (EvFindRectangle *rectangle)
+{
+       g_return_val_if_fail (rectangle != NULL, NULL);
+       return g_slice_dup (EvFindRectangle, rectangle);
+}
+
+void
+ev_find_rectangle_free (EvFindRectangle *rectangle)
+{
+       g_slice_free (EvFindRectangle, rectangle);
+}
+
 EvFindOptions
 ev_document_find_get_supported_options (EvDocumentFind *document_find)
 {
diff --git a/libdocument/ev-document-find.h b/libdocument/ev-document-find.h
index f50ef0a2..fcf2bae3 100644
--- a/libdocument/ev-document-find.h
+++ b/libdocument/ev-document-find.h
@@ -42,6 +42,23 @@ G_BEGIN_DECLS
 
 typedef struct _EvDocumentFind         EvDocumentFind;
 typedef struct _EvDocumentFindInterface EvDocumentFindInterface;
+typedef struct _EvFindRectangle         EvFindRectangle;
+
+#define EV_TYPE_FIND_RECTANGLE (ev_find_rectangle_get_type ())
+struct _EvFindRectangle
+{
+       gdouble x1;
+       gdouble y1;
+       gdouble x2;
+       gdouble y2;
+       gboolean next_line; /* the boolean from poppler_rectangle_find_get_match_continued() */
+       gboolean after_hyphen; /* the boolean from poppler_rectangle_find_get_ignored_hyphen() */
+};
+
+GType            ev_find_rectangle_get_type (void) G_GNUC_CONST;
+EvFindRectangle *ev_find_rectangle_new      (void);
+EvFindRectangle *ev_find_rectangle_copy     (EvFindRectangle *ev_find_rect);
+void             ev_find_rectangle_free     (EvFindRectangle *ev_find_rect);
 
 typedef enum {
        EV_FIND_DEFAULT          = 0,
diff --git a/libview/ev-jobs.c b/libview/ev-jobs.c
index c96e92cb..a3c3540e 100644
--- a/libview/ev-jobs.c
+++ b/libview/ev-jobs.c
@@ -1640,8 +1640,7 @@ ev_job_find_dispose (GObject *object)
                gint i;
 
                for (i = 0; i < job->n_pages; i++) {
-                       g_list_foreach (job->pages[i], (GFunc)ev_rectangle_free, NULL);
-                       g_list_free (job->pages[i]);
+                       g_list_free_full (job->pages[i], (GDestroyNotify)ev_find_rectangle_free);
                }
 
                g_free (job->pages);
@@ -1779,6 +1778,31 @@ ev_job_find_get_n_results (EvJobFind *job,
        return g_list_length (job->pages[page]);
 }
 
+/**
+ * ev_job_find_get_n_main_results:
+ * @job: an #EvJobFind job
+ * @page: number of the page we want to count its match results.
+ *
+ * This is similar to ev_job_find_get_n_results() but it takes
+ * care to treat any multi-line matches as being only one result.
+ *
+ * Returns: total number of match results in @page
+ */
+gint
+ev_job_find_get_n_main_results (EvJobFind *job,
+                               gint       page)
+{
+       GList *l;
+       int n = 0;
+
+       for (l = job->pages[page]; l; l = l->next) {
+               if ( !((EvFindRectangle *) l->data)->next_line )
+                       n++;
+       }
+
+       return n;
+}
+
 gdouble
 ev_job_find_get_progress (EvJobFind *job)
 {
@@ -1808,7 +1832,7 @@ ev_job_find_has_results (EvJobFind *job)
  * ev_job_find_get_results: (skip)
  * @job: an #EvJobFind
  *
- * Returns: a #GList of #GList<!-- -->s containing #EvRectangle<!-- -->s
+ * Returns: a #GList of #GList<!-- -->s containing #EvFindRectangle<!-- -->s
  */
 GList **
 ev_job_find_get_results (EvJobFind *job)
diff --git a/libview/ev-jobs.h b/libview/ev-jobs.h
index 9a197c96..41dd2f02 100644
--- a/libview/ev-jobs.h
+++ b/libview/ev-jobs.h
@@ -604,6 +604,8 @@ EvJob          *ev_job_find_new           (EvDocument      *document,
 void            ev_job_find_set_options   (EvJobFind       *job,
                                            EvFindOptions    options);
 EvFindOptions   ev_job_find_get_options   (EvJobFind       *job);
+gint       ev_job_find_get_n_main_results (EvJobFind       *job,
+                                          gint             pages);
 gint            ev_job_find_get_n_results (EvJobFind       *job,
                                           gint             pages);
 gdouble         ev_job_find_get_progress  (EvJobFind       *job);
diff --git a/libview/ev-view-private.h b/libview/ev-view-private.h
index bcf66cee..3b83bb8e 100644
--- a/libview/ev-view-private.h
+++ b/libview/ev-view-private.h
@@ -151,9 +151,11 @@ struct _EvView {
 
        /* Find */
        EvJobFind *find_job;
-       GList **find_pages; /* Backwards compatibility */
-       gint find_page;
-       gint find_result;
+       GList **find_pages; /* Backwards compatibility. Contains EvFindRectangles's elements per page */
+       gint find_page;     /* Page of active find result */
+       gint find_result;   /* Index of active find result on find_pages[find_page]. For matches across
+                            * two lines (which comprise two EvFindRectangle's), this will always point
+                            * to the first one, i.e. the one where rect->next_line is TRUE */
        gboolean jump_to_find_result;
        gboolean highlight_find_results;
 
diff --git a/libview/ev-view.c b/libview/ev-view.c
index dab6a7ae..26095116 100644
--- a/libview/ev-view.c
+++ b/libview/ev-view.c
@@ -308,7 +308,7 @@ static void       ev_view_handle_cursor_over_xy              (EvView *view,
 /*** Find ***/
 static gint         ev_view_find_get_n_results               (EvView             *view,
                                                              gint                page);
-static EvRectangle *ev_view_find_get_result                  (EvView             *view,
+static EvFindRectangle *ev_view_find_get_result              (EvView             *view,
                                                              gint                page,
                                                              gint                result);
 static void       jump_to_find_result                        (EvView             *view);
@@ -7326,21 +7326,41 @@ highlight_find_results (EvView *view,
                         cairo_t *cr,
                         int page)
 {
+       EvRectangle *ev_rect;
        gint i, n_results = 0;
 
        n_results = ev_view_find_get_n_results (view, page);
+       ev_rect = ev_rectangle_new ();
 
        for (i = 0; i < n_results; i++) {
-               EvRectangle *rectangle;
+               EvFindRectangle *find_rect;
                GdkRectangle view_rectangle;
-               gboolean     active;
+               gboolean active;
 
-               active = i == view->find_result && page == view->find_page;
+               find_rect = ev_view_find_get_result (view, page, i);
+               ev_rect->x1 = find_rect->x1;
+               ev_rect->x2 = find_rect->x2;
+               ev_rect->y1 = find_rect->y1;
+               ev_rect->y2 = find_rect->y2;
 
-               rectangle = ev_view_find_get_result (view, page, i);
-               _ev_view_transform_doc_rect_to_view_rect (view, page, rectangle, &view_rectangle);
+               active = page == view->find_page && i == view->find_result;
+               _ev_view_transform_doc_rect_to_view_rect (view, page, ev_rect, &view_rectangle);
                draw_rubberband (view, cr, &view_rectangle, active);
+
+               if (active && find_rect->next_line) {
+                       /* Draw now next result (which is second part of multi-line match) */
+                       i++;
+                       find_rect = ev_view_find_get_result (view, page, i);
+                       ev_rect->x1 = find_rect->x1;
+                       ev_rect->x2 = find_rect->x2;
+                       ev_rect->y1 = find_rect->y1;
+                       ev_rect->y2 = find_rect->y2;
+                       _ev_view_transform_doc_rect_to_view_rect (view, page, ev_rect, &view_rectangle);
+                       draw_rubberband (view, cr, &view_rectangle, TRUE);
+               }
         }
+
+       ev_rectangle_free (ev_rect);
 }
 
 static void
@@ -9477,32 +9497,60 @@ ev_view_find_get_n_results (EvView *view, gint page)
        return view->find_pages ? g_list_length (view->find_pages[page]) : 0;
 }
 
-static EvRectangle *
+static EvFindRectangle *
 ev_view_find_get_result (EvView *view, gint page, gint result)
 {
-       return view->find_pages ? (EvRectangle *) g_list_nth_data (view->find_pages[page], result) : NULL;
+       return view->find_pages ? (EvFindRectangle *) g_list_nth_data (view->find_pages[page], result) : NULL;
+}
+
+static gboolean
+ev_view_find_is_next_line (EvView *view, gint page, gint result)
+{
+       if (!view->find_pages)
+               return FALSE;
+
+       GList *elem = g_list_nth (view->find_pages[page], result);
+       return elem && ((EvFindRectangle *) elem->data)->next_line;
 }
 
 static void
 jump_to_find_result (EvView *view)
 {
+       EvRectangle *rect;
        gint n_results;
        gint page = view->find_page;
 
        n_results = ev_view_find_get_n_results (view, page);
+       rect = ev_rectangle_new ();
 
        if (n_results > 0 && view->find_result < n_results) {
-               EvRectangle *rect;
+               EvFindRectangle *find_rect, *rect_next;
                GdkRectangle view_rect;
 
-               rect = ev_view_find_get_result (view, page, view->find_result);
+               rect_next = NULL;
+               find_rect = ev_view_find_get_result (view, page, view->find_result);
+               if (find_rect->next_line) {
+                       /* For an across-lines match, make sure both rectangles are visible */
+                       rect_next = ev_view_find_get_result (view, page, view->find_result + 1);
+                       rect->x1 = MIN (find_rect->x1, rect_next->x1);
+                       rect->y1 = MIN (find_rect->y1, rect_next->y1);
+                       rect->x2 = MAX (find_rect->x2, rect_next->x2);
+                       rect->y2 = MAX (find_rect->y2, rect_next->y2);
+               } else {
+                       rect->x1 = find_rect->x1;
+                       rect->y1 = find_rect->y1;
+                       rect->x2 = find_rect->x2;
+                       rect->y2 = find_rect->y2;
+               }
                _ev_view_transform_doc_rect_to_view_rect (view, page, rect, &view_rect);
                _ev_view_ensure_rectangle_is_visible (view, &view_rect);
                if (view->caret_enabled && view->rotation == 0)
-                       position_caret_cursor_at_doc_point (view, page, rect->x1, rect->y1);
+                       position_caret_cursor_at_doc_point (view, page, find_rect->x1, find_rect->y1);
 
                view->jump_to_find_result = FALSE;
        }
+
+       ev_rectangle_free (rect);
 }
 
 /**
@@ -9628,7 +9676,8 @@ ev_view_find_next (EvView *view)
        gint n_results;
 
        n_results = ev_view_find_get_n_results (view, view->find_page);
-       view->find_result++;
+       view->find_result += ev_view_find_is_next_line (view, view->find_page, view->find_result)
+                            ? 2 : 1;
 
        if (view->find_result >= n_results) {
                view->find_result = 0;
@@ -9644,11 +9693,14 @@ ev_view_find_next (EvView *view)
 void
 ev_view_find_previous (EvView *view)
 {
-       view->find_result--;
+       view->find_result -= ev_view_find_is_next_line (view, view->find_page, view->find_result - 2)
+                            ? 2 : 1;
 
        if (view->find_result < 0) {
                jump_to_find_page (view, EV_VIEW_FIND_PREV, -1);
                view->find_result = MAX (0, ev_view_find_get_n_results (view, view->find_page) - 1);
+               if (view->find_result && ev_view_find_is_next_line (view, view->find_page, view->find_result))
+                       view->find_result--; /* set to last "non-nextline" result */
        } else if (view->find_page != view->current_page) {
                jump_to_find_page (view, EV_VIEW_FIND_PREV, 0);
        }
diff --git a/shell/ev-find-sidebar.c b/shell/ev-find-sidebar.c
index 04a63586..9b8da175 100644
--- a/shell/ev-find-sidebar.c
+++ b/shell/ev-find-sidebar.c
@@ -268,7 +268,7 @@ ev_find_sidebar_highlight_first_match_of_page (EvFindSidebar *sidebar,
                 return;
 
         for (i = 0; i < page; i++)
-                index += ev_job_find_get_n_results (priv->job, i);
+                index += ev_job_find_get_n_main_results (priv->job, i);
 
         if (priv->highlighted_result)
                 gtk_tree_path_free (priv->highlighted_result);
@@ -339,7 +339,9 @@ get_surrounding_text_markup (const gchar  *text,
                              gboolean      case_sensitive,
                              PangoLogAttr *log_attrs,
                              gint          log_attrs_length,
-                             gint          offset)
+                             gint          offset,
+                             gboolean      has_nextline,
+                             gboolean      hyphen_was_ignored)
 {
         gint   iter;
         gchar *prec = NULL;
@@ -356,7 +358,15 @@ get_surrounding_text_markup (const gchar  *text,
 
         iter = offset;
         offset += g_utf8_strlen (find_text, -1);
-        if (!case_sensitive)
+
+        if (has_nextline || g_utf8_offset_to_pointer (text, offset-1)[0] == '\n') {
+                if (has_nextline) {
+                        offset += 1; /* for newline */
+                        if (hyphen_was_ignored)
+                                offset += 1; /* for hyphen */
+                }
+                match = sanitized_substring (text, iter, offset);
+        } else if (!case_sensitive)
                 match = g_utf8_substring (text, iter, offset);
 
         iter = MIN (log_attrs_length, offset + 1);
@@ -409,7 +419,7 @@ get_page_text (EvDocument   *document,
 static gint
 get_match_offset (EvRectangle *areas,
                   guint        n_areas,
-                  EvRectangle *match,
+                  EvFindRectangle *match,
                   gint         offset)
 {
         gdouble x, y;
@@ -489,11 +499,14 @@ process_matches_idle (EvFindSidebar *sidebar)
                 offset = 0;
 
                 for (l = matches, result = 0; l; l = g_list_next (l), result++) {
-                        EvRectangle *match = (EvRectangle *)l->data;
+                        EvFindRectangle *match = (EvFindRectangle *)l->data;
                         gchar       *markup;
                         GtkTreeIter  iter;
                         gint         new_offset;
 
+                        if (l->prev && ((EvFindRectangle *)l->prev->data)->next_line)
+                                continue; /* Skip as this is second part of a multi-line match */
+
                         new_offset = get_match_offset (areas, n_areas, match, offset);
                         if (new_offset == -1) {
                                 g_warning ("No offset found for match \"%s\" at page %d after processing %d 
results\n",
@@ -517,7 +530,9 @@ process_matches_idle (EvFindSidebar *sidebar)
                                                               priv->job->case_sensitive,
                                                               text_log_attrs,
                                                               text_log_attrs_length,
-                                                              offset);
+                                                              offset,
+                                                              match->next_line,
+                                                              match->after_hyphen);
 
                         gtk_list_store_set (GTK_LIST_STORE (model), &iter,
                                             TEXT_COLUMN, markup,


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]