[pango/fix-sentence-breaks] break: Fix an inconsistency with sentence breaks




commit aa606673d3ae4dae54cef5d7d2de031b8f0b73a1
Author: Matthias Clasen <mclasen redhat com>
Date:   Sun May 9 23:26:32 2021 -0400

    break: Fix an inconsistency with sentence breaks
    
    When we retroactively remove a sentence boundary
    because of rule SB8, we were not cleaning up the
    sencence_start/end markers that have already been
    derived from it. This can be seen in urls like
      http://www.unicode.org/reports/tr29,
    where we don't have any sentence boundaries, but
    we leave a stray sentence_start/end at the first
    / after ".org".

 pango/break.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 6b8e5c01..c34d0512 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -306,7 +306,6 @@ pango_default_break (const gchar   *text,
       /* Emoji extended pictographics */
       gboolean is_Extended_Pictographic;
 
-
       wc = next_wc;
       break_type = next_break_type;
 
@@ -932,7 +931,20 @@ pango_default_break (const gchar   *text,
                      prev_prev_SB_type == SB_ATerm_Close_Sp) &&
                     IS_OTHER_TERM(prev_SB_type) &&
                     SB_type == SB_Lower)
-             attrs[prev_SB_i].is_sentence_boundary = FALSE;
+              {
+               attrs[prev_SB_i].is_sentence_boundary = FALSE;
+               attrs[prev_SB_i].is_sentence_start = FALSE;
+               attrs[prev_SB_i].is_sentence_end = FALSE;
+                last_sentence_start = -1;
+                for (int j = prev_SB_i - 1; j >= 0; j--)
+                  {
+                    if (attrs[j].is_sentence_boundary)
+                      {
+                        last_sentence_start = j;
+                        break;
+                      }
+                  }
+              }
            else if ((prev_SB_type == SB_ATerm ||
                      prev_SB_type == SB_ATerm_Close_Sp ||
                      prev_SB_type == SB_STerm ||
@@ -1537,8 +1549,9 @@ pango_default_break (const gchar   *text,
        /* meets space character, move sentence start */
        if (last_sentence_start != -1 &&
            last_sentence_start == i - 1 &&
-           attrs[i - 1].is_white)
+           attrs[i - 1].is_white) {
            last_sentence_start++;
+          }
 
       }
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]