[pango/more-log-attr-things: 4/6] break: Fix up word fixups

From: Matthias Clasen <matthiasc src gnome org>
To: commits-list gnome org
Cc:
Subject: [pango/more-log-attr-things: 4/6] break: Fix up word fixups
Date: Thu, 26 Aug 2021 12:02:52 +0000 (UTC)

commit 653e27a8dc6cb25042d811be7f6d962d50233dbe
Author: Matthias Clasen <mclasen redhat com>
Date:   Thu Aug 26 01:14:07 2021 -0400

    break: Fix up word fixups
    
    We were not correctly handling overlapping segments
    in all cases, with <span segment='word'>. Improve this.
    
    Update affected tests.

 pango/break.c                | 21 ++++++++++++++-------
 tests/breaks/twelve.expected | 14 +++++++-------
 2 files changed, 21 insertions(+), 14 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 369a2cb4..0cb6666d 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1867,11 +1867,14 @@ handle_words (const char    *text,
       if (start >= offset)
         {
           gboolean in_word = FALSE;
-          for (pos = start_pos - 1; pos >= 0; pos--)
+          for (pos = start_pos; pos >= 0; pos--)
             {
               if (log_attrs[pos].is_word_end)
-                break;
-              if (log_attrs[pos].is_word_start)
+                {
+                  in_word = pos == start_pos;
+                  break;
+                }
+              if (pos < start_pos && log_attrs[pos].is_word_start)
                 {
                   in_word = TRUE;
                   break;
@@ -1882,7 +1885,8 @@ handle_words (const char    *text,
           log_attrs[start_pos].is_word_boundary = TRUE;
 
           /* Allow line breaks before words */
-          log_attrs[start_pos].is_line_break = TRUE;
+          if (start_pos > 0)
+            log_attrs[start_pos].is_line_break = TRUE;
 
           tailored = TRUE;
         }
@@ -1890,11 +1894,14 @@ handle_words (const char    *text,
       if (end < offset + length)
         {
           gboolean in_word = FALSE;
-          for (pos = end_pos + 1; pos < log_attrs_len; pos++)
+          for (pos = end_pos; pos < log_attrs_len; pos++)
             {
               if (log_attrs[pos].is_word_start)
-                break;
-              if (log_attrs[pos].is_word_end)
+                {
+                  in_word = pos == end_pos;
+                  break;
+                }
+              if (pos > end_pos && log_attrs[pos].is_word_end)
                 {
                   in_word = TRUE;
                   break;
diff --git a/tests/breaks/twelve.expected b/tests/breaks/twelve.expected
index 0fb7655e..cfb8e625 100644
--- a/tests/breaks/twelve.expected
+++ b/tests/breaks/twelve.expected
@@ -1,7 +1,7 @@
-Text:         ⁦t⁩ ⁦h⁩ ⁦e⁩  [ ]  ⁦f⁩ ⁦i⁩ ⁦l⁩ ⁦e⁩  [ ]  ⁦/⁩ ⁦p⁩ ⁦a⁩ ⁦t⁩ ⁦h⁩ ⁦/⁩   ⁦t⁩ ⁦o⁩ ⁦/⁩  ⁦m⁩ ⁦y⁩ ⁦/⁩   
⁦h⁩ ⁦o⁩ ⁦m⁩ ⁦e⁩  [ ]  ⁦i⁩ ⁦s⁩  [ ]  ⁦c⁩ ⁦u⁩ ⁦r⁩ ⁦s⁩ ⁦e⁩ ⁦d⁩  ⁦.⁩ [0x0a]  
-Breaks:     c  c c c    lc c c c c    lc c c c c c lc  c c lc c c lc  c c c c    lc c c    lc c c c c c c  c 
     lc
-Whitespace:        x             x                                          x         x                    w 
     w 
-Sentences:  bs                                                                                             e 
     b 
-Words:      bs     be   bs       be   bs           bse     bs     bse       be   bs   be   bs           be b 
     b 
-Graphemes:  b  b b b    b  b b b b    b  b b b b b b   b b b  b b b   b b b b    b  b b    b  b b b b b b  b 
     b 
-Hyphens:       i i         i i i           i i i       i      i       i i i         i         i i i i i      
       
+Text:         ⁦t⁩ ⁦h⁩ ⁦e⁩  [ ]  ⁦f⁩ ⁦i⁩ ⁦l⁩ ⁦e⁩  [ ]  ⁦/⁩ ⁦p⁩ ⁦a⁩ ⁦t⁩ ⁦h⁩ ⁦/⁩   ⁦t⁩ ⁦o⁩ ⁦/⁩   ⁦m⁩ ⁦y⁩ ⁦/⁩   
⁦h⁩ ⁦o⁩ ⁦m⁩ ⁦e⁩  [ ]  ⁦i⁩ ⁦s⁩  [ ]  ⁦c⁩ ⁦u⁩ ⁦r⁩ ⁦s⁩ ⁦e⁩ ⁦d⁩  ⁦.⁩ [0x0a]  
+Breaks:     c  c c c    lc c c c c    lc c c c c c lc  c c lc  c c lc  c c c c    lc c c    lc c c c c c c  
c      Lc
+Whitespace:        x             x                                           x         x                    
w      w 
+Sentences:  bs                                                                                              
e      b 
+Words:      bs     be   bs       be   bs           bse     bse     bse       be   bs   be   bs           be 
b      b 
+Graphemes:  b  b b b    b  b b b b    b  b b b b b b   b b b   b b b   b b b b    b  b b    b  b b b b b b  
b      b 
+Hyphens:       i i         i i i           i i i       i       i       i i i         i         i i i i i
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]