[pango/line-breaking-fixes4: 1/2] break: Fix hyphen condition




commit 6655ceabff26f7830335833ace8e59e78251c296
Author: Matthias Clasen <mclasen redhat com>
Date:   Sun Nov 21 16:17:37 2021 -0500

    break: Fix hyphen condition
    
    When looking at scripts, we want to look
    at the script of the *previous* character.
    And then we need to exclude SHY from the
    common script.

 pango/break.c                   |  9 +++++++--
 tests/breaks/eight.expected     |  2 +-
 tests/breaks/eleven.expected    | 14 +++++++-------
 tests/breaks/fifteen.expected   |  4 ++--
 tests/breaks/one.expected       |  4 ++--
 tests/breaks/seventeen.expected |  2 +-
 tests/breaks/sixteen.expected   |  4 ++--
 tests/breaks/thirteen.expected  |  4 ++--
 tests/breaks/two.break          |  2 +-
 tests/breaks/two.expected       | 14 +++++++-------
 10 files changed, 32 insertions(+), 27 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 3af083ce..5622ca21 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -170,6 +170,8 @@ default_break (const char    *text,
   GUnicodeBreakType prev_break_type;
   GUnicodeBreakType prev_prev_break_type;
 
+  PangoScript prev_script;
+
   /* See Grapheme_Cluster_Break Property Values table of UAX#29 */
   typedef enum
   {
@@ -262,6 +264,7 @@ default_break (const char    *text,
   prev_break_type = G_UNICODE_BREAK_UNKNOWN;
   prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
   prev_wc = 0;
+  prev_script = PANGO_SCRIPT_COMMON;
   prev_jamo = NO_JAMO;
   prev_space_or_hyphen = FALSE;
 
@@ -539,7 +542,6 @@ default_break (const char    *text,
       }
 
       script = (PangoScript)g_unichar_get_script (wc);
-
       /* ---- UAX#29 Word Boundaries ---- */
       {
        is_word_boundary = FALSE;
@@ -1571,9 +1573,11 @@ default_break (const char    *text,
         attrs[i].break_inserts_hyphen = FALSE;
         attrs[i].break_removes_preceding = FALSE;
 
-        switch ((int)script)
+        switch ((int)prev_script)
           {
           case PANGO_SCRIPT_COMMON:
+            insert_hyphens = prev_wc == 0x00ad;
+            break;
           case PANGO_SCRIPT_HAN:
           case PANGO_SCRIPT_HANGUL:
           case PANGO_SCRIPT_HIRAGANA:
@@ -1634,6 +1638,7 @@ default_break (const char    *text,
       }
 
       prev_wc = wc;
+      prev_script = script;
 
       /* wc might not be a valid Unicode base character, but really all we
        * need to know is the last non-combining character */
diff --git a/tests/breaks/eight.expected b/tests/breaks/eight.expected
index 39794d22..d71bb02f 100644
--- a/tests/breaks/eight.expected
+++ b/tests/breaks/eight.expected
@@ -4,4 +4,4 @@ Whitespace:        x               x            x                          x
 Sentences:  bs                                                                                               
                e       b 
 Words:      bs     be   bs   e s   be   bs   be b   bs                     be   bs     be bs   be   bs       
  be bs         be      b 
 Graphemes:  b  b b b    b  b b b b b    b  b b  b   b  b b b b b b b b b b b    b  b b b  b  b b    b  b b b 
b b  b  b b b b b       b 
-Hyphens:                                               i i i i     i i i i                             i i i 
i       i i i i           
+Hyphens:                                               i i i i i     i i i                             i i i 
i       i i i i           
diff --git a/tests/breaks/eleven.expected b/tests/breaks/eleven.expected
index 8df89869..b4a33897 100644
--- a/tests/breaks/eleven.expected
+++ b/tests/breaks/eleven.expected
@@ -1,7 +1,7 @@
-Text:         ⁦❤⁩ ⁦️⁩ ⁦︎⁩ ⁦︎⁩  ⁦👨⁩ [0x200d]⁦🦰⁩  ⁦👨⁩⁦🏿⁩ [0x200d]⁦🦱⁩  ⁦0⁩ ⁦️⁩ ⁦⃣⁩  
⁦🏴⁩[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f]  ⁦🇩⁩⁦🇪⁩ ⁦️⁩ [0x0a]  
-Breaks:     c        lc           lc            lc     lc                                              lc    
c      lc
-Whitespace:                                                                                                  
w      w 
-Sentences:  bs                                                                                               
e      b 
-Words:      b        b            b             bs     be                                              b     
b      b 
-Graphemes:  b        b            b             b      b                                               b     
b      b 
-Hyphens:       i i i    i             i            i i                                                     i 
         
+Text:         ⁦❤⁩⁦️⁩ ⁦︎⁩ ⁦︎⁩  ⁦👨⁩[0x200d] ⁦🦰⁩  ⁦👨⁩⁦🏿⁩[0x200d] ⁦🦱⁩  ⁦0⁩⁦️⁩ ⁦⃣⁩  
⁦🏴⁩[0xe0075][0xe0073][0xe0063][0xe0061][0xe007f]  ⁦🇩⁩⁦🇪⁩⁦️⁩ [0x0a]  
+Breaks:     c       lc           lc            lc    lc                                              lc   c  
    Lc
+Whitespace:                                                                                               w  
    w 
+Sentences:  bs                                                                                            e  
    b 
+Words:      b       b            b             bs    be                                              b    b  
    b 
+Graphemes:  b       b            b             b     b                                               b    b  
    b 
+Hyphens:        i i            i             i     i                                                         
      
diff --git a/tests/breaks/fifteen.expected b/tests/breaks/fifteen.expected
index 93b37c39..3521a70b 100644
--- a/tests/breaks/fifteen.expected
+++ b/tests/breaks/fifteen.expected
@@ -1,7 +1,7 @@
 Text:         ⁦o⁩ ⁦n⁩ ⁦e⁩  [ ]  ⁦t⁩ ⁦w⁩ ⁦o⁩  ⁦-⁩  ⁦t⁩ ⁦h⁩ ⁦r⁩ ⁦e⁩ ⁦e⁩  [ ]  ⁦f⁩ ⁦o⁩ [0xad]  ⁦u⁩ ⁦r⁩  [0x0a]  
-Breaks:     c  c c c    lc c c c  lc c c c c c    lc c c      lc c c       lc
+Breaks:     c  c c c    lc c c c  lc c c c c c    lc c c      lc c c       Lc
 Whitespace:        x                         x                     w       w 
 Sentences:  bs                                                     e       b 
 Words:      bs     be   bs     be bs         be   bs               be      b 
 Graphemes:  b  b b b    b  b b b  b  b b b b b    b  b b      b  b b       b 
-Hyphens:                   i i       i i i i         i        i  i           
+Hyphens:                   i i       i i i i         i i      i  i           
diff --git a/tests/breaks/one.expected b/tests/breaks/one.expected
index 44fee3ef..6c811256 100644
--- a/tests/breaks/one.expected
+++ b/tests/breaks/one.expected
@@ -1,7 +1,7 @@
 Text:         ⁦a⁩ ⁦b⁩ ⁦c⁩  ⁦/⁩  ⁦d⁩ ⁦e⁩ ⁦f⁩  [ ]  ⁦g⁩ ⁦h⁩ ⁦i⁩ [0xad]  ⁦j⁩ ⁦k⁩ ⁦l⁩  ⁦.⁩ [ ]  ⁦B⁩ ⁦l⁩ ⁦a⁩  
[0x0a]  
-Breaks:     c  c c c  lc c c c    lc c c c      lc c c c  c   lc c c c       lc
+Breaks:     c  c c c  lc c c c    lc c c c      lc c c c  c   lc c c c       Lc
 Whitespace:                  x                            x          w       w 
 Sentences:  bs                                            e   bs     e       b 
 Words:      bs     be bs     be   bs                   be b   bs     be      b 
 Graphemes:  b  b b b  b  b b b    b  b b b      b  b b b  b   b  b b b       b 
-Hyphens:       i i       i i         i i        i  i i           i i           
+Hyphens:       i i       i i         i i i      i  i i           i i           
diff --git a/tests/breaks/seventeen.expected b/tests/breaks/seventeen.expected
index 8f5f2749..35fb5120 100644
--- a/tests/breaks/seventeen.expected
+++ b/tests/breaks/seventeen.expected
@@ -4,4 +4,4 @@ Whitespace:      x           x               x                                 w
 Sentences:  bs                                                                 e       bs                    
                                             e       bs                                                       
          e       b 
 Words:      bs   be   bs     be   bs         be   bs e s                       be      bs   be   bs     be   
bs         be   bs e s                       be      bs   be   bs     be   bs         be   bs e s           e 
s         be      b 
 Graphemes:  b  b b    b  b b b    b  b b b b b    b  b b b b b b b b b b b b b b       b  b b    b  b b b    
b  b b b b b    b  b b b b b b b b b b b b b b       b  b b    b  b b b    b  b b b b b    b  b b b b b b b b 
b b b b b b       b 
-Hyphens:       i         i i         i i i i           i i i i i i   i i i i i            i         i i      
   i i i i           i i i i i i   i i i i i            i         i i         i i i i           i i i i i i   
i i i i i           
+Hyphens:       i         i i         i i i i             i i i i i     i i i i            i         i i      
   i i i i             i i i i i     i i i i            i         i i         i i i i             i i i i i   
  i i i i           
diff --git a/tests/breaks/sixteen.expected b/tests/breaks/sixteen.expected
index 0fd06fb2..2868b8e4 100644
--- a/tests/breaks/sixteen.expected
+++ b/tests/breaks/sixteen.expected
@@ -1,7 +1,7 @@
 Text:         ⁦h⁩ ⁦y⁩ ⁦‧⁩  ⁦p⁩ ⁦h⁩ ⁦e⁩ ⁦n⁩  ⁦|⁩  ⁦a⁩ ⁦t⁩ ⁦i⁩ ⁦o⁩ ⁦n⁩  [ ]  ⁦o⁩ ⁦v⁩ ⁦e⁩ ⁦r⁩ [0xad]  ⁦l⁩ ⁦o⁩ 
⁦a⁩ ⁦d⁩  [0x0a]  
-Breaks:     c  c c lc c c c c  lc c c c c c    lc c c c c      lc c c c c       lc
+Breaks:     c  c c lc c c c c  lc c c c c c    lc c c c c      lc c c c c       Lc
 Whitespace:                               x                             w       w 
 Sentences:  bs                                                          e       b 
 Words:      bs   e s        be bs         be   bs                       be      b 
 Graphemes:  b  b b b  b b b b  b  b b b b b    b  b b b b      b  b b b b       b 
-Hyphens:       i   ri i i i       i i i i         i i i        i  i i i           
+Hyphens:       i   ri i i i       i i i i         i i i i      i  i i i           
diff --git a/tests/breaks/thirteen.expected b/tests/breaks/thirteen.expected
index e6b1787d..25a38150 100644
--- a/tests/breaks/thirteen.expected
+++ b/tests/breaks/thirteen.expected
@@ -1,7 +1,7 @@
 Text:         ⁦a⁩  [ ]  ⁦a⁩ ⁦b⁩ [0x200b]  ⁦s⁩ ⁦p⁩ [0x200b] [ ] [ ]  ⁦d⁩ ⁦e⁩ [0xad]  ⁦f⁩ ⁦g⁩  [ ]  ⁦b⁩  
[0x0a]  
-Breaks:     c  c    lc c c        lc c c        c   c   lc c c      lc c c    lc c       lc
+Breaks:     c  c    lc c c        lc c c        c   c   lc c c      lc c c    lc c       Lc
 Whitespace:    x                                x   x                    x       w       w 
 Sentences:  bs                                                                   e       b 
 Words:      bs be   bs                                                   be   bs be      b 
 Graphemes:  b  b    b  b b        b  b b        b   b   b  b b      b  b b    b  b       b 
-Hyphens:               i          i  i                     i        i  i                   
+Hyphens:               i i           i i                   i i      i  i                   
diff --git a/tests/breaks/two.break b/tests/breaks/two.break
index 6ff0a36e..53c39c5c 100644
--- a/tests/breaks/two.break
+++ b/tests/breaks/two.break
@@ -1,3 +1,3 @@
 # Example from https://gitlab.gnome.org/GNOME/pango/issues/218
 # This shows difference between word start/end and boundary
-goril·les
+goril‧les
diff --git a/tests/breaks/two.expected b/tests/breaks/two.expected
index 2921d224..58d15186 100644
--- a/tests/breaks/two.expected
+++ b/tests/breaks/two.expected
@@ -1,7 +1,7 @@
-Text:         ⁦g⁩ ⁦o⁩ ⁦r⁩ ⁦i⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦e⁩ ⁦s⁩  [0x0a]  
-Breaks:     c  c c c c c c c c c       lc
-Whitespace:                    w       w 
-Sentences:  bs                 e       b 
-Words:      bs         e s     be      b 
-Graphemes:  b  b b b b b b b b b       b 
-Hyphens:       i i i i   i i i           
+Text:         ⁦g⁩ ⁦o⁩ ⁦r⁩ ⁦i⁩ ⁦l⁩ ⁦‧⁩  ⁦l⁩ ⁦e⁩ ⁦s⁩  [0x0a]  
+Breaks:     c  c c c c c lc c c c       Lc
+Whitespace:                     w       w 
+Sentences:  bs                  e       b 
+Words:      bs         e s      be      b 
+Graphemes:  b  b b b b b b  b b b       b 
+Hyphens:       i i i i   ri i i           


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]