[pango: 1/2] Support for Catalan middle dot in word start and end




commit 89f69b369b9b551a75cbabe0af5eb3931afae272
Author: Jordi Mas <jmas softcatala org>
Date:   Wed Oct 27 23:29:13 2021 +0000

    Support for Catalan middle dot in word start and end

 pango/break-latin.c             | 61 +++++++++++++++++++++++++++++++++++++++++
 pango/break.c                   |  6 ++++
 tests/breaks/seventeen.break    |  3 ++
 tests/breaks/seventeen.expected |  7 +++++
 4 files changed, 77 insertions(+)
---
diff --git a/pango/break-latin.c b/pango/break-latin.c
new file mode 100644
index 00000000..608bff94
--- /dev/null
+++ b/pango/break-latin.c
@@ -0,0 +1,61 @@
+/* Pango
+ * break-latin.c:
+ *
+ * Copyright (C) 2021 Jordi Mas i Hernàndez <jmas softcatala org>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.         See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+#include "config.h"
+
+#include "pango-break.h"
+#include "pango-impl-utils.h"
+
+static void
+break_latin (const char          *text,
+           int                  len,
+           const PangoAnalysis *analysis G_GNUC_UNUSED,
+           PangoLogAttr        *attrs,
+           int                  attrs_len G_GNUC_UNUSED)
+{
+
+    if (analysis && analysis->language &&
+        g_ascii_strncasecmp (pango_language_to_string (analysis->language), "ca-", 3) != 0)
+        return;
+
+    const gchar *next;
+    gunichar wc;
+    gunichar prev_wc = 0;
+
+    next = text;
+    for (int i = 0; i < len; i++)
+    {
+        wc = g_utf8_get_char (next);
+        next = g_utf8_next_char (next);
+
+        /* Catalan middle dot does not break words */
+        if (wc == 0x00b7)
+        {
+            gunichar middle_next = g_utf8_get_char (next);
+            if (g_unichar_tolower (middle_next) == 'l' && g_unichar_tolower (prev_wc) == 'l')
+            {
+              attrs[i].is_word_end = FALSE;
+              attrs[i+1].is_word_start = FALSE;
+            }
+        }
+        prev_wc = wc;
+    }
+}
+
diff --git a/pango/break.c b/pango/break.c
index 917d6031..3a3e0b23 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -1664,6 +1664,7 @@ default_break (const char    *text,
 #include "break-arabic.c"
 #include "break-indic.c"
 #include "break-thai.c"
+#include "break-latin.c"
 
 static gboolean
 break_script (const char          *item_text,
@@ -1694,6 +1695,11 @@ break_script (const char          *item_text,
     case PANGO_SCRIPT_THAI:
       break_thai (item_text, item_length, analysis, attrs, attrs_len);
       break;
+
+    case PANGO_SCRIPT_LATIN:
+      break_latin (item_text, item_length, analysis, attrs, attrs_len);
+      break;
+
     default:
       return FALSE;
     }
diff --git a/tests/breaks/seventeen.break b/tests/breaks/seventeen.break
new file mode 100644
index 00000000..82a207fb
--- /dev/null
+++ b/tests/breaks/seventeen.break
@@ -0,0 +1,3 @@
+<span lang='ca-es'>És una prova d'instal·lació</span>
+<span lang='ca-fr'>És una prova d'instal·lació</span>
+<span lang='en-US'>És una prova d'instal·lació</span>
diff --git a/tests/breaks/seventeen.expected b/tests/breaks/seventeen.expected
new file mode 100644
index 00000000..8f5f2749
--- /dev/null
+++ b/tests/breaks/seventeen.expected
@@ -0,0 +1,7 @@
+Text:         ⁦É⁩ ⁦s⁩  [ ]  ⁦u⁩ ⁦n⁩ ⁦a⁩  [ ]  ⁦p⁩ ⁦r⁩ ⁦o⁩ ⁦v⁩ ⁦a⁩  [ ]  ⁦d⁩ ⁦'⁩ ⁦i⁩ ⁦n⁩ ⁦s⁩ ⁦t⁩ ⁦a⁩ ⁦l⁩ ⁦·⁩ 
⁦l⁩ ⁦a⁩ ⁦c⁩ ⁦i⁩ ⁦ó⁩  [0x0a]  ⁦É⁩ ⁦s⁩  [ ]  ⁦u⁩ ⁦n⁩ ⁦a⁩  [ ]  ⁦p⁩ ⁦r⁩ ⁦o⁩ ⁦v⁩ ⁦a⁩  [ ]  ⁦d⁩ ⁦'⁩ ⁦i⁩ ⁦n⁩ ⁦s⁩ 
⁦t⁩ ⁦a⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦a⁩ ⁦c⁩ ⁦i⁩ ⁦ó⁩  [0x0a]  ⁦É⁩ ⁦s⁩  [ ]  ⁦u⁩ ⁦n⁩ ⁦a⁩  [ ]  ⁦p⁩ ⁦r⁩ ⁦o⁩ ⁦v⁩ ⁦a⁩  [ ]  ⁦d⁩ 
⁦'⁩ ⁦i⁩ ⁦n⁩ ⁦s⁩ ⁦t⁩ ⁦a⁩ ⁦l⁩ ⁦·⁩ ⁦l⁩ ⁦a⁩ ⁦c⁩ ⁦i⁩ ⁦ó⁩  [0x0a]  
+Breaks:     c  c c    lc c c c    lc c c c c c    lc c c c c c c c c c c c c c c       Lc c c    lc c c c    
lc c c c c c    lc c c c c c c c c c c c c c c       Lc c c    lc c c c    lc c c c c c    lc c c c c c c c c 
c c c c c c       Lc
+Whitespace:      x           x               x                                 w            x           x    
           x                                 w            x           x               x                       
          w       w 
+Sentences:  bs                                                                 e       bs                    
                                             e       bs                                                       
          e       b 
+Words:      bs   be   bs     be   bs         be   bs e s                       be      bs   be   bs     be   
bs         be   bs e s                       be      bs   be   bs     be   bs         be   bs e s           e 
s         be      b 
+Graphemes:  b  b b    b  b b b    b  b b b b b    b  b b b b b b b b b b b b b b       b  b b    b  b b b    
b  b b b b b    b  b b b b b b b b b b b b b b       b  b b    b  b b b    b  b b b b b    b  b b b b b b b b 
b b b b b b       b 
+Hyphens:       i         i i         i i i i           i i i i i i   i i i i i            i         i i      
   i i i i           i i i i i i   i i i i i            i         i i         i i i i           i i i i i i   
i i i i i           


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]