[pango] Update pango_default_break function for Line Break



commit c4619480e536e393e2d4a8e26a6ceb5af1fe80e3
Author: Peng Wu <alexepico gmail com>
Date:   Tue Sep 12 14:57:11 2017 +0800

    Update pango_default_break function for Line Break
    
    Support Line Break of Unicode 9.0.
    
    https://bugzilla.gnome.org/show_bug.cgi?id=788115

 pango/break.c |  880 +++++++++++++++++++++++++++------------------------------
 1 files changed, 421 insertions(+), 459 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index 34a7cd2..c46f338 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -40,323 +40,16 @@ typedef enum
   BREAK_PROHIBITED, /* no break, even if spaces intervene */
   BREAK_IF_SPACES,  /* "indirect break" (only if there are spaces) */
   BREAK_ALLOWED     /* "direct break" (can always break here) */
-  /* TR 14 has one more break-opportunity class,
+  /* TR 14 has two more break-opportunity classes,
    * "indirect break opportunity for combining marks following a space"
+   * and "prohibited break for combining marks"
    * but we handle that inline in the code.
    */
 } BreakOpportunity;
 
-
-enum
-{
-  INDEX_OPEN_PUNCTUATION,
-  INDEX_CLOSE_PUNCTUATION,
-  INDEX_QUOTATION,
-  INDEX_NON_BREAKING_GLUE,
-  INDEX_NON_STARTER,
-  INDEX_EXCLAMATION,
-  INDEX_SYMBOL,
-  INDEX_INFIX_SEPARATOR,
-  INDEX_PREFIX,
-  INDEX_POSTFIX,
-  INDEX_NUMERIC,
-  INDEX_ALPHABETIC,
-  INDEX_IDEOGRAPHIC,
-  INDEX_INSEPARABLE,
-  INDEX_HYPHEN,
-  INDEX_AFTER,
-  INDEX_BEFORE,
-  INDEX_BEFORE_AND_AFTER,
-  INDEX_ZERO_WIDTH_SPACE,
-  INDEX_COMBINING_MARK,
-  INDEX_WORD_JOINER,
-
-  /* End of the table */
-
-  INDEX_END_OF_TABLE,
-
-  /* The following are not in the tables */
-  INDEX_MANDATORY,
-  INDEX_CARRIAGE_RETURN,
-  INDEX_LINE_FEED,
-  INDEX_SURROGATE,
-  INDEX_CONTINGENT,
-  INDEX_SPACE,
-  INDEX_COMPLEX_CONTEXT,
-  INDEX_AMBIGUOUS,
-  INDEX_UNKNOWN,
-  INDEX_NEXT_LINE,
-  INDEX_HANGUL_L_JAMO,
-  INDEX_HANGUL_V_JAMO,
-  INDEX_HANGUL_T_JAMO,
-  INDEX_HANGUL_LV_SYLLABLE,
-  INDEX_HANGUL_LVT_SYLLABLE,
-};
-
-static const BreakOpportunity row_OPEN_PUNCTUATION[INDEX_END_OF_TABLE] = {
-  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_CLOSE_PUNCTUATION[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_QUOTATION[INDEX_END_OF_TABLE] = {
-  BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_NON_BREAKING_GLUE[INDEX_END_OF_TABLE] = {
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_NON_STARTER[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_EXCLAMATION[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_SYMBOL[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_INFIX_SEPARATOR[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_PREFIX[INDEX_END_OF_TABLE] = {
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_POSTFIX[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_NUMERIC[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_ALPHABETIC[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_IDEOGRAPHIC[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_INSEPARABLE[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_HYPHEN[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_AFTER[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_BEFORE[INDEX_END_OF_TABLE] = {
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_BEFORE_AND_AFTER[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_ZERO_WIDTH_SPACE[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED, BREAK_ALLOWED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED
-};
-
-static const BreakOpportunity row_COMBINING_MARK[INDEX_END_OF_TABLE] = {
-  BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_ALLOWED, BREAK_ALLOWED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity row_WORD_JOINER[INDEX_END_OF_TABLE] = {
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_IF_SPACES,
-  BREAK_IF_SPACES, BREAK_IF_SPACES, BREAK_PROHIBITED, BREAK_PROHIBITED,
-  BREAK_PROHIBITED
-};
-
-static const BreakOpportunity *const line_break_rows[INDEX_END_OF_TABLE] = {
-  row_OPEN_PUNCTUATION, /* INDEX_OPEN_PUNCTUATION */
-  row_CLOSE_PUNCTUATION, /* INDEX_CLOSE_PUNCTUATION */
-  row_QUOTATION, /* INDEX_QUOTATION */
-  row_NON_BREAKING_GLUE, /* INDEX_NON_BREAKING_GLUE */
-  row_NON_STARTER, /* INDEX_NON_STARTER */
-  row_EXCLAMATION, /* INDEX_EXCLAMATION */
-  row_SYMBOL, /* INDEX_SYMBOL */
-  row_INFIX_SEPARATOR, /* INDEX_INFIX_SEPARATOR */
-  row_PREFIX, /* INDEX_PREFIX */
-  row_POSTFIX, /* INDEX_POSTFIX */
-  row_NUMERIC, /* INDEX_NUMERIC */
-  row_ALPHABETIC, /* INDEX_ALPHABETIC */
-  row_IDEOGRAPHIC, /* INDEX_IDEOGRAPHIC */
-  row_INSEPARABLE, /* INDEX_INSEPARABLE */
-  row_HYPHEN, /* INDEX_HYPHEN */
-  row_AFTER, /* INDEX_AFTER */
-  row_BEFORE, /* INDEX_BEFORE */
-  row_BEFORE_AND_AFTER, /* INDEX_BEFORE_AND_AFTER */
-  row_ZERO_WIDTH_SPACE, /* INDEX_ZERO_WIDTH_SPACE */
-  row_COMBINING_MARK, /* INDEX_COMBINING_MARK */
-  row_WORD_JOINER /* INDEX_WORD_JOINER */
-};
-
-/* Map GUnicodeBreakType to table indexes */
-static const int line_break_indexes[] = {
-  INDEX_MANDATORY,
-  INDEX_CARRIAGE_RETURN,
-  INDEX_LINE_FEED,
-  INDEX_COMBINING_MARK,
-  INDEX_SURROGATE,
-  INDEX_ZERO_WIDTH_SPACE,
-  INDEX_INSEPARABLE,
-  INDEX_NON_BREAKING_GLUE,
-  INDEX_CONTINGENT,
-  INDEX_SPACE,
-  INDEX_AFTER,
-  INDEX_BEFORE,
-  INDEX_BEFORE_AND_AFTER,
-  INDEX_HYPHEN,
-  INDEX_NON_STARTER,
-  INDEX_OPEN_PUNCTUATION,
-  INDEX_CLOSE_PUNCTUATION,
-  INDEX_QUOTATION,
-  INDEX_EXCLAMATION,
-  INDEX_IDEOGRAPHIC,
-  INDEX_NUMERIC,
-  INDEX_INFIX_SEPARATOR,
-  INDEX_SYMBOL,
-  INDEX_ALPHABETIC,
-  INDEX_PREFIX,
-  INDEX_POSTFIX,
-  INDEX_COMPLEX_CONTEXT,
-  INDEX_AMBIGUOUS,
-  INDEX_UNKNOWN,
-  INDEX_NEXT_LINE,
-  INDEX_WORD_JOINER,
-  INDEX_HANGUL_L_JAMO,
-  INDEX_HANGUL_V_JAMO,
-  INDEX_HANGUL_T_JAMO,
-  INDEX_HANGUL_LV_SYLLABLE,
-  INDEX_HANGUL_LVT_SYLLABLE
-};
-
+/* need to sync the break range to glib/gunicode.h . */
 #define BREAK_TYPE_SAFE(btype)            \
-        ((btype) < G_N_ELEMENTS(line_break_indexes) ? (btype) : G_UNICODE_BREAK_UNKNOWN)
-#define BREAK_INDEX(btype)                \
-        (line_break_indexes[(btype)])
-#define BREAK_ROW(before_type)            \
-        (line_break_rows[BREAK_INDEX (before_type)])
-#define BREAK_OP(before_type, after_type) \
-        (BREAK_ROW (before_type)[BREAK_INDEX (after_type)])
-#define IN_BREAK_TABLE(btype)             \
-        ((btype) < G_N_ELEMENTS(line_break_indexes) && BREAK_INDEX((btype)) < INDEX_END_OF_TABLE)
-
+        ((btype) <= G_UNICODE_BREAK_ZERO_WIDTH_JOINER ? (btype) : G_UNICODE_BREAK_UNKNOWN)
 
 
 /*
@@ -487,8 +180,8 @@ pango_default_break (const gchar   *text,
   JamoType prev_jamo;
 
   GUnicodeBreakType next_break_type;
-  GUnicodeBreakType prev_break_type; /* skips spaces */
-  gboolean prev_was_break_space;
+  GUnicodeBreakType prev_break_type;
+  GUnicodeBreakType prev_prev_break_type;
 
   /* See Grapheme_Cluster_Break Property Values table of UAX#29 */
   typedef enum
@@ -554,6 +247,17 @@ pango_default_break (const gchar   *text,
   SentenceBreakType prev_prev_SB_type = SB_Other, prev_SB_type = SB_Other;
   gint prev_SB_i = -1;
 
+  /* Rule LB25 with Example 7 of Customization */
+  typedef enum
+  {
+    LB_Other,
+    LB_Numeric,
+    LB_Numeric_Close,
+    LB_RI_Odd,
+    LB_RI_Even,
+  } LineBreakType;
+  LineBreakType prev_LB_type = LB_Other;
+
   WordType current_word_type = WordNone;
   gunichar last_word_letter = 0;
   gunichar base_character = 0;
@@ -570,7 +274,7 @@ pango_default_break (const gchar   *text,
   next = text;
 
   prev_break_type = G_UNICODE_BREAK_UNKNOWN;
-  prev_was_break_space = FALSE;
+  prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
   prev_wc = 0;
   prev_jamo = NO_JAMO;
 
@@ -590,6 +294,7 @@ pango_default_break (const gchar   *text,
       GUnicodeType type;
       gunichar wc;
       GUnicodeBreakType break_type;
+      GUnicodeBreakType row_break_type;
       BreakOpportunity break_op;
       JamoType jamo;
       gboolean makes_hangul_syllable;
@@ -1374,168 +1079,379 @@ pango_default_break (const gchar   *text,
 
       break_op = BREAK_ALREADY_HANDLED;
 
-      g_assert (prev_break_type != G_UNICODE_BREAK_SPACE);
+      row_break_type = prev_break_type == G_UNICODE_BREAK_SPACE ?
+       prev_prev_break_type : prev_break_type;
+      g_assert (row_break_type != G_UNICODE_BREAK_SPACE);
 
       attrs[i].is_char_break = FALSE;
       attrs[i].is_line_break = FALSE;
       attrs[i].is_mandatory_break = FALSE;
 
-      if (attrs[i].is_cursor_position) /* If it's not a grapheme boundary,
-                                       * it's not a line break either
-                                       */
+      /* Rule LB1:
+        assign a line breaking class to each code point of the input. */
+      switch ((int) break_type)
        {
-         /* space followed by a combining mark is handled
-          * specially; (rule 7a from TR 14)
-          */
-         if (break_type == G_UNICODE_BREAK_SPACE &&
-             next_break_type == G_UNICODE_BREAK_COMBINING_MARK)
-           break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
+       case G_UNICODE_BREAK_AMBIGUOUS:
+       case G_UNICODE_BREAK_SURROGATE:
+       case G_UNICODE_BREAK_UNKNOWN:
+         break_type = G_UNICODE_BREAK_ALPHABETIC;
+         break;
 
-         /* Unicode doesn't specify char wrap; we wrap around all chars
-          * except where a line break is prohibited, which means we
-          * effectively break everywhere except inside runs of spaces.
-          */
-         attrs[i].is_char_break = TRUE;
+       case G_UNICODE_BREAK_COMPLEX_CONTEXT:
+         if (type == G_UNICODE_NON_SPACING_MARK ||
+             type == G_UNICODE_SPACING_MARK)
+           break_type = G_UNICODE_BREAK_COMBINING_MARK;
+         else
+           break_type = G_UNICODE_BREAK_ALPHABETIC;
+         break;
 
-         /* Make any necessary replacements first */
-         switch ((int) prev_break_type)
-           {
-           case G_UNICODE_BREAK_HANGUL_L_JAMO:
-           case G_UNICODE_BREAK_HANGUL_V_JAMO:
-           case G_UNICODE_BREAK_HANGUL_T_JAMO:
-           case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
-           case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
-             /* treat Jamo as IDEOGRAPHIC from now
-              */
-             prev_break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
-             break;
+       case G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER:
+         break_type = G_UNICODE_BREAK_NON_STARTER;
+         break;
 
-           case G_UNICODE_BREAK_AMBIGUOUS:
-             /* FIXME
-              * we need to resolve the East Asian width
-              * to decide what to do here
-              */
-           case G_UNICODE_BREAK_COMPLEX_CONTEXT:
-             /* FIXME
-              * language engines should handle this case...
-              */
-           case G_UNICODE_BREAK_UNKNOWN:
-             /* convert unknown, complex, ambiguous to ALPHABETIC
-              */
-             prev_break_type = G_UNICODE_BREAK_ALPHABETIC;
-             break;
+       default:
+         ;
+       }
 
-           default:
-             ;
-           }
+      /* If it's not a grapheme boundary, it's not a line break either */
+      if (attrs[i].is_cursor_position ||
+         break_type == G_UNICODE_BREAK_COMBINING_MARK ||
+         break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER ||
+         break_type == G_UNICODE_BREAK_HANGUL_L_JAMO ||
+         break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+         break_type == G_UNICODE_BREAK_HANGUL_T_JAMO ||
+         break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE ||
+         break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE ||
+         break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR)
+       {
+         LineBreakType LB_type;
 
-         switch ((int) prev_break_type)
-           {
-           case G_UNICODE_BREAK_MANDATORY:
-           case G_UNICODE_BREAK_LINE_FEED:
-           case G_UNICODE_BREAK_NEXT_LINE:
-             attrs[i].is_line_break = TRUE;
-             attrs[i].is_mandatory_break = TRUE;
-             break;
+         /* Find the LineBreakType of wc */
+         LB_type = LB_Other;
 
-           case G_UNICODE_BREAK_CARRIAGE_RETURN:
-             if (wc != '\n')
-               {
-                 attrs[i].is_line_break = TRUE;
-                 attrs[i].is_mandatory_break = TRUE;
-               }
-             break;
+         if (break_type == G_UNICODE_BREAK_NUMERIC)
+           LB_type = LB_Numeric;
 
-           case G_UNICODE_BREAK_CONTINGENT:
-             /* can break after 0xFFFC by default, though we might want
-              * to eventually have a PangoLayout setting or
-              * PangoAttribute that disables this, if for some
-              * application breaking after objects is not desired.
-              */
-             break_op = BREAK_ALLOWED;
-             break;
+         if (break_type == G_UNICODE_BREAK_SYMBOL ||
+             break_type == G_UNICODE_BREAK_INFIX_SEPARATOR)
+           {
+             if (!(prev_LB_type == LB_Numeric))
+               LB_type = LB_Other;
+           }
 
-           case G_UNICODE_BREAK_SURROGATE:
-             /* Undefined according to UTR#14, but ALLOWED in test data. */
-             break_op = BREAK_ALLOWED;
-             break;
+         if (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
+             break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS)
+           {
+             if (prev_LB_type == LB_Numeric)
+               LB_type = LB_Numeric_Close;
+             else
+               LB_type = LB_Other;
+           }
 
-           default:
-             g_assert (IN_BREAK_TABLE (prev_break_type));
-
-             /* Note that our table assumes that combining marks
-              * are only applied to alphabetic characters;
-              * tech report 14 explains how to remove this assumption
-              * from the code, if anyone ever cares, but it shouldn't
-              * be a problem. Also this issue sort of goes
-              * away since we only look for breaks on grapheme
-              * boundaries.
-              */
+         if (break_type == G_UNICODE_BREAK_REGIONAL_INDICATOR)
+           {
+             if (prev_LB_type == LB_RI_Odd)
+               LB_type = LB_RI_Even;
+             else if (prev_LB_type == LB_RI_Even)
+               LB_type = LB_RI_Odd;
+             else
+               LB_type = LB_RI_Odd;
+           }
 
-             switch ((int) break_type)
-               {
-               case G_UNICODE_BREAK_MANDATORY:
-               case G_UNICODE_BREAK_LINE_FEED:
-               case G_UNICODE_BREAK_CARRIAGE_RETURN:
-               case G_UNICODE_BREAK_NEXT_LINE:
-               case G_UNICODE_BREAK_SPACE:
-                 /* These types all "pile up" at the end of lines and
-                  * get elided.
-                  */
-                 break_op = BREAK_PROHIBITED;
-                 break;
+         attrs[i].is_line_break = TRUE; /* Rule LB31 */
+         /* Unicode doesn't specify char wrap; we wrap around all chars
+          * except where a line break is prohibited, which means we
+          * effectively break everywhere except inside runs of spaces.
+          */
+         attrs[i].is_char_break = TRUE;
 
-               case G_UNICODE_BREAK_CONTINGENT:
-                 /* break before 0xFFFC by default, eventually
-                  * make this configurable?
-                  */
-                 break_op = BREAK_ALLOWED;
-                 break;
+         /* Make any necessary replacements first */
+         if (row_break_type == G_UNICODE_BREAK_UNKNOWN)
+           row_break_type = G_UNICODE_BREAK_ALPHABETIC;
+
+         /* add the line break rules in reverse order to override
+            the lower priority rules. */
+
+         /* Rule LB30 */
+         if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER ||
+              prev_break_type == G_UNICODE_BREAK_NUMERIC) &&
+             break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION)
+           break_op = BREAK_PROHIBITED;
+
+         if (prev_break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS &&
+             (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              break_type == G_UNICODE_BREAK_HEBREW_LETTER ||
+              break_type == G_UNICODE_BREAK_NUMERIC))
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB30a */
+         if (prev_LB_type == LB_RI_Odd && LB_type == LB_RI_Even)
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB30b */
+         if (prev_break_type == G_UNICODE_BREAK_EMOJI_BASE &&
+             break_type == G_UNICODE_BREAK_EMOJI_MODIFIER)
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB29 */
+         if (prev_break_type == G_UNICODE_BREAK_INFIX_SEPARATOR &&
+             (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              break_type == G_UNICODE_BREAK_HEBREW_LETTER))
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB28 */
+         if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) &&
+             (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              break_type == G_UNICODE_BREAK_HEBREW_LETTER))
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB27 */
+         if ((prev_break_type == G_UNICODE_BREAK_HANGUL_L_JAMO ||
+              prev_break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+              prev_break_type == G_UNICODE_BREAK_HANGUL_T_JAMO ||
+              prev_break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE ||
+              prev_break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE) &&
+             (break_type == G_UNICODE_BREAK_INSEPARABLE ||
+              break_type == G_UNICODE_BREAK_POSTFIX))
+           break_op = BREAK_PROHIBITED;
+
+         if (prev_break_type == G_UNICODE_BREAK_PREFIX &&
+             (break_type == G_UNICODE_BREAK_HANGUL_L_JAMO ||
+              break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+              break_type == G_UNICODE_BREAK_HANGUL_T_JAMO ||
+              break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE ||
+              break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE))
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB26 */
+         if (prev_break_type == G_UNICODE_BREAK_HANGUL_L_JAMO &&
+             (break_type == G_UNICODE_BREAK_HANGUL_L_JAMO ||
+              break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+              break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE ||
+              break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE))
+           break_op = BREAK_PROHIBITED;
+
+         if ((prev_break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+              prev_break_type == G_UNICODE_BREAK_HANGUL_LV_SYLLABLE) &&
+             (break_type == G_UNICODE_BREAK_HANGUL_V_JAMO ||
+              break_type == G_UNICODE_BREAK_HANGUL_T_JAMO))
+           break_op = BREAK_PROHIBITED;
+
+         if ((prev_break_type == G_UNICODE_BREAK_HANGUL_T_JAMO ||
+              prev_break_type == G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE) &&
+             break_type == G_UNICODE_BREAK_HANGUL_T_JAMO)
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB25 with Example 7 of Customization */
+         if ((prev_break_type == G_UNICODE_BREAK_PREFIX ||
+              prev_break_type == G_UNICODE_BREAK_POSTFIX) &&
+             break_type == G_UNICODE_BREAK_NUMERIC)
+           break_op = BREAK_PROHIBITED;
+
+         if ((prev_break_type == G_UNICODE_BREAK_PREFIX ||
+              prev_break_type == G_UNICODE_BREAK_POSTFIX) &&
+             (break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION ||
+              break_type == G_UNICODE_BREAK_HYPHEN) &&
+             next_break_type == G_UNICODE_BREAK_NUMERIC)
+           break_op = BREAK_PROHIBITED;
+
+         if ((prev_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION ||
+              prev_break_type == G_UNICODE_BREAK_HYPHEN) &&
+             break_type == G_UNICODE_BREAK_NUMERIC)
+           break_op = BREAK_PROHIBITED;
+
+         if (prev_break_type == G_UNICODE_BREAK_NUMERIC &&
+             (break_type == G_UNICODE_BREAK_NUMERIC ||
+              break_type == G_UNICODE_BREAK_SYMBOL ||
+              break_type == G_UNICODE_BREAK_INFIX_SEPARATOR))
+           break_op = BREAK_PROHIBITED;
+
+         if (prev_LB_type == LB_Numeric &&
+             (break_type == G_UNICODE_BREAK_NUMERIC ||
+              break_type == G_UNICODE_BREAK_SYMBOL ||
+              break_type == G_UNICODE_BREAK_INFIX_SEPARATOR ||
+              break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
+              break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS))
+           break_op = BREAK_PROHIBITED;
+
+         if ((prev_LB_type == LB_Numeric ||
+              prev_LB_type == LB_Numeric_Close) &&
+             (break_type == G_UNICODE_BREAK_POSTFIX ||
+              break_type == G_UNICODE_BREAK_PREFIX))
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB24 */
+         if ((prev_break_type == G_UNICODE_BREAK_PREFIX ||
+              prev_break_type == G_UNICODE_BREAK_POSTFIX) &&
+             (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              break_type == G_UNICODE_BREAK_HEBREW_LETTER))
+           break_op = BREAK_PROHIBITED;
+
+         if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) &&
+             (break_type == G_UNICODE_BREAK_PREFIX ||
+              break_type == G_UNICODE_BREAK_POSTFIX))
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB23 */
+         if ((prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER) &&
+             break_type == G_UNICODE_BREAK_NUMERIC)
+           break_op = BREAK_PROHIBITED;
+
+         if (prev_break_type == G_UNICODE_BREAK_NUMERIC &&
+             (break_type == G_UNICODE_BREAK_ALPHABETIC ||
+              break_type == G_UNICODE_BREAK_HEBREW_LETTER))
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB23a */
+         if (prev_break_type == G_UNICODE_BREAK_PREFIX &&
+             (break_type == G_UNICODE_BREAK_IDEOGRAPHIC ||
+              break_type == G_UNICODE_BREAK_EMOJI_BASE ||
+              break_type == G_UNICODE_BREAK_EMOJI_MODIFIER))
+           break_op = BREAK_PROHIBITED;
+
+         if ((prev_break_type == G_UNICODE_BREAK_IDEOGRAPHIC ||
+              prev_break_type == G_UNICODE_BREAK_EMOJI_BASE ||
+              prev_break_type == G_UNICODE_BREAK_EMOJI_MODIFIER) &&
+             break_type == G_UNICODE_BREAK_POSTFIX)
+           break_op = BREAK_PROHIBITED;
+
+         /* Rule LB22 */
+         if (break_type == G_UNICODE_BREAK_INSEPARABLE)
+           {
+             if (prev_break_type == G_UNICODE_BREAK_ALPHABETIC ||
+                 prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER)
+               break_op = BREAK_PROHIBITED;
 
-               case G_UNICODE_BREAK_SURROGATE:
-                 /* Undefined according to UTR#14, but ALLOWED in test data. */
-                 break_op = BREAK_ALLOWED;
-                 break;
+             if (prev_break_type == G_UNICODE_BREAK_EXCLAMATION)
+               break_op = BREAK_PROHIBITED;
 
-               /* Hangul additions are from Unicode 4.1 UAX#14 */
-               case G_UNICODE_BREAK_HANGUL_L_JAMO:
-               case G_UNICODE_BREAK_HANGUL_V_JAMO:
-               case G_UNICODE_BREAK_HANGUL_T_JAMO:
-               case G_UNICODE_BREAK_HANGUL_LV_SYLLABLE:
-               case G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE:
-                 /* treat Jamo as IDEOGRAPHIC from now
-                  */
-                 break_type = G_UNICODE_BREAK_IDEOGRAPHIC;
-
-                 if (makes_hangul_syllable)
-                   break_op = BREAK_IF_SPACES;
-                 else
-                   break_op = BREAK_ALLOWED;
-                 break;
+             if (prev_break_type == G_UNICODE_BREAK_IDEOGRAPHIC ||
+                 prev_break_type == G_UNICODE_BREAK_EMOJI_BASE ||
+                 prev_break_type == G_UNICODE_BREAK_EMOJI_MODIFIER)
+               break_op = BREAK_PROHIBITED;
 
-               case G_UNICODE_BREAK_AMBIGUOUS:
-                 /* FIXME:
-                  * we need to resolve the East Asian width
-                  * to decide what to do here
-                  */
-               case G_UNICODE_BREAK_COMPLEX_CONTEXT:
-                 /* FIXME:
-                  * language engines should handle this case...
-                  */
-               case G_UNICODE_BREAK_UNKNOWN:
-                 /* treat unknown, complex, and ambiguous like ALPHABETIC
-                  * for now
-                  */
-                 break_op = BREAK_OP (prev_break_type, G_UNICODE_BREAK_ALPHABETIC);
-                 break;
+             if (prev_break_type == G_UNICODE_BREAK_INSEPARABLE)
+               break_op = BREAK_PROHIBITED;
 
-               default:
+             if (prev_break_type == G_UNICODE_BREAK_NUMERIC)
+               break_op = BREAK_PROHIBITED;
+           }
 
-                 g_assert (IN_BREAK_TABLE (break_type));
-                 break_op = BREAK_OP (prev_break_type, break_type);
-                 break;
-               }
-             break;
+         if (break_type == G_UNICODE_BREAK_AFTER ||
+             break_type == G_UNICODE_BREAK_HYPHEN ||
+             break_type == G_UNICODE_BREAK_NON_STARTER ||
+             prev_break_type == G_UNICODE_BREAK_BEFORE)
+           break_op = BREAK_PROHIBITED; /* Rule LB21 */
+
+         if (prev_prev_break_type == G_UNICODE_BREAK_HEBREW_LETTER &&
+             (prev_break_type == G_UNICODE_BREAK_HYPHEN ||
+              prev_break_type == G_UNICODE_BREAK_AFTER))
+           break_op = BREAK_PROHIBITED; /* Rule LB21a */
+
+         if (prev_break_type == G_UNICODE_BREAK_SYMBOL &&
+             break_type == G_UNICODE_BREAK_HEBREW_LETTER)
+           break_op = BREAK_PROHIBITED; /* Rule LB21b */
+
+         if (prev_break_type == G_UNICODE_BREAK_CONTINGENT ||
+             break_type == G_UNICODE_BREAK_CONTINGENT)
+           break_op = BREAK_ALLOWED; /* Rule LB20 */
+
+         if (prev_break_type == G_UNICODE_BREAK_QUOTATION ||
+             break_type == G_UNICODE_BREAK_QUOTATION)
+           break_op = BREAK_PROHIBITED; /* Rule LB19 */
+
+         /* handle related rules for Space as state machine here,
+            and override the pair table result. */
+         if (prev_break_type == G_UNICODE_BREAK_SPACE) /* Rule LB18 */
+           break_op = BREAK_ALLOWED;
+
+         if (row_break_type == G_UNICODE_BREAK_BEFORE_AND_AFTER &&
+             break_type == G_UNICODE_BREAK_BEFORE_AND_AFTER)
+           break_op = BREAK_PROHIBITED; /* Rule LB17 */
+
+         if ((row_break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
+              row_break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS) &&
+             break_type == G_UNICODE_BREAK_NON_STARTER)
+           break_op = BREAK_PROHIBITED; /* Rule LB16 */
+
+         if (row_break_type == G_UNICODE_BREAK_QUOTATION &&
+             break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION)
+           break_op = BREAK_PROHIBITED; /* Rule LB15 */
+
+         if (row_break_type == G_UNICODE_BREAK_OPEN_PUNCTUATION)
+           break_op = BREAK_PROHIBITED; /* Rule LB14 */
+
+         /* Rule LB13 with Example 7 of Customization */
+         if (break_type == G_UNICODE_BREAK_EXCLAMATION)
+           break_op = BREAK_PROHIBITED;
+
+         if (prev_break_type != G_UNICODE_BREAK_NUMERIC &&
+             (break_type == G_UNICODE_BREAK_CLOSE_PUNCTUATION ||
+              break_type == G_UNICODE_BREAK_CLOSE_PARANTHESIS ||
+              break_type == G_UNICODE_BREAK_INFIX_SEPARATOR ||
+              break_type == G_UNICODE_BREAK_SYMBOL))
+           break_op = BREAK_PROHIBITED;
+
+         if (prev_break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE)
+           break_op = BREAK_PROHIBITED; /* Rule LB12 */
+
+         if (break_type == G_UNICODE_BREAK_NON_BREAKING_GLUE &&
+             (prev_break_type != G_UNICODE_BREAK_SPACE &&
+              prev_break_type != G_UNICODE_BREAK_AFTER &&
+              prev_break_type != G_UNICODE_BREAK_HYPHEN))
+           break_op = BREAK_PROHIBITED; /* Rule LB12a */
+
+         if (prev_break_type == G_UNICODE_BREAK_WORD_JOINER ||
+             break_type == G_UNICODE_BREAK_WORD_JOINER)
+           break_op = BREAK_PROHIBITED; /* Rule LB11 */
+
+
+         /* Rule LB9 */
+         if (break_type == G_UNICODE_BREAK_COMBINING_MARK ||
+              break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
+           {
+             if (!(prev_break_type == G_UNICODE_BREAK_MANDATORY ||
+                   prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+                   prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+                   prev_break_type == G_UNICODE_BREAK_NEXT_LINE ||
+                   prev_break_type == G_UNICODE_BREAK_SPACE ||
+                   prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE))
+               break_op = BREAK_PROHIBITED;
+           }
+
+         if (row_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+           break_op = BREAK_ALLOWED; /* Rule LB8 */
+
+         if (prev_wc == 0x200D &&
+             (break_type == G_UNICODE_BREAK_IDEOGRAPHIC ||
+              break_type == G_UNICODE_BREAK_EMOJI_BASE ||
+              break_type == G_UNICODE_BREAK_EMOJI_MODIFIER))
+           break_op = BREAK_PROHIBITED; /* Rule LB8a */
+
+         if (break_type == G_UNICODE_BREAK_SPACE ||
+             break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+           break_op = BREAK_PROHIBITED; /* Rule LB7 */
+
+         /* Rule LB6 */
+         if (break_type == G_UNICODE_BREAK_MANDATORY ||
+             break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+             break_type == G_UNICODE_BREAK_LINE_FEED ||
+             break_type == G_UNICODE_BREAK_NEXT_LINE)
+           break_op = BREAK_PROHIBITED;
+
+         /* Rules LB4 and LB5 */
+         if (prev_break_type == G_UNICODE_BREAK_MANDATORY ||
+             (prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN &&
+              wc != '\n') ||
+             prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+             prev_break_type == G_UNICODE_BREAK_NEXT_LINE)
+           {
+             attrs[i].is_mandatory_break = TRUE;
+             break_op = BREAK_ALLOWED;
            }
 
          switch (break_op)
@@ -1543,12 +1459,13 @@ pango_default_break (const gchar   *text,
            case BREAK_PROHIBITED:
              /* can't break here */
              attrs[i].is_char_break = FALSE;
+             attrs[i].is_line_break = FALSE;
              break;
 
            case BREAK_IF_SPACES:
              /* break if prev char was space */
-             if (prev_was_break_space)
-               attrs[i].is_line_break = TRUE;
+             if (prev_break_type != G_UNICODE_BREAK_SPACE)
+               attrs[i].is_line_break = FALSE;
              break;
 
            case BREAK_ALLOWED:
@@ -1562,16 +1479,61 @@ pango_default_break (const gchar   *text,
              g_assert_not_reached ();
              break;
            }
+
+         /* Rule LB9 */
+         if (!(break_type == G_UNICODE_BREAK_COMBINING_MARK ||
+               break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER))
+           {
+             /* Rule LB25 with Example 7 of Customization */
+             if (break_type == G_UNICODE_BREAK_NUMERIC ||
+                 break_type == G_UNICODE_BREAK_SYMBOL ||
+                 break_type == G_UNICODE_BREAK_INFIX_SEPARATOR)
+               {
+                 if (prev_LB_type != LB_Numeric)
+                   prev_LB_type = LB_type;
+                 /* else don't change the prev_LB_type */
+               }
+             else
+               {
+                 prev_LB_type = LB_type;
+               }
+           }
+         /* else don't change the prev_LB_type for Rule LB9 */
        }
 
       if (break_type != G_UNICODE_BREAK_SPACE)
        {
-         prev_break_type = break_type;
-         prev_was_break_space = FALSE;
+         /* Rule LB9 */
+         if (break_type == G_UNICODE_BREAK_COMBINING_MARK ||
+             break_type == G_UNICODE_BREAK_ZERO_WIDTH_JOINER)
+           {
+             if (i == 0 /* start of text */ ||
+                 prev_break_type == G_UNICODE_BREAK_MANDATORY ||
+                 prev_break_type == G_UNICODE_BREAK_CARRIAGE_RETURN ||
+                 prev_break_type == G_UNICODE_BREAK_LINE_FEED ||
+                 prev_break_type == G_UNICODE_BREAK_NEXT_LINE ||
+                 prev_break_type == G_UNICODE_BREAK_SPACE ||
+                 prev_break_type == G_UNICODE_BREAK_ZERO_WIDTH_SPACE)
+               prev_break_type = G_UNICODE_BREAK_ALPHABETIC; /* Rule LB10 */
+             /* else don't change the prev_break_type for Rule LB9 */
+           }
+         else
+           {
+             prev_prev_break_type = prev_break_type;
+             prev_break_type = break_type;
+           }
+
          prev_jamo = jamo;
        }
       else
-       prev_was_break_space = TRUE;
+       {
+         if (prev_break_type != G_UNICODE_BREAK_SPACE)
+           {
+             prev_prev_break_type = prev_break_type;
+             prev_break_type = break_type;
+           }
+         /* else don't change the prev_break_type */
+       }
 
       /* ---- Word breaks ---- */
 


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]