[vte/wip/egmont/bidi: 93/107] explicit shaping



commit 2277543072d999338cafbdcba70a7bc6410498aa
Author: Egmont Koblinger <egmont gmail com>
Date:   Fri May 24 11:35:56 2019 +0200

    explicit shaping

 src/bidi.cc | 182 ++++++++++++++++++++++++++++++++++++++++++------------------
 src/bidi.hh |   3 +
 2 files changed, 132 insertions(+), 53 deletions(-)
---
diff --git a/src/bidi.cc b/src/bidi.cc
index 4514ac8b..719b213b 100644
--- a/src/bidi.cc
+++ b/src/bidi.cc
@@ -160,25 +160,148 @@ BidiRunner::BidiRunner(RingView *ringview)
 
 BidiRunner::~BidiRunner() {}
 
+bool BidiRunner::is_arabic(gunichar c)
+{
+        const gunichar MIN_ARABIC = 0x0600;
+        const gunichar MAX_ARABIC = 0x06ff;
+
+        return c >= MIN_ARABIC && c <= MAX_ARABIC;
+}
+
 /* Returns whether there's an Arabic character in the row so that shaping might be required. */
 bool BidiRunner::needs_shaping(vte::grid::row_t row)
 {
-        const gulong MIN_SHAPING = 0x0600;
-        const gulong MAX_SHAPING = 0x06FF;
-
         const VteRowData *row_data = m_ringview->get_row(row);
 
         for (int i = 0; i < row_data->len; i++) {
                 const VteCell *cell = _vte_row_data_get(row_data, i);
                 if (cell != nullptr) {
                         gunichar c = _vte_unistr_get_base(cell->c);
-                        if (G_UNLIKELY (c >= MIN_SHAPING && c <= MAX_SHAPING))
+                        if (G_UNLIKELY (is_arabic(c)))
                                 return true;
                 }
         }
         return false;
 }
 
+/* Perform Arabic shaping on an explicit line (which could be explicit LTR or explicit RTL),
+ * using presentational form characters.
+ *
+ * Don't do shaping across lines. (I'm unsure about this design decision.
+ * Shaping across soft linebreaks would require an even much more complex code.)
+ *
+ * The FriBiDi API doesn't have a method for shaping a visual string, so we need to extract
+ * Arabic words ourselves, by walking in the visual order from right to left. It's painful.
+ *
+ * This whole shaping business with presentational form characters should be replaced by HarfBuzz.
+ */
+void BidiRunner::explicit_line_shape(vte::grid::row_t row)
+{
+        VteRowData *row_data = m_ringview->get_row(row);
+        if (G_UNLIKELY (row_data == nullptr))
+                return;
+
+        BidiRow *bidirow = m_ringview->get_row_map_writable(row);
+
+        auto width = m_ringview->get_width();
+
+        GArray *fribidi_chars_array = nullptr;
+
+        FriBidiParType pbase_dir = FRIBIDI_PAR_RTL;
+        FriBidiLevel level;
+        FriBidiChar *fribidi_chars;
+        FriBidiCharType *fribidi_chartypes;
+        FriBidiBracketType *fribidi_brackettypes;
+        FriBidiJoiningType *fribidi_joiningtypes;
+        FriBidiLevel *fribidi_levels;
+
+        int count;
+
+        const VteCell *cell;
+        gunichar c;
+        gunichar base;
+        int i, j;
+
+        fribidi_chars_array = g_array_new (FALSE, FALSE, sizeof (FriBidiChar));
+
+        /* Walk in visual order from right to left. */
+        i = width - 1;
+        while (i >= 0) {
+                cell = _vte_row_data_get(row_data, bidirow->m_vis2log[i]);
+                c = cell ? cell->c : 0;
+                base = _vte_unistr_get_base(c);
+                if (!is_arabic(base)) {
+                        i--;
+                        continue;
+                }
+
+                /* Found an Arabic character. Keep walking to the left, extracting the word. */
+                g_array_set_size(fribidi_chars_array, 0);
+                j = i;
+                do {
+                        auto prev_len = fribidi_chars_array->len;
+                        _vte_unistr_append_to_gunichars (cell->c, fribidi_chars_array);
+                        g_assert_cmpint (fribidi_chars_array->len, >, prev_len);
+
+                        j--;
+                        if (j >= 0) {
+                                cell = _vte_row_data_get(row_data, bidirow->m_vis2log[j]);
+                                c = cell ? cell->c : 0;
+                                base = _vte_unistr_get_base(c);
+                       } else {
+                                /* Pretend that visual column -1 contains a stop char. */
+                               base = 0;
+                       }
+                } while (is_arabic(base));
+
+                /* Extracted the Arabic run. Do the BiDi. */
+
+                /* Convenience stuff, we no longer need the auto-growing GArray wrapper. */
+                count = fribidi_chars_array->len;
+               fribidi_chars = (FriBidiChar *) fribidi_chars_array->data;
+
+               /* Run the BiDi algorithm on the paragraph to get the embedding levels. */
+               fribidi_chartypes = g_newa (FriBidiCharType, count);
+               fribidi_brackettypes = g_newa (FriBidiBracketType, count);
+               fribidi_joiningtypes = g_newa (FriBidiJoiningType, count);
+               fribidi_levels = g_newa (FriBidiLevel, count);
+
+               fribidi_get_bidi_types (fribidi_chars, count, fribidi_chartypes);
+               fribidi_get_bracket_types (fribidi_chars, count, fribidi_chartypes, fribidi_brackettypes);
+               fribidi_get_joining_types (fribidi_chars, count, fribidi_joiningtypes);
+               level = fribidi_get_par_embedding_levels_ex (fribidi_chartypes, fribidi_brackettypes, count, 
&pbase_dir, fribidi_levels);
+                if (level == 0) {
+                        /* Error. Just skip shaping this word. */
+                        i = j - 1;
+                        continue;
+                }
+
+               fribidi_join_arabic (fribidi_chartypes, count, fribidi_levels, fribidi_joiningtypes);
+               fribidi_shape_arabic (FRIBIDI_FLAGS_ARABIC, fribidi_levels, count, fribidi_joiningtypes, 
fribidi_chars);
+
+                /* Walk through the Arabic word again. */
+                j = i;
+                while (count > 0) {
+                        g_assert_cmpint (j, >=, 0);
+                        cell = _vte_row_data_get(row_data, bidirow->m_vis2log[j]);
+                        c = cell->c;
+                        base = _vte_unistr_get_base(c);
+                        if (*fribidi_chars != base) {
+                                /* Shaping changed the codepoint. Apply combining accents and store. */
+                                bidirow->m_vis_shaped_char[j] = _vte_unistr_replace_base(c, *fribidi_chars);
+                        }
+                        int len = _vte_unistr_strlen(c);
+                        fribidi_chars += len;
+                        count -= len;
+                        j--;
+                }
+
+                /* Ready to look for the next word. Tiny speedup by skipping the stop char which isn't 
Arabic. */
+                i = j - 1;
+        }
+        g_array_free (fribidi_chars_array, TRUE);
+}
+
 /* Set up the mapping according to explicit mode for a given line.
  *
  * If @shape then perform Arabic shaping on the visual string, independently
@@ -191,7 +314,7 @@ void BidiRunner::explicit_line(vte::grid::row_t row, bool rtl, bool shape)
         int i;
 
         BidiRow *bidirow = m_ringview->get_row_map_writable(row);
-        if (bidirow == nullptr)
+        if (G_UNLIKELY (bidirow == nullptr))
                 return;
         bidirow->m_base_rtl = rtl;
         bidirow->m_has_foreign = false;
@@ -221,54 +344,7 @@ void BidiRunner::explicit_line(vte::grid::row_t row, bool rtl, bool shape)
                         return;
                }
         }
-
-        /* Perform Arabic shaping on an explicit line (which could be explicit LTR or explicit RTL).
-         * Don't do shaping across (soft) linebreaks (I'm unsure about it).
-         * The FriBiDi API doesn't have such a method, so we need to extract Arabic words ourselves,
-         * by walking in the visual order from right to left. */
-
-        // FIXME this is going to be freaking cumbersome.
-
-#if 0 // bleeeeh
-        GArray *fribidi_chars_array = nullptr;
-        
-        VteCell *cell;
-        gunichar c;
-        gunichar base;
-        i = width - 1;
-        while (i >= 0) {
-                cell = _vte_row_data_get(row_data, bidirow->m_vis2log[i]);
-                c = cell->c;
-                base = _vte_unistr_get_base(c);
-                if (base < MIN_SHAPING || base > MAX_SHAPING) {
-                        i--;
-                        continue;
-                }
-
-                /* Found an Arabic character. Keep walking to the left, extracting the word. */
-                       fribidi_chars_array = g_array_new (FALSE, FALSE, sizeof (FriBidiChar));
-                       j = i;
-                do {
-                        auto prev_len = fribidi_chars_array->len;
-                        _vte_unistr_append_to_gunichars (cell->c, fribidi_chars_array);
-                        g_assert_cmpint (fribidi_chars_array->len, >, prev_len);
-
-                        j--;
-                        if (j >= 0) {
-                                cell = _vte_row_data_get(row_data, bidirow->m_vis2log[j]);
-                                c = cell->c;
-                                base = _vte_unistr_get_base(c);
-                       } else {
-                               base = 0;
-                       }
-                } while (base >= MIN_SHAPING && base <= MAX_SHAPING);
-
-
-
-        }
-#endif
-
-
+        explicit_line_shape(row);
 }
 
 /* Set up the mapping according to explicit mode, for all the lines
diff --git a/src/bidi.hh b/src/bidi.hh
index b0c277a7..3ed29d58 100644
--- a/src/bidi.hh
+++ b/src/bidi.hh
@@ -92,7 +92,10 @@ public:
 private:
         RingView *m_ringview;
 
+        static bool is_arabic(gunichar c);
         bool needs_shaping(vte::grid::row_t row);
+        void explicit_line_shape(vte::grid::row_t row);
+
         void explicit_line(vte::grid::row_t row, bool rtl, bool shape);
         void explicit_paragraph(vte::grid::row_t start, vte::grid::row_t end, bool rtl, bool shape);
 };


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]