[vte/wip/egmont/bidi: 88/104] Arabic shaping PoC, using fribidi + Unicode presentation form characters



commit 7d90ecef0ff38eee39e76dac82bcf5b99f98ce37
Author: Egmont Koblinger <egmont gmail com>
Date:   Mon Oct 1 11:42:55 2018 +0200

    Arabic shaping PoC, using fribidi + Unicode presentation form characters

 BIDI-STATUS |  3 ++-
 src/bidi.cc | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
 src/bidi.hh |  2 ++
 src/vte.cc  |  4 ++--
 4 files changed, 58 insertions(+), 9 deletions(-)
---
diff --git a/BIDI-STATUS b/BIDI-STATUS
index ca99c46a..1a29c4f4 100644
--- a/BIDI-STATUS
+++ b/BIDI-STATUS
@@ -10,6 +10,7 @@ Done:
 - Mouse reporting.
 - Regex match and explicit hyperlink underlining on hover.
 - VTE_DEBUG=bidi highlights characters with resolved RTL directionality.
+- Arabic shaping using Unicode presentation forms.
 - Test file.
 - Configure flag.
 - Keyboard arrow swapping.
@@ -31,7 +32,7 @@ Missing from first release:
 - Code cleanup and review, of course.
 
 Planned future improvements:
-- Shaping.
+- Real shaping (harfbuzz?).
 - Right-align RTL glyphs.
 - Implicit mode level 2 (handling BiDi control characters).
 - Mirror the glyphs that don't have mirrored counterpart.
diff --git a/src/bidi.cc b/src/bidi.cc
index 5078824c..8bdef69d 100644
--- a/src/bidi.cc
+++ b/src/bidi.cc
@@ -42,6 +42,7 @@ BidiRow::BidiRow()
         m_log2vis = nullptr;
         m_vis2log = nullptr;
         m_vis_rtl = nullptr;
+        m_vis_shaped_char = nullptr;
 }
 
 BidiRow::~BidiRow()
@@ -49,6 +50,7 @@ BidiRow::~BidiRow()
         g_free (m_log2vis);
         g_free (m_vis2log);
         g_free (m_vis_rtl);
+        g_free (m_vis_shaped_char);
 }
 
 void BidiRow::set_width(vte::grid::column_t width)
@@ -63,6 +65,7 @@ void BidiRow::set_width(vte::grid::column_t width)
                 m_log2vis = (vte::grid::column_t *) g_realloc (m_log2vis, sizeof (vte::grid::column_t) * 
m_width_alloc);
                 m_vis2log = (vte::grid::column_t *) g_realloc (m_vis2log, sizeof (vte::grid::column_t) * 
m_width_alloc);
                 m_vis_rtl = (guint8 *) g_realloc (m_vis_rtl, sizeof (guint8) * m_width_alloc);
+                m_vis_shaped_char = (gunichar *) g_realloc (m_vis_shaped_char, sizeof (gunichar) * 
m_width_alloc);
         }
 
         m_width = width;
@@ -113,6 +116,21 @@ bool BidiRow::log_is_rtl(vte::grid::column_t col) const
         }
 }
 
+/* Get the shaped character (vteunistr) for the given visual position.
+ *
+ * The unshaped character (vteunistr) needs to be passed to this method because
+ * the BiDi component may not store it if no shaping was required, and does not
+ * store combining accents. This method takes care of preserving combining accents.
+ */
+vteunistr
+BidiRow::vis_get_shaped_char(vte::grid::column_t col, vteunistr s) const
+{
+        if (col >= m_width || m_vis_shaped_char[col] == 0)
+                return s;
+
+        return _vte_unistr_replace_base(s, m_vis_shaped_char[col]);
+}
+
 /* Whether the line's base direction is RTL. */
 bool BidiRow::base_is_rtl() const
 {
@@ -244,6 +262,7 @@ void RingView::explicit_line(vte::grid::row_t row, bool rtl)
                 for (i = 0; i < m_width; i++) {
                         bidirow->m_log2vis[i] = bidirow->m_vis2log[i] = m_width - 1 - i;
                         bidirow->m_vis_rtl[i] = true;
+                        bidirow->m_vis_shaped_char[i] = 0;
                 }
         } else {
                 /* Shortcut: bidirow->m_width == 0 might denote a fully LTR line,
@@ -308,6 +327,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
         FriBidiChar *fribidi_chars;
         FriBidiCharType *fribidi_chartypes;
         FriBidiBracketType *fribidi_brackettypes;
+        FriBidiJoiningType *fribidi_joiningtypes;
         FriBidiLevel *fribidi_levels;
         FriBidiStrIndex *fribidi_map;
         FriBidiStrIndex *fribidi_to_term;
@@ -484,6 +504,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
         /* Run the BiDi algorithm on the paragraph to get the embedding levels. */
         fribidi_chartypes = g_newa (FriBidiCharType, count);
         fribidi_brackettypes = g_newa (FriBidiBracketType, count);
+        fribidi_joiningtypes = g_newa (FriBidiJoiningType, count);
         fribidi_levels = g_newa (FriBidiLevel, count);
 
         pbase_dir = autodir ? (rtl ? FRIBIDI_PAR_WRTL : FRIBIDI_PAR_WLTR)
@@ -491,6 +512,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
 
         fribidi_get_bidi_types (fribidi_chars, count, fribidi_chartypes);
         fribidi_get_bracket_types (fribidi_chars, count, fribidi_chartypes, fribidi_brackettypes);
+        fribidi_get_joining_types (fribidi_chars, count, fribidi_joiningtypes);
         level = fribidi_get_par_embedding_levels_ex (fribidi_chartypes, fribidi_brackettypes, count, 
&pbase_dir, fribidi_levels);
 
         if (level == 0) {
@@ -501,16 +523,36 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
                 return explicit_paragraph (row_orig, rtl);
         }
 
+        /* Arabic shaping
+         *
+         * https://www.w3.org/TR/css-text-3/#word-break-shaping says:
+         * "When shaping scripts such as Arabic wrap [...] the characters must still be shaped (their 
joining forms chosen)
+         * as if the word were still whole."
+         *
+         * Also, FriBidi's Arabic shaping methods, as opposed to fribidi_reorder_line(), don't take an 
offset parameter.
+         * This is another weak sign that the desired behavior is to shape the entire paragraph before 
splitting to lines.
+         *
+         * We only perform shaping in implicit mode, for two reasons:
+         *
+         * Following the CSS logic, I think the sensible behavior for a partially visible word (e.g. at the 
margin of a
+         * text editor) is to use the joining/shaping form according to the entire word. Hence in explicit 
mode it must be
+         * the responsibility of the BiDi-aware application and not the terminal emulator to perform 
joining/shaping.
+         *
+         * And a technical limitation: FriBidi can only perform joining/shaping with the logical order as 
input, not with
+         * the visual order. We'd need to find another API, or do ugly workarounds, which I'd rather not. */
+        fribidi_join_arabic (fribidi_chartypes, count, fribidi_levels, fribidi_joiningtypes);
+        fribidi_shape_arabic (FRIBIDI_FLAGS_ARABIC, fribidi_levels, count, fribidi_joiningtypes, 
fribidi_chars);
+
         g_assert_cmpint (pbase_dir, !=, FRIBIDI_PAR_ON);
         /* For convenience, from now on this variable contains the resolved (i.e. possibly autodetected) 
value. */
         rtl = (pbase_dir == FRIBIDI_PAR_RTL || pbase_dir == FRIBIDI_PAR_WRTL);
 
-        if ((!rtl && level == 1) || (rtl && level == 2)) {
-                /* Fast shortcut for LTR-only and RTL-only paragraphs. */
+        if (!rtl && level == 1) {
+                /* Fast shortcut for LTR-only paragraphs. */
                 g_array_free (fribidi_chars_array, TRUE);
                 g_array_free (fribidi_map_array, TRUE);
                 g_array_free (fribidi_to_term_array, TRUE);
-                return explicit_paragraph (row_orig, rtl);
+                return explicit_paragraph (row_orig, false);
         }
 
         /* Reshuffle line by line. */
@@ -547,9 +589,9 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
                         goto next_line;
                 }
 
-                if ((!rtl && level == 1) || (rtl && level == 2)) {
-                        /* Fast shortcut for LTR-only and RTL-only lines. */
-                        explicit_line (row, rtl);
+                if (!rtl && level == 1) {
+                        /* Fast shortcut for LTR-only lines. */
+                        explicit_line (row, false);
                         bidirow->m_has_foreign = true;
                         goto next_line;
                 }
@@ -562,6 +604,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
                         for (; tv < unused; tv++) {
                                 bidirow->m_vis2log[tv] = m_width - 1 - tv;
                                 bidirow->m_vis_rtl[tv] = true;
+                                bidirow->m_vis_shaped_char[tv] = 0;
                         }
                 }
                 for (fv = lines[line]; fv < lines[line + 1]; fv++) {
@@ -578,6 +621,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
                                 for (col = 0; col < cell->attr.columns(); col++) {
                                         bidirow->m_vis2log[tv + col] = tl + cell->attr.columns() - 1 - col;
                                         bidirow->m_vis_rtl[tv + col] = true;
+                                        bidirow->m_vis_shaped_char[tv + col] = fribidi_chars[fl];
                                 }
                                 tv += cell->attr.columns();
                                 tl += cell->attr.columns();
@@ -586,6 +630,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
                                 for (col = 0; col < cell->attr.columns(); col++) {
                                         bidirow->m_vis2log[tv] = tl;
                                         bidirow->m_vis_rtl[tv] = false;
+                                        bidirow->m_vis_shaped_char[tv] = fribidi_chars[fl];
                                         tv++;
                                         tl++;
                                 }
@@ -597,6 +642,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
                         for (; tv < m_width; tv++) {
                                 bidirow->m_vis2log[tv] = tv;
                                 bidirow->m_vis_rtl[tv] = false;
+                                bidirow->m_vis_shaped_char[tv] = 0;
                         }
                 }
                 g_assert_cmpint (tv, ==, m_width);
diff --git a/src/bidi.hh b/src/bidi.hh
index 1b75dc98..38e8035e 100644
--- a/src/bidi.hh
+++ b/src/bidi.hh
@@ -49,6 +49,7 @@ public:
         vte::grid::column_t vis2log(vte::grid::column_t col) const;
         bool log_is_rtl(vte::grid::column_t col) const;
         bool vis_is_rtl(vte::grid::column_t col) const;
+        vteunistr vis_get_shaped_char(vte::grid::column_t col, vteunistr s) const;
         bool base_is_rtl() const;
         bool has_foreign() const;
 
@@ -61,6 +62,7 @@ private:
         vte::grid::column_t *m_log2vis;
         vte::grid::column_t *m_vis2log;
         guint8 *m_vis_rtl;
+        gunichar *m_vis_shaped_char;
 
         guint8 m_base_rtl: 1;
         guint8 m_has_foreign: 1;
diff --git a/src/vte.cc b/src/vte.cc
index 00884271..4109aef2 100644
--- a/src/vte.cc
+++ b/src/vte.cc
@@ -9226,7 +9226,7 @@ Terminal::draw_rows(VteScreen *screen_,
                         hilite = nhilite;
 
                         g_assert_cmpint (item_count, <, column_count);
-                        items[item_count].c = c;
+                        items[item_count].c = bidirow->vis_get_shaped_char(col, c);
                         items[item_count].columns = j - col;
                         items[item_count].x = (col - (bidirow->vis_is_rtl(col) ? j - col - 1 : 0)) * 
column_width;
                         items[item_count].y = y;
@@ -9367,7 +9367,7 @@ Terminal::paint_cursor()
 
        /* Draw the cursor. */
         viscol = bidirow->log2vis(col);
-       item.c = (cell && cell->c) ? cell->c : ' ';
+       item.c = (cell && cell->c) ? bidirow->vis_get_shaped_char(viscol, cell->c) : ' ';
        item.columns = item.c == '\t' ? 1 : cell ? cell->attr.columns() : 1;
         item.x = (viscol - ((cell && bidirow->vis_is_rtl(viscol)) ? cell->attr.columns() - 1 : 0)) * width;
        item.y = row_to_pixel(drow);


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]