[vte/wip/egmont/bidi: 20/73] bidi is now run on paragraphs (limited to viewport for now)



commit fecdc516b0d8e7a96e8f2dfd6686a35979fbb61f
Author: Egmont Koblinger <egmont gmail com>
Date:   Thu Aug 23 00:40:41 2018 +0200

    bidi is now run on paragraphs (limited to viewport for now)

 configure.ac      |   2 +-
 src/bidi.cc       | 315 ++++++++++++++++++++++++++++++++++++++++++------------
 src/bidi.hh       |  18 +---
 src/vte.cc        |  16 ++-
 src/vtedefines.hh |   3 +
 5 files changed, 270 insertions(+), 84 deletions(-)
---
diff --git a/configure.ac b/configure.ac
index 067cf926..b1d6fd01 100644
--- a/configure.ac
+++ b/configure.ac
@@ -252,7 +252,7 @@ GLIB_REQUIRED=2.40.0
 GIO_REQUIRED=2.40.0
 PANGO_REQUIRED=1.22.0
 GNUTLS_REQUIRED=3.2.7
-FRIBIDI_REQUIRED=0.19.7  # FIXME bump to 1.0 whenever I upgrade :)
+FRIBIDI_REQUIRED=1.0.0
 PCRE2_REQUIRED=10.21
 
 # GNUTLS
diff --git a/src/bidi.cc b/src/bidi.cc
index 0207399d..72023d2a 100644
--- a/src/bidi.cc
+++ b/src/bidi.cc
@@ -24,8 +24,21 @@
 
 #include "bidi.hh"
 #include "debug.h"
+#include "vtedefines.hh"
 #include "vteinternal.hh"
 
+
+
+#ifdef WITH_FRIBIDI
+FriBidiChar fribidi_chars[100000];
+FriBidiCharType fribidi_chartypes[100000];
+FriBidiBracketType fribidi_brackettypes[100000];
+FriBidiLevel fribidi_levels[100000];
+FriBidiStrIndex fribidi_map[100000];
+#endif
+
+
+
 using namespace vte::base;
 
 RingView::RingView()
@@ -47,7 +60,7 @@ RingView::~RingView()
 {
         for (int i = 0; i < m_height_alloc; i++)
                 g_free (m_bidimaps[i]);
-       g_free (m_bidimaps);
+        g_free (m_bidimaps);
         /* ... */
 }
 
@@ -89,109 +102,277 @@ void RingView::set_rows(long s, long l)
 
 void RingView::update()
 {
-        for (int i = 0; i < m_len; i++) {
-                for (int j = 0; j < m_width; j++) {
-                        m_bidimaps[i][j].log2vis = j;
-                        m_bidimaps[i][j].vis2log = j;
-                        m_bidimaps[i][j].rtl = 0;
-                }
+        long i = m_start;
+        while (i < m_start + m_len) {
+                i = paragraph (i);
         }
 }
 
 bidicellmap *RingView::get_row_map(long row)
 {
-        g_assert (row >= m_start && row < m_start + m_len);
+        g_assert_cmpint (row, >=, m_start);
+        g_assert_cmpint (row, <, m_start + m_len);
         return m_bidimaps[row - m_start];
 }
 
-#ifdef WITH_FRIBIDI
-
-FriBidiChar str[1000];
-FriBidiStrIndex L_to_V[1000];
-FriBidiStrIndex V_to_L[1000];
-
-static void bidi_shuffle_explicit (int width, gboolean rtl)
+/* Set up the mapping according to explicit mode for a given line. */
+void RingView::explicit_line(long row, gboolean rtl)
 {
         int i;
+        bidicellmap *map;
+
+        if (G_UNLIKELY (row < m_start || row >= m_start + m_len))
+                return;
+
+        map = m_bidimaps[row - m_start];
 
-        if (rtl) {
-                for (i = 0; i < width; i++) {
-                        L_to_V[i] = V_to_L[i] = width - 1 - i;
+        if (G_UNLIKELY (rtl)) {
+                for (i = 0; i < m_width; i++) {
+                        map[i].log2vis = map[i].vis2log = m_width - 1 - i;
+                        map[i].vis_rtl = TRUE;
                 }
         } else {
-                for (i = 0; i < width; i++) {
-                        L_to_V[i] = V_to_L[i] = i;
+                for (i = 0; i < m_width; i++) {
+                        map[i].log2vis = map[i].vis2log = i;
+                        map[i].vis_rtl = FALSE;
                 }
         }
 }
 
-void bidi_shuffle (const VteRowData *rowdata, int width)
+/* Set up the mapping according to explicit mode, for all the lines
+ * of a paragraph beginning at the given line.
+ * Returns the row number after the paragraph or viewport (whichever ends first). */
+long RingView::explicit_paragraph(long row, gboolean rtl)
 {
-        int i;
+        const VteRowData *row_data;
+
+        while (row < m_start + m_len) {
+                explicit_line(row, rtl);
+
+                row_data = m_ring->index(row++);
+                if (row_data == nullptr || !row_data->attr.soft_wrapped)
+                        break;
+        }
+        return row;
+}
+
+/* Figure out the mapping for the paragraph starting at the given row.
+ * Returns the row number after the paragraph or viewport (whichever ends first). */
+long RingView::paragraph(long row)
+{
+        const VteRowData *row_data;
+
+#ifdef WITH_FRIBIDI
+        const VteCell *cell;
+        gboolean rtl;
+        gboolean autodir;
         FriBidiParType pbase_dir;
+        FriBidiLevel level;
+        bidicellmap *map;
+#endif /* WITH_FRIBIDI */
 
-        if (rowdata == NULL) {  // FIXME make sure it doesn't happen
-                bidi_shuffle_explicit (width, FALSE);
-                return;
+        row_data = m_ring->index(row);
+        if (row_data == nullptr) {
+                return explicit_paragraph(row, FALSE);
         }
 
-        if (!(rowdata->attr.bidi_flags & VTE_BIDI_IMPLICIT)) {
-                bidi_shuffle_explicit (width, rowdata->attr.bidi_flags & VTE_BIDI_RTL);
-                return;
+#ifndef WITH_FRIBIDI
+        return explicit_paragraph(row, !!(row_data->attr.bidi_flags & VTE_BIDI_RTL));
+#else
+
+        if (!(row_data->attr.bidi_flags & VTE_BIDI_IMPLICIT)) {
+                return explicit_paragraph(row, !!(row_data->attr.bidi_flags & VTE_BIDI_RTL));
         }
 
-        for (i = 0; i < rowdata->len && i < width; i++) {
-                if (rowdata->cells[i].c == 0) break;
-                // FIXME is it okay to run the BiDi algorithm without the combining accents?
-                str[i] = _vte_unistr_get_base(rowdata->cells[i].c);
+        rtl = !!(row_data->attr.bidi_flags & VTE_BIDI_RTL);
+        autodir = !!(row_data->attr.bidi_flags & VTE_BIDI_AUTO);
+
+        int lines[VTE_BIDI_PARAGRAPH_LENGTH_MAX + 1];
+        lines[0] = 0;
+        int line = 0;
+        int c = 0;
+        int row_orig = row;
+        int j = 0;
+        int k, l, v;
+        unsigned int col;
+
+        /* Extract the paragraph's contents, omitting unused and fragment cells. */
+        while (row < m_start + m_len) {
+                row_data = m_ring->index(row++);
+                if (row_data == nullptr)
+                        break;
+
+                if (line == VTE_BIDI_PARAGRAPH_LENGTH_MAX) {
+                        /* Overlong paragraph, bail out. */
+                        return explicit_paragraph (row_orig, rtl);
+                }
+
+                /* A row_data might be longer, in case rewrapping is disabled and the window was narrowed.
+                 * Truncate the logical data before applying BiDi. */
+                // FIXME what the heck to do if this truncation cuts a TAB or CJK in half???
+                for (j = 0; j < m_width && j < row_data->len; j++) {
+                        cell = _vte_row_data_get (row_data, j);
+                        if (cell->attr.fragment())
+                                continue;
+
+                        // FIXME is it okay to run the BiDi algorithm without the combining accents?
+                        // If we need to preserve them then we need to double check whether
+                        // fribidi_reorder_line() requires a FRIBIDI_FLAG_REORDER_NSM or not.
+                        fribidi_chars[c++] = _vte_unistr_get_base(cell->c);
+                }
+
+                lines[++line] = c;
+
+                if (!row_data->attr.soft_wrapped)
+                        break;
+        }
+
+        if (lines == 0) {
+                // huh?
+                return explicit_paragraph (row_orig, rtl);
         }
 
-        pbase_dir = (rowdata->attr.bidi_flags & VTE_BIDI_AUTO)
-                    ? FRIBIDI_PAR_ON
-                    : (rowdata->attr.bidi_flags & VTE_BIDI_RTL) ? FRIBIDI_PAR_RTL : FRIBIDI_PAR_LTR;
+        /* Run the BiDi algorithm on the paragraph to get the embedding levels. */
 
-        fribidi_log2vis (str, i, &pbase_dir, NULL, L_to_V, V_to_L, NULL);
+        // FIXME are the WLTR / WRTL paragraph directions what I think they are?
+        pbase_dir = autodir ? (rtl ? FRIBIDI_PAR_WRTL : FRIBIDI_PAR_WLTR)
+                            : (rtl ? FRIBIDI_PAR_RTL  : FRIBIDI_PAR_LTR );
 
-        if (pbase_dir == FRIBIDI_PAR_ON) {
-                pbase_dir = (rowdata->attr.bidi_flags & VTE_BIDI_RTL) ? FRIBIDI_PAR_RTL : FRIBIDI_PAR_LTR;
+        fribidi_get_bidi_types (fribidi_chars, c, fribidi_chartypes);
+        fribidi_get_bracket_types (fribidi_chars, c, fribidi_chartypes, fribidi_brackettypes);
+        level = fribidi_get_par_embedding_levels_ex (fribidi_chartypes, fribidi_brackettypes, c, &pbase_dir, 
fribidi_levels);
+
+        if (level == 0) {
+                /* error */
+                return explicit_paragraph (row_orig, rtl);
         }
 
-        if (pbase_dir == FRIBIDI_PAR_RTL || pbase_dir == FRIBIDI_PAR_WRTL) {
-                if (i < width) {
-                        /* shift to the right */
-                        int shift = width - i;
-                        for (i--; i >= 0; i--) {
-                                L_to_V[i] += shift;
-                                V_to_L[i + shift] = V_to_L[i];
+        /* For convenience, from now on this variable contains the resolved (i.e. possibly autodetected) 
value. */
+        rtl = (pbase_dir == FRIBIDI_PAR_RTL || pbase_dir == FRIBIDI_PAR_WRTL);
+
+        if (level == 1 || (rtl && level == 2)) {
+                /* Fast shortcut for LTR-only and RTL-only. */
+                return explicit_paragraph (row_orig, rtl);
+        }
+
+        /* Reshuffle line by line. */
+        row = row_orig;
+        line = 0;
+        while (row < m_start + m_len) {
+                if (G_UNLIKELY (row < m_start)) {
+                        row++;
+                        line++;
+                        continue;
+                }
+
+                map = m_bidimaps[row - m_start];
+
+                row_data = m_ring->index(row++);
+                if (row_data == nullptr)
+                        break;
+
+                /* fribidi_reorder_line() conveniently reorders arbitrary numbers we pass as the map.
+                 * Use the logical position to save us from headaches when encountering fragments . */
+                k = lines[line];
+                for (j = 0; j < m_width && j < row_data->len; j++) {
+                        cell = _vte_row_data_get (row_data, j);
+                        if (cell->attr.fragment())
+                                continue;
+
+                        fribidi_map[k++] = j;
+                }
+
+                g_assert_cmpint (k, ==, lines[line + 1]);
+
+                // FIXME is it okay to run the BiDi algorithm without the combining accents?
+                // If we need to preserve them then we need to double check whether
+                // fribidi_reorder_line() requires a FRIBIDI_FLAG_REORDER_NSM or not.
+                level = fribidi_reorder_line (FRIBIDI_FLAGS_DEFAULT,
+                                              fribidi_chartypes,
+                                              lines[line + 1] - lines[line],
+                                              lines[line],
+                                              pbase_dir,
+                                              fribidi_levels,
+                                              NULL,
+                                              fribidi_map);
+
+                if (level == 0) {
+                        /* error, what should we do? */
+                        explicit_line (row, rtl);
+                        goto cont;
+                }
+
+                // FIXME can we do LTR-only and RTL-only shortcuts, just like with 
fribidi_get_par_embedding_levels_ex() ?
+
+                /* Copy to our realm. Proceed in visual order.*/
+                v = 0;
+                if (rtl) {
+                        /* Unused cell on the left for RTL paragraphs */
+                        int unused = MAX(m_width - row_data->len, 0);
+                        for (; v < unused; v++) {
+                                map[v].vis2log = m_width - 1 - v;
+                                map[v].vis_rtl = TRUE;
+                        }
+                }
+                for (j = lines[line]; j < lines[line + 1]; j++) {
+                        /* Inflate fribidi's result by inserting fragments. */
+                        l = fribidi_map[j];
+                        cell = _vte_row_data_get (row_data, l);
+                        g_assert (!cell->attr.fragment());
+                        g_assert (cell->attr.columns() > 0);
+                        if (fribidi_levels[l] % 2 == 0) {
+                                /* LTR character directionality. */
+                                for (col = 0; col < cell->attr.columns(); col++) {
+                                        map[v].vis2log = l;
+                                        map[v].vis_rtl = FALSE;
+                                        v++;
+                                        l++;
+                                }
+                        } else {
+                                /* RTL character directionality. Map fragments in reverse order. */
+                                for (col = 0; col < cell->attr.columns(); col++) {
+                                        map[v + col].vis2log = l + cell->attr.columns() - 1 - col;
+                                        map[v + col].vis_rtl = TRUE;
+                                }
+                                v += cell->attr.columns();
+                                l += cell->attr.columns();
                         }
-                        for (i = 0; i < shift; i++) {
-                                L_to_V[width - 1 - i] = i;
-                                V_to_L[i] = width - 1 - i;
+                }
+                if (!rtl) {
+                        /* Unused cell on the right for LTR paragraphs */
+                        g_assert_cmpint (v, ==, MIN (row_data->len, m_width));
+                        for (; v < m_width; v++) {
+                                map[v].vis2log = v;
+                                map[v].vis_rtl = FALSE;
                         }
                 }
-        } else {
-                for (; i < width; i++) {
-                        L_to_V[i] = V_to_L[i] = i;
+                g_assert_cmpint (v, ==, m_width);
+
+                /* From vis2log create the log2vis mapping too */
+                if (_vte_debug_on (VTE_DEBUG_BIDI)) {
+                        for (l = 0; l < m_width; l++) {
+                                map[l].log2vis = -1;
+                        }
                 }
-        }
-}
 
-#else /* WITH_FRIBIDI */
+                for (v = 0; v < m_width; v++) {
+                        map[map[v].vis2log].log2vis = v;
+                }
 
-void bidi_shuffle (const VteRowData *rowdata, int width) {
-        if (rowdata == NULL) {  // FIXME make sure it doesn't happen
-                bidi_shuffle_explicit (width, FALSE);
-        } else {
-                bidi_shuffle_explicit (width, rowdata->attr.bidi_flags & VTE_BIDI_RTL);
-        }
-}
+                if (_vte_debug_on (VTE_DEBUG_BIDI)) {
+                        for (l = 0; l < m_width; l++) {
+                                g_assert_cmpint (map[l].log2vis, !=, -1);
+                        }
+                }
 
-#endif /* WITH_FRIBIDI */
+cont:
+                line++;
 
-int log2vis (int log) {
-        return L_to_V[log];
-}
+                if (!row_data->attr.soft_wrapped)
+                        break;
+        }
+
+        return row;
 
-int vis2log (int vis) {
-        return V_to_L[vis];
+#endif /* !WITH_FRIBIDI */
 }
diff --git a/src/bidi.hh b/src/bidi.hh
index 58fddf04..6291f662 100644
--- a/src/bidi.hh
+++ b/src/bidi.hh
@@ -26,7 +26,7 @@
 struct _bidicellmap {
         int log2vis;
         int vis2log;
-        guint8 rtl: 1;
+        guint8 vis_rtl: 1;
 };
 
 typedef struct _bidicellmap bidicellmap;
@@ -59,6 +59,10 @@ private:
 
         long m_height_alloc;
         long m_width_alloc;
+
+        void explicit_line(long row, gboolean rtl);
+        long explicit_paragraph(long row, gboolean rtl);
+        long paragraph(long row);
 };
 
 
@@ -68,16 +72,4 @@ private:
 
 G_BEGIN_DECLS
 
-void bidi_shuffle (const VteRowData *rowdata, int width);
-int log2vis (int log);
-int vis2log (int vis);
-
-struct _bidimap {
-        vte::base::Ring *ring;
-};
-
-typedef struct _bidimap bidimap;
-
-
-
 G_END_DECLS
diff --git a/src/vte.cc b/src/vte.cc
index dc18d675..36c1c8a1 100644
--- a/src/vte.cc
+++ b/src/vte.cc
@@ -3018,7 +3018,7 @@ Terminal::maybe_apply_bidi_attributes()
                 const VteRowData *rowdata = _vte_ring_index (m_screen->row_data, row - 1);
                 if (rowdata != nullptr && rowdata->attr.soft_wrapped) {
                         _vte_debug_print(VTE_DEBUG_BIDI,
-                                         "No, not after a hard wrap.\n");
+                                         "No, we're not after a hard wrap.\n");
                         return;
                 }
         }
@@ -8962,7 +8962,7 @@ Terminal::draw_rows(VteScreen *screen_,
                         items[item_count].columns = cell->attr.columns();
                         items[item_count].x = col * column_width;
                         items[item_count].y = y;
-                        items[item_count].mirror = !!(row_data->attr.bidi_flags & VTE_BIDI_RTL);  // FIXME
+                        items[item_count].mirror = bidimap[i].vis_rtl;
                         items[item_count].box_mirror = !!(row_data->attr.bidi_flags & VTE_BIDI_BOX_MIRROR);
                         item_count++;
                 }
@@ -9082,10 +9082,20 @@ Terminal::paint_cursor()
        if (CLAMP(col, 0, m_column_count - 1) != col)
                return;
 
+
+
+
+        // FIXME find a nicer place for these
+        m_ringview.set_ring (m_screen->row_data);
+        m_ringview.set_rows ((long) m_screen->scroll_delta, m_row_count + 2);
+        m_ringview.set_width (m_column_count);
+        m_ringview.update ();
+
+
+
         /* Find the first cell of the character "under" the cursor.
          * This is for CJK.  For TAB, paint the cursor where it really is. */
         VteRowData const *row_data = find_row_data(drow);
-        m_ringview.update();
         bidicellmap const *bidimap = m_ringview.get_row_map(drow);
 
        auto cell = find_charcell(col, drow);
diff --git a/src/vtedefines.hh b/src/vtedefines.hh
index 71896a76..aa64c27c 100644
--- a/src/vtedefines.hh
+++ b/src/vtedefines.hh
@@ -136,3 +136,6 @@
 
 /* Max depth of title stack */
 #define VTE_WINDOW_TITLE_STACK_MAX_DEPTH (8)
+
+/* Maximum length of a paragraph, in lines, that might get proper BiDi treatment. */
+#define VTE_BIDI_PARAGRAPH_LENGTH_MAX   20


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]