[vte/wip/egmont/bidi: 2/2] Arabic shaping PoC, using fribidi + Unicode presentation form characters
- From: Egmont Koblinger <egmontkob src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [vte/wip/egmont/bidi: 2/2] Arabic shaping PoC, using fribidi + Unicode presentation form characters
- Date: Mon, 12 Nov 2018 10:49:18 +0000 (UTC)
commit 4946f26a8e39491eca68aaa5b676a462a7508da0
Author: Egmont Koblinger <egmont gmail com>
Date: Mon Oct 1 11:42:55 2018 +0200
Arabic shaping PoC, using fribidi + Unicode presentation form characters
BIDI-STATUS | 3 ++-
src/bidi.cc | 57 +++++++++++++++++++++++++++++++++++++++++++++++++++------
src/bidi.hh | 2 ++
src/vte.cc | 4 ++--
4 files changed, 57 insertions(+), 9 deletions(-)
---
diff --git a/BIDI-STATUS b/BIDI-STATUS
index ff78efb6..ee198dc1 100644
--- a/BIDI-STATUS
+++ b/BIDI-STATUS
@@ -10,6 +10,7 @@ Done:
- Mouse reporting.
- Regex match and explicit hyperlink underlining on hover.
- VTE_DEBUG=bidi highlights characters with resolved RTL directionality.
+- Arabic shaping using Unicode presentation forms.
- Test file.
- Configure flag.
@@ -30,7 +31,7 @@ Missing from first release:
- Code cleanup and review, of course.
Planned future improvements:
-- Shaping.
+- Real shaping (harfbuzz?).
- Right-align RTL glyphs.
- Implicit mode level 2 (handling BiDi control characters).
- Mirror the glyphs that don't have mirrored counterpart.
diff --git a/src/bidi.cc b/src/bidi.cc
index 9646fd17..61336461 100644
--- a/src/bidi.cc
+++ b/src/bidi.cc
@@ -39,6 +39,7 @@ BidiRow::BidiRow()
m_log2vis = nullptr;
m_vis2log = nullptr;
m_vis_rtl = nullptr;
+ m_vis_shaped_char = nullptr;
}
BidiRow::~BidiRow()
@@ -46,6 +47,7 @@ BidiRow::~BidiRow()
g_free (m_log2vis);
g_free (m_vis2log);
g_free (m_vis_rtl);
+ g_free (m_vis_shaped_char);
}
void BidiRow::set_width(vte::grid::column_t width)
@@ -60,6 +62,7 @@ void BidiRow::set_width(vte::grid::column_t width)
m_log2vis = (vte::grid::column_t *) g_realloc (m_log2vis, sizeof (vte::grid::column_t) *
m_width_alloc);
m_vis2log = (vte::grid::column_t *) g_realloc (m_vis2log, sizeof (vte::grid::column_t) *
m_width_alloc);
m_vis_rtl = (guint8 *) g_realloc (m_vis_rtl, sizeof (guint8) * m_width_alloc);
+ m_vis_shaped_char = (gunichar *) g_realloc (m_vis_shaped_char, sizeof (gunichar) *
m_width_alloc);
}
m_width = width;
@@ -110,6 +113,21 @@ bool BidiRow::log_is_rtl(vte::grid::column_t col) const
}
}
+/* Get the shaped character (vteunistr) for the given visual position.
+ *
+ * The unshaped character (vteunistr) needs to be passed to this method because
+ * the BiDi component may not store it if no shaping was required, and does not
+ * store combining accents. This method takes care of preserving combining accents.
+ */
+vteunistr
+BidiRow::vis_get_shaped_char(vte::grid::column_t col, vteunistr s) const
+{
+ if (col >= m_width || m_vis_shaped_char[col] == 0)
+ return s;
+
+ return _vte_unistr_replace_base(s, m_vis_shaped_char[col]);
+}
+
/* Whether the line's base direction is RTL. */
bool BidiRow::base_is_rtl() const
{
@@ -219,6 +237,7 @@ void RingView::explicit_line(vte::grid::row_t row, bool rtl)
for (i = 0; i < m_width; i++) {
bidirow->m_log2vis[i] = bidirow->m_vis2log[i] = m_width - 1 - i;
bidirow->m_vis_rtl[i] = true;
+ bidirow->m_vis_shaped_char[i] = 0;
}
} else {
/* Shortcut: bidirow->m_width == 0 might denote a fully LTR line,
@@ -343,6 +362,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
// FIXME VLA is a gcc extension, use g_newa() instead
FriBidiCharType fribidi_chartypes[count];
FriBidiBracketType fribidi_brackettypes[count];
+ FriBidiJoiningType fribidi_joiningtypes[count];
FriBidiLevel fribidi_levels[count];
FriBidiStrIndex fribidi_map[count];
@@ -351,6 +371,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
fribidi_get_bidi_types (fribidi_chars, count, fribidi_chartypes);
fribidi_get_bracket_types (fribidi_chars, count, fribidi_chartypes, fribidi_brackettypes);
+ fribidi_get_joining_types (fribidi_chars, count, fribidi_joiningtypes);
level = fribidi_get_par_embedding_levels_ex (fribidi_chartypes, fribidi_brackettypes, count,
&pbase_dir, fribidi_levels);
if (level == 0) {
@@ -358,13 +379,33 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
return explicit_paragraph (row_orig, rtl);
}
+ /* Arabic shaping
+ *
+ * https://www.w3.org/TR/css-text-3/#word-break-shaping says:
+ * "When shaping scripts such as Arabic wrap [...] the characters must still be shaped (their
joining forms chosen)
+ * as if the word were still whole."
+ *
+ * Also, FriBidi's Arabic shaping methods, as opposed to fribidi_reorder_line(), don't take an
offset parameter.
+ * This is another weak sign that the desired behavior is to shape the entire paragraph before
splitting to lines.
+ *
+ * We only perform shaping in implicit mode, for two reasons:
+ *
+ * Following the CSS logic, I think the sensible behavior for a partially visible word (e.g. at the
margin of a
+ * text editor) is to use the joining/shaping form according to the entire word. Hence in explicit
mode it must be
+ * the responsibility of the BiDi-aware application and not the terminal emulator to perform
joining/shaping.
+ *
+ * And a technical limitation: FriBidi can only perform joining/shaping with the logical order as
input, not with
+ * the visual order. We'd need to find another API, or do ugly workarounds, which I'd rather not. */
+ fribidi_join_arabic (fribidi_chartypes, count, fribidi_levels, fribidi_joiningtypes);
+ fribidi_shape_arabic (FRIBIDI_FLAGS_ARABIC, fribidi_levels, count, fribidi_joiningtypes,
fribidi_chars);
+
g_assert_cmpint (pbase_dir, !=, FRIBIDI_PAR_ON);
/* For convenience, from now on this variable contains the resolved (i.e. possibly autodetected)
value. */
rtl = (pbase_dir == FRIBIDI_PAR_RTL || pbase_dir == FRIBIDI_PAR_WRTL);
- if (level == 1 || (rtl && level == 2)) {
- /* Fast shortcut for LTR-only and RTL-only paragraphs. */
- return explicit_paragraph (row_orig, rtl);
+ if (level == 1) {
+ /* Fast shortcut for LTR-only paragraphs. */
+ return explicit_paragraph (row_orig, false);
}
/* Silly FriBidi API of fribidi_reorder_line()... It reorders whatever values we give to it,
@@ -426,9 +467,9 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
goto next_line;
}
- if (level == 1 || (rtl && level == 2)) {
- /* Fast shortcut for LTR-only and RTL-only lines. */
- explicit_line (row, rtl);
+ if (level == 1) {
+ /* Fast shortcut for LTR-only lines. */
+ explicit_line (row, false);
bidirow->m_has_foreign = true;
goto next_line;
}
@@ -441,6 +482,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
for (; tv < unused; tv++) {
bidirow->m_vis2log[tv] = m_width - 1 - tv;
bidirow->m_vis_rtl[tv] = true;
+ bidirow->m_vis_shaped_char[tv] = 0;
}
}
for (fv = lines[line]; fv < lines[line + 1]; fv++) {
@@ -455,6 +497,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
for (col = 0; col < cell->attr.columns(); col++) {
bidirow->m_vis2log[tv + col] = tl + cell->attr.columns() - 1 - col;
bidirow->m_vis_rtl[tv + col] = true;
+ bidirow->m_vis_shaped_char[tv + col] = fribidi_chars[fl];
}
tv += cell->attr.columns();
tl += cell->attr.columns();
@@ -463,6 +506,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
for (col = 0; col < cell->attr.columns(); col++) {
bidirow->m_vis2log[tv] = tl;
bidirow->m_vis_rtl[tv] = false;
+ bidirow->m_vis_shaped_char[tv] = fribidi_chars[fl];
tv++;
tl++;
}
@@ -474,6 +518,7 @@ vte::grid::row_t RingView::paragraph(vte::grid::row_t row)
for (; tv < m_width; tv++) {
bidirow->m_vis2log[tv] = tv;
bidirow->m_vis_rtl[tv] = false;
+ bidirow->m_vis_shaped_char[tv] = 0;
}
}
g_assert_cmpint (tv, ==, m_width);
diff --git a/src/bidi.hh b/src/bidi.hh
index b1a8da7e..4f4b6f96 100644
--- a/src/bidi.hh
+++ b/src/bidi.hh
@@ -49,6 +49,7 @@ public:
vte::grid::column_t vis2log(vte::grid::column_t col) const;
bool log_is_rtl(vte::grid::column_t col) const;
bool vis_is_rtl(vte::grid::column_t col) const;
+ vteunistr vis_get_shaped_char(vte::grid::column_t col, vteunistr s) const;
bool base_is_rtl() const;
bool has_foreign() const;
@@ -61,6 +62,7 @@ private:
vte::grid::column_t *m_log2vis;
vte::grid::column_t *m_vis2log;
guint8 *m_vis_rtl;
+ gunichar *m_vis_shaped_char;
guint8 m_base_rtl: 1;
guint8 m_has_foreign: 1;
diff --git a/src/vte.cc b/src/vte.cc
index a57b5862..a3eb6d66 100644
--- a/src/vte.cc
+++ b/src/vte.cc
@@ -9095,7 +9095,7 @@ Terminal::draw_rows(VteScreen *screen_,
hilite = nhilite;
g_assert_cmpint (item_count, <, column_count);
- items[item_count].c = cell->c;
+ items[item_count].c = bidirow->vis_get_shaped_char(col, cell->c);
items[item_count].columns = cell->attr.columns();
items[item_count].x = (col - (bidirow->vis_is_rtl(col) ? cell->attr.columns() - 1 :
0)) * column_width;
items[item_count].y = y;
@@ -9245,7 +9245,7 @@ Terminal::paint_cursor()
/* Draw the cursor. */
viscol = bidirow->log2vis(col);
- item.c = (cell && cell->c) ? cell->c : ' ';
+ item.c = (cell && cell->c) ? bidirow->vis_get_shaped_char(viscol, cell->c) : ' ';
item.columns = item.c == '\t' ? 1 : cell ? cell->attr.columns() : 1;
item.x = (viscol - ((cell && bidirow->vis_is_rtl(viscol)) ? cell->attr.columns() - 1 : 0)) * width;
item.y = row_to_pixel(drow);
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]