[pango/hyphen-log-attr: 1/4] Add hyphens to log attrs
- From: Matthias Clasen <matthiasc src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [pango/hyphen-log-attr: 1/4] Add hyphens to log attrs
- Date: Wed, 25 Aug 2021 03:00:12 +0000 (UTC)
commit edf85c9a2dc5861fa8645a58a1c3f6e230ea0942
Author: Matthias Clasen <mclasen redhat com>
Date: Tue Aug 24 22:41:15 2021 -0400
Add hyphens to log attrs
The code computing this is much better off in break.c,
so move it there, and keep the information in the
log attr array.
pango/break.c | 105 ++++++++++++++++++++++++++++++++++++++++++++++++++--
pango/pango-break.h | 2 +
2 files changed, 104 insertions(+), 3 deletions(-)
---
diff --git a/pango/break.c b/pango/break.c
index c55d5f22..11c0eb2f 100644
--- a/pango/break.c
+++ b/pango/break.c
@@ -249,6 +249,8 @@ default_break (const char *text,
gint last_sentence_start = -1;
gint last_non_space = -1;
+ gboolean prev_space_or_hyphen;
+
gboolean almost_done = FALSE;
gboolean done = FALSE;
@@ -261,6 +263,7 @@ default_break (const char *text,
prev_prev_break_type = G_UNICODE_BREAK_UNKNOWN;
prev_wc = 0;
prev_jamo = NO_JAMO;
+ prev_space_or_hyphen = FALSE;
if (length == 0 || *text == '\0')
{
@@ -291,6 +294,8 @@ default_break (const char *text,
/* Emoji extended pictographics */
gboolean is_Extended_Pictographic;
+ PangoScript script;
+
wc = next_wc;
break_type = next_break_type;
@@ -533,17 +538,16 @@ default_break (const char *text,
prev_GB_type = GB_type;
}
+ script = (PangoScript)g_unichar_get_script (wc);
+
/* ---- UAX#29 Word Boundaries ---- */
{
is_word_boundary = FALSE;
if (is_grapheme_boundary ||
G_UNLIKELY(wc >=0x1F1E6 && wc <=0x1F1FF)) /* Rules WB3 and WB4 */
{
- PangoScript script;
WordBreakType WB_type;
- script = (PangoScript)g_unichar_get_script (wc);
-
/* Find the WordBreakType of wc */
WB_type = WB_Other;
@@ -1552,7 +1556,68 @@ default_break (const char *text,
attrs[i - 1].is_white) {
last_sentence_start++;
}
+ }
+
+ /* --- Hyphens --- */
+ {
+ gboolean insert_hyphens;
+ gboolean space_or_hyphen = FALSE;
+
+ switch ((int)script)
+ {
+ case PANGO_SCRIPT_COMMON:
+ case PANGO_SCRIPT_HAN:
+ case PANGO_SCRIPT_HANGUL:
+ case PANGO_SCRIPT_HIRAGANA:
+ case PANGO_SCRIPT_KATAKANA:
+ insert_hyphens = FALSE;
+ break;
+ default:
+ insert_hyphens = TRUE;
+ break;
+ }
+
+ switch ((int)type)
+ {
+ case G_UNICODE_SPACE_SEPARATOR:
+ case G_UNICODE_LINE_SEPARATOR:
+ case G_UNICODE_PARAGRAPH_SEPARATOR:
+ space_or_hyphen = TRUE;
+ break;
+ case G_UNICODE_CONTROL:
+ if (wc == '\t' || wc == '\n' || wc == '\r' || wc == '\f')
+ space_or_hyphen = TRUE;
+ break;
+ default:
+ break;
+ }
+
+ if (!space_or_hyphen)
+ {
+ if (wc == '-' || /* Hyphen-minus */
+ wc == 0x058a || /* Armenian hyphen */
+ wc == 0x1400 || /* Canadian syllabics hyphen */
+ wc == 0x1806 || /* Mongolian todo hyphen */
+ wc == 0x2010 || /* Hyphen */
+ wc == 0x2027 || /* Hyphenation point */
+ wc == 0x2e17 || /* Double oblique hyphen */
+ wc == 0x2e40 || /* Double hyphen */
+ wc == 0x30a0 || /* Katakana-Hiragana double hyphen */
+ wc == 0xfe63 || /* Small hyphen-minus */
+ wc == 0xff0d) /* Fullwidth hyphen-minus */
+ space_or_hyphen = TRUE;
+ }
+
+ if (attrs[i].is_word_boundary)
+ attrs[i].needs_hyphen = FALSE;
+ else if (prev_space_or_hyphen)
+ attrs[i].needs_hyphen = FALSE;
+ else if (space_or_hyphen)
+ attrs[i].needs_hyphen = FALSE;
+ else
+ attrs[i].needs_hyphen = insert_hyphens;
+ prev_space_or_hyphen = space_or_hyphen;
}
prev_wc = wc;
@@ -1633,16 +1698,21 @@ break_attrs (const char *text,
int log_attrs_len)
{
PangoAttrList list;
+ PangoAttrList hyphens;
PangoAttrIterator iter;
GSList *l;
_pango_attr_list_init (&list);
+ _pango_attr_list_init (&hyphens);
+
for (l = attributes; l; l = l->next)
{
PangoAttribute *attr = l->data;
if (attr->klass->type == PANGO_ATTR_ALLOW_BREAKS)
pango_attr_list_insert (&list, pango_attribute_copy (attr));
+ if (attr->klass->type == PANGO_ATTR_INSERT_HYPHENS)
+ pango_attr_list_insert (&hyphens, pango_attribute_copy (attr));
}
if (!_pango_attr_list_has_attributes (&list))
@@ -1681,7 +1751,36 @@ break_attrs (const char *text,
} while (pango_attr_iterator_next (&iter));
_pango_attr_iterator_destroy (&iter);
+
+ _pango_attr_list_get_iterator (&hyphens, &iter);
+ do {
+ const PangoAttribute *attr = pango_attr_iterator_get (&iter, PANGO_ATTR_INSERT_HYPHENS);
+
+ if (attr && ((PangoAttrInt*)attr)->value == 0)
+ {
+ int start, end;
+ int start_pos, end_pos;
+ int pos;
+
+ pango_attr_iterator_range (&iter, &start, &end);
+ if (start < offset)
+ start_pos = 0;
+ else
+ start_pos = g_utf8_pointer_to_offset (text, text + start - offset);
+ if (end >= offset + length)
+ end_pos = log_attrs_len;
+ else
+ end_pos = g_utf8_pointer_to_offset (text, text + end - offset);
+
+ for (pos = start_pos + 1; pos < end_pos; pos++)
+ {
+ log_attrs[pos].needs_hyphen = FALSE;
+ }
+ }
+ } while (pango_attr_iterator_next (&iter));
+
_pango_attr_list_destroy (&list);
+ _pango_attr_list_destroy (&hyphens);
return TRUE;
}
diff --git a/pango/pango-break.h b/pango/pango-break.h
index a8e6c5b9..3ac52460 100644
--- a/pango/pango-break.h
+++ b/pango/pango-break.h
@@ -72,6 +72,7 @@ G_BEGIN_DECLS
* This flag is particularly useful when selecting text word-by-word. This flag
* implements Unicode's [Word Boundaries](http://www.unicode.org/reports/tr29/)
* semantics. (Since: 1.22)
+ * @needs_hyphen: when breaking lines before this char, insert a hyphen. Since: 1.50
*
* The `PangoLogAttr` structure stores information about the attributes of a
* single character.
@@ -91,6 +92,7 @@ struct _PangoLogAttr
guint backspace_deletes_character : 1;
guint is_expandable_space : 1;
guint is_word_boundary : 1;
+ guint needs_hyphen : 1;
};
PANGO_DEPRECATED_IN_1_44
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]