vte r2340 - in trunk: . src



Author: behdad
Date: Fri Dec 12 06:57:09 2008
New Revision: 2340
URL: http://svn.gnome.org/viewvc/vte?rev=2340&view=rev

Log:
2008-12-12  Behdad Esfahbod  <behdad gnome org>

        Bug 149631 â gnome-terminal doesn't combine combining chars in utf8

        * src/vteunistr.c:
        * src/vteunistr.h:
        An extended UTF-32 type that assigns numeric values to UTF-8 sequences
        on demand.  Can be used to efficiently store a string, instead of one
        character, at each cell.

        * src/vte-private.h:
        * src/vte.c:
        * src/vtedraw.c:
        * src/vtedraw.h:
        * src/vtepangocairo.c:
        Port to vteunistr instead of gunichar for cell content.  
        
        * src/vte.c: 
        Handle zerowidth insertions by sticking it on the previous cell.

        * src/iso2022.h:
        * src/iso2022.c:
        Cleanup ambiguous-width handling.  Handle zero-width chars.



Added:
   trunk/src/vteunistr.c
   trunk/src/vteunistr.h
Modified:
   trunk/ChangeLog
   trunk/src/Makefile.am
   trunk/src/iso2022.c
   trunk/src/iso2022.h
   trunk/src/vte-private.h
   trunk/src/vte.c
   trunk/src/vtedraw.c
   trunk/src/vtedraw.h
   trunk/src/vtepangocairo.c

Modified: trunk/src/Makefile.am
==============================================================================
--- trunk/src/Makefile.am	(original)
+++ trunk/src/Makefile.am	Fri Dec 12 06:57:09 2008
@@ -80,6 +80,8 @@
 	vtetree.h \
 	vtetypebuiltins.c \
 	vtetypebuiltins.h \
+	vteunistr.c \
+	vteunistr.h \
 	vteversion.h
 	$(NULL)
 

Modified: trunk/src/iso2022.c
==============================================================================
--- trunk/src/iso2022.c	(original)
+++ trunk/src/iso2022.c	Fri Dec 12 06:57:09 2008
@@ -284,39 +284,26 @@
 	return GPOINTER_TO_INT(a) - GPOINTER_TO_INT(b);
 }
 
-static inline gboolean
-_vte_iso2022_is_ambiguous(gunichar c)
-{
-	/* ASCII chars are not ambiguous */
-	if (G_LIKELY (c < 0x80))
-		return FALSE;
-
-	return g_unichar_iswide (c) != g_unichar_iswide_cjk (c);
-}
-
 /* If we only have a codepoint, guess what the ambiguous width should be based
  * on the default region.  Just hope we don't do this too often. */
 static int
 _vte_iso2022_ambiguous_width_guess(void)
 {
 	static int guess;
-	if (guess == 0) {
+	if (G_UNLIKELY (guess == 0)) {
 		const char *lang = NULL;
 		guess = 1;
-		if ((lang == NULL) && (g_getenv("LC_ALL") != NULL)) {
+		if (lang == NULL)
 			lang = g_getenv("LC_ALL");
-		}
-		if ((lang == NULL) && (g_getenv("LC_CTYPE") != NULL)) {
+		if (lang == NULL)
 			lang = g_getenv("LC_CTYPE");
-		}
-		if ((lang == NULL) && (g_getenv("LANG") != NULL)) {
+		if (lang == NULL)
 			lang = g_getenv("LANG");
-		}
-		if (lang != NULL) {
+		if (lang) {
 			if (g_ascii_strncasecmp(lang, "ja", 2) == 0 ||
-					g_ascii_strncasecmp(lang, "ko", 2) == 0 ||
-					g_ascii_strncasecmp(lang, "vi", 2) == 0 ||
-					g_ascii_strncasecmp(lang, "zh", 2) == 0) {
+			    g_ascii_strncasecmp(lang, "ko", 2) == 0 ||
+			    g_ascii_strncasecmp(lang, "vi", 2) == 0 ||
+			    g_ascii_strncasecmp(lang, "zh", 2) == 0) {
 				guess = 2;
 			}
 		}
@@ -357,12 +344,11 @@
 	/* Sort-of canonify the encoding name. */
 	i = j = 0;
 	for (i = 0; state->codeset[i] != '\0'; i++) {
-		if (g_ascii_isalnum(state->codeset[i])) {
+		if (g_ascii_isalnum(state->codeset[i]))
 			codeset[j++] = g_ascii_tolower(state->codeset[i]);
-		}
-		if (j >= sizeof(codeset) - 1) {
+
+		if (j >= sizeof(codeset) - 1)
 			break;
-		}
 	}
 	codeset[j] = '\0';
 
@@ -377,11 +363,11 @@
 	 * Decide the ambiguous width according to the default region if 
 	 * current locale is UTF-8.
 	 */
-	if (strcmp (codeset, "utf8") == 0 && g_getenv("VTE_CJK_WIDTH") != NULL) {
+	if (strcmp (codeset, "utf8") == 0) {
 	  const char *env = g_getenv ("VTE_CJK_WIDTH");
-	  if ((g_ascii_strcasecmp (env, "narrow")==0) || (g_ascii_strcasecmp (env, "0")==0))
+	  if (env && (g_ascii_strcasecmp (env, "narrow")==0 || g_ascii_strcasecmp (env, "0")==0))
 	    return 1;
-	  if ((g_ascii_strcasecmp (env, "wide")==0) || (g_ascii_strcasecmp (env, "1")==0))
+	  if (env && (g_ascii_strcasecmp (env, "wide")==0 || g_ascii_strcasecmp (env, "1")==0))
 	    return 2;
 	  else
 	    return _vte_iso2022_ambiguous_width_guess ();
@@ -391,6 +377,33 @@
 	return 1;
 }
 
+static inline gboolean
+_vte_iso2022_is_ambiguous(gunichar c)
+{
+	if (G_LIKELY (c < 0x80))
+		return FALSE;
+	if (G_UNLIKELY (g_unichar_iszerowidth (c)))
+		return FALSE;
+	return G_UNLIKELY (!g_unichar_iswide (c) && g_unichar_iswide_cjk (c));
+}
+
+int
+_vte_iso2022_unichar_width(struct _vte_iso2022_state *state,
+			   gunichar c)
+{
+	if (G_LIKELY (c < 0x80))
+		return 1;
+	if (G_UNLIKELY (g_unichar_iszerowidth (c)))
+		return 0;
+	if (G_UNLIKELY (g_unichar_iswide (c)))
+		return 2;
+	if (G_LIKELY (state->ambiguous_width == 1))
+		return 1;
+	if (G_UNLIKELY (g_unichar_iswide_cjk (c)))
+		return 2;
+	return 1;
+}
+
 static GHashTable *
 _vte_iso2022_map_init(const struct _vte_iso2022_map *map, gssize length)
 {
@@ -722,16 +735,16 @@
 	}
 }
 
-gssize
+int
 _vte_iso2022_get_encoded_width(gunichar c)
 {
-	gssize width;
+	int width;
 	width = (c & VTE_ISO2022_ENCODED_WIDTH_MASK) >> VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET;
 	return CLAMP(width, 0, 2);
 }
 
 static gunichar
-_vte_iso2022_set_encoded_width(gunichar c, gssize width)
+_vte_iso2022_set_encoded_width(gunichar c, int width)
 {
 	width = CLAMP(width, 0, 2);
 	c &= ~(VTE_ISO2022_ENCODED_WIDTH_MASK);
@@ -817,7 +830,7 @@
 	}
 	state->codeset = g_intern_string (codeset);
 	state->conv = conv;
-	state->ambiguous_width = _vte_iso2022_ambiguous_width(state);
+	state->ambiguous_width = _vte_iso2022_ambiguous_width (state);
 }
 
 const char *
@@ -1739,19 +1752,6 @@
 	return length;
 }
 
-gssize
-_vte_iso2022_unichar_width(gunichar c)
-{
-	c = c & ~(VTE_ISO2022_ENCODED_WIDTH_MASK); /* just in case */
-	if (G_UNLIKELY (_vte_iso2022_is_ambiguous(c))) {
-		return _vte_iso2022_ambiguous_width_guess();
-	}
-	if (g_unichar_iswide(c)) {
-		return 2;
-	}
-	return 1;
-}
-
 #ifdef ISO2022_MAIN
 #include <stdio.h>
 int

Modified: trunk/src/iso2022.h
==============================================================================
--- trunk/src/iso2022.h	(original)
+++ trunk/src/iso2022.h	Fri Dec 12 06:57:09 2008
@@ -49,8 +49,9 @@
 #define VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET	28
 #define VTE_ISO2022_ENCODED_WIDTH_MASK		(3 << VTE_ISO2022_ENCODED_WIDTH_BIT_OFFSET)
 #define VTE_ISO2022_HAS_ENCODED_WIDTH(__c)	(((__c) & VTE_ISO2022_ENCODED_WIDTH_MASK) != 0)
-gssize _vte_iso2022_get_encoded_width(gunichar c);
-gssize _vte_iso2022_unichar_width(gunichar c);
+int _vte_iso2022_get_encoded_width(gunichar c);
+int _vte_iso2022_unichar_width(struct _vte_iso2022_state *state,
+			       gunichar c);
 
 G_END_DECLS
 

Modified: trunk/src/vte-private.h
==============================================================================
--- trunk/src/vte-private.h	(original)
+++ trunk/src/vte-private.h	Fri Dec 12 06:57:09 2008
@@ -41,6 +41,7 @@
 #include <unistd.h>
 #include <glib/gi18n-lib.h>
 
+#include "vteunistr.h"
 #include "vte.h"
 #include "buffer.h"
 #include "debug.h"
@@ -98,7 +99,7 @@
 /* The structure we use to hold characters we're supposed to display -- this
  * includes any supported visible attributes. */
 struct vte_charcell {
-	gunichar c;		/* The Unicode character. */
+	vteunistr c;		/* The Unicode string for the cell. */
 
 	struct vte_charcell_attr {
 		guint32 columns: 4;	/* Number of visible columns

Modified: trunk/src/vte.c
==============================================================================
--- trunk/src/vte.c	(original)
+++ trunk/src/vte.c	Fri Dec 12 06:57:09 2008
@@ -669,7 +669,7 @@
 		     (!left_only || (i < terminal->pvt->im_preedit_cursor));
 		     i++) {
 			c = g_utf8_get_char(preedit);
-			ret += _vte_iso2022_unichar_width(c);
+			ret += _vte_iso2022_unichar_width(terminal->pvt->iso2022, c);
 			preedit = g_utf8_next_char(preedit);
 		}
 	}
@@ -3069,7 +3069,7 @@
 	if (G_UNLIKELY (screen->alternate_charset)) {
 		_vte_debug_print(VTE_DEBUG_SUBSTITUTION,
 				"Attempting charset substitution"
-				"for 0x%04x.\n", c);
+				"for U+%04X.\n", c);
 		/* See if there's a mapping for it. */
 		c = _vte_iso2022_process_single(terminal->pvt->iso2022, c, '0');
 	}
@@ -3086,12 +3086,13 @@
 		columns = _vte_iso2022_get_encoded_width(c);
 		c &= ~VTE_ISO2022_ENCODED_WIDTH_MASK;
 	} else {
-		columns = _vte_iso2022_unichar_width(c);
+		columns = _vte_iso2022_unichar_width(terminal->pvt->iso2022, c);
 	}
 
+
 	/* If we're autowrapping here, do it. */
 	col = screen->cursor_current.col;
-	if (G_UNLIKELY (col + columns > terminal->column_count)) {
+	if (G_UNLIKELY (columns && col + columns > terminal->column_count)) {
 		if (terminal->pvt->flags.am) {
 			_vte_debug_print(VTE_DEBUG_ADJ,
 					"Autowrapping before character\n");
@@ -3118,6 +3119,73 @@
 			col, columns, (long)screen->cursor_current.row,
 			(long)screen->insert_delta);
 
+
+	if (G_UNLIKELY (columns == 0)) {
+
+		/* It's a combining mark */
+
+		long row_num;
+		struct vte_charcell *cell;
+
+		_vte_debug_print(VTE_DEBUG_PARSE, "combining U+%04X", c);
+
+		row_num = screen->cursor_current.row;
+		row = NULL;
+		if (col == 0) {
+			/* We are at first column.  See if the previous line softwrapped.
+			 * If it did, move there.  Otherwise skip inserting. */
+
+			if (row_num > 0) {
+				row_num--;
+				row = _vte_terminal_find_row_data (terminal, row_num);
+
+				if (!row->soft_wrapped)
+					row = NULL;
+				else
+					col = row->cells->len;
+			}
+		} else {
+			row = _vte_terminal_find_row_data (terminal, row_num);
+		}
+
+		if (G_UNLIKELY (!row || !col))
+			goto not_inserted;
+
+		/* Combine it on the previous cell */
+
+		col--;
+		cell = _vte_row_data_find_charcell(row, col);
+
+		if (G_UNLIKELY (!cell))
+			goto not_inserted;
+
+		/* Find the previous cell */
+		while (cell->attr.fragment && col > 0) {
+			cell = _vte_row_data_find_charcell(row, --col);
+		}
+		if (G_UNLIKELY (!cell || cell->c == '\t'))
+			goto not_inserted;
+
+		/* Combine the new character on top of the cell string */
+		c = _vte_unistr_append_unichar (cell->c, c);
+
+		/* And set it */
+		columns = cell->attr.columns;
+		for (i = 0; i < columns; i++) {
+			cell = _vte_row_data_find_charcell(row, col++);
+			cell->c = c;
+		}
+
+		/* Always invalidate since we put the mark on the *previous* cell
+		 * and the higher level code doesn't know this. */
+		_vte_invalidate_cells(terminal,
+				      col - columns,
+				      columns,
+				      row_num, 1);
+
+		goto done;
+	}
+
 	/* Make sure we have enough rows to hold this data. */
 	row = vte_terminal_ensure_cursor (terminal);
 	g_assert(row != NULL);
@@ -3207,9 +3275,11 @@
 		}
 	}
 
+done:
 	/* We added text, so make a note of it. */
 	terminal->pvt->text_inserted_flag = TRUE;
 
+not_inserted:
 	_vte_debug_print(VTE_DEBUG_ADJ|VTE_DEBUG_PARSE,
 			"insertion delta => %ld.\n",
 			(long)screen->insert_delta);
@@ -5193,7 +5263,7 @@
 	struct vte_charcell *pcell = NULL;
 	gboolean word_char;
 	if ((pcell = vte_terminal_find_charcell(terminal, acol, arow)) != NULL && pcell->c != 0) {
-		word_char = vte_terminal_is_word_char(terminal, pcell->c);
+		word_char = vte_terminal_is_word_char(terminal, _vte_unistr_get_base (pcell->c));
 
 		/* Lets not group non-wordchars together (bug #25290) */
 		if (!word_char)
@@ -5204,7 +5274,7 @@
 			return FALSE;
 		}
 		if (word_char != vte_terminal_is_word_char(terminal,
-							   pcell->c)) {
+							   _vte_unistr_get_base (pcell->c))) {
 			return FALSE;
 		}
 		return TRUE;
@@ -5846,21 +5916,19 @@
 					attr.underline = pcell->attr.underline;
 					attr.strikethrough = pcell->attr.strikethrough;
 
-					/* Store the character. */
-					string = g_string_append_unichar(string,
-							pcell->c ?
-							pcell->c :
-							' ');
+					/* Store the cell string */
 					if (pcell->c == 0) {
+						g_string_append_c (string, ' ');
 						last_empty = string->len;
 						last_emptycol = col;
 					} else {
+						_vte_unistr_append_to_string (pcell->c, string);
 						last_nonempty = string->len;
 						last_nonemptycol = col;
 					}
 
-					/* If we added a character to the string, record its
-					 * attributes, one per char. */
+					/* If we added text to the string, record its
+					 * attributes, one per byte. */
 					if (attributes) {
 						vte_g_array_fill(attributes,
 								&attr, string->len);
@@ -8744,7 +8812,7 @@
 /* Check if a unicode character is actually a graphic character we draw
  * ourselves to handle cases where fonts don't have glyphs for them. */
 static gboolean
-vte_unichar_is_local_graphic(gunichar c)
+vte_unichar_is_local_graphic(vteunistr c)
 {
 	if ((c >= 0x2500) && (c <= 0x257f)) {
 		return TRUE;
@@ -8783,7 +8851,7 @@
 	return FALSE;
 }
 static gboolean
-vte_terminal_unichar_is_local_graphic(VteTerminal *terminal, gunichar c)
+vte_terminal_unichar_is_local_graphic(VteTerminal *terminal, vteunistr c)
 {
 	return vte_unichar_is_local_graphic (c) &&
 		!_vte_draw_has_char (terminal->pvt->draw, c);
@@ -8870,7 +8938,7 @@
 /* Draw the graphic representation of a line-drawing or special graphics
  * character. */
 static gboolean
-vte_terminal_draw_graphic(VteTerminal *terminal, gunichar c,
+vte_terminal_draw_graphic(VteTerminal *terminal, vteunistr c,
 			  gint fore, gint back, gboolean draw_default_bg,
 			  gint x, gint y,
 			  gint column_width, gint columns, gint row_height)
@@ -10708,7 +10776,8 @@
 		items = g_new(struct _vte_draw_text_request, len);
 		for (i = columns = 0; i < len; i++) {
 			items[i].c = g_utf8_get_char(preedit);
-			items[i].columns = _vte_iso2022_unichar_width(items[i].c);
+			items[i].columns = _vte_iso2022_unichar_width(terminal->pvt->iso2022,
+								      items[i].c);
 			items[i].x = (col + columns) * width;
 			items[i].y = row * height;
 			columns += items[i].columns;

Modified: trunk/src/vtedraw.c
==============================================================================
--- trunk/src/vtedraw.c	(original)
+++ trunk/src/vtedraw.c	Fri Dec 12 06:57:09 2008
@@ -312,7 +312,7 @@
 }
 
 int
-_vte_draw_get_char_width (struct _vte_draw *draw, gunichar c, int columns)
+_vte_draw_get_char_width (struct _vte_draw *draw, vteunistr c, int columns)
 {
 	int width = 0;
 
@@ -370,11 +370,11 @@
 	return has_char;
 }
 gboolean
-_vte_draw_has_char (struct _vte_draw *draw, gunichar c)
+_vte_draw_has_char (struct _vte_draw *draw, vteunistr c)
 {
 	gboolean has_char = TRUE;
 
-	_vte_debug_print (VTE_DEBUG_DRAW, "draw_has_char ('%c')\n", c);
+	_vte_debug_print (VTE_DEBUG_DRAW, "draw_has_char ('0x%04X')\n", c);
 
 	if (draw->impl->has_char)
 		has_char = draw->impl->has_char (draw, c);

Modified: trunk/src/vtedraw.h
==============================================================================
--- trunk/src/vtedraw.h	(original)
+++ trunk/src/vtedraw.h	Fri Dec 12 06:57:09 2008
@@ -26,6 +26,7 @@
 #include <gtk/gtk.h>
 #include "vtebg.h"
 #include "vte.h"
+#include "vteunistr.h"
 
 G_BEGIN_DECLS
 
@@ -55,7 +56,7 @@
    corner of the cell into which the character will be drawn instead of the
    left end of the baseline. */
 struct _vte_draw_text_request {
-	gunichar c;
+	vteunistr c;
 	gshort x, y, columns;
 };
 
@@ -86,11 +87,11 @@
 			      const PangoFontDescription *,
 			      VteTerminalAntiAlias);
 	void (*get_text_metrics)(struct _vte_draw *, gint *, gint *, gint *);
-	int (*get_char_width)(struct _vte_draw *, gunichar c, int columns);
+	int (*get_char_width)(struct _vte_draw *, vteunistr c, int columns);
 	void (*draw_text)(struct _vte_draw *,
 			  struct _vte_draw_text_request *, gsize,
 			  GdkColor *, guchar);
-	gboolean (*has_char)(struct _vte_draw *, gunichar);
+	gboolean (*has_char)(struct _vte_draw *, vteunistr);
 	void (*draw_rectangle)(struct _vte_draw *,
 			       gint, gint, gint, gint,
 			       GdkColor *, guchar);
@@ -150,7 +151,7 @@
 			     VteTerminalAntiAlias anti_alias);
 void _vte_draw_get_text_metrics(struct _vte_draw *draw,
 				gint *width, gint *height, gint *ascent);
-int _vte_draw_get_char_width(struct _vte_draw *draw, gunichar c, int columns);
+int _vte_draw_get_char_width(struct _vte_draw *draw, vteunistr c, int columns);
 
 void _vte_draw_text(struct _vte_draw *draw,
 		    struct _vte_draw_text_request *requests, gsize n_requests,
@@ -158,7 +159,7 @@
 gboolean _vte_draw_char(struct _vte_draw *draw,
 			struct _vte_draw_text_request *request,
 			GdkColor *color, guchar alpha);
-gboolean _vte_draw_has_char(struct _vte_draw *draw, gunichar c);
+gboolean _vte_draw_has_char(struct _vte_draw *draw, vteunistr c);
 
 void _vte_draw_fill_rectangle(struct _vte_draw *draw,
 			      gint x, gint y, gint width, gint height,

Modified: trunk/src/vtepangocairo.c
==============================================================================
--- trunk/src/vtepangocairo.c	(original)
+++ trunk/src/vtepangocairo.c	Fri Dec 12 06:57:09 2008
@@ -45,16 +45,16 @@
  *   - We attach a font_info to draw as our private data.  A font_info has
  *     all the information to quickly draw text.
  *
- *   - A font_info keeps uses unichar_font_info structs that represent all
- *     information needed to quickly draw a single gunichar.  The font_info
- *     creates those unichar_font_info structs on demand and caches them
+ *   - A font_info keeps uses unistr_font_info structs that represent all
+ *     information needed to quickly draw a single vteunistr.  The font_info
+ *     creates those unistr_font_info structs on demand and caches them
  *     indefinitely.  It uses a direct array for the ASCII range and a hash
  *     table for the rest.
  *
  *
- * Fast rendering of unichars:
+ * Fast rendering of unistrs:
  *
- * A unichar_font_info (uinfo) calls Pango to set text for the unichar upon
+ * A unistr_font_info (uinfo) calls Pango to set text for the unistr upon
  * initialization and then caches information needed to draw the results
  * later.  It uses three different internal representations and respectively
  * three drawing paths:
@@ -64,7 +64,7 @@
  *     fastest way to draw text as it bypasses Pango completely and allows
  *     for stuffing multiple glyphs into a single cairo_show_glyphs() request
  *     (if scaled-fonts match).  This method is used if the glyphs used for
- *     the gunichar as determined by Pango consists of a single regular glyph
+ *     the vteunistr as determined by Pango consists of a single regular glyph
  *     positioned at 0,0 using a regular font.  This method is used for more
  *     than 99% of the cases.  Only exceptional cases fall through to the
  *     other two methods.
@@ -79,7 +79,7 @@
  *
  *   - COVERAGE_USE_PANGO_LAYOUT_LINE:
  *     Keeping a pango layout line.  This method is used only in the very
- *     weird and exception case that a single gunichar uses more than one font
+ *     weird and exception case that a single vteunistr uses more than one font
  *     to be drawn.  This is not expected to happen, but exists for
  *     completeness, to make sure we can deal with any junk pango decides to
  *     throw at us.
@@ -112,7 +112,7 @@
  *
  * When initializing a font info struct we measure a string consisting of all
  * ASCII letters and some other ASCII characters.  Since we have a shaped pango
- * layout at hand, we walk over it and cache unichar font info for the ASCII
+ * layout at hand, we walk over it and cache unistr font info for the ASCII
  * letters if we can do that easily using COVERAGE_USE_CAIRO_GLYPH.  This
  * means that we precache all ASCII letters without any extra pango shaping
  * involved.
@@ -139,7 +139,7 @@
 #define MAX_RUN_LENGTH 100
 
 
-enum unichar_coverage {
+enum unistr_coverage {
 	/* in increasing order of speed */
 	COVERAGE_UNKNOWN = 0,		/* we don't know about the character yet */
 	COVERAGE_USE_PANGO_LAYOUT_LINE,	/* use a PangoLayoutLine for the character */
@@ -147,7 +147,7 @@
 	COVERAGE_USE_CAIRO_GLYPH	/* use a cairo_glyph_t for the character */
 };
 
-union unichar_font_info {
+union unistr_font_info {
 	/* COVERAGE_USE_PANGO_LAYOUT_LINE */
 	struct {
 		PangoLayoutLine *line;
@@ -164,23 +164,23 @@
 	} using_cairo_glyph;
 };
 
-struct unichar_info {
+struct unistr_info {
 	guchar coverage;
 	guchar has_unknown_chars;
 	guint16 width;
-	union unichar_font_info ufi;
+	union unistr_font_info ufi;
 };
 
-static struct unichar_info *
-unichar_info_create (void)
+static struct unistr_info *
+unistr_info_create (void)
 {
-	return g_slice_new0 (struct unichar_info);
+	return g_slice_new0 (struct unistr_info);
 }
 
 static void
-unichar_info_finish (struct unichar_info *uinfo)
+unistr_info_finish (struct unistr_info *uinfo)
 {
-	union unichar_font_info *ufi = &uinfo->ufi;
+	union unistr_font_info *ufi = &uinfo->ufi;
 
 	switch (uinfo->coverage) {
 	default:
@@ -209,10 +209,10 @@
 }
 
 static void
-unichar_info_destroy (struct unichar_info *uinfo)
+unistr_info_destroy (struct unistr_info *uinfo)
 {
-	unichar_info_finish (uinfo);
-	g_slice_free (struct unichar_info, uinfo);
+	unistr_info_finish (uinfo);
+	g_slice_free (struct unistr_info, uinfo);
 }
 
 struct font_info {
@@ -220,16 +220,19 @@
 	int ref_count;
 	guint destroy_timeout; /* only used when ref_count == 0 */
 
-	/* reusable layout set with font and everything */
+	/* reusable layout set with font and everything set */
 	PangoLayout *layout;
 
 	/* cache of character info */
-	struct unichar_info ascii_unichar_info[128];
-	GHashTable *other_unichar_info;
+	struct unistr_info ascii_unistr_info[128];
+	GHashTable *other_unistr_info;
 
 	/* cell metrics */
 	gint width, height, ascent;
 
+	/* reusable string for UTF-8 conversion */
+	GString *string;
+
 #ifdef VTE_DEBUG
 	/* profiling info */
 	int coverage_count[4];
@@ -237,24 +240,24 @@
 };
 
 
-static struct unichar_info *
-font_info_find_unichar_info (struct font_info    *info,
-			     gunichar             c)
+static struct unistr_info *
+font_info_find_unistr_info (struct font_info    *info,
+			    vteunistr            c)
 {
-	struct unichar_info *uinfo;
+	struct unistr_info *uinfo;
 
-	if (G_LIKELY (c < G_N_ELEMENTS (info->ascii_unichar_info)))
-		return &info->ascii_unichar_info[c];
+	if (G_LIKELY (c < G_N_ELEMENTS (info->ascii_unistr_info)))
+		return &info->ascii_unistr_info[c];
 
-	if (G_UNLIKELY (info->other_unichar_info == NULL))
-		info->other_unichar_info = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) unichar_info_destroy);
+	if (G_UNLIKELY (info->other_unistr_info == NULL))
+		info->other_unistr_info = g_hash_table_new_full (NULL, NULL, NULL, (GDestroyNotify) unistr_info_destroy);
 
-	uinfo = g_hash_table_lookup (info->other_unichar_info, GINT_TO_POINTER (c));
+	uinfo = g_hash_table_lookup (info->other_unistr_info, GINT_TO_POINTER (c));
 	if (G_LIKELY (uinfo))
 		return uinfo;
 
-	uinfo = unichar_info_create ();
-	g_hash_table_insert (info->other_unichar_info, GINT_TO_POINTER (c), uinfo);
+	uinfo = unistr_info_create ();
+	g_hash_table_insert (info->other_unistr_info, GINT_TO_POINTER (c), uinfo);
 	return uinfo;
 }
 
@@ -307,11 +310,11 @@
 	     more;
 	     more = pango_glyph_item_iter_next_cluster (&iter))
 	{
-		struct unichar_info *uinfo;
-		union unichar_font_info *ufi;
+		struct unistr_info *uinfo;
+		union unistr_font_info *ufi;
 	 	PangoGlyphGeometry *geometry;
 		PangoGlyph glyph;
-		gunichar c;
+		vteunistr c;
 
 		/* Only cache simple clusters */
 		if (iter.start_char +1 != iter.end_char  ||
@@ -334,7 +337,7 @@
 		if (!(glyph <= 0xFFFF) || (geometry->x_offset | geometry->y_offset) != 0)
 			continue;
 
-		uinfo = font_info_find_unichar_info (info, c);
+		uinfo = font_info_find_unistr_info (info, c);
 		if (G_UNLIKELY (uinfo->coverage != COVERAGE_UNKNOWN))
 			continue;
 
@@ -405,6 +408,7 @@
 			  info);
 
 	info->layout = pango_layout_new (context);
+	info->string = g_string_sized_new (VTE_UTF8_BPC+1);
 
 	font_info_measure_font (info);
 
@@ -414,7 +418,7 @@
 static void
 font_info_free (struct font_info *info)
 {
-	gunichar i;
+	vteunistr i;
 
 #ifdef VTE_DEBUG
 	_vte_debug_print (VTE_DEBUG_PANGOCAIRO,
@@ -426,13 +430,14 @@
 			  info->coverage_count[3]);
 #endif
 
+	g_string_free (info->string, TRUE);
 	g_object_unref (info->layout);
 
-	for (i = 0; i < G_N_ELEMENTS (info->ascii_unichar_info); i++)
-		unichar_info_finish (&info->ascii_unichar_info[i]);
+	for (i = 0; i < G_N_ELEMENTS (info->ascii_unistr_info); i++)
+		unistr_info_finish (&info->ascii_unistr_info[i]);
 		
-	if (info->other_unichar_info) {
-		g_hash_table_destroy (info->other_unichar_info);
+	if (info->other_unistr_info) {
+		g_hash_table_destroy (info->other_unistr_info);
 	}
 
 	g_slice_free (struct font_info, info);
@@ -669,24 +674,24 @@
 	return font_info_create_for_screen (screen, desc, antialias, language);
 }
 
-static struct unichar_info *
-font_info_get_unichar_info (struct font_info *info,
-			    gunichar c)
-{
-	struct unichar_info *uinfo;
-	union unichar_font_info *ufi;
-	char buf[VTE_UTF8_BPC+1];
+static struct unistr_info *
+font_info_get_unistr_info (struct font_info *info,
+			   vteunistr c)
+{
+	struct unistr_info *uinfo;
+	union unistr_font_info *ufi;
 	PangoRectangle logical;
 	PangoLayoutLine *line;
 
-	uinfo = font_info_find_unichar_info (info, c);
+	uinfo = font_info_find_unistr_info (info, c);
 	if (G_LIKELY (uinfo->coverage != COVERAGE_UNKNOWN))
 		return uinfo;
 
 	ufi = &uinfo->ufi;
 
-	buf[g_unichar_to_utf8 (c, buf)] = '\0';
-	pango_layout_set_text (info->layout, buf, -1);
+	g_string_set_size (info->string, 0);
+	_vte_unistr_append_to_string (c, info->string);
+	pango_layout_set_text (info->layout, info->string->str, -1);
 	pango_layout_get_extents (info->layout, NULL, &logical);
 
 	uinfo->width = PANGO_PIXELS_CEIL (logical.width);
@@ -929,14 +934,14 @@
 
 
 static int
-_vte_pangocairo_get_char_width (struct _vte_draw *draw, gunichar c, int columns)
+_vte_pangocairo_get_char_width (struct _vte_draw *draw, vteunistr c, int columns)
 {
 	struct _vte_pangocairo_data *data = draw->impl_data;
-	struct unichar_info *uinfo;
+	struct unistr_info *uinfo;
 
 	g_return_val_if_fail (data->font != NULL, 0);
 
-	uinfo = font_info_get_unichar_info (data->font, c);
+	uinfo = font_info_get_unistr_info (data->font, c);
 	return uinfo->width;
 }
 
@@ -969,11 +974,11 @@
 	cairo_set_operator (data->cr, CAIRO_OPERATOR_OVER);
 
 	for (i = 0; i < n_requests; i++) {
-		gunichar c = requests[i].c;
+		vteunistr c = requests[i].c;
 		int x = requests[i].x;
 		int y = requests[i].y + data->font->ascent;
-		struct unichar_info *uinfo = font_info_get_unichar_info (data->font, c);
-		union unichar_font_info *ufi = &uinfo->ufi;
+		struct unistr_info *uinfo = font_info_get_unistr_info (data->font, c);
+		union unistr_font_info *ufi = &uinfo->ufi;
 
 		switch (uinfo->coverage) {
 		default:
@@ -1019,14 +1024,14 @@
 }
 
 static gboolean
-_vte_pangocairo_draw_has_char (struct _vte_draw *draw, gunichar c)
+_vte_pangocairo_draw_has_char (struct _vte_draw *draw, vteunistr c)
 {
 	struct _vte_pangocairo_data *data = draw->impl_data;
-	struct unichar_info *uinfo;
+	struct unistr_info *uinfo;
 
 	g_return_val_if_fail (data->font != NULL, FALSE);
 
-	uinfo = font_info_get_unichar_info (data->font, c);
+	uinfo = font_info_get_unistr_info (data->font, c);
 	return !uinfo->has_unknown_chars;
 }
 

Added: trunk/src/vteunistr.c
==============================================================================
--- (empty file)
+++ trunk/src/vteunistr.c	Fri Dec 12 06:57:09 2008
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2008 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author(s):
+ * 	Behdad Esfahbod
+ */
+
+#include <config.h>
+
+#include <string.h>
+
+#include "vteunistr.h"
+
+#define VTE_UNISTR_START 0x80000000
+
+
+static vteunistr unistr_next = VTE_UNISTR_START + 1;
+
+struct VteUnistrDecomp {
+	vteunistr prefix;
+	gunichar  suffix;
+};
+
+GArray     *unistr_decomp;
+GHashTable *unistr_comp;
+
+static guint
+unistr_comp_hash (gconstpointer key)
+{
+	struct VteUnistrDecomp *decomp;
+	decomp = &g_array_index (unistr_decomp,
+				 struct VteUnistrDecomp,
+				 GPOINTER_TO_UINT (key));
+	return decomp->prefix ^ decomp->suffix;
+}
+
+static gboolean
+unistr_comp_equal (gconstpointer a,
+		      gconstpointer b)
+{
+	return 0 == memcmp (&g_array_index (unistr_decomp,
+					    struct VteUnistrDecomp,
+					    GPOINTER_TO_UINT (a)),
+			    &g_array_index (unistr_decomp,
+					    struct VteUnistrDecomp,
+					    GPOINTER_TO_UINT (b)),
+			    sizeof (struct VteUnistrDecomp));
+}
+
+vteunistr
+_vte_unistr_append_unichar (vteunistr s, gunichar c)
+{
+	struct VteUnistrDecomp decomp;
+	vteunistr ret = 0;
+
+	decomp.prefix = s;
+	decomp.suffix = c;
+
+	if (G_UNLIKELY (!unistr_decomp)) {
+		unistr_decomp = g_array_new (FALSE, TRUE,
+						sizeof (struct VteUnistrDecomp));
+		g_array_set_size (unistr_decomp, 1);
+		unistr_comp = g_hash_table_new (unistr_comp_hash,
+						unistr_comp_equal);
+	} else {
+		g_array_index (unistr_decomp,
+			       struct VteUnistrDecomp,
+			       0) = decomp;
+		ret = GPOINTER_TO_UINT (g_hash_table_lookup (unistr_comp,
+							     GUINT_TO_POINTER (0)));
+	}
+
+	if (G_UNLIKELY (!ret)) {
+		ret = unistr_next++;
+		g_array_append_val (unistr_decomp, decomp);
+		g_hash_table_insert (unistr_comp,
+				     GUINT_TO_POINTER (ret - VTE_UNISTR_START),
+				     GUINT_TO_POINTER (ret));
+	}
+
+	return ret;
+}
+
+int
+_vte_unistr_strlen (vteunistr s)
+{
+	int len = 1;
+	g_return_val_if_fail (s < unistr_next, len);
+	while (G_UNLIKELY (s >= VTE_UNISTR_START)) {
+		s = g_array_index (unistr_decomp,
+				   struct VteUnistrDecomp,
+				   s - VTE_UNISTR_START).prefix;
+		len++;
+	}
+	return len;
+}
+
+gunichar
+_vte_unistr_get_base (vteunistr s)
+{
+	g_return_val_if_fail (s < unistr_next, s);
+	while (G_UNLIKELY (s >= VTE_UNISTR_START))
+		s = g_array_index (unistr_decomp,
+				   struct VteUnistrDecomp,
+				   s - VTE_UNISTR_START).prefix;
+	return (gunichar) s;
+}
+
+void
+_vte_unistr_append_to_string (vteunistr s, GString *gs)
+{
+	g_return_if_fail (s < unistr_next);
+	if (G_UNLIKELY (s >= VTE_UNISTR_START)) {
+		struct VteUnistrDecomp *decomp;
+		decomp = &g_array_index (unistr_decomp,
+					 struct VteUnistrDecomp,
+					 s - VTE_UNISTR_START);
+		_vte_unistr_append_to_string (decomp->prefix, gs);
+		s = decomp->suffix;
+	}
+	g_string_append_unichar (gs, (gunichar) s);
+}

Added: trunk/src/vteunistr.h
==============================================================================
--- (empty file)
+++ trunk/src/vteunistr.h	Fri Dec 12 06:57:09 2008
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2008 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Author(s):
+ * 	Behdad Esfahbod
+ */
+
+#ifndef vte_vteunistr_h_included
+#define vte_vteunistr_h_included
+
+#include <glib.h>
+
+G_BEGIN_DECLS
+
+typedef guint32 vteunistr;
+
+#define vte_unistr_from_unichar(c) ((vteunistr) c)
+
+vteunistr
+_vte_unistr_append_unichar (vteunistr s, gunichar c);
+
+int
+_vte_unistr_strlen (vteunistr s);
+
+gunichar
+_vte_unistr_get_base (vteunistr s);
+
+void
+_vte_unistr_append_to_string (vteunistr s, GString *gs);
+
+G_END_DECLS
+
+#endif



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]