gtkhtml r9074 - trunk/gtkhtml



Author: mcrha
Date: Tue Dec 23 18:55:40 2008
New Revision: 9074
URL: http://svn.gnome.org/viewvc/gtkhtml?rev=9074&view=rev

Log:
2008-12-23  Denis Pauk  <pauk denis gmail com>

	** Fix for bug #552357

	* htmltokenizer.c:
	* htmlentity.gperf:
	* htmlentity.c: Fixes invalid reads and memory usage.



Added:
   trunk/gtkhtml/htmlentity.gperf
Modified:
   trunk/gtkhtml/ChangeLog
   trunk/gtkhtml/htmlentity.c
   trunk/gtkhtml/htmltokenizer.c

Modified: trunk/gtkhtml/htmlentity.c
==============================================================================
--- trunk/gtkhtml/htmlentity.c	(original)
+++ trunk/gtkhtml/htmlentity.c	Tue Dec 23 18:55:40 2008
@@ -1,3 +1,36 @@
+/* ANSI-C code produced by gperf version 3.0.3 */
+/* Command-line: gperf --struct-type -l -N html_entity_hash -L ANSI-C htmlentity.gperf  */
+/* Computed positions: -k'1-3,5,$' */
+
+#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
+      && ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
+      && (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
+      && ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
+      && ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
+      && ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
+      && ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
+      && ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
+      && ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
+      && ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
+      && ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
+      && ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
+      && ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
+      && ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
+      && ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
+      && ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
+      && ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
+      && ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
+      && ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
+      && ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
+      && ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
+      && ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
+      && ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
+/* The character set is not based on ISO-646.  */
+#error "gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf gnu org>."
+#endif
+
+#line 1 "htmlentity.gperf"
+
 /* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 8; tab-width: 8 -*- */
 /* htmlentity.c
  *
@@ -30,306 +63,813 @@
 #include <glib.h>
 #include "htmlentity.h"
 
-
+#line 35 "htmlentity.gperf"
 struct _EntityEntry {
-	guint value;
-	const gchar *str;
+	const gchar *name;
+	gulong value;
 };
-typedef struct _EntityEntry EntityEntry;
-
-static EntityEntry entity_table[] = {
-
-	/* Latin1 */
-	{ 160,	"nbsp" },
-	{ 161,	"iexcl" },
-	{ 162,	"cent" },
-	{ 163,	"pound" },
-	{ 164,	"curren" },
-	{ 165,	"yen" },
-	{ 166,	"brvbar" },
-	{ 167,	"sect" },
-	{ 168,	"uml" },
-	{ 169,	"copy" },
-	{ 170,	"ordf" },
-	{ 171,	"laquo" },
-	{ 172,	"not" },
-	{ 173,	"shy" },
-	{ 174,	"reg" },
-	{ 175,	"macr" },
-	{ 176,	"deg" },
-	{ 177,	"plusmn" },
-	{ 178,	"sup2" },
-	{ 179,	"sup3" },
-	{ 180,	"acute" },
-	{ 181,	"micro" },
-	{ 182,	"para" },
-	{ 183,	"middot" },
-	{ 184,	"cedil" },
-	{ 185,	"sup1" },
-	{ 186,	"ordm" },
-	{ 187,	"raquo" },
-	{ 188,	"frac14" },
-	{ 189,	"frac12" },
-	{ 190,	"frac34" },
-	{ 191,	"iquest" },
-	{ 192,	"Agrave" },
-	{ 193,	"Aacute" },
-	{ 194,	"Acirc" },
-	{ 195,	"Atilde" },
-	{ 196,	"Auml" },
-	{ 197,	"Aring" },
-	{ 198,	"AElig" },
-	{ 199,	"Ccedil" },
-	{ 200,	"Egrave" },
-	{ 201,	"Eacute" },
-	{ 202,	"Ecirc" },
-	{ 203,	"Euml" },
-	{ 204,	"Igrave" },
-	{ 205,	"Iacute" },
-	{ 206,	"Icirc" },
-	{ 207,	"Iuml" },
-	{ 208,	"ETH" },
-	{ 209,	"Ntilde" },
-	{ 210,	"Ograve" },
-	{ 211,	"Oacute" },
-	{ 212,	"Ocirc" },
-	{ 213,	"Otilde" },
-	{ 214,	"Ouml" },
-	{ 215,	"times" },
-	{ 216,	"Oslash" },
-	{ 217,	"Ugrave" },
-	{ 218,	"Uacute" },
-	{ 219,	"Ucirc" },
-	{ 220,	"Uuml" },
-	{ 221,	"Yacute" },
-	{ 222,	"THORN" },
-	{ 223,	"szlig" },
-	{ 224,	"agrave" },
-	{ 225,	"aacute" },
-	{ 226,	"acirc" },
-	{ 227,	"atilde" },
-	{ 228,	"auml" },
-	{ 229,	"aring" },
-	{ 230,	"aelig" },
-	{ 231,	"ccedil" },
-	{ 232,	"egrave" },
-	{ 233,	"eacute" },
-	{ 234,	"ecirc" },
-	{ 235,	"euml" },
-	{ 236,	"igrave" },
-	{ 237,	"iacute" },
-	{ 238,	"icirc" },
-	{ 239,	"iuml" },
-	{ 240,	"eth" },
-	{ 241,	"ntilde" },
-	{ 242,	"ograve" },
-	{ 243,	"oacute" },
-	{ 244,	"ocirc" },
-	{ 245,	"otilde" },
-	{ 246,	"ouml" },
-	{ 247,	"divide" },
-	{ 248,	"oslash" },
-	{ 249,	"ugrave" },
-	{ 250,	"uacute" },
-	{ 251,	"ucirc" },
-	{ 252,	"uuml" },
-	{ 253,	"yacute" },
-	{ 254,	"thorn" },
-	{ 255,	"yuml" },
-
-	/* special charactes */
-	{ 34,    "quot" },
-	{ 38,    "amp" },
-	{ 39,    "apos" },
-	{ 60,    "lt" },
-	{ 62,    "gt" },
-	{ 338,   "OElig" },
-	{ 339,   "oelig" },
-	{ 352,   "Scaron" },
-	{ 353,   "scaron" },
-	{ 376,   "Yuml" },
-	{ 710,   "circ" },
-	{ 732,   "tilde" },
-	{ 8194,  "ensp" },
-	{ 8195,  "emsp" },
-	{ 8201,  "thinsp" },
-	{ 8204,  "zwnj" },
-	{ 8205,  "zwj" },
-	{ 8206,  "lrm" },
-	{ 8207,  "rlm" },
-	{ 8211,  "ndash" },
-	{ 8212,  "mdash" },
-	{ 8216,  "lsquo" },
-	{ 8217,  "rsquo" },
-	{ 8218,  "sbquo" },
-	{ 8220,  "ldquo" },
-	{ 8221,  "rdquo" },
-	{ 8222,  "bdquo" },
-	{ 8224,  "dagger" },
-	{ 8225,  "Dagger" },
-	{ 8240,  "permil" },
-	{ 8249,  "lsaquo" },
-	{ 8250,  "rsaquo" },
-	{ 8364,  "euro" },
-
-	/* symbols */
-	{ 402,   "fnof" },
-	{ 913,   "Alpha" },
-	{ 914,   "Beta" },
-	{ 915,   "Gamma" },
-	{ 916,   "Delta" },
-	{ 917,   "Epsilon" },
-	{ 918,   "Zeta" },
-	{ 919,   "Eta" },
-	{ 920,   "Theta" },
-	{ 921,   "Iota" },
-	{ 922,   "Kappa" },
-	{ 923,   "Lambda" },
-	{ 924,   "Mu" },
-	{ 925,   "Nu" },
-	{ 926,   "Xi" },
-	{ 927,   "Omicron" },
-	{ 928,   "Pi" },
-	{ 929,   "Rho" },
-	{ 931,   "Sigma" },
-	{ 932,   "Tau" },
-	{ 933,   "Upsilon" },
-	{ 934,   "Phi" },
-	{ 935,   "Chi" },
-	{ 936,   "Psi" },
-	{ 937,   "Omega" },
-	{ 945,   "alpha" },
-	{ 946,   "beta" },
-	{ 947,   "gamma" },
-	{ 948,   "delta" },
-	{ 949,   "epsilon" },
-	{ 950,   "zeta" },
-	{ 951,   "eta" },
-	{ 952,   "theta" },
-	{ 953,   "iota" },
-	{ 954,   "kappa" },
-	{ 955,   "lambda" },
-	{ 956,   "mu" },
-	{ 957,   "nu" },
-	{ 958,   "xi" },
-	{ 959,   "omicron" },
-	{ 960,   "pi" },
-	{ 961,   "rho" },
-	{ 962,   "sigmaf" },
-	{ 963,   "sigma" },
-	{ 964,   "tau" },
-	{ 965,   "upsilon" },
-	{ 966,   "phi" },
-	{ 967,   "chi" },
-	{ 968,   "psi" },
-	{ 969,   "omega" },
-	{ 977,   "thetasym" },
-	{ 978,   "upsih" },
-	{ 982,   "piv" },
-	{ 8226,  "bull" },
-	{ 8230,  "hellip" },
-	{ 8242,  "prime" },
-	{ 8243,  "Prime" },
-	{ 8254,  "oline" },
-	{ 8260,  "frasl" },
-	{ 8472,  "weierp" },
-	{ 8465,  "image" },
-	{ 8476,  "real" },
-	{ 8482,  "trade" },
-	{ 8501,  "alefsym" },
-	{ 8592,  "larr" },
-	{ 8593,  "uarr" },
-	{ 8594,  "rarr" },
-	{ 8595,  "darr" },
-	{ 8596,  "harr" },
-	{ 8629,  "crarr" },
-	{ 8656,  "lArr" },
-	{ 8657,  "uArr" },
-	{ 8658,  "rArr" },
-	{ 8659,  "dArr" },
-	{ 8660,  "hArr" },
-	{ 8704,  "forall" },
-	{ 8706,  "part" },
-	{ 8707,  "exist" },
-	{ 8709,  "empty" },
-	{ 8711,  "nabla" },
-	{ 8712,  "isin" },
-	{ 8713,  "notin" },
-	{ 8715,  "ni" },
-	{ 8719,  "prod" },
-	{ 8721,  "sum" },
-	{ 8722,  "minus" },
-	{ 8727,  "lowast" },
-	{ 8730,  "radic" },
-	{ 8733,  "prop" },
-	{ 8734,  "infin" },
-	{ 8736,  "ang" },
-	{ 8743,  "and" },
-	{ 8744,  "or" },
-	{ 8745,  "cap" },
-	{ 8746,  "cup" },
-	{ 8747,  "int" },
-	{ 8756,  "there4" },
-	{ 8764,  "sim" },
-	{ 8773,  "cong" },
-	{ 8776,  "asymp" },
-	{ 8800,  "ne" },
-	{ 8801,  "equiv" },
-	{ 8804,  "le" },
-	{ 8805,  "ge" },
-	{ 8834,  "sub" },
-	{ 8835,  "sup" },
-	{ 8836,  "nsub" },
-	{ 8838,  "sube" },
-	{ 8839,  "supe" },
-	{ 8853,  "oplus" },
-	{ 8855,  "otimes" },
-	{ 8869,  "perp" },
-	{ 8901,  "sdot" },
-	{ 8968,  "lceil" },
-	{ 8969,  "rceil" },
-	{ 8970,  "lfloor" },
-	{ 8971,  "rfloor" },
-	{ 9001,  "lang" },
-	{ 9002,  "rang" },
-	{ 9674,  "loz" },
-	{ 9824,  "spades" },
-	{ 9827,  "clubs" },
-	{ 9829,  "hearts" },
-	{ 9830,  "diams" },
-};
-
 
-/* FIXME FIXME this function just sucks.  We should use gperf or something instead.  */
+#define TOTAL_KEYWORDS 253
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 8
+#define MIN_HASH_VALUE 8
+#define MAX_HASH_VALUE 738
+/* maximum key range = 731, duplicates = 0 */
+
+#ifdef __GNUC__
+__inline
+#else
+#ifdef __cplusplus
+inline
+#endif
+#endif
+static unsigned int
+hash (register const char *str, register unsigned int len)
+{
+  static unsigned short asso_values[] =
+    {
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739,  10,
+       35,  20,   0, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 175, 135,  30,  60,  95,
+        5,   0,   5, 180, 739,  15,   5,   0,  15, 110,
+      110, 739,   5,   5,   5, 100, 739, 739,   0,  20,
+        0, 739, 739, 739, 739, 739, 739,   5,  60,  50,
+        0,  15, 144, 115, 215,  10, 225,  10,  95, 125,
+       25,   0,   5, 218,  90,  20,   0,  65,  35,  55,
+       45, 115,   5,  15, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739, 739, 739, 739,
+      739, 739, 739, 739, 739, 739, 739
+    };
+  register int hval = len;
+
+  switch (hval)
+    {
+      default:
+        hval += asso_values[(unsigned char)str[4]];
+      /*FALLTHROUGH*/
+      case 4:
+      case 3:
+        hval += asso_values[(unsigned char)str[2]];
+      /*FALLTHROUGH*/
+      case 2:
+        hval += asso_values[(unsigned char)str[1]+1];
+      /*FALLTHROUGH*/
+      case 1:
+        hval += asso_values[(unsigned char)str[0]];
+        break;
+    }
+  return hval + asso_values[(unsigned char)str[len - 1]];
+}
 
-static gint
-html_g_str_case_equal (gconstpointer v, gconstpointer v2)
+#ifdef __GNUC__
+__inline
+#ifdef __GNUC_STDC_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
+static struct _EntityEntry *
+html_entity_hash (register const char *str, register unsigned int len)
 {
-	return g_ascii_strcasecmp ((const gchar*) v, (const gchar*)v2) == 0;
+  static unsigned char lengthtable[] =
+    {
+       0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  0,  0,  0,  3,
+       0,  0,  0,  0,  3,  0,  0,  0,  0,  0,  4,  0,  0,  0,
+       3,  4,  0,  0,  0,  3,  4,  0,  0,  0,  3,  4,  5,  6,
+       0,  3,  4,  5,  6,  0,  0,  4,  0,  6,  0,  0,  0,  5,
+       6,  0,  0,  0,  5,  0,  0,  3,  0,  5,  0,  0,  3,  4,
+       5,  0,  0,  0,  4,  5,  0,  0,  0,  4,  5,  0,  0,  3,
+       4,  5,  0,  0,  0,  0,  5,  0,  0,  3,  0,  5,  6,  0,
+       3,  4,  0,  6,  2,  3,  4,  5,  0,  0,  0,  0,  0,  6,
+       2,  3,  0,  5,  6,  2,  0,  0,  5,  6,  0,  3,  0,  5,
+       0,  2,  0,  0,  5,  6,  0,  3,  4,  5,  6,  0,  0,  0,
+       0,  6,  0,  3,  4,  5,  6,  0,  0,  4,  5,  6,  0,  4,
+       4,  0,  6,  7,  4,  4,  5,  0,  2,  8,  4,  5,  6,  0,
+       4,  0,  5,  6,  0,  3,  0,  5,  6,  0,  3,  4,  5,  6,
+       2,  0,  0,  6,  2,  0,  0,  0,  0,  6,  0,  3,  4,  6,
+       6,  0,  3,  4,  5,  6,  0,  0,  0,  0,  6,  0,  3,  5,
+       5,  0,  0,  4,  0,  5,  6,  0,  4,  0,  6,  6,  0,  4,
+       0,  5,  6,  2,  0,  0,  0,  6,  0,  0,  0,  0,  0,  2,
+       3,  0,  0,  6,  2,  0,  4,  5,  6,  4,  4,  0,  0,  0,
+       0,  0,  5,  5,  2,  4,  0,  4,  5,  0,  2,  3,  4,  0,
+       0,  7,  0,  4,  5,  6,  0,  0,  4,  5,  2,  0,  0,  4,
+       5,  6,  2,  3,  6,  5,  6,  0,  4,  4,  5,  6,  4,  5,
+       4,  5,  0,  7,  5,  4,  5,  0,  0,  3,  4,  5,  6,  0,
+       3,  4,  6,  6,  3,  5,  5,  5,  0,  0,  5,  4,  5,  0,
+       0,  0,  4,  0,  0,  0,  5,  4,  5,  0,  0,  5,  4,  0,
+       6,  0,  4,  4,  0,  0,  0,  0,  0,  0,  6,  2,  0,  0,
+       6,  6,  0,  0,  4,  5,  6,  0,  5,  4,  5,  6,  0,  0,
+       5,  5,  6,  0,  3,  4,  0,  6,  0,  5,  4,  5,  6,  3,
+       5,  0,  7,  0,  0,  0,  4,  0,  0,  0,  0,  5,  6,  0,
+       0,  0,  5,  5,  6,  0,  0,  0,  0,  6,  0,  0,  0,  5,
+       0,  0,  0,  4,  0,  0,  0,  0,  4,  0,  0,  0,  0,  4,
+       5,  0,  0,  0,  0,  0,  6,  0,  0,  0,  7,  0,  0,  0,
+       4,  6,  0,  0,  0,  4,  5,  0,  0,  0,  0,  5,  6,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  6,  0,  0,  4,  7,  6,
+       0,  0,  0,  7,  0,  3,  3,  0,  0,  6,  0,  0,  5,  6,
+       0,  0,  0,  5,  5,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  3,  0,  0,  0,  0,  3,
+       0,  0,  0,  0,  0,  0,  5,  0,  0,  3,  0,  6,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  4,  0,  6,  0,  0,  0,  0,  6,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  5,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  5
+    };
+  static struct _EntityEntry wordlist[] =
+    {
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 53 "htmlentity.gperf"
+      {"and", 8743},
+      {""}, {""}, {""}, {""},
+#line 134 "htmlentity.gperf"
+      {"int", 8747},
+      {""}, {""}, {""}, {""},
+#line 230 "htmlentity.gperf"
+      {"Rho", 929},
+      {""}, {""}, {""}, {""}, {""},
+#line 136 "htmlentity.gperf"
+      {"iota", 953},
+      {""}, {""}, {""},
+#line 218 "htmlentity.gperf"
+      {"psi", 968},
+#line 215 "htmlentity.gperf"
+      {"prod", 8719},
+      {""}, {""}, {""},
+#line 171 "htmlentity.gperf"
+      {"not", 172},
+#line 216 "htmlentity.gperf"
+      {"prop", 8733},
+      {""}, {""}, {""},
+#line 207 "htmlentity.gperf"
+      {"phi", 966},
+#line 238 "htmlentity.gperf"
+      {"sdot", 8901},
+#line 259 "htmlentity.gperf"
+      {"theta", 952},
+#line 236 "htmlentity.gperf"
+      {"Scaron", 352},
+      {""},
+#line 52 "htmlentity.gperf"
+      {"amp", 38},
+#line 99 "htmlentity.gperf"
+      {"ensp", 8194},
+#line 258 "htmlentity.gperf"
+      {"Theta", 920},
+#line 257 "htmlentity.gperf"
+      {"there4", 8756},
+      {""}, {""},
+#line 138 "htmlentity.gperf"
+      {"isin", 8712},
+      {""},
+#line 261 "htmlentity.gperf"
+      {"thinsp", 8201},
+      {""}, {""}, {""},
+#line 188 "htmlentity.gperf"
+      {"omega", 969},
+#line 237 "htmlentity.gperf"
+      {"scaron", 353},
+      {""}, {""}, {""},
+#line 266 "htmlentity.gperf"
+      {"trade", 8482},
+      {""}, {""},
+#line 73 "htmlentity.gperf"
+      {"Chi", 935},
+      {""},
+#line 263 "htmlentity.gperf"
+      {"thorn", 254},
+      {""}, {""},
+#line 252 "htmlentity.gperf"
+      {"sup", 8835},
+#line 98 "htmlentity.gperf"
+      {"emsp", 8195},
+#line 213 "htmlentity.gperf"
+      {"prime", 8242},
+      {""}, {""}, {""},
+#line 249 "htmlentity.gperf"
+      {"sup1", 185},
+#line 132 "htmlentity.gperf"
+      {"image", 8465},
+      {""}, {""}, {""},
+#line 253 "htmlentity.gperf"
+      {"supe", 8839},
+#line 212 "htmlentity.gperf"
+      {"pound", 163},
+      {""}, {""},
+#line 74 "htmlentity.gperf"
+      {"chi", 967},
+#line 251 "htmlentity.gperf"
+      {"sup3", 179},
+#line 172 "htmlentity.gperf"
+      {"notin", 8713},
+      {""}, {""}, {""}, {""},
+#line 142 "htmlentity.gperf"
+      {"kappa", 954},
+      {""}, {""},
+#line 104 "htmlentity.gperf"
+      {"eta", 951},
+      {""},
+#line 141 "htmlentity.gperf"
+      {"Kappa", 922},
+#line 198 "htmlentity.gperf"
+      {"otilde", 245},
+      {""},
+#line 80 "htmlentity.gperf"
+      {"cup", 8746},
+#line 250 "htmlentity.gperf"
+      {"sup2", 178},
+      {""},
+#line 60 "htmlentity.gperf"
+      {"atilde", 227},
+#line 164 "htmlentity.gperf"
+      {"Mu", 924},
+#line 231 "htmlentity.gperf"
+      {"rho", 961},
+#line 167 "htmlentity.gperf"
+      {"nbsp", 160},
+#line 44 "htmlentity.gperf"
+      {"acute", 180},
+      {""}, {""}, {""}, {""}, {""},
+#line 174 "htmlentity.gperf"
+      {"Ntilde", 209},
+#line 192 "htmlentity.gperf"
+      {"or", 8744},
+#line 154 "htmlentity.gperf"
+      {"loz", 9674},
+      {""},
+#line 181 "htmlentity.gperf"
+      {"ocirc", 244},
+#line 199 "htmlentity.gperf"
+      {"otimes", 8855},
+#line 176 "htmlentity.gperf"
+      {"Nu", 925},
+      {""}, {""},
+#line 43 "htmlentity.gperf"
+      {"acirc", 226},
+#line 175 "htmlentity.gperf"
+      {"ntilde", 241},
+      {""},
+#line 68 "htmlentity.gperf"
+      {"cap", 8745},
+      {""},
+#line 128 "htmlentity.gperf"
+      {"icirc", 238},
+      {""},
+#line 177 "htmlentity.gperf"
+      {"nu", 957},
+      {""}, {""},
+#line 94 "htmlentity.gperf"
+      {"ecirc", 234},
+#line 179 "htmlentity.gperf"
+      {"oacute", 243},
+      {""},
+#line 217 "htmlentity.gperf"
+      {"Psi", 936},
+#line 247 "htmlentity.gperf"
+      {"sube", 8838},
+#line 58 "htmlentity.gperf"
+      {"asymp", 8776},
+#line 41 "htmlentity.gperf"
+      {"aacute", 225},
+      {""}, {""}, {""}, {""},
+#line 126 "htmlentity.gperf"
+      {"iacute", 237},
+      {""},
+#line 206 "htmlentity.gperf"
+      {"Phi", 934},
+#line 109 "htmlentity.gperf"
+      {"euro", 8364},
+#line 110 "htmlentity.gperf"
+      {"exist", 8707},
+#line 92 "htmlentity.gperf"
+      {"eacute", 233},
+      {""}, {""},
+#line 194 "htmlentity.gperf"
+      {"ordm", 186},
+#line 51 "htmlentity.gperf"
+      {"alpha", 945},
+#line 284 "htmlentity.gperf"
+      {"Yacute", 221},
+      {""},
+#line 289 "htmlentity.gperf"
+      {"Zeta", 918},
+#line 173 "htmlentity.gperf"
+      {"nsub", 8836},
+      {""},
+#line 69 "htmlentity.gperf"
+      {"Ccedil", 199},
+#line 190 "htmlentity.gperf"
+      {"omicron", 959},
+#line 290 "htmlentity.gperf"
+      {"zeta", 950},
+#line 203 "htmlentity.gperf"
+      {"part", 8706},
+#line 166 "htmlentity.gperf"
+      {"nabla", 8711},
+      {""},
+#line 158 "htmlentity.gperf"
+      {"lt", 60},
+#line 260 "htmlentity.gperf"
+      {"thetasym", 977},
+#line 202 "htmlentity.gperf"
+      {"para", 182},
+#line 187 "htmlentity.gperf"
+      {"Omega", 937},
+#line 233 "htmlentity.gperf"
+      {"rsaquo", 8250},
+      {""},
+#line 193 "htmlentity.gperf"
+      {"ordf", 170},
+      {""},
+#line 186 "htmlentity.gperf"
+      {"oline", 8254},
+#line 156 "htmlentity.gperf"
+      {"lsaquo", 8249},
+      {""},
+#line 103 "htmlentity.gperf"
+      {"Eta", 919},
+      {""},
+#line 214 "htmlentity.gperf"
+      {"Prime", 8243},
+#line 70 "htmlentity.gperf"
+      {"ccedil", 231},
+      {""},
+#line 246 "htmlentity.gperf"
+      {"sub", 8834},
+#line 78 "htmlentity.gperf"
+      {"copy", 169},
+#line 272 "htmlentity.gperf"
+      {"ucirc", 251},
+#line 153 "htmlentity.gperf"
+      {"lowast", 8727},
+#line 120 "htmlentity.gperf"
+      {"gt", 62},
+      {""}, {""},
+#line 114 "htmlentity.gperf"
+      {"frac14", 188},
+#line 169 "htmlentity.gperf"
+      {"ne", 8800},
+      {""}, {""}, {""}, {""},
+#line 137 "htmlentity.gperf"
+      {"iquest", 191},
+      {""},
+#line 256 "htmlentity.gperf"
+      {"tau", 964},
+#line 135 "htmlentity.gperf"
+      {"Iota", 921},
+#line 115 "htmlentity.gperf"
+      {"frac34", 190},
+#line 268 "htmlentity.gperf"
+      {"uacute", 250},
+      {""},
+#line 255 "htmlentity.gperf"
+      {"Tau", 932},
+#line 77 "htmlentity.gperf"
+      {"cong", 8773},
+#line 117 "htmlentity.gperf"
+      {"Gamma", 915},
+#line 143 "htmlentity.gperf"
+      {"Lambda", 923},
+      {""}, {""}, {""}, {""},
+#line 197 "htmlentity.gperf"
+      {"Otilde", 213},
+      {""},
+#line 105 "htmlentity.gperf"
+      {"ETH", 208},
+#line 133 "htmlentity.gperf"
+      {"infin", 8734},
+#line 93 "htmlentity.gperf"
+      {"Ecirc", 202},
+      {""}, {""},
+#line 65 "htmlentity.gperf"
+      {"beta", 946},
+      {""},
+#line 271 "htmlentity.gperf"
+      {"Ucirc", 219},
+#line 66 "htmlentity.gperf"
+      {"brvbar", 166},
+      {""},
+#line 239 "htmlentity.gperf"
+      {"sect", 167},
+      {""},
+#line 113 "htmlentity.gperf"
+      {"frac12", 189},
+#line 81 "htmlentity.gperf"
+      {"curren", 164},
+      {""},
+#line 72 "htmlentity.gperf"
+      {"cent", 162},
+      {""},
+#line 180 "htmlentity.gperf"
+      {"Ocirc", 212},
+#line 91 "htmlentity.gperf"
+      {"Eacute", 201},
+#line 165 "htmlentity.gperf"
+      {"mu", 956},
+      {""}, {""}, {""},
+#line 267 "htmlentity.gperf"
+      {"Uacute", 218},
+      {""}, {""}, {""}, {""}, {""},
+#line 282 "htmlentity.gperf"
+      {"Xi", 926},
+#line 54 "htmlentity.gperf"
+      {"ang", 8736},
+      {""}, {""},
+#line 178 "htmlentity.gperf"
+      {"Oacute", 211},
+#line 209 "htmlentity.gperf"
+      {"pi", 960},
+      {""},
+#line 84 "htmlentity.gperf"
+      {"darr", 8595},
+#line 102 "htmlentity.gperf"
+      {"equiv", 8801},
+#line 285 "htmlentity.gperf"
+      {"yacute", 253},
+#line 55 "htmlentity.gperf"
+      {"apos", 39},
+#line 205 "htmlentity.gperf"
+      {"perp", 8869},
+      {""}, {""}, {""}, {""}, {""},
+#line 88 "htmlentity.gperf"
+      {"delta", 948},
+#line 220 "htmlentity.gperf"
+      {"radic", 8730},
+#line 151 "htmlentity.gperf"
+      {"le", 8804},
+#line 219 "htmlentity.gperf"
+      {"quot", 34},
+      {""},
+#line 201 "htmlentity.gperf"
+      {"ouml", 246},
+#line 79 "htmlentity.gperf"
+      {"crarr", 8629},
+      {""},
+#line 170 "htmlentity.gperf"
+      {"ni", 8715},
+#line 240 "htmlentity.gperf"
+      {"shy", 173},
+#line 62 "htmlentity.gperf"
+      {"auml", 228},
+      {""}, {""},
+#line 189 "htmlentity.gperf"
+      {"Omicron", 927},
+      {""},
+#line 140 "htmlentity.gperf"
+      {"iuml", 239},
+#line 57 "htmlentity.gperf"
+      {"aring", 229},
+#line 59 "htmlentity.gperf"
+      {"Atilde", 195},
+      {""}, {""},
+#line 108 "htmlentity.gperf"
+      {"euml", 235},
+#line 89 "htmlentity.gperf"
+      {"diams", 9830},
+#line 119 "htmlentity.gperf"
+      {"ge", 8805},
+      {""}, {""},
+#line 288 "htmlentity.gperf"
+      {"Yuml", 376},
+#line 97 "htmlentity.gperf"
+      {"empty", 8709},
+#line 90 "htmlentity.gperf"
+      {"divide", 247},
+#line 283 "htmlentity.gperf"
+      {"xi", 958},
+#line 275 "htmlentity.gperf"
+      {"uml", 168},
+#line 245 "htmlentity.gperf"
+      {"spades", 9824},
+#line 76 "htmlentity.gperf"
+      {"clubs", 9827},
+#line 82 "htmlentity.gperf"
+      {"dagger", 8224},
+      {""},
+#line 64 "htmlentity.gperf"
+      {"Beta", 914},
+#line 67 "htmlentity.gperf"
+      {"bull", 8226},
+#line 42 "htmlentity.gperf"
+      {"Acirc", 194},
+#line 144 "htmlentity.gperf"
+      {"lambda", 955},
+#line 111 "htmlentity.gperf"
+      {"fnof", 402},
+#line 235 "htmlentity.gperf"
+      {"sbquo", 8218},
+#line 221 "htmlentity.gperf"
+      {"rang", 9002},
+#line 127 "htmlentity.gperf"
+      {"Icirc", 206},
+      {""},
+#line 49 "htmlentity.gperf"
+      {"alefsym", 8501},
+#line 63 "htmlentity.gperf"
+      {"bdquo", 8222},
+#line 145 "htmlentity.gperf"
+      {"lang", 9001},
+#line 225 "htmlentity.gperf"
+      {"rceil", 8969},
+      {""}, {""},
+#line 210 "htmlentity.gperf"
+      {"piv", 982},
+#line 292 "htmlentity.gperf"
+      {"zwnj", 8204},
+#line 149 "htmlentity.gperf"
+      {"lceil", 8968},
+#line 40 "htmlentity.gperf"
+      {"Aacute", 193},
+      {""},
+#line 248 "htmlentity.gperf"
+      {"sum", 8721},
+#line 269 "htmlentity.gperf"
+      {"uarr", 8593},
+#line 281 "htmlentity.gperf"
+      {"weierp", 8472},
+#line 125 "htmlentity.gperf"
+      {"Iacute", 205},
+#line 286 "htmlentity.gperf"
+      {"yen", 165},
+#line 234 "htmlentity.gperf"
+      {"rsquo", 8217},
+#line 87 "htmlentity.gperf"
+      {"Delta", 916},
+#line 118 "htmlentity.gperf"
+      {"gamma", 947},
+      {""}, {""},
+#line 157 "htmlentity.gperf"
+      {"lsquo", 8216},
+#line 85 "htmlentity.gperf"
+      {"dArr", 8659},
+#line 50 "htmlentity.gperf"
+      {"Alpha", 913},
+      {""}, {""}, {""},
+#line 280 "htmlentity.gperf"
+      {"uuml", 252},
+      {""}, {""}, {""},
+#line 226 "htmlentity.gperf"
+      {"rdquo", 8221},
+#line 159 "htmlentity.gperf"
+      {"macr", 175},
+#line 262 "htmlentity.gperf"
+      {"THORN", 222},
+      {""}, {""},
+#line 150 "htmlentity.gperf"
+      {"ldquo", 8220},
+#line 223 "htmlentity.gperf"
+      {"rarr", 8594},
+      {""},
+#line 196 "htmlentity.gperf"
+      {"oslash", 248},
+      {""},
+#line 227 "htmlentity.gperf"
+      {"real", 8476},
+#line 147 "htmlentity.gperf"
+      {"larr", 8592},
+      {""}, {""}, {""}, {""}, {""}, {""},
+#line 83 "htmlentity.gperf"
+      {"Dagger", 8225},
+#line 208 "htmlentity.gperf"
+      {"Pi", 928},
+      {""}, {""},
+#line 204 "htmlentity.gperf"
+      {"permil", 8240},
+#line 211 "htmlentity.gperf"
+      {"plusmn", 177},
+      {""}, {""},
+#line 107 "htmlentity.gperf"
+      {"Euml", 203},
+#line 264 "htmlentity.gperf"
+      {"tilde", 732},
+#line 162 "htmlentity.gperf"
+      {"middot", 183},
+      {""},
+#line 191 "htmlentity.gperf"
+      {"oplus", 8853},
+#line 279 "htmlentity.gperf"
+      {"Uuml", 220},
+#line 241 "htmlentity.gperf"
+      {"Sigma", 931},
+#line 185 "htmlentity.gperf"
+      {"ograve", 242},
+      {""}, {""},
+#line 116 "htmlentity.gperf"
+      {"frasl", 8260},
+#line 254 "htmlentity.gperf"
+      {"szlig", 223},
+#line 48 "htmlentity.gperf"
+      {"agrave", 224},
+      {""},
+#line 155 "htmlentity.gperf"
+      {"lrm", 8206},
+#line 200 "htmlentity.gperf"
+      {"Ouml", 214},
+      {""},
+#line 131 "htmlentity.gperf"
+      {"igrave", 236},
+      {""},
+#line 222 "htmlentity.gperf"
+      {"raquo", 187},
+#line 287 "htmlentity.gperf"
+      {"yuml", 255},
+#line 242 "htmlentity.gperf"
+      {"sigma", 963},
+#line 96 "htmlentity.gperf"
+      {"egrave", 232},
+#line 86 "htmlentity.gperf"
+      {"deg", 176},
+#line 146 "htmlentity.gperf"
+      {"laquo", 171},
+      {""},
+#line 101 "htmlentity.gperf"
+      {"epsilon", 949},
+      {""}, {""}, {""},
+#line 270 "htmlentity.gperf"
+      {"uArr", 8657},
+      {""}, {""}, {""}, {""},
+#line 71 "htmlentity.gperf"
+      {"cedil", 184},
+#line 123 "htmlentity.gperf"
+      {"hearts", 9829},
+      {""}, {""}, {""},
+#line 129 "htmlentity.gperf"
+      {"iexcl", 161},
+#line 265 "htmlentity.gperf"
+      {"times", 215},
+#line 229 "htmlentity.gperf"
+      {"rfloor", 8971},
+      {""}, {""}, {""}, {""},
+#line 152 "htmlentity.gperf"
+      {"lfloor", 8970},
+      {""}, {""}, {""},
+#line 161 "htmlentity.gperf"
+      {"micro", 181},
+      {""}, {""}, {""},
+#line 224 "htmlentity.gperf"
+      {"rArr", 8658},
+      {""}, {""}, {""}, {""},
+#line 148 "htmlentity.gperf"
+      {"lArr", 8656},
+      {""}, {""}, {""}, {""},
+#line 75 "htmlentity.gperf"
+      {"circ", 710},
+#line 163 "htmlentity.gperf"
+      {"minus", 8722},
+      {""}, {""}, {""}, {""}, {""},
+#line 274 "htmlentity.gperf"
+      {"ugrave", 249},
+      {""}, {""}, {""},
+#line 278 "htmlentity.gperf"
+      {"upsilon", 965},
+      {""}, {""}, {""},
+#line 61 "htmlentity.gperf"
+      {"Auml", 196},
+#line 112 "htmlentity.gperf"
+      {"forall", 8704},
+      {""}, {""}, {""},
+#line 139 "htmlentity.gperf"
+      {"Iuml", 207},
+#line 56 "htmlentity.gperf"
+      {"Aring", 197},
+      {""}, {""}, {""}, {""},
+#line 182 "htmlentity.gperf"
+      {"OElig", 338},
+#line 195 "htmlentity.gperf"
+      {"Oslash", 216},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 95 "htmlentity.gperf"
+      {"Egrave", 200},
+      {""}, {""},
+#line 121 "htmlentity.gperf"
+      {"harr", 8596},
+#line 100 "htmlentity.gperf"
+      {"Epsilon", 917},
+#line 273 "htmlentity.gperf"
+      {"Ugrave", 217},
+      {""}, {""}, {""},
+#line 277 "htmlentity.gperf"
+      {"Upsilon", 933},
+      {""},
+#line 228 "htmlentity.gperf"
+      {"reg", 174},
+#line 232 "htmlentity.gperf"
+      {"rlm", 8207},
+      {""}, {""},
+#line 184 "htmlentity.gperf"
+      {"Ograve", 210},
+      {""}, {""},
+#line 183 "htmlentity.gperf"
+      {"oelig", 339},
+#line 124 "htmlentity.gperf"
+      {"hellip", 8230},
+      {""}, {""}, {""},
+#line 46 "htmlentity.gperf"
+      {"aelig", 230},
+#line 168 "htmlentity.gperf"
+      {"ndash", 8211},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 244 "htmlentity.gperf"
+      {"sim", 8764},
+      {""}, {""}, {""}, {""},
+#line 291 "htmlentity.gperf"
+      {"zwj", 8205},
+      {""}, {""}, {""}, {""}, {""}, {""},
+#line 45 "htmlentity.gperf"
+      {"AElig", 198},
+      {""}, {""},
+#line 106 "htmlentity.gperf"
+      {"eth", 240},
+      {""},
+#line 243 "htmlentity.gperf"
+      {"sigmaf", 962},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+#line 122 "htmlentity.gperf"
+      {"hArr", 8660},
+      {""},
+#line 47 "htmlentity.gperf"
+      {"Agrave", 192},
+      {""}, {""}, {""}, {""},
+#line 130 "htmlentity.gperf"
+      {"Igrave", 204},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""},
+#line 160 "htmlentity.gperf"
+      {"mdash", 8212},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""},
+#line 276 "htmlentity.gperf"
+      {"upsih", 978}
+    };
+
+  if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+    {
+      register int key = hash (str, len);
+
+      if (key <= MAX_HASH_VALUE && key >= 0)
+        if (len == lengthtable[key])
+          {
+            register const char *s = wordlist[key].name;
+
+            if (*str == *s && !memcmp (str + 1, s + 1, len - 1))
+              return &wordlist[key];
+          }
+    }
+  return 0;
 }
+#line 293 "htmlentity.gperf"
 
 gulong
 html_entity_parse (const gchar *s, guint len)
 {
-	static GHashTable *ehash = NULL;
-	gchar *t;
-
-	if (!ehash) {
-		gint i;
-
-		ehash = g_hash_table_new (g_str_hash, html_g_str_case_equal);
-
-		for (i = 0; i < sizeof (entity_table) / sizeof (entity_table[0]); i++)
-			g_hash_table_insert (ehash, (gpointer) entity_table[i].str, GINT_TO_POINTER (entity_table[i].value));
-	}
-
-	if (len > 0) {
-		t = alloca (len + 1);
-		memcpy (t, s, len);
-		*(t + len) = '\0';
-	} else {
-		t = (gchar *) s;
-	}
-
-	return GPOINTER_TO_INT (g_hash_table_lookup (ehash, t));
+	struct _EntityEntry * result = html_entity_hash( s, len);
+	if (result == NULL )
+		return ' ';
+	return result->value;
 }

Added: trunk/gtkhtml/htmlentity.gperf
==============================================================================
--- (empty file)
+++ trunk/gtkhtml/htmlentity.gperf	Tue Dec 23 18:55:40 2008
@@ -0,0 +1,301 @@
+%{
+/* -*- Mode: C; indent-tabs-mode: t; c-basic-offset: 8; tab-width: 8 -*- */
+/* htmlentity.c
+ *
+ * This file is part of the GtkHTML library.
+ *
+ * Copyright (C) 1999  Helix Code, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ * Author: Ettore Perazzoli
+ */
+
+#include <config.h>
+#include <string.h>
+#include <stdlib.h>
+#include "gtkhtml-compat.h"
+
+#include <glib.h>
+#include "htmlentity.h"
+
+%}
+struct _EntityEntry {
+	const gchar *name;
+	gulong value;
+};
+%%
+Aacute, 193
+aacute, 225
+Acirc, 194
+acirc, 226
+acute, 180
+AElig, 198
+aelig, 230
+Agrave, 192
+agrave, 224
+alefsym, 8501
+Alpha, 913
+alpha, 945
+amp, 38
+and, 8743
+ang, 8736
+apos, 39
+Aring, 197
+aring, 229
+asymp, 8776
+Atilde, 195
+atilde, 227
+Auml, 196
+auml, 228
+bdquo, 8222
+Beta, 914
+beta, 946
+brvbar, 166
+bull, 8226
+cap, 8745
+Ccedil, 199
+ccedil, 231
+cedil, 184
+cent, 162
+Chi, 935
+chi, 967
+circ, 710
+clubs, 9827
+cong, 8773
+copy, 169
+crarr, 8629
+cup, 8746
+curren, 164
+dagger, 8224
+Dagger, 8225
+darr, 8595
+dArr, 8659
+deg, 176
+Delta, 916
+delta, 948
+diams, 9830
+divide, 247
+Eacute, 201
+eacute, 233
+Ecirc, 202
+ecirc, 234
+Egrave, 200
+egrave, 232
+empty, 8709
+emsp, 8195
+ensp, 8194
+Epsilon, 917
+epsilon, 949
+equiv, 8801
+Eta, 919
+eta, 951
+ETH, 208
+eth, 240
+Euml, 203
+euml, 235
+euro, 8364
+exist, 8707
+fnof, 402
+forall, 8704
+frac12, 189
+frac14, 188
+frac34, 190
+frasl, 8260
+Gamma, 915
+gamma, 947
+ge, 8805
+gt, 62
+harr, 8596
+hArr, 8660
+hearts, 9829
+hellip, 8230
+Iacute, 205
+iacute, 237
+Icirc, 206
+icirc, 238
+iexcl, 161
+Igrave, 204
+igrave, 236
+image, 8465
+infin, 8734
+int, 8747
+Iota, 921
+iota, 953
+iquest, 191
+isin, 8712
+Iuml, 207
+iuml, 239
+Kappa, 922
+kappa, 954
+Lambda, 923
+lambda, 955
+lang, 9001
+laquo, 171
+larr, 8592
+lArr, 8656
+lceil, 8968
+ldquo, 8220
+le, 8804
+lfloor, 8970
+lowast, 8727
+loz, 9674
+lrm, 8206
+lsaquo, 8249
+lsquo, 8216
+lt, 60
+macr, 175
+mdash, 8212
+micro, 181
+middot, 183
+minus, 8722
+Mu, 924
+mu, 956
+nabla, 8711
+nbsp, 160
+ndash, 8211
+ne, 8800
+ni, 8715
+not, 172
+notin, 8713
+nsub, 8836
+Ntilde, 209
+ntilde, 241
+Nu, 925
+nu, 957
+Oacute, 211
+oacute, 243
+Ocirc, 212
+ocirc, 244
+OElig, 338
+oelig, 339
+Ograve, 210
+ograve, 242
+oline, 8254
+Omega, 937
+omega, 969
+Omicron, 927
+omicron, 959
+oplus, 8853
+or, 8744
+ordf, 170
+ordm, 186
+Oslash, 216
+oslash, 248
+Otilde, 213
+otilde, 245
+otimes, 8855
+Ouml, 214
+ouml, 246
+para, 182
+part, 8706
+permil, 8240
+perp, 8869
+Phi, 934
+phi, 966
+Pi, 928
+pi, 960
+piv, 982
+plusmn, 177
+pound, 163
+prime, 8242
+Prime, 8243
+prod, 8719
+prop, 8733
+Psi, 936
+psi, 968
+quot, 34
+radic, 8730
+rang, 9002
+raquo, 187
+rarr, 8594
+rArr, 8658
+rceil, 8969
+rdquo, 8221
+real, 8476
+reg, 174
+rfloor, 8971
+Rho, 929
+rho, 961
+rlm, 8207
+rsaquo, 8250
+rsquo, 8217
+sbquo, 8218
+Scaron, 352
+scaron, 353
+sdot, 8901
+sect, 167
+shy, 173
+Sigma, 931
+sigma, 963
+sigmaf, 962
+sim, 8764
+spades, 9824
+sub, 8834
+sube, 8838
+sum, 8721
+sup1, 185
+sup2, 178
+sup3, 179
+sup, 8835
+supe, 8839
+szlig, 223
+Tau, 932
+tau, 964
+there4, 8756
+Theta, 920
+theta, 952
+thetasym, 977
+thinsp, 8201
+THORN, 222
+thorn, 254
+tilde, 732
+times, 215
+trade, 8482
+Uacute, 218
+uacute, 250
+uarr, 8593
+uArr, 8657
+Ucirc, 219
+ucirc, 251
+Ugrave, 217
+ugrave, 249
+uml, 168
+upsih, 978
+Upsilon, 933
+upsilon, 965
+Uuml, 220
+uuml, 252
+weierp, 8472
+Xi, 926
+xi, 958
+Yacute, 221
+yacute, 253
+yen, 165
+yuml, 255
+Yuml, 376
+Zeta, 918
+zeta, 950
+zwj, 8205
+zwnj, 8204
+%%
+gulong
+html_entity_parse (const gchar *s, guint len)
+{
+	struct _EntityEntry * result = html_entity_hash( s, len);
+	if (result == NULL )
+		return ' ';
+	return result->value;
+}

Modified: trunk/gtkhtml/htmltokenizer.c
==============================================================================
--- trunk/gtkhtml/htmltokenizer.c	(original)
+++ trunk/gtkhtml/htmltokenizer.c	Tue Dec 23 18:55:40 2008
@@ -445,57 +445,62 @@
 }
 
 /*Convert entity values in already converted to right charset token*/
-gchar*
-html_tokenizer_convert_entity(gchar * token)
+char* html_tokenizer_convert_entity(char * token)
 {	
-	char* full_pos = token + strlen (token);	
-	char* write_pos = token + strcspn (token, "&");
-	gunichar value;
-	size_t count_chars;
-	char *read_pos;
-	while(write_pos < full_pos)
+	if (token == NULL)
+		return NULL;
+	/*stop pointer*/
+	gchar* full_pos = token + strlen(token);
+	gchar* resulted = g_new(gchar, strlen (token) +1);
+	gchar* write_pos = resulted;
+	gchar* read_pos = token;
+	while ( read_pos < full_pos )
 	{
-		write_pos++;
-		count_chars = strcspn(write_pos+1, ";");
-		value = INVALID_CHARACTER_MARKER;
-		if(count_chars < 14)
-		{
-			char save = *(write_pos + count_chars + 1);
-			*(write_pos + count_chars + 1)=0;
-			/* &#1234567 */
-			if (*write_pos == '#')
+		size_t count_chars = strcspn (read_pos, "&");
+		memcpy( write_pos, read_pos, count_chars);
+		write_pos += count_chars;
+		read_pos += count_chars;
+		/*may be end string?*/
+		if (read_pos < full_pos)
+			if(*read_pos == '&' )
 			{
-				if(isdigit (*(write_pos + 1)))
-				{
-					value=strtoull (write_pos + 1, NULL, 10);
+				/*value to add*/
+				gunichar value = INVALID_CHARACTER_MARKER;
+				/*skip not needed &*/
+				read_pos ++;
+				count_chars = strcspn(read_pos, ";");
+				if(count_chars < 14 && count_chars > 1)
+				{					
+					*(read_pos + count_chars)=0;
+					/* &#******; */
+					if (*read_pos == '#')
+					{
+						/* &#1234567 */
+						if(isdigit (*(read_pos + 1)))
+						{
+							value=strtoull (read_pos + 1, NULL, 10);
+						}
+						/* &#xdd; */
+						else if(*(read_pos + 1) == 'x')
+						{
+							value=strtoull (read_pos + 2, NULL, 16);
+						}
+					}
+					else 
+					{
+						value = html_entity_parse (read_pos, strlen(read_pos));
+					}
+					read_pos += (count_chars + 1);
+					write_pos += g_unichar_to_utf8 ( value, write_pos);
 				}
-				/* &#xdd */
-				else if(*(write_pos + 1) == 'x')
-				{
-					value=strtoull (write_pos + 2, NULL, 16);
-				}
-			}
-			else 
-			{
-				value=html_entity_parse (write_pos, 0);
-			}
-			*(write_pos+count_chars+1)=save;
-			if(count_chars>0)
-			{
-				memset (write_pos-1, ' ', count_chars + 3);
-				/* first char is & I think this not need */
-				write_pos --;
-				read_pos = write_pos + count_chars + 3;
-				write_pos += g_unichar_to_utf8 (value,write_pos);
-				memcpy (write_pos, read_pos, full_pos - read_pos + 1);
-				full_pos = write_pos + (full_pos - read_pos);
 			}
-		}	
-		write_pos = write_pos + strcspn (write_pos, "&");
-	}		
-	return token;
+	}
+	* write_pos = 0;
+	free(token);
+	return resulted;
 }
 
+/*convert text to utf8 - allways alloc memmory*/
 gchar* 
 convert_text_encoding(const GIConv iconv_cd,const gchar * token)
 {
@@ -507,9 +512,9 @@
 	size_t oldlength;
 	if(token == NULL)
 		return NULL;
-	currlength = strlen (token);
 	if(is_valid_g_iconv (iconv_cd) && is_need_convert (token))
 	{
+		currlength = strlen (token);
 		current = token;
 		newlength = currlength*7+1;
 		oldlength = newlength;
@@ -535,10 +540,7 @@
 		g_assert (returnbuffer);
 		return returnbuffer;
 	}
-	newbuffer = g_new (gchar, currlength + 1);
-	memcpy (newbuffer,token, currlength);
-	newbuffer[currlength] = 0;
-	return newbuffer;
+	return g_strdup(token);
 }
 
 static gchar *



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]