[glib/wip/pcre-8.30: 4/8] regex: Use g_ascii_is[x]digit



commit 2363ba228cae098fd47caed42b063363dd87c4ff
Author: Christian Persch <chpe gnome org>
Date:   Sun Feb 12 19:29:42 2012 +0100

    regex: Use g_ascii_is[x]digit

 glib/pcre/pcre_compile.c       |   22 ++--
 glib/update-pcre/digitab.patch |  217 ++++++++++++++++-----------------------
 2 files changed, 101 insertions(+), 138 deletions(-)
---
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
index 8070f51..eb985df 100644
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@@ -52,6 +52,7 @@ supporting internal functions that are not used by other modules. */
 
 #include "pcre_internal.h"
 
+#include "gstrfuncs.h"
 
 /* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
 is also used by pcretest. PCRE_DEBUG is not defined when building a production
@@ -513,6 +514,7 @@ into a subtraction and unsigned comparison). */
 
 #define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
 
+#if 0
 #ifndef EBCDIC
 
 /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
@@ -626,7 +628,7 @@ static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255 */
 #endif
-
+#endif /* 0 */
 
 /* Definition to allow mutual recursion */
 
@@ -812,10 +814,10 @@ else
       {
       /* In JavaScript, \u must be followed by four hexadecimal numbers.
       Otherwise it is a lowercase u letter. */
-      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
-        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
-        && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
-        && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
+      if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
+        && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0
+        && MAX_255(ptr[3]) && g_ascii_isxdigit(ptr[3]) != 0
+        && MAX_255(ptr[4]) && g_ascii_isxdigit(ptr[4]) != 0)
         {
         c = 0;
         for (i = 0; i < 4; ++i)
@@ -1012,8 +1014,8 @@ else
       {
       /* In JavaScript, \x must be followed by two hexadecimal numbers.
       Otherwise it is a lowercase x letter. */
-      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
-        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
+      if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
+        && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0)
         {
         c = 0;
         for (i = 0; i < 2; ++i)
@@ -1036,7 +1038,7 @@ else
       const pcre_uchar *pt = ptr + 2;
 
       c = 0;
-      while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
+      while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0)
         {
         register int cc = *pt++;
         if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
@@ -1060,7 +1062,7 @@ else
 
       if (c < 0)
         {
-        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
+        while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0) pt++;
         *errorcodeptr = ERR34;
         }
 
@@ -1078,7 +1080,7 @@ else
     /* Read just a single-byte hex-defined char */
 
     c = 0;
-    while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
+    while (i++ < 2 && MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0)
       {
       int cc;                                  /* Some compilers don't like */
       cc = *(++ptr);                           /* ++ in initializers */
diff --git a/glib/update-pcre/digitab.patch b/glib/update-pcre/digitab.patch
index a745fbb..a12efc5 100644
--- a/glib/update-pcre/digitab.patch
+++ b/glib/update-pcre/digitab.patch
@@ -1,133 +1,94 @@
---- pcre_compile.c	2006-10-10 12:00:00.000000000 +0200
-+++ pcre_compile.c	2006-10-10 12:00:00.000000000 +0200
-@@ -246,130 +246,6 @@ static const char *error_texts[] = {
- };
+From 5238ab10c5f3082a4be38410bd01a47ab176dfde Mon Sep 17 00:00:00 2001
+From: Christian Persch <chpe gnome org>
+Date: Sun, 12 Feb 2012 19:29:42 +0100
+Subject: [PATCH] regex: Use g_ascii_is[x]digit
+
+---
+ glib/pcre/pcre_compile.c |   22 ++++++++++++----------
+ 1 files changed, 12 insertions(+), 10 deletions(-)
+
+diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
+index 8070f51..eb985df 100644
+--- a/glib/pcre/pcre_compile.c
++++ b/glib/pcre/pcre_compile.c
+@@ -52,6 +52,7 @@ supporting internal functions that are not used by other modules. */
  
+ #include "pcre_internal.h"
  
--/* Table to identify digits and hex digits. This is used when compiling
--patterns. Note that the tables in chartables are dependent on the locale, and
--may mark arbitrary characters as digits - but the PCRE compiling code expects
--to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have
--a private table here. It costs 256 bytes, but it is a lot faster than doing
--character value tests (at least in some simple cases I timed), and in some
--applications one wants PCRE to compile efficiently as well as match
--efficiently.
--
--For convenience, we use the same bit definitions as in chartables:
--
--  0x04   decimal digit
--  0x08   hexadecimal digit
--
--Then we can use ctype_digit and ctype_xdigit in the code. */
--
--#if !EBCDIC    /* This is the "normal" case, for ASCII systems */
--static const unsigned char digitab[] =
--  {
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - '  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ( - /  */
--  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  */
--  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /*  8 - ?  */
--  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  @ - G  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H - O  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  P - W  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  X - _  */
--  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  ` - g  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h - o  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  p - w  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  x -127 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
--
--#else          /* This is the "abnormal" case, for EBCDIC systems */
--static const unsigned char digitab[] =
--  {
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   8- 15    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 10 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  32- 39 20 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 30 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- ¬     */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- "     */
--  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g  80 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  h -143    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p  90 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  q -159    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x  A0 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  y -175    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ^ -183 B0 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191    */
--  0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /*  { - G  C0 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  H -207    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  } - P  D0 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  Q -223    */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  \ - X  E0 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  Y -239    */
--  0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /*  0 - 7  F0 */
--  0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255    */
--
--static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */
--  0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*   0-  7 */
--  0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
--  0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  16- 23 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
--  0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /*  32- 39 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  40- 47 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  48- 55 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  56- 63 */
--  0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */
--  0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */
--  0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- ¬  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */
--  0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
--  0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- "  */
--  0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g  */
--  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  h -143 */
--  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p  */
--  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  q -159 */
--  0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x  */
--  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  y -175 */
--  0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  ^ -183 */
--  0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
--  0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  { - G  */
--  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  H -207 */
--  0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  } - P  */
--  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  Q -223 */
--  0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /*  \ - X  */
--  0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /*  Y -239 */
--  0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
--  0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255 */
--#endif
--
++#include "gstrfuncs.h"
+ 
+ /* When PCRE_DEBUG is defined, we need the pcre(16)_printint() function, which
+ is also used by pcretest. PCRE_DEBUG is not defined when building a production
+@@ -513,6 +514,7 @@ into a subtraction and unsigned comparison). */
+ 
+ #define IS_DIGIT(x) ((x) >= CHAR_0 && (x) <= CHAR_9)
+ 
++#if 0
+ #ifndef EBCDIC
+ 
+ /* This is the "normal" case, for ASCII systems, and EBCDIC systems running in
+@@ -626,7 +628,7 @@ static const pcre_uint8 ebcdic_chartab[] = { /* chartable partial dup */
+   0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
+   0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/*  8 -255 */
+ #endif
 -
++#endif /* 0 */
+ 
  /* Definition to allow mutual recursion */
  
- static BOOL
+@@ -812,10 +814,10 @@ else
+       {
+       /* In JavaScript, \u must be followed by four hexadecimal numbers.
+       Otherwise it is a lowercase u letter. */
+-      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
+-        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0
+-        && MAX_255(ptr[3]) && (digitab[ptr[3]] & ctype_xdigit) != 0
+-        && MAX_255(ptr[4]) && (digitab[ptr[4]] & ctype_xdigit) != 0)
++      if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
++        && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0
++        && MAX_255(ptr[3]) && g_ascii_isxdigit(ptr[3]) != 0
++        && MAX_255(ptr[4]) && g_ascii_isxdigit(ptr[4]) != 0)
+         {
+         c = 0;
+         for (i = 0; i < 4; ++i)
+@@ -1012,8 +1014,8 @@ else
+       {
+       /* In JavaScript, \x must be followed by two hexadecimal numbers.
+       Otherwise it is a lowercase x letter. */
+-      if (MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0
+-        && MAX_255(ptr[2]) && (digitab[ptr[2]] & ctype_xdigit) != 0)
++      if (MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0
++        && MAX_255(ptr[2]) && g_ascii_isxdigit(ptr[2]) != 0)
+         {
+         c = 0;
+         for (i = 0; i < 2; ++i)
+@@ -1036,7 +1038,7 @@ else
+       const pcre_uchar *pt = ptr + 2;
+ 
+       c = 0;
+-      while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
++      while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0)
+         {
+         register int cc = *pt++;
+         if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
+@@ -1060,7 +1062,7 @@ else
+ 
+       if (c < 0)
+         {
+-        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
++        while (MAX_255(*pt) && g_ascii_isxdigit(*pt) != 0) pt++;
+         *errorcodeptr = ERR34;
+         }
+ 
+@@ -1078,7 +1080,7 @@ else
+     /* Read just a single-byte hex-defined char */
+ 
+     c = 0;
+-    while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
++    while (i++ < 2 && MAX_255(ptr[1]) && g_ascii_isxdigit(ptr[1]) != 0)
+       {
+       int cc;                                  /* Some compilers don't like */
+       cc = *(++ptr);                           /* ++ in initializers */
+-- 
+1.7.5.1.217.g4e3aa.dirty
+



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]