glib r7813 - in trunk: . glib/pcre
- From: matthiasc svn gnome org
- To: svn-commits-list gnome org
- Subject: glib r7813 - in trunk: . glib/pcre
- Date: Sun, 18 Jan 2009 06:32:03 +0000 (UTC)
Author: matthiasc
Date: Sun Jan 18 06:32:03 2009
New Revision: 7813
URL: http://svn.gnome.org/viewvc/glib?rev=7813&view=rev
Log:
Update PCRE to 7.8
Modified:
trunk/ChangeLog
trunk/glib/pcre/pcre.h
trunk/glib/pcre/pcre_chartables.c
trunk/glib/pcre/pcre_compile.c
trunk/glib/pcre/pcre_config.c
trunk/glib/pcre/pcre_dfa_exec.c
trunk/glib/pcre/pcre_exec.c
trunk/glib/pcre/pcre_fullinfo.c
trunk/glib/pcre/pcre_get.c
trunk/glib/pcre/pcre_globals.c
trunk/glib/pcre/pcre_info.c
trunk/glib/pcre/pcre_internal.h
trunk/glib/pcre/pcre_ord2utf8.c
trunk/glib/pcre/pcre_refcount.c
trunk/glib/pcre/pcre_study.c
trunk/glib/pcre/pcre_tables.c
trunk/glib/pcre/pcre_ucp_searchfuncs.c
trunk/glib/pcre/pcre_valid_utf8.c
trunk/glib/pcre/pcre_version.c
trunk/glib/pcre/pcre_xclass.c
trunk/glib/pcre/ucp.h
Modified: trunk/glib/pcre/pcre.h
==============================================================================
--- trunk/glib/pcre/pcre.h (original)
+++ trunk/glib/pcre/pcre.h Sun Jan 18 06:32:03 2009
@@ -42,9 +42,9 @@
/* The current PCRE version information. */
#define PCRE_MAJOR 7
-#define PCRE_MINOR 7
+#define PCRE_MINOR 8
#define PCRE_PRERELEASE
-#define PCRE_DATE 2008-05-07
+#define PCRE_DATE 2008-09-05
/* When an application links to a PCRE DLL in Windows, the symbols that are
imported have to be identified as such. When building PCRE, the appropriate
Modified: trunk/glib/pcre/pcre_chartables.c
==============================================================================
--- trunk/glib/pcre/pcre_chartables.c (original)
+++ trunk/glib/pcre/pcre_chartables.c Sun Jan 18 06:32:03 2009
@@ -1,6 +1,3 @@
-/* This file is autogenerated by ../update-pcre/update.sh during
- * the update of the local copy of PCRE.
- */
/*************************************************
* Perl-Compatible Regular Expressions *
*************************************************/
Modified: trunk/glib/pcre/pcre_compile.c
==============================================================================
--- trunk/glib/pcre/pcre_compile.c (original)
+++ trunk/glib/pcre/pcre_compile.c Sun Jan 18 06:32:03 2009
@@ -331,7 +331,7 @@
find_error_text(int n)
{
const char *s = error_texts;
-for (; n > 0; n--) while (*s++ != 0);
+for (; n > 0; n--) while (*s++ != 0) {};
return s;
}
@@ -437,7 +437,7 @@
{
const uschar *p;
for (p = ptr+2; *p != 0 && *p != '}'; p++)
- if (*p != '-' && g_ascii_isdigit(*p) == 0) break;
+ if (*p != '-' && g_ascii_isdigit (*p) == 0) break;
if (*p != 0 && *p != '}')
{
c = -ESC_k;
@@ -456,7 +456,7 @@
else negated = FALSE;
c = 0;
- while (g_ascii_isdigit(ptr[1]) != 0)
+ while (g_ascii_isdigit (ptr[1]) != 0)
c = c * 10 + *(++ptr) - '0';
if (c < 0) /* Integer overflow */
@@ -509,7 +509,7 @@
{
oldptr = ptr;
c -= '0';
- while (g_ascii_isdigit(ptr[1]) != 0)
+ while (g_ascii_isdigit (ptr[1]))
c = c * 10 + *(++ptr) - '0';
if (c < 0) /* Integer overflow */
{
@@ -559,7 +559,7 @@
int count = 0;
c = 0;
- while (g_ascii_isxdigit(*pt) != 0)
+ while (g_ascii_isxdigit (*pt) != 0)
{
register int cc = *pt++;
if (c == 0 && cc == '0') continue; /* Leading zeroes */
@@ -588,7 +588,7 @@
/* Read just a single-byte hex-defined char */
c = 0;
- while (i++ < 2 && g_ascii_isxdigit(ptr[1]) != 0)
+ while (i++ < 2 && g_ascii_isxdigit (ptr[1]) != 0)
{
int cc; /* Some compilers don't like ++ */
cc = *(++ptr); /* in initializers */
@@ -757,15 +757,15 @@
static BOOL
is_counted_repeat(const uschar *p)
{
-if (g_ascii_isdigit(*p++) == 0) return FALSE;
-while (g_ascii_isdigit(*p) != 0) p++;
+if (g_ascii_isdigit (*p++) == 0) return FALSE;
+while (g_ascii_isdigit (*p) != 0) p++;
if (*p == '}') return TRUE;
if (*p++ != ',') return FALSE;
if (*p == '}') return TRUE;
-if (g_ascii_isdigit(*p++) == 0) return FALSE;
-while (g_ascii_isdigit(*p) != 0) p++;
+if (g_ascii_isdigit (*p++) == 0) return FALSE;
+while (g_ascii_isdigit (*p) != 0) p++;
return (*p == '}');
}
@@ -800,7 +800,7 @@
/* Read the minimum value and do a paranoid check: a negative value indicates
an integer overflow. */
-while (g_ascii_isdigit(*p) != 0) min = min * 10 + *p++ - '0';
+while (g_ascii_isdigit (*p) != 0) min = min * 10 + *p++ - '0';
if (min < 0 || min > 65535)
{
*errorcodeptr = ERR5;
@@ -815,7 +815,7 @@
if (*(++p) != '}')
{
max = 0;
- while(g_ascii_isdigit(*p) != 0) max = max * 10 + *p++ - '0';
+ while(g_ascii_isdigit (*p) != 0) max = max * 10 + *p++ - '0';
if (max < 0 || max > 65535)
{
*errorcodeptr = ERR5;
@@ -878,7 +878,7 @@
if (*(++ptr) == 0) return -1;
if (*ptr == 'Q') for (;;)
{
- while (*(++ptr) != 0 && *ptr != '\\');
+ while (*(++ptr) != 0 && *ptr != '\\') {};
if (*ptr == 0) return -1;
if (*(++ptr) == 'E') break;
}
@@ -921,7 +921,7 @@
if (*(++ptr) == 0) return -1;
if (*ptr == 'Q') for (;;)
{
- while (*(++ptr) != 0 && *ptr != '\\');
+ while (*(++ptr) != 0 && *ptr != '\\') {};
if (*ptr == 0) return -1;
if (*(++ptr) == 'E') break;
}
@@ -935,7 +935,7 @@
if (xmode && *ptr == '#')
{
- while (*(++ptr) != 0 && *ptr != '\n');
+ while (*(++ptr) != 0 && *ptr != '\n') {};
if (*ptr == 0) return -1;
continue;
}
@@ -1326,6 +1326,8 @@
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
break;
}
+#else
+ (void)(utf8); /* Keep compiler happy by referencing function argument */
#endif
}
}
@@ -1419,6 +1421,8 @@
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
break;
}
+#else
+ (void)(utf8); /* Keep compiler happy by referencing function argument */
#endif
}
}
@@ -1891,7 +1895,7 @@
unsigned int c, othercase, next;
for (c = *cptr; c <= d; c++)
- { if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR) break; }
+ { if ((othercase = UCD_OTHERCASE(c)) != c) break; }
if (c > d) return FALSE;
@@ -1900,7 +1904,7 @@
for (++c; c <= d; c++)
{
- if (_pcre_ucp_othercase(c) != next) break;
+ if (UCD_OTHERCASE(c) != next) break;
next++;
}
@@ -2010,6 +2014,8 @@
case OP_CHAR:
#ifdef SUPPORT_UTF8
if (utf8 && item > 127) { GETCHAR(item, utf8_char); }
+#else
+ (void)(utf8_char); /* Keep compiler happy by referencing function argument */
#endif
return item != next;
@@ -2028,7 +2034,7 @@
unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP
- othercase = _pcre_ucp_othercase((unsigned int)next);
+ othercase = UCD_OTHERCASE((unsigned int)next);
#else
othercase = NOTACHAR;
#endif
@@ -2049,7 +2055,7 @@
unsigned int othercase;
if (next < 128) othercase = cd->fcc[next]; else
#ifdef SUPPORT_UCP
- othercase = _pcre_ucp_othercase(next);
+ othercase = UCD_OTHERCASE(next);
#else
othercase = NOTACHAR;
#endif
@@ -3215,7 +3221,7 @@
if ((options & PCRE_CASELESS) != 0)
{
unsigned int othercase;
- if ((othercase = _pcre_ucp_othercase(c)) != NOTACHAR)
+ if ((othercase = UCD_OTHERCASE(c)) != c)
{
*class_utf8data++ = XCL_SINGLE;
class_utf8data += _pcre_ord2utf8(othercase, class_utf8data);
@@ -4092,7 +4098,7 @@
const char *vn = verbnames;
const uschar *name = ++ptr;
previous = NULL;
- while ((cd->ctypes[*++ptr] & ctype_letter) != 0);
+ while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
if (*ptr == ':')
{
*errorcodeptr = ERR59; /* Not supported */
@@ -4230,7 +4236,7 @@
while ((cd->ctypes[*ptr] & ctype_word) != 0)
{
if (recno >= 0)
- recno = (g_ascii_isdigit(*ptr) != 0)?
+ recno = (g_ascii_isdigit (*ptr) != 0)?
recno * 10 + *ptr - '0' : -1;
ptr++;
}
@@ -4315,7 +4321,7 @@
recno = 0;
for (i = 1; i < namelen; i++)
{
- if (g_ascii_isdigit(name[i]) == 0)
+ if (g_ascii_isdigit (name[i]) == 0)
{
*errorcodeptr = ERR15;
goto FAILED;
@@ -4411,7 +4417,7 @@
*code++ = OP_CALLOUT;
{
int n = 0;
- while (g_ascii_isdigit(*(++ptr)) != 0)
+ while (g_ascii_isdigit (*(++ptr)) != 0)
n = n * 10 + *ptr - '0';
if (*ptr != ')')
{
@@ -4626,7 +4632,7 @@
if ((refsign = *ptr) == '+')
{
ptr++;
- if (g_ascii_isdigit(*ptr) == 0)
+ if (g_ascii_isdigit (*ptr) == 0)
{
*errorcodeptr = ERR63;
goto FAILED;
@@ -4634,13 +4640,13 @@
}
else if (refsign == '-')
{
- if (g_ascii_isdigit(ptr[1]) == 0)
+ if (g_ascii_isdigit (ptr[1]) == 0)
goto OTHER_CHAR_AFTER_QUERY;
ptr++;
}
recno = 0;
- while(g_ascii_isdigit(*ptr) != 0)
+ while(g_ascii_isdigit (*ptr) != 0)
recno = recno * 10 + *ptr++ - '0';
if (*ptr != terminator)
@@ -4796,10 +4802,8 @@
both phases.
If we are not at the pattern start, compile code to change the ims
- options if this setting actually changes any of them. We also pass the
- new setting back so that it can be put at the start of any following
- branches, and when this group ends (if we are in a group), a resetting
- item can be compiled. */
+ options if this setting actually changes any of them, and reset the
+ greedy defaults and the case value for firstbyte and reqbyte. */
if (*ptr == ')')
{
@@ -4807,7 +4811,6 @@
(lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
{
cd->external_options = newoptions;
- options = *optionsptr = newoptions;
}
else
{
@@ -4816,17 +4819,17 @@
*code++ = OP_OPT;
*code++ = newoptions & PCRE_IMS;
}
-
- /* Change options at this level, and pass them back for use
- in subsequent branches. Reset the greedy defaults and the case
- value for firstbyte and reqbyte. */
-
- *optionsptr = options = newoptions;
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0);
greedy_non_default = greedy_default ^ 1;
- req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
+ req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0;
}
+ /* Change options at this level, and pass them back for use
+ in subsequent branches. When not at the start of the pattern, this
+ information is also necessary so that a resetting item can be
+ compiled at the end of a group (if we are in a group). */
+
+ *optionsptr = options = newoptions;
previous = NULL; /* This item can't be repeated */
continue; /* It is complete */
}
@@ -5115,7 +5118,7 @@
/* Test a signed number in angle brackets or quotes. */
p = ptr + 2;
- while (g_ascii_isdigit(*p) != 0) p++;
+ while (g_ascii_isdigit (*p) != 0) p++;
if (*p != terminator)
{
*errorcodeptr = ERR57;
@@ -5820,7 +5823,7 @@
with errorptr and erroroffset set
*/
-PCRE_EXP_DEFN pcre *
+PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile(const char *pattern, int options, const char **errorptr,
int *erroroffset, const unsigned char *tables)
{
@@ -5828,7 +5831,7 @@
}
-PCRE_EXP_DEFN pcre *
+PCRE_EXP_DEFN pcre * PCRE_CALL_CONVENTION
pcre_compile2(const char *pattern, int options, int *errorcodeptr,
const char **errorptr, int *erroroffset, const unsigned char *tables)
{
Modified: trunk/glib/pcre/pcre_config.c
==============================================================================
--- trunk/glib/pcre/pcre_config.c (original)
+++ trunk/glib/pcre/pcre_config.c Sun Jan 18 06:32:03 2009
@@ -62,7 +62,7 @@
Returns: 0 if data returned, negative on error
*/
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_config(int what, void *where)
{
switch (what)
Modified: trunk/glib/pcre/pcre_dfa_exec.c
==============================================================================
--- trunk/glib/pcre/pcre_dfa_exec.c (original)
+++ trunk/glib/pcre/pcre_dfa_exec.c Sun Jan 18 06:32:03 2009
@@ -512,9 +512,6 @@
const uschar *code;
int state_offset = current_state->offset;
int count, codevalue;
-#ifdef SUPPORT_UCP
- int chartype, script;
-#endif
#ifdef DEBUG
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
@@ -825,7 +822,7 @@
if (clen > 0)
{
BOOL OK;
- int category = _pcre_ucp_findprop(c, &chartype, &script);
+ int chartype = UCD_CHARTYPE(c);
switch(code[1])
{
case PT_ANY:
@@ -837,7 +834,7 @@
break;
case PT_GC:
- OK = category == code[2];
+ OK = _pcre_ucp_gentype[chartype] == code[2];
break;
case PT_PC:
@@ -845,7 +842,7 @@
break;
case PT_SC:
- OK = script == code[2];
+ OK = UCD_SCRIPT(c) == code[2];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -994,7 +991,7 @@
if (clen > 0)
{
BOOL OK;
- int category = _pcre_ucp_findprop(c, &chartype, &script);
+ int chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
@@ -1006,7 +1003,7 @@
break;
case PT_GC:
- OK = category == code[3];
+ OK = _pcre_ucp_gentype[chartype] == code[3];
break;
case PT_PC:
@@ -1014,7 +1011,7 @@
break;
case PT_SC:
- OK = script == code[3];
+ OK = UCD_SCRIPT(c) == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1043,7 +1040,7 @@
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
- if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+ if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1057,7 +1054,7 @@
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
- if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+ if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1216,7 +1213,7 @@
if (clen > 0)
{
BOOL OK;
- int category = _pcre_ucp_findprop(c, &chartype, &script);
+ int chartype = UCD_CHARTYPE(c);
switch(code[2])
{
case PT_ANY:
@@ -1228,7 +1225,7 @@
break;
case PT_GC:
- OK = category == code[3];
+ OK = _pcre_ucp_gentype[chartype] == code[3];
break;
case PT_PC:
@@ -1236,7 +1233,7 @@
break;
case PT_SC:
- OK = script == code[3];
+ OK = UCD_SCRIPT(c) == code[3];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1274,7 +1271,7 @@
QS2:
ADD_ACTIVE(state_offset + 2, 0);
- if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+ if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1289,7 +1286,7 @@
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
- if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+ if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1463,7 +1460,7 @@
if (clen > 0)
{
BOOL OK;
- int category = _pcre_ucp_findprop(c, &chartype, &script);
+ int chartype = UCD_CHARTYPE(c);
switch(code[4])
{
case PT_ANY:
@@ -1475,7 +1472,7 @@
break;
case PT_GC:
- OK = category == code[5];
+ OK = _pcre_ucp_gentype[chartype] == code[5];
break;
case PT_PC:
@@ -1483,7 +1480,7 @@
break;
case PT_SC:
- OK = script == code[5];
+ OK = UCD_SCRIPT(c) == code[5];
break;
/* Should never occur, but keep compilers from grumbling. */
@@ -1516,7 +1513,7 @@
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
{ ADD_ACTIVE(state_offset + 4, 0); }
count = current_state->count; /* Number already matched */
- if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+ if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1530,7 +1527,7 @@
int nd;
int ndlen = 1;
GETCHARLEN(nd, nptr, ndlen);
- if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
+ if (UCD_CATEGORY(nd) != ucp_M) break;
ncount++;
nptr += ndlen;
}
@@ -1710,7 +1707,7 @@
other case of the character. */
#ifdef SUPPORT_UCP
- othercase = _pcre_ucp_othercase(c);
+ othercase = UCD_OTHERCASE(c);
#else
othercase = NOTACHAR;
#endif
@@ -1735,7 +1732,7 @@
to wait for them to pass before continuing. */
case OP_EXTUNI:
- if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
+ if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
{
const uschar *nptr = ptr + clen;
int ncount = 0;
@@ -1743,7 +1740,7 @@
{
int nclen = 1;
GETCHARLEN(c, nptr, nclen);
- if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
+ if (UCD_CATEGORY(c) != ucp_M) break;
ncount++;
nptr += nclen;
}
@@ -1911,7 +1908,7 @@
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
- otherd = _pcre_ucp_othercase(d);
+ otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1949,7 +1946,7 @@
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
- otherd = _pcre_ucp_othercase(d);
+ otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -1985,7 +1982,7 @@
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
- otherd = _pcre_ucp_othercase(d);
+ otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2017,7 +2014,7 @@
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
- otherd = _pcre_ucp_othercase(d);
+ otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2052,7 +2049,7 @@
if (utf8 && d >= 128)
{
#ifdef SUPPORT_UCP
- otherd = _pcre_ucp_othercase(d);
+ otherd = UCD_OTHERCASE(d);
#endif /* SUPPORT_UCP */
}
else
@@ -2508,7 +2505,7 @@
< -1 => some kind of unexpected problem
*/
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
const char *subject, int length, int start_offset, int options, int *offsets,
int offsetcount, int *workspace, int wscount)
@@ -2736,7 +2733,18 @@
if (firstline)
{
- const uschar *t = current_subject;
+ USPTR t = current_subject;
+#ifdef SUPPORT_UTF8
+ if (utf8)
+ {
+ while (t < md->end_subject && !IS_NEWLINE(t))
+ {
+ t++;
+ while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+ }
+ }
+ else
+#endif
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
@@ -2758,7 +2766,20 @@
{
if (current_subject > md->start_subject + start_offset)
{
- while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
+#ifdef SUPPORT_UTF8
+ if (utf8)
+ {
+ while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
+ {
+ current_subject++;
+ while(current_subject < end_subject &&
+ (*current_subject & 0xc0) == 0x80)
+ current_subject++;
+ }
+ }
+ else
+#endif
+ while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
current_subject++;
/* If we have just passed a CR and the newline option is ANY or
Modified: trunk/glib/pcre/pcre_exec.c
==============================================================================
--- trunk/glib/pcre/pcre_exec.c (original)
+++ trunk/glib/pcre/pcre_exec.c Sun Jan 18 06:32:03 2009
@@ -158,13 +158,39 @@
if (length > md->end_subject - eptr) return FALSE;
-/* Separate the caselesss case for speed */
+/* Separate the caseless case for speed. In UTF-8 mode we can only do this
+properly if Unicode properties are supported. Otherwise, we can check only
+ASCII characters. */
if ((ims & PCRE_CASELESS) != 0)
{
+#ifdef SUPPORT_UTF8
+#ifdef SUPPORT_UCP
+ if (md->utf8)
+ {
+ USPTR endptr = eptr + length;
+ while (eptr < endptr)
+ {
+ int c, d;
+ GETCHARINC(c, eptr);
+ GETCHARINC(d, p);
+ if (c != d && c != UCD_OTHERCASE(d)) return FALSE;
+ }
+ }
+ else
+#endif
+#endif
+
+ /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
+ is no UCP support. */
+
while (length-- > 0)
- if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
+ { if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; }
}
+
+/* In the caseful case, we can just compare the bytes, whether or not we
+are in UTF-8 mode. */
+
else
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
@@ -1653,9 +1679,7 @@
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
- int chartype, script;
- int category = _pcre_ucp_findprop(c, &chartype, &script);
-
+ int chartype = UCD_CHARTYPE(c);
switch(ecode[1])
{
case PT_ANY:
@@ -1670,7 +1694,7 @@
break;
case PT_GC:
- if ((ecode[2] != category) == (op == OP_PROP))
+ if ((ecode[2] != _pcre_ucp_gentype[chartype]) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
@@ -1680,7 +1704,7 @@
break;
case PT_SC:
- if ((ecode[2] != script) == (op == OP_PROP))
+ if ((ecode[2] != UCD_SCRIPT(c)) == (op == OP_PROP))
RRETURN(MATCH_NOMATCH);
break;
@@ -1699,8 +1723,7 @@
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
{
- int chartype, script;
- int category = _pcre_ucp_findprop(c, &chartype, &script);
+ int category = UCD_CATEGORY(c);
if (category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -1709,7 +1732,7 @@
{
GETCHARLEN(c, eptr, len);
}
- category = _pcre_ucp_findprop(c, &chartype, &script);
+ category = UCD_CATEGORY(c);
if (category != ucp_M) break;
eptr += len;
}
@@ -2174,7 +2197,7 @@
if (fc != dc)
{
#ifdef SUPPORT_UCP
- if (dc != _pcre_ucp_othercase(fc))
+ if (dc != UCD_OTHERCASE(fc))
#endif
RRETURN(MATCH_NOMATCH);
}
@@ -2265,7 +2288,7 @@
#ifdef SUPPORT_UCP
unsigned int othercase;
if ((ims & PCRE_CASELESS) != 0 &&
- (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
+ (othercase = UCD_OTHERCASE(fc)) != fc)
oclength = _pcre_ord2utf8(othercase, occhars);
else oclength = 0;
#endif /* SUPPORT_UCP */
@@ -2585,10 +2608,11 @@
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(d, eptr);
if (d < 256) d = md->lcc[d];
- if (fi >= max || eptr >= md->end_subject || fc == d)
- RRETURN(MATCH_NOMATCH);
+ if (fc == d) RRETURN(MATCH_NOMATCH);
+
}
}
else
@@ -2694,9 +2718,9 @@
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+ if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(d, eptr);
- if (fi >= max || eptr >= md->end_subject || fc == d)
- RRETURN(MATCH_NOMATCH);
+ if (fc == d) RRETURN(MATCH_NOMATCH);
}
}
else
@@ -2870,7 +2894,7 @@
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@@ -2883,7 +2907,7 @@
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -2894,7 +2918,7 @@
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -2905,7 +2929,7 @@
{
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -2924,7 +2948,7 @@
for (i = 1; i <= min; i++)
{
GETCHARINCTEST(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -2933,7 +2957,7 @@
{
GETCHARLEN(c, eptr, len);
}
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -3349,7 +3373,7 @@
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@@ -3364,7 +3388,7 @@
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -3377,7 +3401,7 @@
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -3390,7 +3414,7 @@
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINC(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
RRETURN(MATCH_NOMATCH);
}
@@ -3412,7 +3436,7 @@
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
GETCHARINCTEST(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
while (eptr < md->end_subject)
{
@@ -3421,7 +3445,7 @@
{
GETCHARLEN(c, eptr, len);
}
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -3739,7 +3763,7 @@
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
prop_chartype == ucp_Ll ||
prop_chartype == ucp_Lt) == prop_fail_result)
@@ -3754,7 +3778,7 @@
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
break;
eptr+= len;
@@ -3767,7 +3791,7 @@
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
break;
eptr+= len;
@@ -3780,7 +3804,7 @@
int len = 1;
if (eptr >= md->end_subject) break;
GETCHARLEN(c, eptr, len);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
break;
eptr+= len;
@@ -3809,7 +3833,7 @@
{
if (eptr >= md->end_subject) break;
GETCHARINCTEST(c, eptr);
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) break;
while (eptr < md->end_subject)
{
@@ -3818,7 +3842,7 @@
{
GETCHARLEN(c, eptr, len);
}
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr += len;
}
@@ -3840,7 +3864,7 @@
BACKCHAR(eptr);
GETCHARLEN(c, eptr, len);
}
- prop_category = _pcre_ucp_findprop(c, &prop_chartype, &prop_script);
+ prop_category = UCD_CATEGORY(c);
if (prop_category != ucp_M) break;
eptr--;
}
@@ -4360,7 +4384,7 @@
< -1 => some kind of unexpected problem
*/
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
int offsetcount)
@@ -4672,31 +4696,53 @@
if (firstline)
{
USPTR t = start_match;
+#ifdef SUPPORT_UTF8
+ if (utf8)
+ {
+ while (t < md->end_subject && !IS_NEWLINE(t))
+ {
+ t++;
+ while (t < end_subject && (*t & 0xc0) == 0x80) t++;
+ }
+ }
+ else
+#endif
while (t < md->end_subject && !IS_NEWLINE(t)) t++;
end_subject = t;
}
- /* Now test for a unique first byte */
+ /* Now advance to a unique first byte if there is one. */
if (first_byte >= 0)
{
if (first_byte_caseless)
- while (start_match < end_subject &&
- md->lcc[*start_match] != first_byte)
- { NEXTCHAR(start_match); }
+ while (start_match < end_subject && md->lcc[*start_match] != first_byte)
+ start_match++;
else
while (start_match < end_subject && *start_match != first_byte)
- { NEXTCHAR(start_match); }
+ start_match++;
}
- /* Or to just after a linebreak for a multiline match if possible */
+ /* Or to just after a linebreak for a multiline match */
else if (startline)
{
if (start_match > md->start_subject + start_offset)
{
- while (start_match <= end_subject && !WAS_NEWLINE(start_match))
- { NEXTCHAR(start_match); }
+#ifdef SUPPORT_UTF8
+ if (utf8)
+ {
+ while (start_match < end_subject && !WAS_NEWLINE(start_match))
+ {
+ start_match++;
+ while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
+ start_match++;
+ }
+ }
+ else
+#endif
+ while (start_match < end_subject && !WAS_NEWLINE(start_match))
+ start_match++;
/* If we have just passed a CR and the newline option is ANY or ANYCRLF,
and we are now at a LF, advance the match position by one more character.
@@ -4710,16 +4756,15 @@
}
}
- /* Or to a non-unique first char after study */
+ /* Or to a non-unique first byte after study */
else if (start_bits != NULL)
{
while (start_match < end_subject)
{
register unsigned int c = *start_match;
- if ((start_bits[c/8] & (1 << (c&7))) == 0)
- { NEXTCHAR(start_match); }
- else break;
+ if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
+ else break;
}
}
Modified: trunk/glib/pcre/pcre_fullinfo.c
==============================================================================
--- trunk/glib/pcre/pcre_fullinfo.c (original)
+++ trunk/glib/pcre/pcre_fullinfo.c Sun Jan 18 06:32:03 2009
@@ -65,7 +65,7 @@
Returns: 0 if data returned, negative on error
*/
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what,
void *where)
{
Modified: trunk/glib/pcre/pcre_get.c
==============================================================================
--- trunk/glib/pcre/pcre_get.c (original)
+++ trunk/glib/pcre/pcre_get.c Sun Jan 18 06:32:03 2009
@@ -65,7 +65,7 @@
(PCRE_ERROR_NOSUBSTRING) if not found
*/
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringnumber(const pcre *code, const char *stringname)
{
int rc;
@@ -114,7 +114,7 @@
(PCRE_ERROR_NOSUBSTRING) if not found
*/
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_stringtable_entries(const pcre *code, const char *stringname,
char **firstptr, char **lastptr)
{
@@ -231,7 +231,7 @@
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, char *buffer, int size)
{
@@ -276,7 +276,7 @@
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, char *buffer, int size)
{
@@ -308,7 +308,7 @@
PCRE_ERROR_NOMEMORY (-6) failed to get store
*/
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
const char ***listptr)
{
@@ -353,7 +353,7 @@
Returns: nothing
*/
-void
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring_list(const char **pointer)
{
(pcre_free)((void *)pointer);
@@ -386,7 +386,7 @@
PCRE_ERROR_NOSUBSTRING (-7) substring not present
*/
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_substring(const char *subject, int *ovector, int stringcount,
int stringnumber, const char **stringptr)
{
@@ -433,7 +433,7 @@
PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
*/
-int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
int stringcount, const char *stringname, const char **stringptr)
{
@@ -456,7 +456,7 @@
Returns: nothing
*/
-void
+PCRE_EXP_DEFN void PCRE_CALL_CONVENTION
pcre_free_substring(const char *pointer)
{
(pcre_free)((void *)pointer);
Modified: trunk/glib/pcre/pcre_globals.c
==============================================================================
--- trunk/glib/pcre/pcre_globals.c (original)
+++ trunk/glib/pcre/pcre_globals.c Sun Jan 18 06:32:03 2009
@@ -52,8 +52,6 @@
#include "pcre_internal.h"
-#ifndef VPCOMPAT
PCRE_EXP_DATA_DEFN int (*pcre_callout)(pcre_callout_block *) = NULL;
-#endif
/* End of pcre_globals.c */
Modified: trunk/glib/pcre/pcre_info.c
==============================================================================
--- trunk/glib/pcre/pcre_info.c (original)
+++ trunk/glib/pcre/pcre_info.c Sun Jan 18 06:32:03 2009
@@ -72,7 +72,7 @@
or negative values on error
*/
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_info(const pcre *argument_re, int *optptr, int *first_byte)
{
real_pcre internal_re;
Modified: trunk/glib/pcre/pcre_internal.h
==============================================================================
--- trunk/glib/pcre/pcre_internal.h (original)
+++ trunk/glib/pcre/pcre_internal.h Sun Jan 18 06:32:03 2009
@@ -132,6 +132,20 @@
# endif
#endif
+/* When compiling with the MSVC compiler, it is sometimes necessary to include
+a "calling convention" before exported function names. (This is secondhand
+information; I know nothing about MSVC myself). For example, something like
+
+ void __cdecl function(....)
+
+might be needed. In order so make this easy, all the exported functions have
+PCRE_CALL_CONVENTION just before their names. It is rarely needed; if not
+set, we ensure here that it has no effect. */
+
+#ifndef PCRE_CALL_CONVENTION
+#define PCRE_CALL_CONVENTION
+#endif
+
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We
cannot determine these outside the compilation (e.g. by running a program as
part of "configure") because PCRE is often cross-compiled for use on other
@@ -140,16 +154,20 @@
#if USHRT_MAX == 65535
typedef unsigned short pcre_uint16;
+ typedef short pcre_int16;
#elif UINT_MAX == 65535
typedef unsigned int pcre_uint16;
+ typedef int pcre_int16;
#else
#error Cannot determine a type for 16-bit unsigned integers
#endif
#if UINT_MAX == 4294967295
typedef unsigned int pcre_uint32;
+ typedef int pcre_int32;
#elif ULONG_MAX == 4294967295
typedef unsigned long int pcre_uint32;
+ typedef long int pcre_int32;
#else
#error Cannot determine a type for 32-bit unsigned integers
#endif
@@ -241,7 +259,6 @@
#define strncmp(s1,s2,m) _strncmp(s1,s2,m)
#define memcmp(s,c,n) _memcmp(s,c,n)
#define memcpy(d,s,n) _memcpy(d,s,n)
-#define memmove(d,s,n) _memmove(d,s,n)
#define memset(s,c,n) _memset(s,c,n)
#else /* VPCOMPAT */
@@ -363,7 +380,6 @@
support is omitted, we don't even define it. */
#ifndef SUPPORT_UTF8
-#define NEXTCHAR(p) p++;
#define GETCHAR(c, eptr) c = *eptr;
#define GETCHARTEST(c, eptr) c = *eptr;
#define GETCHARINC(c, eptr) c = *eptr++;
@@ -373,13 +389,6 @@
#else /* SUPPORT_UTF8 */
-/* Advance a character pointer one byte in non-UTF-8 mode and by one character
-in UTF-8 mode. */
-
-#define NEXTCHAR(p) \
- p++; \
- if (utf8) { while((*p & 0xc0) == 0x80) p++; }
-
/* Get the next UTF-8 character, not advancing the pointer. This is called when
we know we are in UTF-8 mode. */
@@ -549,7 +558,8 @@
#define REQ_CASELESS 0x0100 /* indicates caselessness */
#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
-/* Miscellaneous definitions */
+/* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
+environments where these macros are defined elsewhere. */
typedef gboolean BOOL;
@@ -1123,12 +1133,24 @@
extern int _pcre_ord2utf8(int, uschar *);
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *,
const pcre_study_data *, pcre_study_data *);
-extern int _pcre_ucp_findprop(const unsigned int, int *, int *);
-extern unsigned int _pcre_ucp_othercase(const unsigned int);
extern int _pcre_valid_utf8(const uschar *, int);
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *,
int *, BOOL);
extern BOOL _pcre_xclass(int, const uschar *);
+extern unsigned int _pcre_ucp_othercase(unsigned int);
+
+
+extern const int _pcre_ucp_gentype[];
+
+
+/* UCD access macros */
+
+#include "../glib.h"
+
+#define UCD_CHARTYPE(ch) g_unichar_type(ch)
+#define UCD_SCRIPT(ch) g_unichar_get_script(ch)
+#define UCD_CATEGORY(ch) _pcre_ucp_gentype[UCD_CHARTYPE(ch)]
+#define UCD_OTHERCASE(ch) _pcre_ucp_othercase(ch)
#endif
Modified: trunk/glib/pcre/pcre_ord2utf8.c
==============================================================================
--- trunk/glib/pcre/pcre_ord2utf8.c (original)
+++ trunk/glib/pcre/pcre_ord2utf8.c Sun Jan 18 06:32:03 2009
@@ -78,8 +78,10 @@
*buffer = _pcre_utf8_table2[i] | cvalue;
return i + 1;
#else
-return 0; /* Keep compiler happy; this function won't ever be */
-#endif /* called when SUPPORT_UTF8 is not defined. */
+(void)(cvalue); /* Keep compiler happy; this function won't ever be */
+(void)(buffer); /* called when SUPPORT_UTF8 is not defined. */
+return 0;
+#endif
}
/* End of pcre_ord2utf8.c */
Modified: trunk/glib/pcre/pcre_refcount.c
==============================================================================
--- trunk/glib/pcre/pcre_refcount.c (original)
+++ trunk/glib/pcre/pcre_refcount.c Sun Jan 18 06:32:03 2009
@@ -68,7 +68,7 @@
a negative error number
*/
-PCRE_EXP_DEFN int
+PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
pcre_refcount(pcre *argument_re, int adjust)
{
real_pcre *re = (real_pcre *)argument_re;
Modified: trunk/glib/pcre/pcre_study.c
==============================================================================
--- trunk/glib/pcre/pcre_study.c (original)
+++ trunk/glib/pcre/pcre_study.c Sun Jan 18 06:32:03 2009
@@ -220,6 +220,7 @@
/* SKIPZERO skips the bracket. */
case OP_SKIPZERO:
+ tcode++;
do tcode += GET(tcode,1); while (*tcode == OP_ALT);
tcode += 1 + LINK_SIZE;
break;
@@ -503,7 +504,7 @@
NULL on error or if no optimization possible
*/
-PCRE_EXP_DEFN pcre_extra *
+PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];
Modified: trunk/glib/pcre/pcre_tables.c
==============================================================================
--- trunk/glib/pcre/pcre_tables.c (original)
+++ trunk/glib/pcre/pcre_tables.c Sun Jan 18 06:32:03 2009
@@ -87,6 +87,19 @@
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
+/* Table to translate from particular type value to the general value. */
+
+const int _pcre_ucp_gentype[] = {
+ ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
+ ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
+ ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
+ ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
+ ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
+ ucp_P, ucp_P, /* Ps, Po */
+ ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
+ ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
+};
+
/* The pcre_utt[] table below translates Unicode property names into type and
code values. It is searched by binary chop, so must be in collating sequence of
name. Originally, the table contained pointers to the name strings in the first
@@ -94,7 +107,10 @@
a shared library is dynamically loaded. A significant reduction is made by
putting all the names into a single, large string and then using offsets in the
table itself. Maintenance is more error-prone, but frequent changes to this
-data is unlikely. */
+data are unlikely.
+
+July 2008: There is now a script called maint/GenerateUtt.py which can be used
+to generate this data instead of maintaining it entirely by hand. */
const char _pcre_utt_names[] =
"Any\0"
@@ -108,8 +124,10 @@
"Buhid\0"
"C\0"
"Canadian_Aboriginal\0"
+ "Carian\0"
"Cc\0"
"Cf\0"
+ "Cham\0"
"Cherokee\0"
"Cn\0"
"Co\0"
@@ -136,12 +154,14 @@
"Inherited\0"
"Kannada\0"
"Katakana\0"
+ "Kayah_Li\0"
"Kharoshthi\0"
"Khmer\0"
"L\0"
"L&\0"
"Lao\0"
"Latin\0"
+ "Lepcha\0"
"Limbu\0"
"Linear_B\0"
"Ll\0"
@@ -149,6 +169,8 @@
"Lo\0"
"Lt\0"
"Lu\0"
+ "Lycian\0"
+ "Lydian\0"
"M\0"
"Malayalam\0"
"Mc\0"
@@ -163,6 +185,7 @@
"Nl\0"
"No\0"
"Ogham\0"
+ "Ol_Chiki\0"
"Old_Italic\0"
"Old_Persian\0"
"Oriya\0"
@@ -177,14 +200,17 @@
"Pi\0"
"Po\0"
"Ps\0"
+ "Rejang\0"
"Runic\0"
"S\0"
+ "Saurashtra\0"
"Sc\0"
"Shavian\0"
"Sinhala\0"
"Sk\0"
"Sm\0"
"So\0"
+ "Sundanese\0"
"Syloti_Nagri\0"
"Syriac\0"
"Tagalog\0"
@@ -197,6 +223,7 @@
"Tibetan\0"
"Tifinagh\0"
"Ugaritic\0"
+ "Vai\0"
"Yi\0"
"Z\0"
"Zl\0"
@@ -204,111 +231,122 @@
"Zs\0";
const ucp_type_table _pcre_utt[] = {
- { 0, PT_ANY, 0 },
- { 4, PT_SC, ucp_Arabic },
- { 11, PT_SC, ucp_Armenian },
- { 20, PT_SC, ucp_Balinese },
- { 29, PT_SC, ucp_Bengali },
- { 37, PT_SC, ucp_Bopomofo },
- { 46, PT_SC, ucp_Braille },
- { 54, PT_SC, ucp_Buginese },
- { 63, PT_SC, ucp_Buhid },
- { 69, PT_GC, ucp_C },
- { 71, PT_SC, ucp_Canadian_Aboriginal },
- { 91, PT_PC, ucp_Cc },
- { 94, PT_PC, ucp_Cf },
- { 97, PT_SC, ucp_Cherokee },
- { 106, PT_PC, ucp_Cn },
- { 109, PT_PC, ucp_Co },
- { 112, PT_SC, ucp_Common },
- { 119, PT_SC, ucp_Coptic },
- { 126, PT_PC, ucp_Cs },
- { 129, PT_SC, ucp_Cuneiform },
- { 139, PT_SC, ucp_Cypriot },
- { 147, PT_SC, ucp_Cyrillic },
- { 156, PT_SC, ucp_Deseret },
- { 164, PT_SC, ucp_Devanagari },
- { 175, PT_SC, ucp_Ethiopic },
- { 184, PT_SC, ucp_Georgian },
- { 193, PT_SC, ucp_Glagolitic },
- { 204, PT_SC, ucp_Gothic },
- { 211, PT_SC, ucp_Greek },
- { 217, PT_SC, ucp_Gujarati },
- { 226, PT_SC, ucp_Gurmukhi },
- { 235, PT_SC, ucp_Han },
- { 239, PT_SC, ucp_Hangul },
- { 246, PT_SC, ucp_Hanunoo },
- { 254, PT_SC, ucp_Hebrew },
- { 261, PT_SC, ucp_Hiragana },
- { 270, PT_SC, ucp_Inherited },
- { 280, PT_SC, ucp_Kannada },
- { 288, PT_SC, ucp_Katakana },
- { 297, PT_SC, ucp_Kharoshthi },
- { 308, PT_SC, ucp_Khmer },
- { 314, PT_GC, ucp_L },
- { 316, PT_LAMP, 0 },
- { 319, PT_SC, ucp_Lao },
- { 323, PT_SC, ucp_Latin },
- { 329, PT_SC, ucp_Limbu },
- { 335, PT_SC, ucp_Linear_B },
- { 344, PT_PC, ucp_Ll },
- { 347, PT_PC, ucp_Lm },
- { 350, PT_PC, ucp_Lo },
- { 353, PT_PC, ucp_Lt },
- { 356, PT_PC, ucp_Lu },
- { 359, PT_GC, ucp_M },
- { 361, PT_SC, ucp_Malayalam },
- { 371, PT_PC, ucp_Mc },
- { 374, PT_PC, ucp_Me },
- { 377, PT_PC, ucp_Mn },
- { 380, PT_SC, ucp_Mongolian },
- { 390, PT_SC, ucp_Myanmar },
- { 398, PT_GC, ucp_N },
- { 400, PT_PC, ucp_Nd },
- { 403, PT_SC, ucp_New_Tai_Lue },
- { 415, PT_SC, ucp_Nko },
- { 419, PT_PC, ucp_Nl },
- { 422, PT_PC, ucp_No },
- { 425, PT_SC, ucp_Ogham },
- { 431, PT_SC, ucp_Old_Italic },
- { 442, PT_SC, ucp_Old_Persian },
- { 454, PT_SC, ucp_Oriya },
- { 460, PT_SC, ucp_Osmanya },
- { 468, PT_GC, ucp_P },
- { 470, PT_PC, ucp_Pc },
- { 473, PT_PC, ucp_Pd },
- { 476, PT_PC, ucp_Pe },
- { 479, PT_PC, ucp_Pf },
- { 482, PT_SC, ucp_Phags_Pa },
- { 491, PT_SC, ucp_Phoenician },
- { 502, PT_PC, ucp_Pi },
- { 505, PT_PC, ucp_Po },
- { 508, PT_PC, ucp_Ps },
- { 511, PT_SC, ucp_Runic },
- { 517, PT_GC, ucp_S },
- { 519, PT_PC, ucp_Sc },
- { 522, PT_SC, ucp_Shavian },
- { 530, PT_SC, ucp_Sinhala },
- { 538, PT_PC, ucp_Sk },
- { 541, PT_PC, ucp_Sm },
- { 544, PT_PC, ucp_So },
- { 547, PT_SC, ucp_Syloti_Nagri },
- { 560, PT_SC, ucp_Syriac },
- { 567, PT_SC, ucp_Tagalog },
- { 575, PT_SC, ucp_Tagbanwa },
- { 584, PT_SC, ucp_Tai_Le },
- { 591, PT_SC, ucp_Tamil },
- { 597, PT_SC, ucp_Telugu },
- { 604, PT_SC, ucp_Thaana },
- { 611, PT_SC, ucp_Thai },
- { 616, PT_SC, ucp_Tibetan },
- { 624, PT_SC, ucp_Tifinagh },
- { 633, PT_SC, ucp_Ugaritic },
- { 642, PT_SC, ucp_Yi },
- { 645, PT_GC, ucp_Z },
- { 647, PT_PC, ucp_Zl },
- { 650, PT_PC, ucp_Zp },
- { 653, PT_PC, ucp_Zs }
+ { 0, PT_ANY, 0 },
+ { 4, PT_SC, ucp_Arabic },
+ { 11, PT_SC, ucp_Armenian },
+ { 20, PT_SC, ucp_Balinese },
+ { 29, PT_SC, ucp_Bengali },
+ { 37, PT_SC, ucp_Bopomofo },
+ { 46, PT_SC, ucp_Braille },
+ { 54, PT_SC, ucp_Buginese },
+ { 63, PT_SC, ucp_Buhid },
+ { 69, PT_GC, ucp_C },
+ { 71, PT_SC, ucp_Canadian_Aboriginal },
+ { 91, PT_SC, ucp_Carian },
+ { 98, PT_PC, ucp_Cc },
+ { 101, PT_PC, ucp_Cf },
+ { 104, PT_SC, ucp_Cham },
+ { 109, PT_SC, ucp_Cherokee },
+ { 118, PT_PC, ucp_Cn },
+ { 121, PT_PC, ucp_Co },
+ { 124, PT_SC, ucp_Common },
+ { 131, PT_SC, ucp_Coptic },
+ { 138, PT_PC, ucp_Cs },
+ { 141, PT_SC, ucp_Cuneiform },
+ { 151, PT_SC, ucp_Cypriot },
+ { 159, PT_SC, ucp_Cyrillic },
+ { 168, PT_SC, ucp_Deseret },
+ { 176, PT_SC, ucp_Devanagari },
+ { 187, PT_SC, ucp_Ethiopic },
+ { 196, PT_SC, ucp_Georgian },
+ { 205, PT_SC, ucp_Glagolitic },
+ { 216, PT_SC, ucp_Gothic },
+ { 223, PT_SC, ucp_Greek },
+ { 229, PT_SC, ucp_Gujarati },
+ { 238, PT_SC, ucp_Gurmukhi },
+ { 247, PT_SC, ucp_Han },
+ { 251, PT_SC, ucp_Hangul },
+ { 258, PT_SC, ucp_Hanunoo },
+ { 266, PT_SC, ucp_Hebrew },
+ { 273, PT_SC, ucp_Hiragana },
+ { 282, PT_SC, ucp_Inherited },
+ { 292, PT_SC, ucp_Kannada },
+ { 300, PT_SC, ucp_Katakana },
+ { 309, PT_SC, ucp_Kayah_Li },
+ { 318, PT_SC, ucp_Kharoshthi },
+ { 329, PT_SC, ucp_Khmer },
+ { 335, PT_GC, ucp_L },
+ { 337, PT_LAMP, 0 },
+ { 340, PT_SC, ucp_Lao },
+ { 344, PT_SC, ucp_Latin },
+ { 350, PT_SC, ucp_Lepcha },
+ { 357, PT_SC, ucp_Limbu },
+ { 363, PT_SC, ucp_Linear_B },
+ { 372, PT_PC, ucp_Ll },
+ { 375, PT_PC, ucp_Lm },
+ { 378, PT_PC, ucp_Lo },
+ { 381, PT_PC, ucp_Lt },
+ { 384, PT_PC, ucp_Lu },
+ { 387, PT_SC, ucp_Lycian },
+ { 394, PT_SC, ucp_Lydian },
+ { 401, PT_GC, ucp_M },
+ { 403, PT_SC, ucp_Malayalam },
+ { 413, PT_PC, ucp_Mc },
+ { 416, PT_PC, ucp_Me },
+ { 419, PT_PC, ucp_Mn },
+ { 422, PT_SC, ucp_Mongolian },
+ { 432, PT_SC, ucp_Myanmar },
+ { 440, PT_GC, ucp_N },
+ { 442, PT_PC, ucp_Nd },
+ { 445, PT_SC, ucp_New_Tai_Lue },
+ { 457, PT_SC, ucp_Nko },
+ { 461, PT_PC, ucp_Nl },
+ { 464, PT_PC, ucp_No },
+ { 467, PT_SC, ucp_Ogham },
+ { 473, PT_SC, ucp_Ol_Chiki },
+ { 482, PT_SC, ucp_Old_Italic },
+ { 493, PT_SC, ucp_Old_Persian },
+ { 505, PT_SC, ucp_Oriya },
+ { 511, PT_SC, ucp_Osmanya },
+ { 519, PT_GC, ucp_P },
+ { 521, PT_PC, ucp_Pc },
+ { 524, PT_PC, ucp_Pd },
+ { 527, PT_PC, ucp_Pe },
+ { 530, PT_PC, ucp_Pf },
+ { 533, PT_SC, ucp_Phags_Pa },
+ { 542, PT_SC, ucp_Phoenician },
+ { 553, PT_PC, ucp_Pi },
+ { 556, PT_PC, ucp_Po },
+ { 559, PT_PC, ucp_Ps },
+ { 562, PT_SC, ucp_Rejang },
+ { 569, PT_SC, ucp_Runic },
+ { 575, PT_GC, ucp_S },
+ { 577, PT_SC, ucp_Saurashtra },
+ { 588, PT_PC, ucp_Sc },
+ { 591, PT_SC, ucp_Shavian },
+ { 599, PT_SC, ucp_Sinhala },
+ { 607, PT_PC, ucp_Sk },
+ { 610, PT_PC, ucp_Sm },
+ { 613, PT_PC, ucp_So },
+ { 616, PT_SC, ucp_Sundanese },
+ { 626, PT_SC, ucp_Syloti_Nagri },
+ { 639, PT_SC, ucp_Syriac },
+ { 646, PT_SC, ucp_Tagalog },
+ { 654, PT_SC, ucp_Tagbanwa },
+ { 663, PT_SC, ucp_Tai_Le },
+ { 670, PT_SC, ucp_Tamil },
+ { 676, PT_SC, ucp_Telugu },
+ { 683, PT_SC, ucp_Thaana },
+ { 690, PT_SC, ucp_Thai },
+ { 695, PT_SC, ucp_Tibetan },
+ { 703, PT_SC, ucp_Tifinagh },
+ { 712, PT_SC, ucp_Ugaritic },
+ { 721, PT_SC, ucp_Vai },
+ { 725, PT_SC, ucp_Yi },
+ { 728, PT_GC, ucp_Z },
+ { 730, PT_PC, ucp_Zl },
+ { 733, PT_PC, ucp_Zp },
+ { 736, PT_PC, ucp_Zs }
};
const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
Modified: trunk/glib/pcre/pcre_ucp_searchfuncs.c
==============================================================================
--- trunk/glib/pcre/pcre_ucp_searchfuncs.c (original)
+++ trunk/glib/pcre/pcre_ucp_searchfuncs.c Sun Jan 18 06:32:03 2009
@@ -43,58 +43,9 @@
/* This module contains code for searching the table of Unicode character
properties. */
-#ifdef HAVE_CONFIG_H
-#include "config.h"
-#endif
-
#include "pcre_internal.h"
#include "ucp.h" /* Category definitions */
-#include "ucpinternal.h" /* Internal table details */
-
-
-/* Table to translate from particular type value to the general value. */
-
-static int ucp_gentype[] = {
- ucp_C, ucp_C, ucp_C, ucp_C, ucp_C, /* Cc, Cf, Cn, Co, Cs */
- ucp_L, ucp_L, ucp_L, ucp_L, ucp_L, /* Ll, Lu, Lm, Lo, Lt */
- ucp_M, ucp_M, ucp_M, /* Mc, Me, Mn */
- ucp_N, ucp_N, ucp_N, /* Nd, Nl, No */
- ucp_P, ucp_P, ucp_P, ucp_P, ucp_P, /* Pc, Pd, Pe, Pf, Pi */
- ucp_P, ucp_P, /* Ps, Po */
- ucp_S, ucp_S, ucp_S, ucp_S, /* Sc, Sk, Sm, So */
- ucp_Z, ucp_Z, ucp_Z /* Zl, Zp, Zs */
-};
-
-
-
-/*************************************************
-* Search table and return type *
-*************************************************/
-
-/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
-character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
-
-Arguments:
- c the character value
- type_ptr the detailed character type is returned here
- script_ptr the script is returned here
-
-Returns: the character type category
-*/
-
-int
-_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
-{
-/* Note that the Unicode types have the same values in glib and in
- * PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
- * ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
-*type_ptr = g_unichar_type(c);
-*script_ptr = g_unichar_get_script(c);
-return ucp_gentype[*type_ptr];
-}
-
-
/*************************************************
@@ -113,7 +64,7 @@
unsigned int
_pcre_ucp_othercase(const unsigned int c)
{
-int other_case = NOTACHAR;
+unsigned int other_case = NOTACHAR;
if (g_unichar_islower(c))
other_case = g_unichar_toupper(c);
Modified: trunk/glib/pcre/pcre_valid_utf8.c
==============================================================================
--- trunk/glib/pcre/pcre_valid_utf8.c (original)
+++ trunk/glib/pcre/pcre_valid_utf8.c Sun Jan 18 06:32:03 2009
@@ -1,4 +1,3 @@
-#include "config.h"
#include "pcre_internal.h"
/*
Modified: trunk/glib/pcre/pcre_version.c
==============================================================================
--- trunk/glib/pcre/pcre_version.c (original)
+++ trunk/glib/pcre/pcre_version.c Sun Jan 18 06:32:03 2009
@@ -79,7 +79,7 @@
pre-processor time. This hack uses a standard trick for avoiding calling
the STRING macro with an empty argument when doing the test. */
-PCRE_EXP_DEFN const char *
+PCRE_EXP_DEFN const char * PCRE_CALL_CONVENTION
pcre_version(void)
{
return (XSTRING(Z PCRE_PRERELEASE)[1] == 0)?
Modified: trunk/glib/pcre/pcre_xclass.c
==============================================================================
--- trunk/glib/pcre/pcre_xclass.c (original)
+++ trunk/glib/pcre/pcre_xclass.c Sun Jan 18 06:32:03 2009
@@ -104,9 +104,7 @@
#ifdef SUPPORT_UCP
else /* XCL_PROP & XCL_NOTPROP */
{
- int chartype, script;
- int category = _pcre_ucp_findprop(c, &chartype, &script);
-
+ int chartype = UCD_CHARTYPE(c);
switch(*data)
{
case PT_ANY:
@@ -119,7 +117,7 @@
break;
case PT_GC:
- if ((data[1] == category) == (t == XCL_PROP)) return !negated;
+ if ((data[1] == _pcre_ucp_gentype[chartype]) == (t == XCL_PROP)) return !negated;
break;
case PT_PC:
@@ -127,7 +125,7 @@
break;
case PT_SC:
- if ((data[1] == script) == (t == XCL_PROP)) return !negated;
+ if ((data[1] == UCD_SCRIPT(c)) == (t == XCL_PROP)) return !negated;
break;
/* This should never occur, but compilers may mutter if there is no
Modified: trunk/glib/pcre/ucp.h
==============================================================================
--- trunk/glib/pcre/ucp.h (original)
+++ trunk/glib/pcre/ucp.h Sun Jan 18 06:32:03 2009
@@ -125,7 +125,18 @@
ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM, /* New for Unicode 5.0.0 */
ucp_Nko = G_UNICODE_SCRIPT_NKO, /* New for Unicode 5.0.0 */
ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA, /* New for Unicode 5.0.0 */
- ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN /* New for Unicode 5.0.0 */
+ ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, /* New for Unicode 5.0.0 */
+ ucp_Carian = G_UNICODE_SCRIPT_CARIAN, /* New for Unicode 5.1 */
+ ucp_Cham = G_UNICODE_SCRIPT_CHAM, /* New for Unicode 5.1 */
+ ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI, /* New for Unicode 5.1 */
+ ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA, /* New for Unicode 5.1 */
+ ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN, /* New for Unicode 5.1 */
+ ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN, /* New for Unicode 5.1 */
+ ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI, /* New for Unicode 5.1 */
+ ucp_Rejang = G_UNICODE_SCRIPT_REJANG, /* New for Unicode 5.1 */
+ ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, /* New for Unicode 5.1 */
+ ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE, /* New for Unicode 5.1 */
+ ucp_Vai = G_UNICODE_SCRIPT_VAI /* New for Unicode 5.1 */
};
#endif
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]