[glib] Update the include pcre to 8.02



commit 85621f1a0f4d77cde71e37ed8212e707f4af441d
Author: Matthias Clasen <mclasen redhat com>
Date:   Sun Jun 20 01:46:35 2010 -0400

    Update the include pcre to 8.02

 glib/pcre/Makefile.am            |    1 -
 glib/pcre/pcre.h                 |   21 +-
 glib/pcre/pcre_compile.c         | 1329 ++++++++++++++++++++++----------
 glib/pcre/pcre_config.c          |    6 +-
 glib/pcre/pcre_dfa_exec.c        |  514 +++++++++----
 glib/pcre/pcre_exec.c            | 1559 +++++++++++++++++++++++++++++---------
 glib/pcre/pcre_fullinfo.c        |   15 +-
 glib/pcre/pcre_globals.c         |   23 +-
 glib/pcre/pcre_info.c            |    4 +-
 glib/pcre/pcre_internal.h        |  759 +++++++++++++++++--
 glib/pcre/pcre_newline.c         |    8 +-
 glib/pcre/pcre_study.c           |  448 ++++++++++-
 glib/pcre/pcre_tables.c          |  631 ++++++++++------
 glib/pcre/pcre_try_flipped.c     |    6 +-
 glib/pcre/pcre_ucp_searchfuncs.c |   46 ++-
 glib/pcre/pcre_xclass.c          |    7 +-
 glib/pcre/ucp.h                  |   53 +-
 glib/pcre/ucpinternal.h          |   92 ---
 18 files changed, 4131 insertions(+), 1391 deletions(-)
---
diff --git a/glib/pcre/Makefile.am b/glib/pcre/Makefile.am
index b01c44f..ecc059c 100644
--- a/glib/pcre/Makefile.am
+++ b/glib/pcre/Makefile.am
@@ -53,7 +53,6 @@ libpcre_la_SOURCES = \
 	pcre.h \
 	pcre_internal.h \
 	ucp.h \
-	ucpinternal.h \
 	$(libpcre_headers)
 
 libpcre_la_LIBADD = $(DEP_LIBS)
diff --git a/glib/pcre/pcre.h b/glib/pcre/pcre.h
index 8fc80a7..e23d49c 100644
--- a/glib/pcre/pcre.h
+++ b/glib/pcre/pcre.h
@@ -5,7 +5,7 @@
 /* This is the public header file for the PCRE library, to be #included by
 applications that call the PCRE functions.
 
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -41,10 +41,10 @@ POSSIBILITY OF SUCH DAMAGE.
 
 /* The current PCRE version information. */
 
-#define PCRE_MAJOR          7
-#define PCRE_MINOR          8
+#define PCRE_MAJOR          8
+#define PCRE_MINOR          02
 #define PCRE_PRERELEASE     
-#define PCRE_DATE           2008-09-05
+#define PCRE_DATE           2010-03-19
 
 /* When an application links to a PCRE DLL in Windows, the symbols that are
 imported have to be identified as such. When building PCRE, the appropriate
@@ -95,7 +95,8 @@ it is needed here for malloc. */
 extern "C" {
 #endif
 
-/* Options */
+/* Options. Some are compile-time only, some are run-time only, and some are
+both, so we keep them all distinct. */
 
 #define PCRE_CASELESS           0x00000001
 #define PCRE_MULTILINE          0x00000002
@@ -112,7 +113,8 @@ extern "C" {
 #define PCRE_NO_AUTO_CAPTURE    0x00001000
 #define PCRE_NO_UTF8_CHECK      0x00002000
 #define PCRE_AUTO_CALLOUT       0x00004000
-#define PCRE_PARTIAL            0x00008000
+#define PCRE_PARTIAL_SOFT       0x00008000
+#define PCRE_PARTIAL            0x00008000  /* Backwards compatible synonym */
 #define PCRE_DFA_SHORTEST       0x00010000
 #define PCRE_DFA_RESTART        0x00020000
 #define PCRE_FIRSTLINE          0x00040000
@@ -125,6 +127,10 @@ extern "C" {
 #define PCRE_BSR_ANYCRLF        0x00800000
 #define PCRE_BSR_UNICODE        0x01000000
 #define PCRE_JAVASCRIPT_COMPAT  0x02000000
+#define PCRE_NO_START_OPTIMIZE  0x04000000
+#define PCRE_NO_START_OPTIMISE  0x04000000
+#define PCRE_PARTIAL_HARD       0x08000000
+#define PCRE_NOTEMPTY_ATSTART   0x10000000
 
 /* Exec-time and get/set-time error codes */
 
@@ -171,6 +177,7 @@ extern "C" {
 #define PCRE_INFO_OKPARTIAL         12
 #define PCRE_INFO_JCHANGED          13
 #define PCRE_INFO_HASCRORLF         14
+#define PCRE_INFO_MINLENGTH         15
 
 /* Request types for pcre_config(). Do not re-arrange, in order to remain
 compatible. */
@@ -250,7 +257,7 @@ typedef struct pcre_callout_block {
 #define pcre_free g_free
 #define pcre_stack_malloc g_try_malloc
 
-PCRE_EXP_DECL int   (*pcre_callout)(pcre_callout_block *);
+int   (*pcre_callout)(pcre_callout_block *);
 
 /* Exported PCRE functions */
 
diff --git a/glib/pcre/pcre_compile.c b/glib/pcre/pcre_compile.c
index ae68fb5..653ab67 100644
--- a/glib/pcre/pcre_compile.c
+++ b/glib/pcre/pcre_compile.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -53,10 +53,11 @@ supporting internal functions that are not used by other modules. */
 #include "pcre_internal.h"
 
 
-/* When DEBUG is defined, we need the pcre_printint() function, which is also
-used by pcretest. DEBUG is not defined when building a production library. */
+/* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
+also used by pcretest. PCRE_DEBUG is not defined when building a production
+library. */
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 #include "pcre_printint.src"
 #endif
 
@@ -91,27 +92,67 @@ is 4 there is plenty of room. */
 
 #define COMPILE_WORK_SIZE (4096)
 
+/* The overrun tests check for a slightly smaller size so that they detect the
+overrun before it actually does run off the end of the data block. */
+
+#define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)
+
 
 /* Table for handling escaped characters in the range '0'-'z'. Positive returns
 are simple data values; negative values are for special things like \d and so
 on. Zero means further processing is needed (for things like \x), or the escape
 is invalid. */
 
-#ifndef EBCDIC  /* This is the "normal" table for ASCII systems */
+#ifndef EBCDIC
+
+/* This is the "normal" table for ASCII systems or for EBCDIC systems running
+in UTF-8 mode. */
+
 static const short int escapes[] = {
-     0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
-     0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
-   '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E,      0, -ESC_G,   /* @ - G */
--ESC_H,      0,      0, -ESC_K,      0,      0,      0,      0,   /* H - O */
--ESC_P, -ESC_Q, -ESC_R, -ESC_S,      0,      0, -ESC_V, -ESC_W,   /* P - W */
--ESC_X,      0, -ESC_Z,    '[',   '\\',    ']',    '^',    '_',   /* X - _ */
-   '`',      7, -ESC_b,      0, -ESC_d,  ESC_e,  ESC_f,      0,   /* ` - g */
--ESC_h,      0,      0, -ESC_k,      0,      0,  ESC_n,      0,   /* h - o */
--ESC_p,      0,  ESC_r, -ESC_s,  ESC_tee,    0, -ESC_v, -ESC_w,   /* p - w */
-     0,      0, -ESC_z                                            /* x - z */
+     0,                       0,
+     0,                       0,
+     0,                       0,
+     0,                       0,
+     0,                       0,
+     CHAR_COLON,              CHAR_SEMICOLON,
+     CHAR_LESS_THAN_SIGN,     CHAR_EQUALS_SIGN,
+     CHAR_GREATER_THAN_SIGN,  CHAR_QUESTION_MARK,
+     CHAR_COMMERCIAL_AT,      -ESC_A,
+     -ESC_B,                  -ESC_C,
+     -ESC_D,                  -ESC_E,
+     0,                       -ESC_G,
+     -ESC_H,                  0,
+     0,                       -ESC_K,
+     0,                       0,
+     0,                       0,
+     -ESC_P,                  -ESC_Q,
+     -ESC_R,                  -ESC_S,
+     0,                       0,
+     -ESC_V,                  -ESC_W,
+     -ESC_X,                  0,
+     -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
+     CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
+     CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
+     CHAR_GRAVE_ACCENT,       7,
+     -ESC_b,                  0,
+     -ESC_d,                  ESC_e,
+     ESC_f,                   0,
+     -ESC_h,                  0,
+     0,                       -ESC_k,
+     0,                       0,
+     ESC_n,                   0,
+     -ESC_p,                  0,
+     ESC_r,                   -ESC_s,
+     ESC_tee,                 0,
+     -ESC_v,                  -ESC_w,
+     0,                       0,
+     -ESC_z
 };
 
-#else           /* This is the "abnormal" table for EBCDIC systems */
+#else
+
+/* This is the "abnormal" table for EBCDIC systems without UTF-8 support. */
+
 static const short int escapes[] = {
 /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
 /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
@@ -142,7 +183,9 @@ static const short int escapes[] = {
 
 /* Table of special "verbs" like (*PRUNE). This is a short table, so it is
 searched linearly. Put all the names into a single string, in order to reduce
-the number of relocations when a shared library is dynamically linked. */
+the number of relocations when a shared library is dynamically linked. The
+string is built from string macros so that it works in UTF-8 mode on EBCDIC
+platforms. */
 
 typedef struct verbitem {
   int   len;
@@ -150,13 +193,13 @@ typedef struct verbitem {
 } verbitem;
 
 static const char verbnames[] =
-  "ACCEPT\0"
-  "COMMIT\0"
-  "F\0"
-  "FAIL\0"
-  "PRUNE\0"
-  "SKIP\0"
-  "THEN";
+  STRING_ACCEPT0
+  STRING_COMMIT0
+  STRING_F0
+  STRING_FAIL0
+  STRING_PRUNE0
+  STRING_SKIP0
+  STRING_THEN;
 
 static const verbitem verbs[] = {
   { 6, OP_ACCEPT },
@@ -178,9 +221,10 @@ length entry. The first three must be alpha, lower, upper, as this is assumed
 for handling case independence. */
 
 static const char posix_names[] =
-  "alpha\0"  "lower\0"  "upper\0"  "alnum\0"  "ascii\0"  "blank\0"
-  "cntrl\0"  "digit\0"  "graph\0"  "print\0"  "punct\0"  "space\0"
-  "word\0"   "xdigit";
+  STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
+  STRING_ascii0 STRING_blank0 STRING_cntrl0 STRING_digit0
+  STRING_graph0 STRING_print0 STRING_punct0 STRING_space0
+  STRING_word0  STRING_xdigit;
 
 static const uschar posix_name_lengths[] = {
   5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
@@ -224,7 +268,11 @@ the number of relocations needed when a shared library is loaded dynamically,
 it is now one long string. We cannot use a table of offsets, because the
 lengths of inserts such as XSTRING(MAX_NAME_SIZE) are not known. Instead, we
 simply count through to the one we want - this isn't a performance issue
-because these strings are used only when there is a compilation error. */
+because these strings are used only when there is a compilation error.
+
+Each substring ends with \0 to insert a null character. This includes the final
+substring, so that the whole string ends with \0\0, which can be detected when
+counting through. */
 
 static const char error_texts[] =
   "no error\0"
@@ -303,7 +351,9 @@ static const char error_texts[] =
   "number is too big\0"
   "subpattern name expected\0"
   "digit expected after (?+\0"
-  "] is an invalid data character in JavaScript compatibility mode";
+  "] is an invalid data character in JavaScript compatibility mode\0"
+  /* 65 */
+  "different names for subpatterns of the same number are not allowed\0";
 
 
 /* Definition to allow mutual recursion */
@@ -331,7 +381,11 @@ static const char *
 find_error_text(int n)
 {
 const char *s = error_texts;
-for (; n > 0; n--) while (*s++ != 0) {};
+for (; n > 0; n--)
+  {
+  while (*s++ != 0) {};
+  if (*s == 0) return "Error text not found (please report)";
+  }
 return s;
 }
 
@@ -379,9 +433,9 @@ if (c == 0) *errorcodeptr = ERR1;
 in a table. A non-zero result is something that can be returned immediately.
 Otherwise further processing may be required. */
 
-#ifndef EBCDIC  /* ASCII coding */
-else if (c < '0' || c > 'z') {}                           /* Not alphanumeric */
-else if ((i = escapes[c - '0']) != 0) c = i;
+#ifndef EBCDIC  /* ASCII/UTF-8 coding */
+else if (c < CHAR_0 || c > CHAR_z) {}                     /* Not alphanumeric */
+else if ((i = escapes[c - CHAR_0]) != 0) c = i;
 
 #else           /* EBCDIC coding */
 else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphanumeric */
@@ -400,11 +454,11 @@ else
     /* A number of Perl escapes are not handled by PCRE. We give an explicit
     error. */
 
-    case 'l':
-    case 'L':
-    case 'N':
-    case 'u':
-    case 'U':
+    case CHAR_l:
+    case CHAR_L:
+    case CHAR_N:
+    case CHAR_u:
+    case CHAR_U:
     *errorcodeptr = ERR37;
     break;
 
@@ -424,8 +478,8 @@ else
     (possibly recursive) subroutine calls, _not_ backreferences. Just return
     the -ESC_g code (cf \k). */
 
-    case 'g':
-    if (ptr[1] == '<' || ptr[1] == '\'')
+    case CHAR_g:
+    if (ptr[1] == CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_APOSTROPHE)
       {
       c = -ESC_g;
       break;
@@ -433,12 +487,12 @@ else
 
     /* Handle the Perl-compatible cases */
 
-    if (ptr[1] == '{')
+    if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
       {
       const uschar *p;
-      for (p = ptr+2; *p != 0 && *p != '}'; p++)
-        if (*p != '-' && g_ascii_isdigit (*p) == 0) break;
-      if (*p != 0 && *p != '}')
+      for (p = ptr+2; *p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET; p++)
+        if (*p != CHAR_MINUS && g_ascii_isdigit(*p) == 0) break;
+      if (*p != 0 && *p != CHAR_RIGHT_CURLY_BRACKET)
         {
         c = -ESC_k;
         break;
@@ -448,7 +502,7 @@ else
       }
     else braced = FALSE;
 
-    if (ptr[1] == '-')
+    if (ptr[1] == CHAR_MINUS)
       {
       negated = TRUE;
       ptr++;
@@ -456,8 +510,8 @@ else
     else negated = FALSE;
 
     c = 0;
-    while (g_ascii_isdigit (ptr[1]) != 0)
-      c = c * 10 + *(++ptr) - '0';
+    while (g_ascii_isdigit(ptr[1]) != 0)
+      c = c * 10 + *(++ptr) - CHAR_0;
 
     if (c < 0)   /* Integer overflow */
       {
@@ -465,7 +519,7 @@ else
       break;
       }
 
-    if (braced && *(++ptr) != '}')
+    if (braced && *(++ptr) != CHAR_RIGHT_CURLY_BRACKET)
       {
       *errorcodeptr = ERR57;
       break;
@@ -502,15 +556,15 @@ else
     value is greater than 377, the least significant 8 bits are taken. Inside a
     character class, \ followed by a digit is always an octal number. */
 
-    case '1': case '2': case '3': case '4': case '5':
-    case '6': case '7': case '8': case '9':
+    case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
+    case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
 
     if (!isclass)
       {
       oldptr = ptr;
-      c -= '0';
-      while (g_ascii_isdigit (ptr[1]))
-        c = c * 10 + *(++ptr) - '0';
+      c -= CHAR_0;
+      while (g_ascii_isdigit(ptr[1]) != 0)
+        c = c * 10 + *(++ptr) - CHAR_0;
       if (c < 0)    /* Integer overflow */
         {
         *errorcodeptr = ERR61;
@@ -528,7 +582,7 @@ else
     generates a binary zero byte and treats the digit as a following literal.
     Thus we have to pull back the pointer by one. */
 
-    if ((c = *ptr) >= '8')
+    if ((c = *ptr) >= CHAR_8)
       {
       ptr--;
       c = 0;
@@ -541,10 +595,10 @@ else
     to do). Nowadays we allow for larger numbers in UTF-8 mode, but no more
     than 3 octal digits. */
 
-    case '0':
-    c -= '0';
-    while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7')
-        c = c * 8 + *(++ptr) - '0';
+    case CHAR_0:
+    c -= CHAR_0;
+    while(i++ < 2 && ptr[1] >= CHAR_0 && ptr[1] <= CHAR_7)
+        c = c * 8 + *(++ptr) - CHAR_0;
     if (!utf8 && c > 255) *errorcodeptr = ERR51;
     break;
 
@@ -552,29 +606,29 @@ else
     than 0xff in utf8 mode, but only if the ddd are hex digits. If not, { is
     treated as a data character. */
 
-    case 'x':
-    if (ptr[1] == '{')
+    case CHAR_x:
+    if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
       {
       const uschar *pt = ptr + 2;
       int count = 0;
 
       c = 0;
-      while (g_ascii_isxdigit (*pt) != 0)
+      while (g_ascii_isxdigit(*pt) != 0)
         {
         register int cc = *pt++;
-        if (c == 0 && cc == '0') continue;     /* Leading zeroes */
+        if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
         count++;
 
-#ifndef EBCDIC  /* ASCII coding */
-        if (cc >= 'a') cc -= 32;               /* Convert to upper case */
-        c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
+#ifndef EBCDIC  /* ASCII/UTF-8 coding */
+        if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
+        c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
 #else           /* EBCDIC coding */
-        if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */
-        c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
+        if (cc >= CHAR_a && cc <= CHAR_z) cc += 64;  /* Convert to upper case */
+        c = (c << 4) + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
 #endif
         }
 
-      if (*pt == '}')
+      if (*pt == CHAR_RIGHT_CURLY_BRACKET)
         {
         if (c < 0 || count > (utf8? 8 : 2)) *errorcodeptr = ERR34;
         ptr = pt;
@@ -588,16 +642,16 @@ else
     /* Read just a single-byte hex-defined char */
 
     c = 0;
-    while (i++ < 2 && g_ascii_isxdigit (ptr[1]) != 0)
+    while (i++ < 2 && g_ascii_isxdigit(ptr[1]) != 0)
       {
-      int cc;                               /* Some compilers don't like ++ */
-      cc = *(++ptr);                        /* in initializers */
-#ifndef EBCDIC  /* ASCII coding */
-      if (cc >= 'a') cc -= 32;              /* Convert to upper case */
-      c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
+      int cc;                                  /* Some compilers don't like */
+      cc = *(++ptr);                           /* ++ in initializers */
+#ifndef EBCDIC  /* ASCII/UTF-8 coding */
+      if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
+      c = c * 16 + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
 #else           /* EBCDIC coding */
-      if (cc <= 'z') cc += 64;              /* Convert to upper case */
-      c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
+      if (cc <= CHAR_z) cc += 64;              /* Convert to upper case */
+      c = c * 16 + cc - ((cc >= CHAR_0)? CHAR_0 : (CHAR_A - 10));
 #endif
       }
     break;
@@ -606,7 +660,7 @@ else
     This coding is ASCII-specific, but then the whole concept of \cx is
     ASCII-specific. (However, an EBCDIC equivalent has now been added.) */
 
-    case 'c':
+    case CHAR_c:
     c = *(++ptr);
     if (c == 0)
       {
@@ -614,11 +668,11 @@ else
       break;
       }
 
-#ifndef EBCDIC  /* ASCII coding */
-    if (c >= 'a' && c <= 'z') c -= 32;
+#ifndef EBCDIC  /* ASCII/UTF-8 coding */
+    if (c >= CHAR_a && c <= CHAR_z) c -= 32;
     c ^= 0x40;
 #else           /* EBCDIC coding */
-    if (c >= 'a' && c <= 'z') c += 64;
+    if (c >= CHAR_a && c <= CHAR_z) c += 64;
     c ^= 0xC0;
 #endif
     break;
@@ -680,9 +734,9 @@ if (c == 0) goto ERROR_RETURN;
 /* \P or \p can be followed by a name in {}, optionally preceded by ^ for
 negation. */
 
-if (c == '{')
+if (c == CHAR_LEFT_CURLY_BRACKET)
   {
-  if (ptr[1] == '^')
+  if (ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
     {
     *negptr = TRUE;
     ptr++;
@@ -691,10 +745,10 @@ if (c == '{')
     {
     c = *(++ptr);
     if (c == 0) goto ERROR_RETURN;
-    if (c == '}') break;
+    if (c == CHAR_RIGHT_CURLY_BRACKET) break;
     name[i] = c;
     }
-  if (c !='}') goto ERROR_RETURN;
+  if (c != CHAR_RIGHT_CURLY_BRACKET) goto ERROR_RETURN;
   name[i] = 0;
   }
 
@@ -757,17 +811,17 @@ Returns:    TRUE or FALSE
 static BOOL
 is_counted_repeat(const uschar *p)
 {
-if (g_ascii_isdigit (*p++) == 0) return FALSE;
-while (g_ascii_isdigit (*p) != 0) p++;
-if (*p == '}') return TRUE;
+if (g_ascii_isdigit(*p++) == 0) return FALSE;
+while (g_ascii_isdigit(*p) != 0) p++;
+if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
 
-if (*p++ != ',') return FALSE;
-if (*p == '}') return TRUE;
+if (*p++ != CHAR_COMMA) return FALSE;
+if (*p == CHAR_RIGHT_CURLY_BRACKET) return TRUE;
 
-if (g_ascii_isdigit (*p++) == 0) return FALSE;
-while (g_ascii_isdigit (*p) != 0) p++;
+if (g_ascii_isdigit(*p++) == 0) return FALSE;
+while (g_ascii_isdigit(*p) != 0) p++;
 
-return (*p == '}');
+return (*p == CHAR_RIGHT_CURLY_BRACKET);
 }
 
 
@@ -800,7 +854,7 @@ int max = -1;
 /* Read the minimum value and do a paranoid check: a negative value indicates
 an integer overflow. */
 
-while (g_ascii_isdigit (*p) != 0) min = min * 10 + *p++ - '0';
+while (g_ascii_isdigit(*p) != 0) min = min * 10 + *p++ - CHAR_0;
 if (min < 0 || min > 65535)
   {
   *errorcodeptr = ERR5;
@@ -810,12 +864,12 @@ if (min < 0 || min > 65535)
 /* Read the maximum value if there is one, and again do a paranoid on its size.
 Also, max must not be less than min. */
 
-if (*p == '}') max = min; else
+if (*p == CHAR_RIGHT_CURLY_BRACKET) max = min; else
   {
-  if (*(++p) != '}')
+  if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
     {
     max = 0;
-    while(g_ascii_isdigit (*p) != 0) max = max * 10 + *p++ - '0';
+    while(g_ascii_isdigit(*p) != 0) max = max * 10 + *p++ - CHAR_0;
     if (max < 0 || max > 65535)
       {
       *errorcodeptr = ERR5;
@@ -840,47 +894,117 @@ return p;
 
 
 /*************************************************
-*       Find forward referenced subpattern       *
+*  Subroutine for finding forward reference      *
 *************************************************/
 
-/* This function scans along a pattern's text looking for capturing
+/* This recursive function is called only from find_parens() below. The
+top-level call starts at the beginning of the pattern. All other calls must
+start at a parenthesis. It scans along a pattern's text looking for capturing
 subpatterns, and counting them. If it finds a named pattern that matches the
 name it is given, it returns its number. Alternatively, if the name is NULL, it
-returns when it reaches a given numbered subpattern. This is used for forward
-references to subpatterns. We know that if (?P< is encountered, the name will
-be terminated by '>' because that is checked in the first pass.
+returns when it reaches a given numbered subpattern. We know that if (?P< is
+encountered, the name will be terminated by '>' because that is checked in the
+first pass. Recursion is used to keep track of subpatterns that reset the
+capturing group numbers - the (?| feature.
 
 Arguments:
-  ptr          current position in the pattern
+  ptrptr       address of the current character pointer (updated)
   cd           compile background data
   name         name to seek, or NULL if seeking a numbered subpattern
   lorn         name length, or subpattern number if name is NULL
   xmode        TRUE if we are in /x mode
+  count        pointer to the current capturing subpattern number (updated)
 
 Returns:       the number of the named subpattern, or -1 if not found
 */
 
 static int
-find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn,
-  BOOL xmode)
+find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
+  BOOL xmode, int *count)
 {
-const uschar *thisname;
-int count = cd->bracount;
+uschar *ptr = *ptrptr;
+int start_count = *count;
+int hwm_count = start_count;
+BOOL dup_parens = FALSE;
 
-for (; *ptr != 0; ptr++)
+/* If the first character is a parenthesis, check on the type of group we are
+dealing with. The very first call may not start with a parenthesis. */
+
+if (ptr[0] == CHAR_LEFT_PARENTHESIS)
   {
-  int term;
+  if (ptr[1] == CHAR_QUESTION_MARK &&
+      ptr[2] == CHAR_VERTICAL_LINE)
+    {
+    ptr += 3;
+    dup_parens = TRUE;
+    }
+
+  /* Handle a normal, unnamed capturing parenthesis */
+
+  else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)
+    {
+    *count += 1;
+    if (name == NULL && *count == lorn) return *count;
+    ptr++;
+    }
+
+  /* Handle a condition. If it is an assertion, just carry on so that it
+  is processed as normal. If not, skip to the closing parenthesis of the
+  condition (there can't be any nested parens. */
+
+  else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
+    {
+    ptr += 2;
+    if (ptr[1] != CHAR_QUESTION_MARK)
+      {
+      while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
+      if (*ptr != 0) ptr++;
+      }
+    }
+
+  /* We have either (? or (* and not a condition */
+
+  else
+    {
+    ptr += 2;
+    if (*ptr == CHAR_P) ptr++;                      /* Allow optional P */
+
+    /* We have to disambiguate (?<! and (?<= from (?<name> for named groups */
+
+    if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
+        ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
+      {
+      int term;
+      const uschar *thisname;
+      *count += 1;
+      if (name == NULL && *count == lorn) return *count;
+      term = *ptr++;
+      if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
+      thisname = ptr;
+      while (*ptr != term) ptr++;
+      if (name != NULL && lorn == ptr - thisname &&
+          strncmp((const char *)name, (const char *)thisname, lorn) == 0)
+        return *count;
+      term++;
+      }
+    }
+  }
+
+/* Past any initial parenthesis handling, scan for parentheses or vertical
+bars. */
 
+for (; *ptr != 0; ptr++)
+  {
   /* Skip over backslashed characters and also entire \Q...\E */
 
-  if (*ptr == '\\')
+  if (*ptr == CHAR_BACKSLASH)
     {
-    if (*(++ptr) == 0) return -1;
-    if (*ptr == 'Q') for (;;)
+    if (*(++ptr) == 0) goto FAIL_EXIT;
+    if (*ptr == CHAR_Q) for (;;)
       {
-      while (*(++ptr) != 0 && *ptr != '\\') {};
-      if (*ptr == 0) return -1;
-      if (*(++ptr) == 'E') break;
+      while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
+      if (*ptr == 0) goto FAIL_EXIT;
+      if (*(++ptr) == CHAR_E) break;
       }
     continue;
     }
@@ -888,42 +1012,50 @@ for (; *ptr != 0; ptr++)
   /* Skip over character classes; this logic must be similar to the way they
   are handled for real. If the first character is '^', skip it. Also, if the
   first few characters (either before or after ^) are \Q\E or \E we skip them
-  too. This makes for compatibility with Perl. */
+  too. This makes for compatibility with Perl. Note the use of STR macros to
+  encode "Q\\E" so that it works in UTF-8 on EBCDIC platforms. */
 
-  if (*ptr == '[')
+  if (*ptr == CHAR_LEFT_SQUARE_BRACKET)
     {
     BOOL negate_class = FALSE;
     for (;;)
       {
-      int c = *(++ptr);
-      if (c == '\\')
+      if (ptr[1] == CHAR_BACKSLASH)
         {
-        if (ptr[1] == 'E') ptr++;
-          else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
-            else break;
+        if (ptr[2] == CHAR_E)
+          ptr+= 2;
+        else if (strncmp((const char *)ptr+2,
+                 STR_Q STR_BACKSLASH STR_E, 3) == 0)
+          ptr += 4;
+        else
+          break;
         }
-      else if (!negate_class && c == '^')
+      else if (!negate_class && ptr[1] == CHAR_CIRCUMFLEX_ACCENT)
+        {
         negate_class = TRUE;
+        ptr++;
+        }
       else break;
       }
 
     /* If the next character is ']', it is a data character that must be
     skipped, except in JavaScript compatibility mode. */
 
-    if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
+    if (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET &&
+        (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0)
       ptr++;
 
-    while (*(++ptr) != ']')
+    while (*(++ptr) != CHAR_RIGHT_SQUARE_BRACKET)
       {
       if (*ptr == 0) return -1;
-      if (*ptr == '\\')
+      if (*ptr == CHAR_BACKSLASH)
         {
-        if (*(++ptr) == 0) return -1;
-        if (*ptr == 'Q') for (;;)
+        if (*(++ptr) == 0) goto FAIL_EXIT;
+        if (*ptr == CHAR_Q) for (;;)
           {
-          while (*(++ptr) != 0 && *ptr != '\\') {};
-          if (*ptr == 0) return -1;
-          if (*(++ptr) == 'E') break;
+          while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {};
+          if (*ptr == 0) goto FAIL_EXIT;
+          if (*(++ptr) == CHAR_E) break;
           }
         continue;
         }
@@ -933,49 +1065,92 @@ for (; *ptr != 0; ptr++)
 
   /* Skip comments in /x mode */
 
-  if (xmode && *ptr == '#')
+  if (xmode && *ptr == CHAR_NUMBER_SIGN)
     {
-    while (*(++ptr) != 0 && *ptr != '\n') {};
-    if (*ptr == 0) return -1;
+    while (*(++ptr) != 0 && *ptr != CHAR_NL) {};
+    if (*ptr == 0) goto FAIL_EXIT;
     continue;
     }
 
-  /* An opening parens must now be a real metacharacter */
+  /* Check for the special metacharacters */
 
-  if (*ptr != '(') continue;
-  if (ptr[1] != '?' && ptr[1] != '*')
+  if (*ptr == CHAR_LEFT_PARENTHESIS)
     {
-    count++;
-    if (name == NULL && count == lorn) return count;
-    continue;
+    int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);
+    if (rc > 0) return rc;
+    if (*ptr == 0) goto FAIL_EXIT;
+    }
+
+  else if (*ptr == CHAR_RIGHT_PARENTHESIS)
+    {
+    if (dup_parens && *count < hwm_count) *count = hwm_count;
+    *ptrptr = ptr;
+    return -1;
     }
 
-  ptr += 2;
-  if (*ptr == 'P') ptr++;                      /* Allow optional P */
+  else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
+    {
+    if (*count > hwm_count) hwm_count = *count;
+    *count = start_count;
+    }
+  }
 
-  /* We have to disambiguate (?<! and (?<= from (?<name> */
+FAIL_EXIT:
+*ptrptr = ptr;
+return -1;
+}
 
-  if ((*ptr != '<' || ptr[1] == '!' || ptr[1] == '=') &&
-       *ptr != '\'')
-    continue;
 
-  count++;
 
-  if (name == NULL && count == lorn) return count;
-  term = *ptr++;
-  if (term == '<') term = '>';
-  thisname = ptr;
-  while (*ptr != term) ptr++;
-  if (name != NULL && lorn == ptr - thisname &&
-      strncmp((const char *)name, (const char *)thisname, lorn) == 0)
-    return count;
+
+/*************************************************
+*       Find forward referenced subpattern       *
+*************************************************/
+
+/* This function scans along a pattern's text looking for capturing
+subpatterns, and counting them. If it finds a named pattern that matches the
+name it is given, it returns its number. Alternatively, if the name is NULL, it
+returns when it reaches a given numbered subpattern. This is used for forward
+references to subpatterns. We used to be able to start this scan from the
+current compiling point, using the current count value from cd->bracount, and
+do it all in a single loop, but the addition of the possibility of duplicate
+subpattern numbers means that we have to scan from the very start, in order to
+take account of such duplicates, and to use a recursive function to keep track
+of the different types of group.
+
+Arguments:
+  cd           compile background data
+  name         name to seek, or NULL if seeking a numbered subpattern
+  lorn         name length, or subpattern number if name is NULL
+  xmode        TRUE if we are in /x mode
+
+Returns:       the number of the found subpattern, or -1 if not found
+*/
+
+static int
+find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)
+{
+uschar *ptr = (uschar *)cd->start_pattern;
+int count = 0;
+int rc;
+
+/* If the pattern does not start with an opening parenthesis, the first call
+to find_parens_sub() will scan right to the end (if necessary). However, if it
+does start with a parenthesis, find_parens_sub() will return when it hits the
+matching closing parens. That is why we have to have a loop. */
+
+for (;;)
+  {
+  rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);
+  if (rc > 0 || *ptr++ == 0) break;
   }
 
-return -1;
+return rc;
 }
 
 
 
+
 /*************************************************
 *      Find first significant op code            *
 *************************************************/
@@ -1025,7 +1200,9 @@ for (;;)
 
     case OP_CALLOUT:
     case OP_CREF:
+    case OP_NCREF:
     case OP_RREF:
+    case OP_NRREF:
     case OP_DEF:
     code += _pcre_OP_lengths[*code];
     break;
@@ -1041,23 +1218,34 @@ for (;;)
 
 
 /*************************************************
-*        Find the fixed length of a pattern      *
+*        Find the fixed length of a branch       *
 *************************************************/
 
-/* Scan a pattern and compute the fixed length of subject that will match it,
+/* Scan a branch and compute the fixed length of subject that will match it,
 if the length is fixed. This is needed for dealing with backward assertions.
-In UTF8 mode, the result is in characters rather than bytes.
+In UTF8 mode, the result is in characters rather than bytes. The branch is
+temporarily terminated with OP_END when this function is called.
+
+This function is called when a backward assertion is encountered, so that if it
+fails, the error message can point to the correct place in the pattern.
+However, we cannot do this when the assertion contains subroutine calls,
+because they can be forward references. We solve this by remembering this case
+and doing the check at the end; a flag specifies which mode we are running in.
 
 Arguments:
   code     points to the start of the pattern (the bracket)
   options  the compiling options
+  atend    TRUE if called when the pattern is complete
+  cd       the "compile data" structure
 
-Returns:   the fixed length, or -1 if there is no fixed length,
+Returns:   the fixed length,
+             or -1 if there is no fixed length,
              or -2 if \C was encountered
+             or -3 if an OP_RECURSE item was encountered and atend is FALSE
 */
 
 static int
-find_fixedlength(uschar *code, int options)
+find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd)
 {
 int length = -1;
 
@@ -1070,6 +1258,7 @@ branch, check the length against that of the other branches. */
 for (;;)
   {
   int d;
+  uschar *ce, *cs;
   register int op = *cc;
   switch (op)
     {
@@ -1077,7 +1266,7 @@ for (;;)
     case OP_BRA:
     case OP_ONCE:
     case OP_COND:
-    d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options);
+    d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd);
     if (d < 0) return d;
     branchlength += d;
     do cc += GET(cc, 1); while (*cc == OP_ALT);
@@ -1100,6 +1289,21 @@ for (;;)
     branchlength = 0;
     break;
 
+    /* A true recursion implies not fixed length, but a subroutine call may
+    be OK. If the subroutine is a forward reference, we can't deal with
+    it until the end of the pattern, so return -3. */
+
+    case OP_RECURSE:
+    if (!atend) return -3;
+    cs = ce = (uschar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
+    do ce += GET(ce, 1); while (*ce == OP_ALT);       /* End subpattern */
+    if (cc > cs && cc < ce) return -1;                /* Recursion */
+    d = find_fixedlength(cs + 2, options, atend, cd);
+    if (d < 0) return d;
+    branchlength += d;
+    cc += 1 + LINK_SIZE;
+    break;
+
     /* Skip over assertive subpatterns */
 
     case OP_ASSERT:
@@ -1113,12 +1317,15 @@ for (;;)
 
     case OP_REVERSE:
     case OP_CREF:
+    case OP_NCREF:
     case OP_RREF:
+    case OP_NRREF:
     case OP_DEF:
     case OP_OPT:
     case OP_CALLOUT:
     case OP_SOD:
     case OP_SOM:
+    case OP_SET_SOM:
     case OP_EOD:
     case OP_EODN:
     case OP_CIRC:
@@ -1136,10 +1343,8 @@ for (;;)
     branchlength++;
     cc += 2;
 #ifdef SUPPORT_UTF8
-    if ((options & PCRE_UTF8) != 0)
-      {
-      while ((*cc & 0xc0) == 0x80) cc++;
-      }
+    if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
+      cc += _pcre_utf8_table4[cc[-1] & 0x3f];
 #endif
     break;
 
@@ -1150,10 +1355,8 @@ for (;;)
     branchlength += GET2(cc,1);
     cc += 4;
 #ifdef SUPPORT_UTF8
-    if ((options & PCRE_UTF8) != 0)
-      {
-      while((*cc & 0x80) == 0x80) cc++;
-      }
+    if ((options & PCRE_UTF8) != 0 && cc[-1] >= 0xc0)
+      cc += _pcre_utf8_table4[cc[-1] & 0x3f];
 #endif
     break;
 
@@ -1232,22 +1435,25 @@ for (;;)
 
 
 /*************************************************
-*    Scan compiled regex for numbered bracket    *
+*    Scan compiled regex for specific bracket    *
 *************************************************/
 
 /* This little function scans through a compiled pattern until it finds a
-capturing bracket with the given number.
+capturing bracket with the given number, or, if the number is negative, an
+instance of OP_REVERSE for a lookbehind. The function is global in the C sense
+so that it can be called from pcre_study() when finding the minimum matching
+length.
 
 Arguments:
   code        points to start of expression
   utf8        TRUE in UTF-8 mode
-  number      the required bracket number
+  number      the required bracket number or negative to find a lookbehind
 
 Returns:      pointer to the opcode for the bracket, or NULL if not found
 */
 
-static const uschar *
-find_bracket(const uschar *code, BOOL utf8, int number)
+const uschar *
+_pcre_find_bracket(const uschar *code, BOOL utf8, int number)
 {
 for (;;)
   {
@@ -1260,6 +1466,14 @@ for (;;)
 
   if (c == OP_XCLASS) code += GET(code, 1);
 
+  /* Handle recursion */
+
+  else if (c == OP_REVERSE)
+    {
+    if (number < 0) return (uschar *)code;
+    code += _pcre_OP_lengths[c];
+    }
+
   /* Handle capturing bracket */
 
   else if (c == OP_CBRA)
@@ -1446,12 +1660,14 @@ Arguments:
   code        points to start of search
   endcode     points to where to stop
   utf8        TRUE if in UTF8 mode
+  cd          contains pointers to tables etc.
 
 Returns:      TRUE if what is matched could be empty
 */
 
 static BOOL
-could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8)
+could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8,
+  compile_data *cd)
 {
 register int c;
 for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE);
@@ -1482,6 +1698,28 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
     continue;
     }
 
+  /* For a recursion/subroutine call, if its end has been reached, which
+  implies a subroutine call, we can scan it. */
+
+  if (c == OP_RECURSE)
+    {
+    BOOL empty_branch = FALSE;
+    const uschar *scode = cd->start_code + GET(code, 1);
+    if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
+    do
+      {
+      if (could_be_empty_branch(scode, endcode, utf8, cd))
+        {
+        empty_branch = TRUE;
+        break;
+        }
+      scode += GET(scode, 1);
+      }
+    while (*scode == OP_ALT);
+    if (!empty_branch) return FALSE;  /* All branches are non-empty */
+    continue;
+    }
+
   /* For other groups, scan the branches. */
 
   if (c == OP_BRA || c == OP_CBRA || c == OP_ONCE || c == OP_COND)
@@ -1489,17 +1727,25 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
     BOOL empty_branch;
     if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
 
-    /* Scan a closed bracket */
+    /* If a conditional group has only one branch, there is a second, implied,
+    empty branch, so just skip over the conditional, because it could be empty.
+    Otherwise, scan the individual branches of the group. */
 
-    empty_branch = FALSE;
-    do
-      {
-      if (!empty_branch && could_be_empty_branch(code, endcode, utf8))
-        empty_branch = TRUE;
+    if (c == OP_COND && code[GET(code, 1)] != OP_ALT)
       code += GET(code, 1);
+    else
+      {
+      empty_branch = FALSE;
+      do
+        {
+        if (!empty_branch && could_be_empty_branch(code, endcode, utf8, cd))
+          empty_branch = TRUE;
+        code += GET(code, 1);
+        }
+      while (*code == OP_ALT);
+      if (!empty_branch) return FALSE;   /* All branches are non-empty */
       }
-    while (*code == OP_ALT);
-    if (!empty_branch) return FALSE;   /* All branches are non-empty */
+
     c = *code;
     continue;
     }
@@ -1617,12 +1863,20 @@ for (code = first_significant_code(code + _pcre_OP_lengths[*code], NULL, 0, TRUE
     case OP_QUERY:
     case OP_MINQUERY:
     case OP_POSQUERY:
+    if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f];
+    break;
+
     case OP_UPTO:
     case OP_MINUPTO:
     case OP_POSUPTO:
-    if (utf8) while ((code[2] & 0xc0) == 0x80) code++;
+    if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f];
     break;
 #endif
+
+    /* None of the remaining opcodes are required to match a character. */
+
+    default:
+    break;
     }
   }
 
@@ -1645,17 +1899,19 @@ Arguments:
   endcode     points to where to stop (current RECURSE item)
   bcptr       points to the chain of current (unclosed) branch starts
   utf8        TRUE if in UTF-8 mode
+  cd          pointers to tables etc
 
 Returns:      TRUE if what is matched could be empty
 */
 
 static BOOL
 could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
-  BOOL utf8)
+  BOOL utf8, compile_data *cd)
 {
-while (bcptr != NULL && bcptr->current >= code)
+while (bcptr != NULL && bcptr->current_branch >= code)
   {
-  if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;
+  if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8, cd))
+    return FALSE;
   bcptr = bcptr->outer;
   }
 return TRUE;
@@ -1701,10 +1957,10 @@ int terminator;          /* Don't combine these lines; the Solaris cc */
 terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
 for (++ptr; *ptr != 0; ptr++)
   {
-  if (*ptr == '\\' && ptr[1] == ']') ptr++; else
+  if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET) ptr++; else
     {
-    if (*ptr == ']') return FALSE;
-    if (*ptr == terminator && ptr[1] == ']')
+    if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
+    if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
       {
       *endptr = ptr;
       return TRUE;
@@ -1950,7 +2206,7 @@ if ((options & PCRE_EXTENDED) != 0)
   for (;;)
     {
     while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
-    if (*ptr == '#')
+    if (*ptr == CHAR_NUMBER_SIGN)
       {
       while (*(++ptr) != 0)
         if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
@@ -1962,7 +2218,7 @@ if ((options & PCRE_EXTENDED) != 0)
 /* If the next item is one that we can handle, get its value. A non-negative
 value is a character, a negative value is an escape value. */
 
-if (*ptr == '\\')
+if (*ptr == CHAR_BACKSLASH)
   {
   int temperrorcode = 0;
   next = check_escape(&ptr, &temperrorcode, cd->bracount, options, FALSE);
@@ -1987,7 +2243,7 @@ if ((options & PCRE_EXTENDED) != 0)
   for (;;)
     {
     while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
-    if (*ptr == '#')
+    if (*ptr == CHAR_NUMBER_SIGN)
       {
       while (*(++ptr) != 0)
         if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
@@ -1998,8 +2254,9 @@ if ((options & PCRE_EXTENDED) != 0)
 
 /* If the next thing is itself optional, we have to give up. */
 
-if (*ptr == '*' || *ptr == '?' || strncmp((char *)ptr, "{0,", 3) == 0)
-  return FALSE;
+if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
+  strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
+    return FALSE;
 
 /* Now compare the next item with the previous opcode. If the previous is a
 positive single character match, "item" either contains the character or, if
@@ -2316,7 +2573,7 @@ BOOL utf8 = FALSE;
 uschar *utf8_char = NULL;
 #endif
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 if (lengthptr != NULL) DPRINTF((">> start branch\n"));
 #endif
 
@@ -2375,10 +2632,10 @@ for (;; ptr++)
 
   if (lengthptr != NULL)
     {
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
     if (code > cd->hwm) cd->hwm = code;                 /* High water info */
 #endif
-    if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
+    if (code > cd->start_workspace + WORK_SIZE_CHECK)   /* Check for overrun */
       {
       *errorcodeptr = ERR52;
       goto FAILED;
@@ -2427,7 +2684,7 @@ for (;; ptr++)
   /* In the real compile phase, just check the workspace used by the forward
   reference list. */
 
-  else if (cd->hwm > cd->start_workspace + COMPILE_WORK_SIZE)
+  else if (cd->hwm > cd->start_workspace + WORK_SIZE_CHECK)
     {
     *errorcodeptr = ERR52;
     goto FAILED;
@@ -2437,7 +2694,7 @@ for (;; ptr++)
 
   if (inescq && c != 0)
     {
-    if (c == '\\' && ptr[1] == 'E')
+    if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
       {
       inescq = FALSE;
       ptr++;
@@ -2463,8 +2720,9 @@ for (;; ptr++)
   /* Fill in length of a previous callout, except when the next thing is
   a quantifier. */
 
-  is_quantifier = c == '*' || c == '+' || c == '?' ||
-    (c == '{' && is_counted_repeat(ptr+1));
+  is_quantifier =
+    c == CHAR_ASTERISK || c == CHAR_PLUS || c == CHAR_QUESTION_MARK ||
+    (c == CHAR_LEFT_CURLY_BRACKET && is_counted_repeat(ptr+1));
 
   if (!is_quantifier && previous_callout != NULL &&
        after_manual_callout-- <= 0)
@@ -2479,7 +2737,7 @@ for (;; ptr++)
   if ((options & PCRE_EXTENDED) != 0)
     {
     if ((cd->ctypes[c] & ctype_space) != 0) continue;
-    if (c == '#')
+    if (c == CHAR_NUMBER_SIGN)
       {
       while (*(++ptr) != 0)
         {
@@ -2504,8 +2762,8 @@ for (;; ptr++)
     {
     /* ===================================================================*/
     case 0:                        /* The branch terminates at string end */
-    case '|':                      /* or | or ) */
-    case ')':
+    case CHAR_VERTICAL_LINE:       /* or | or ) */
+    case CHAR_RIGHT_PARENTHESIS:
     *firstbyteptr = firstbyte;
     *reqbyteptr = reqbyte;
     *codeptr = code;
@@ -2527,7 +2785,7 @@ for (;; ptr++)
     /* Handle single-character metacharacters. In multiline mode, ^ disables
     the setting of any following char as a first character. */
 
-    case '^':
+    case CHAR_CIRCUMFLEX_ACCENT:
     if ((options & PCRE_MULTILINE) != 0)
       {
       if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
@@ -2536,7 +2794,7 @@ for (;; ptr++)
     *code++ = OP_CIRC;
     break;
 
-    case '$':
+    case CHAR_DOLLAR_SIGN:
     previous = NULL;
     *code++ = OP_DOLL;
     break;
@@ -2544,7 +2802,7 @@ for (;; ptr++)
     /* There can never be a first char if '.' is first, whatever happens about
     repeats. The value of reqbyte doesn't change either. */
 
-    case '.':
+    case CHAR_DOT:
     if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
     zerofirstbyte = firstbyte;
     zeroreqbyte = reqbyte;
@@ -2568,7 +2826,7 @@ for (;; ptr++)
     In JavaScript compatibility mode, an isolated ']' causes an error. In
     default (Perl) mode, it is treated as a data character. */
 
-    case ']':
+    case CHAR_RIGHT_SQUARE_BRACKET:
     if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
       {
       *errorcodeptr = ERR64;
@@ -2576,16 +2834,17 @@ for (;; ptr++)
       }
     goto NORMAL_CHAR;
 
-    case '[':
+    case CHAR_LEFT_SQUARE_BRACKET:
     previous = code;
 
     /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
     they are encountered at the top level, so we'll do that too. */
 
-    if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
+    if ((ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
+         ptr[1] == CHAR_EQUALS_SIGN) &&
         check_posix_syntax(ptr, &tempptr))
       {
-      *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31;
+      *errorcodeptr = (ptr[1] == CHAR_COLON)? ERR13 : ERR31;
       goto FAILED;
       }
 
@@ -2597,13 +2856,17 @@ for (;; ptr++)
     for (;;)
       {
       c = *(++ptr);
-      if (c == '\\')
+      if (c == CHAR_BACKSLASH)
         {
-        if (ptr[1] == 'E') ptr++;
-          else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3;
-            else break;
+        if (ptr[1] == CHAR_E)
+          ptr++;
+        else if (strncmp((const char *)ptr+1,
+                          STR_Q STR_BACKSLASH STR_E, 3) == 0)
+          ptr += 3;
+        else
+          break;
         }
-      else if (!negate_class && c == '^')
+      else if (!negate_class && c == CHAR_CIRCUMFLEX_ACCENT)
         negate_class = TRUE;
       else break;
       }
@@ -2613,7 +2876,8 @@ for (;; ptr++)
     that. In JS mode, [] must always fail, so generate OP_FAIL, whereas
     [^] must match any character, so generate OP_ALLANY. */
 
-    if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
+    if (c == CHAR_RIGHT_SQUARE_BRACKET &&
+        (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0)
       {
       *code++ = negate_class? OP_ALLANY : OP_FAIL;
       if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE;
@@ -2678,7 +2942,7 @@ for (;; ptr++)
 
       if (inescq)
         {
-        if (c == '\\' && ptr[1] == 'E')     /* If we are at \E */
+        if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)  /* If we are at \E */
           {
           inescq = FALSE;                   /* Reset literal state */
           ptr++;                            /* Skip the 'E' */
@@ -2693,23 +2957,23 @@ for (;; ptr++)
       [.ch.] and [=ch=] ("collating elements") and fault them, as Perl
       5.6 and 5.8 do. */
 
-      if (c == '[' &&
-          (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') &&
-          check_posix_syntax(ptr, &tempptr))
+      if (c == CHAR_LEFT_SQUARE_BRACKET &&
+          (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||
+           ptr[1] == CHAR_EQUALS_SIGN) && check_posix_syntax(ptr, &tempptr))
         {
         BOOL local_negate = FALSE;
         int posix_class, taboffset, tabopt;
         register const uschar *cbits = cd->cbits;
         uschar pbits[32];
 
-        if (ptr[1] != ':')
+        if (ptr[1] != CHAR_COLON)
           {
           *errorcodeptr = ERR31;
           goto FAILED;
           }
 
         ptr += 2;
-        if (*ptr == '^')
+        if (*ptr == CHAR_CIRCUMFLEX_ACCENT)
           {
           local_negate = TRUE;
           should_flip_negation = TRUE;  /* Note negative special */
@@ -2782,17 +3046,17 @@ for (;; ptr++)
       to 'or' into the one we are building. We assume they have more than one
       character in them, so set class_charcount bigger than one. */
 
-      if (c == '\\')
+      if (c == CHAR_BACKSLASH)
         {
         c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
         if (*errorcodeptr != 0) goto FAILED;
 
-        if (-c == ESC_b) c = '\b';       /* \b is backspace in a class */
-        else if (-c == ESC_X) c = 'X';   /* \X is literal X in a class */
-        else if (-c == ESC_R) c = 'R';   /* \R is literal R in a class */
+        if (-c == ESC_b) c = CHAR_BS;       /* \b is backspace in a class */
+        else if (-c == ESC_X) c = CHAR_X;   /* \X is literal X in a class */
+        else if (-c == ESC_R) c = CHAR_R;   /* \R is literal R in a class */
         else if (-c == ESC_Q)            /* Handle start of quoted string */
           {
-          if (ptr[1] == '\\' && ptr[2] == 'E')
+          if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
             {
             ptr += 2; /* avoid empty string */
             }
@@ -3018,7 +3282,7 @@ for (;; ptr++)
       entirely. The code for handling \Q and \E is messy. */
 
       CHECK_RANGE:
-      while (ptr[1] == '\\' && ptr[2] == 'E')
+      while (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
         {
         inescq = FALSE;
         ptr += 2;
@@ -3028,28 +3292,29 @@ for (;; ptr++)
 
       /* Remember \r or \n */
 
-      if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF;
+      if (c == CHAR_CR || c == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
 
       /* Check for range */
 
-      if (!inescq && ptr[1] == '-')
+      if (!inescq && ptr[1] == CHAR_MINUS)
         {
         int d;
         ptr += 2;
-        while (*ptr == '\\' && ptr[1] == 'E') ptr += 2;
+        while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E) ptr += 2;
 
         /* If we hit \Q (not followed by \E) at this point, go into escaped
         mode. */
 
-        while (*ptr == '\\' && ptr[1] == 'Q')
+        while (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
           {
           ptr += 2;
-          if (*ptr == '\\' && ptr[1] == 'E') { ptr += 2; continue; }
+          if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_E)
+            { ptr += 2; continue; }
           inescq = TRUE;
           break;
           }
 
-        if (*ptr == 0 || (!inescq && *ptr == ']'))
+        if (*ptr == 0 || (!inescq && *ptr == CHAR_RIGHT_SQUARE_BRACKET))
           {
           ptr = oldptr;
           goto LONE_SINGLE_CHARACTER;
@@ -3068,7 +3333,7 @@ for (;; ptr++)
         not any of the other escapes. Perl 5.6 treats a hyphen as a literal
         in such circumstances. */
 
-        if (!inescq && d == '\\')
+        if (!inescq && d == CHAR_BACKSLASH)
           {
           d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
           if (*errorcodeptr != 0) goto FAILED;
@@ -3078,9 +3343,9 @@ for (;; ptr++)
 
           if (d < 0)
             {
-            if (d == -ESC_b) d = '\b';
-            else if (d == -ESC_X) d = 'X';
-            else if (d == -ESC_R) d = 'R'; else
+            if (d == -ESC_b) d = CHAR_BS;
+            else if (d == -ESC_X) d = CHAR_X;
+            else if (d == -ESC_R) d = CHAR_R; else
               {
               ptr = oldptr;
               goto LONE_SINGLE_CHARACTER;  /* A few lines below */
@@ -3101,7 +3366,7 @@ for (;; ptr++)
 
         /* Remember \r or \n */
 
-        if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF;
+        if (d == CHAR_CR || d == CHAR_NL) cd->external_flags |= PCRE_HASCRORLF;
 
         /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless
         matching, we have to use an XCLASS with extra data items. Caseless
@@ -3248,7 +3513,7 @@ for (;; ptr++)
 
     /* Loop until ']' reached. This "while" is the end of the "do" above. */
 
-    while ((c = *(++ptr)) != 0 && (c != ']' || inescq));
+    while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
 
     if (c == 0)                          /* Missing terminating ']' */
       {
@@ -3393,23 +3658,23 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
     /* Various kinds of repeat; '{' is not necessarily a quantifier, but this
     has been tested above. */
 
-    case '{':
+    case CHAR_LEFT_CURLY_BRACKET:
     if (!is_quantifier) goto NORMAL_CHAR;
     ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr);
     if (*errorcodeptr != 0) goto FAILED;
     goto REPEAT;
 
-    case '*':
+    case CHAR_ASTERISK:
     repeat_min = 0;
     repeat_max = -1;
     goto REPEAT;
 
-    case '+':
+    case CHAR_PLUS:
     repeat_min = 1;
     repeat_max = -1;
     goto REPEAT;
 
-    case '?':
+    case CHAR_QUESTION_MARK:
     repeat_min = 0;
     repeat_max = 1;
 
@@ -3444,13 +3709,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
     but if PCRE_UNGREEDY is set, it works the other way round. We change the
     repeat type to the non-default. */
 
-    if (ptr[1] == '+')
+    if (ptr[1] == CHAR_PLUS)
       {
       repeat_type = 0;                  /* Force greedy */
       possessive_quantifier = TRUE;
       ptr++;
       }
-    else if (ptr[1] == '?')
+    else if (ptr[1] == CHAR_QUESTION_MARK)
       {
       repeat_type = greedy_non_default;
       ptr++;
@@ -3565,10 +3830,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
       if (repeat_max == 0) goto END_REPEAT;
 
+      /*--------------------------------------------------------------------*/
+      /* This code is obsolete from release 8.00; the restriction was finally
+      removed: */
+
       /* All real repeats make it impossible to handle partial matching (maybe
       one day we will be able to remove this restriction). */
 
-      if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
+      /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
+      /*--------------------------------------------------------------------*/
 
       /* Combine the op_type with the repeat_type */
 
@@ -3715,10 +3985,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         goto END_REPEAT;
         }
 
+      /*--------------------------------------------------------------------*/
+      /* This code is obsolete from release 8.00; the restriction was finally
+      removed: */
+
       /* All real repeats make it impossible to handle partial matching (maybe
       one day we will be able to remove this restriction). */
 
-      if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL;
+      /* if (repeat_max != 1) cd->external_flags |= PCRE_NOPARTIAL; */
+      /*--------------------------------------------------------------------*/
 
       if (repeat_min == 0 && repeat_max == -1)
         *code++ = OP_CRSTAR + repeat_type;
@@ -3853,13 +4128,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           {
           /* In the pre-compile phase, we don't actually do the replication. We
           just adjust the length as if we had. Do some paranoid checks for
-          potential integer overflow. */
+          potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
+          integer type when available, otherwise double. */
 
           if (lengthptr != NULL)
             {
             int delta = (repeat_min - 1)*length_prevgroup;
-            if ((double)(repeat_min - 1)*(double)length_prevgroup >
-                                                            (double)INT_MAX ||
+            if ((INT64_OR_DOUBLE)(repeat_min - 1)*
+                  (INT64_OR_DOUBLE)length_prevgroup >
+                    (INT64_OR_DOUBLE)INT_MAX ||
                 OFLOW_MAX - *lengthptr < delta)
               {
               *errorcodeptr = ERR20;
@@ -3905,15 +4182,16 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         just adjust the length as if we had. For each repetition we must add 1
         to the length for BRAZERO and for all but the last repetition we must
         add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
-        paranoid checks to avoid integer overflow. */
+        paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
+        a 64-bit integer type when available, otherwise double. */
 
         if (lengthptr != NULL && repeat_max > 0)
           {
           int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
                       2 - 2*LINK_SIZE;   /* Last one doesn't nest */
-          if ((double)repeat_max *
-                (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
-                  > (double)INT_MAX ||
+          if ((INT64_OR_DOUBLE)repeat_max *
+                (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
+                  > (INT64_OR_DOUBLE)INT_MAX ||
               OFLOW_MAX - *lengthptr < delta)
             {
             *errorcodeptr = ERR20;
@@ -3990,7 +4268,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           uschar *scode = bracode;
           do
             {
-            if (could_be_empty_branch(scode, ketcode, utf8))
+            if (could_be_empty_branch(scode, ketcode, utf8, cd))
               {
               *bracode += OP_SBRA - OP_BRA;
               break;
@@ -4033,11 +4311,20 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
     if (possessive_quantifier)
       {
       int len;
-      if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT ||
-          *tempcode == OP_NOTEXACT)
+
+      if (*tempcode == OP_TYPEEXACT)
         tempcode += _pcre_OP_lengths[*tempcode] +
-          ((*tempcode == OP_TYPEEXACT &&
-             (tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0);
+          ((tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP)? 2 : 0);
+
+      else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
+        {
+        tempcode += _pcre_OP_lengths[*tempcode];
+#ifdef SUPPORT_UTF8
+        if (utf8 && tempcode[-1] >= 0xc0)
+          tempcode += _pcre_utf8_table4[tempcode[-1] & 0x3f];
+#endif
+        }
+
       len = code - tempcode;
       if (len > 0) switch (*tempcode)
         {
@@ -4056,7 +4343,12 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break;
         case OP_NOTUPTO:  *tempcode = OP_NOTPOSUPTO; break;
 
+        /* Because we are moving code along, we must ensure that any
+        pending recursive references are updated. */
+
         default:
+        *code = OP_END;
+        adjust_recurse(tempcode, 1 + LINK_SIZE, utf8, cd, save_hwm);
         memmove(tempcode + 1+LINK_SIZE, tempcode, len);
         code += 1 + LINK_SIZE;
         len += 1 + LINK_SIZE;
@@ -4083,7 +4375,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
     lookbehind or option setting or condition or all the other extended
     parenthesis forms.  */
 
-    case '(':
+    case CHAR_LEFT_PARENTHESIS:
     newoptions = options;
     skipbytes = 0;
     bravalue = OP_CBRA;
@@ -4092,19 +4384,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
     /* First deal with various "verbs" that can be introduced by '*'. */
 
-    if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
+    if (*(++ptr) == CHAR_ASTERISK && (cd->ctypes[ptr[1]] & ctype_letter) != 0)
       {
       int i, namelen;
       const char *vn = verbnames;
       const uschar *name = ++ptr;
       previous = NULL;
       while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
-      if (*ptr == ':')
+      if (*ptr == CHAR_COLON)
         {
         *errorcodeptr = ERR59;   /* Not supported */
         goto FAILED;
         }
-      if (*ptr != ')')
+      if (*ptr != CHAR_RIGHT_PARENTHESIS)
         {
         *errorcodeptr = ERR60;
         goto FAILED;
@@ -4115,8 +4407,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         if (namelen == verbs[i].len &&
             strncmp((char *)name, vn, namelen) == 0)
           {
-          *code = verbs[i].op;
-          if (*code++ == OP_ACCEPT) cd->had_accept = TRUE;
+          /* Check for open captures before ACCEPT */
+
+          if (verbs[i].op == OP_ACCEPT)
+            {
+            open_capitem *oc;
+            cd->had_accept = TRUE;
+            for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+              {
+              *code++ = OP_CLOSE;
+              PUT2INC(code, 0, oc->number);
+              }
+            }
+          *code++ = verbs[i].op;
           break;
           }
         vn += verbs[i].len + 1;
@@ -4129,7 +4432,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
     /* Deal with the extended parentheses; all are introduced by '?', and the
     appearance of any of them means that this is not a capturing group. */
 
-    else if (*ptr == '?')
+    else if (*ptr == CHAR_QUESTION_MARK)
       {
       int i, set, unset, namelen;
       int *optset;
@@ -4138,9 +4441,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
       switch (*(++ptr))
         {
-        case '#':                 /* Comment; skip to ket */
+        case CHAR_NUMBER_SIGN:                 /* Comment; skip to ket */
         ptr++;
-        while (*ptr != 0 && *ptr != ')') ptr++;
+        while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
         if (*ptr == 0)
           {
           *errorcodeptr = ERR18;
@@ -4150,19 +4453,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
 
         /* ------------------------------------------------------------ */
-        case '|':                 /* Reset capture count for each branch */
+        case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
         reset_bracount = TRUE;
         /* Fall through */
 
         /* ------------------------------------------------------------ */
-        case ':':                 /* Non-capturing bracket */
+        case CHAR_COLON:          /* Non-capturing bracket */
         bravalue = OP_BRA;
         ptr++;
         break;
 
 
         /* ------------------------------------------------------------ */
-        case '(':
+        case CHAR_LEFT_PARENTHESIS:
         bravalue = OP_COND;       /* Conditional group */
 
         /* A condition can be an assertion, a number (referring to a numbered
@@ -4182,7 +4485,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         the switch. This will take control down to where bracketed groups,
         including assertions, are processed. */
 
-        if (ptr[1] == '?' && (ptr[2] == '=' || ptr[2] == '!' || ptr[2] == '<'))
+        if (ptr[1] == CHAR_QUESTION_MARK && (ptr[2] == CHAR_EQUALS_SIGN ||
+            ptr[2] == CHAR_EXCLAMATION_MARK || ptr[2] == CHAR_LESS_THAN_SIGN))
           break;
 
         /* Most other conditions use OP_CREF (a couple change to OP_RREF
@@ -4194,7 +4498,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
         /* Check for a test for recursion in a named group. */
 
-        if (ptr[1] == 'R' && ptr[2] == '&')
+        if (ptr[1] == CHAR_R && ptr[2] == CHAR_AMPERSAND)
           {
           terminator = -1;
           ptr += 2;
@@ -4204,20 +4508,20 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         /* Check for a test for a named group's having been set, using the Perl
         syntax (?(<name>) or (?('name') */
 
-        else if (ptr[1] == '<')
+        else if (ptr[1] == CHAR_LESS_THAN_SIGN)
           {
-          terminator = '>';
+          terminator = CHAR_GREATER_THAN_SIGN;
           ptr++;
           }
-        else if (ptr[1] == '\'')
+        else if (ptr[1] == CHAR_APOSTROPHE)
           {
-          terminator = '\'';
+          terminator = CHAR_APOSTROPHE;
           ptr++;
           }
         else
           {
           terminator = 0;
-          if (ptr[1] == '-' || ptr[1] == '+') refsign = *(++ptr);
+          if (ptr[1] == CHAR_MINUS || ptr[1] == CHAR_PLUS) refsign = *(++ptr);
           }
 
         /* We now expect to read a name; any thing else is an error */
@@ -4236,13 +4540,14 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         while ((cd->ctypes[*ptr] & ctype_word) != 0)
           {
           if (recno >= 0)
-            recno = (g_ascii_isdigit (*ptr) != 0)?
-              recno * 10 + *ptr - '0' : -1;
+            recno = (g_ascii_isdigit(*ptr) != 0)?
+              recno * 10 + *ptr - CHAR_0 : -1;
           ptr++;
           }
         namelen = ptr - name;
 
-        if ((terminator > 0 && *ptr++ != terminator) || *ptr++ != ')')
+        if ((terminator > 0 && *ptr++ != terminator) ||
+            *ptr++ != CHAR_RIGHT_PARENTHESIS)
           {
           ptr--;      /* Error offset */
           *errorcodeptr = ERR26;
@@ -4264,7 +4569,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
             *errorcodeptr = ERR58;
             goto FAILED;
             }
-          recno = (refsign == '-')?
+          recno = (refsign == CHAR_MINUS)?
             cd->bracount - recno + 1 : recno +cd->bracount;
           if (recno <= 0 || recno > cd->final_bracount)
             {
@@ -4276,7 +4581,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           }
 
         /* Otherwise (did not start with "+" or "-"), start by looking for the
-        name. */
+        name. If we find a name, add one to the opcode to change OP_CREF or
+        OP_RREF into OP_NCREF or OP_NRREF. These behave exactly the same,
+        except they record that the reference was originally to a name. The
+        information is used to check duplicate names. */
 
         slot = cd->name_table;
         for (i = 0; i < cd->names_found; i++)
@@ -4291,14 +4599,16 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           {
           recno = GET2(slot, 0);
           PUT2(code, 2+LINK_SIZE, recno);
+          code[1+LINK_SIZE]++;
           }
 
         /* Search the pattern for a forward reference */
 
-        else if ((i = find_parens(ptr, cd, name, namelen,
+        else if ((i = find_parens(cd, name, namelen,
                         (options & PCRE_EXTENDED) != 0)) > 0)
           {
           PUT2(code, 2+LINK_SIZE, i);
+          code[1+LINK_SIZE]++;
           }
 
         /* If terminator == 0 it means that the name followed directly after
@@ -4316,17 +4626,17 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         /* Check for (?(R) for recursion. Allow digits after R to specify a
         specific group number. */
 
-        else if (*name == 'R')
+        else if (*name == CHAR_R)
           {
           recno = 0;
           for (i = 1; i < namelen; i++)
             {
-            if (g_ascii_isdigit (name[i]) == 0)
+            if (g_ascii_isdigit(name[i]) == 0)
               {
               *errorcodeptr = ERR15;
               goto FAILED;
               }
-            recno = recno * 10 + name[i] - '0';
+            recno = recno * 10 + name[i] - CHAR_0;
             }
           if (recno == 0) recno = RREF_ANY;
           code[1+LINK_SIZE] = OP_RREF;      /* Change test type */
@@ -4336,7 +4646,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         /* Similarly, check for the (?(DEFINE) "condition", which is always
         false. */
 
-        else if (namelen == 6 && strncmp((char *)name, "DEFINE", 6) == 0)
+        else if (namelen == 6 && strncmp((char *)name, STRING_DEFINE, 6) == 0)
           {
           code[1+LINK_SIZE] = OP_DEF;
           skipbytes = 1;
@@ -4361,16 +4671,16 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
 
         /* ------------------------------------------------------------ */
-        case '=':                 /* Positive lookahead */
+        case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
         bravalue = OP_ASSERT;
         ptr++;
         break;
 
 
         /* ------------------------------------------------------------ */
-        case '!':                 /* Negative lookahead */
+        case CHAR_EXCLAMATION_MARK:            /* Negative lookahead */
         ptr++;
-        if (*ptr == ')')          /* Optimize (?!) */
+        if (*ptr == CHAR_RIGHT_PARENTHESIS)    /* Optimize (?!) */
           {
           *code++ = OP_FAIL;
           previous = NULL;
@@ -4381,15 +4691,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
 
         /* ------------------------------------------------------------ */
-        case '<':                 /* Lookbehind or named define */
+        case CHAR_LESS_THAN_SIGN:              /* Lookbehind or named define */
         switch (ptr[1])
           {
-          case '=':               /* Positive lookbehind */
+          case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
           bravalue = OP_ASSERTBACK;
           ptr += 2;
           break;
 
-          case '!':               /* Negative lookbehind */
+          case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
           bravalue = OP_ASSERTBACK_NOT;
           ptr += 2;
           break;
@@ -4404,22 +4714,22 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
 
         /* ------------------------------------------------------------ */
-        case '>':                 /* One-time brackets */
+        case CHAR_GREATER_THAN_SIGN:           /* One-time brackets */
         bravalue = OP_ONCE;
         ptr++;
         break;
 
 
         /* ------------------------------------------------------------ */
-        case 'C':                 /* Callout - may be followed by digits; */
+        case CHAR_C:                 /* Callout - may be followed by digits; */
         previous_callout = code;  /* Save for later completion */
         after_manual_callout = 1; /* Skip one item before completing */
         *code++ = OP_CALLOUT;
           {
           int n = 0;
-          while (g_ascii_isdigit (*(++ptr)) != 0)
-            n = n * 10 + *ptr - '0';
-          if (*ptr != ')')
+          while (g_ascii_isdigit(*(++ptr)) != 0)
+            n = n * 10 + *ptr - CHAR_0;
+          if (*ptr != CHAR_RIGHT_PARENTHESIS)
             {
             *errorcodeptr = ERR39;
             goto FAILED;
@@ -4439,14 +4749,15 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
 
         /* ------------------------------------------------------------ */
-        case 'P':                 /* Python-style named subpattern handling */
-        if (*(++ptr) == '=' || *ptr == '>')  /* Reference or recursion */
+        case CHAR_P:              /* Python-style named subpattern handling */
+        if (*(++ptr) == CHAR_EQUALS_SIGN ||
+            *ptr == CHAR_GREATER_THAN_SIGN)  /* Reference or recursion */
           {
-          is_recurse = *ptr == '>';
-          terminator = ')';
+          is_recurse = *ptr == CHAR_GREATER_THAN_SIGN;
+          terminator = CHAR_RIGHT_PARENTHESIS;
           goto NAMED_REF_OR_RECURSE;
           }
-        else if (*ptr != '<')    /* Test for Python-style definition */
+        else if (*ptr != CHAR_LESS_THAN_SIGN)  /* Test for Python-style defn */
           {
           *errorcodeptr = ERR41;
           goto FAILED;
@@ -4456,9 +4767,10 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
         /* ------------------------------------------------------------ */
         DEFINE_NAME:    /* Come here from (?< handling */
-        case '\'':
+        case CHAR_APOSTROPHE:
           {
-          terminator = (*ptr == '<')? '>' : '\'';
+          terminator = (*ptr == CHAR_LESS_THAN_SIGN)?
+            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
           name = ++ptr;
 
           while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
@@ -4489,11 +4801,24 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
               }
             }
 
-          /* In the real compile, create the entry in the table */
+          /* In the real compile, create the entry in the table, maintaining
+          alphabetical order. Duplicate names for different numbers are
+          permitted only if PCRE_DUPNAMES is set. Duplicate names for the same
+          number are always OK. (An existing number can be re-used if (?|
+          appears in the pattern.) In either event, a duplicate name results in
+          a duplicate entry in the table, even if the number is the same. This
+          is because the number of names, and hence the table size, is computed
+          in the pre-compile, and it affects various numbers and pointers which
+          would all have to be modified, and the compiled code moved down, if
+          duplicates with the same number were omitted from the table. This
+          doesn't seem worth the hassle. However, *different* names for the
+          same number are not permitted. */
 
           else
             {
+            BOOL dupname = FALSE;
             slot = cd->name_table;
+
             for (i = 0; i < cd->names_found; i++)
               {
               int crc = memcmp(name, slot+2, namelen);
@@ -4501,39 +4826,72 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
                 {
                 if (slot[2+namelen] == 0)
                   {
-                  if ((options & PCRE_DUPNAMES) == 0)
+                  if (GET2(slot, 0) != cd->bracount + 1 &&
+                      (options & PCRE_DUPNAMES) == 0)
                     {
                     *errorcodeptr = ERR43;
                     goto FAILED;
                     }
+                  else dupname = TRUE;
                   }
-                else crc = -1;      /* Current name is substring */
+                else crc = -1;      /* Current name is a substring */
                 }
+
+              /* Make space in the table and break the loop for an earlier
+              name. For a duplicate or later name, carry on. We do this for
+              duplicates so that in the simple case (when ?(| is not used) they
+              are in order of their numbers. */
+
               if (crc < 0)
                 {
                 memmove(slot + cd->name_entry_size, slot,
                   (cd->names_found - i) * cd->name_entry_size);
                 break;
                 }
+
+              /* Continue the loop for a later or duplicate name */
+
               slot += cd->name_entry_size;
               }
 
+            /* For non-duplicate names, check for a duplicate number before
+            adding the new name. */
+
+            if (!dupname)
+              {
+              uschar *cslot = cd->name_table;
+              for (i = 0; i < cd->names_found; i++)
+                {
+                if (cslot != slot)
+                  {
+                  if (GET2(cslot, 0) == cd->bracount + 1)
+                    {
+                    *errorcodeptr = ERR65;
+                    goto FAILED;
+                    }
+                  }
+                else i--;
+                cslot += cd->name_entry_size;
+                }
+              }
+
             PUT2(slot, 0, cd->bracount + 1);
             memcpy(slot + 2, name, namelen);
             slot[2+namelen] = 0;
             }
           }
 
-        /* In both cases, count the number of names we've encountered. */
+        /* In both pre-compile and compile, count the number of names we've
+        encountered. */
 
-        ptr++;                    /* Move past > or ' */
         cd->names_found++;
+        ptr++;                    /* Move past > or ' */
         goto NUMBERED_GROUP;
 
 
         /* ------------------------------------------------------------ */
-        case '&':                 /* Perl recursion/subroutine syntax */
-        terminator = ')';
+        case CHAR_AMPERSAND:            /* Perl recursion/subroutine syntax */
+        terminator = CHAR_RIGHT_PARENTHESIS;
         is_recurse = TRUE;
         /* Fall through */
 
@@ -4592,7 +4950,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
             recno = GET2(slot, 0);
             }
           else if ((recno =                /* Forward back reference */
-                    find_parens(ptr, cd, name, namelen,
+                    find_parens(cd, name, namelen,
                       (options & PCRE_EXTENDED) != 0)) <= 0)
             {
             *errorcodeptr = ERR15;
@@ -4608,18 +4966,18 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
 
         /* ------------------------------------------------------------ */
-        case 'R':                 /* Recursion */
+        case CHAR_R:              /* Recursion */
         ptr++;                    /* Same as (?0)      */
         /* Fall through */
 
 
         /* ------------------------------------------------------------ */
-        case '-': case '+':
-        case '0': case '1': case '2': case '3': case '4':   /* Recursion or */
-        case '5': case '6': case '7': case '8': case '9':   /* subroutine */
+        case CHAR_MINUS: case CHAR_PLUS:  /* Recursion or subroutine */
+        case CHAR_0: case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4:
+        case CHAR_5: case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
           {
           const uschar *called;
-          terminator = ')';
+          terminator = CHAR_RIGHT_PARENTHESIS;
 
           /* Come here from the \g<...> and \g'...' code (Oniguruma
           compatibility). However, the syntax has been checked to ensure that
@@ -4629,25 +4987,25 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
           HANDLE_NUMERICAL_RECURSION:
 
-          if ((refsign = *ptr) == '+')
+          if ((refsign = *ptr) == CHAR_PLUS)
             {
             ptr++;
-            if (g_ascii_isdigit (*ptr) == 0)
+            if (g_ascii_isdigit(*ptr) == 0)
               {
               *errorcodeptr = ERR63;
               goto FAILED;
               }
             }
-          else if (refsign == '-')
+          else if (refsign == CHAR_MINUS)
             {
-            if (g_ascii_isdigit (ptr[1]) == 0)
+            if (g_ascii_isdigit(ptr[1]) == 0)
               goto OTHER_CHAR_AFTER_QUERY;
             ptr++;
             }
 
           recno = 0;
-          while(g_ascii_isdigit (*ptr) != 0)
-            recno = recno * 10 + *ptr++ - '0';
+          while(g_ascii_isdigit(*ptr) != 0)
+            recno = recno * 10 + *ptr++ - CHAR_0;
 
           if (*ptr != terminator)
             {
@@ -4655,7 +5013,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
             goto FAILED;
             }
 
-          if (refsign == '-')
+          if (refsign == CHAR_MINUS)
             {
             if (recno == 0)
               {
@@ -4669,7 +5027,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
               goto FAILED;
               }
             }
-          else if (refsign == '+')
+          else if (refsign == CHAR_PLUS)
             {
             if (recno == 0)
               {
@@ -4696,18 +5054,24 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
           if (lengthptr == NULL)
             {
             *code = OP_END;
-            if (recno != 0) called = find_bracket(cd->start_code, utf8, recno);
+            if (recno != 0)
+              called = _pcre_find_bracket(cd->start_code, utf8, recno);
 
             /* Forward reference */
 
             if (called == NULL)
               {
-              if (find_parens(ptr, cd, NULL, recno,
+              if (find_parens(cd, NULL, recno,
                     (options & PCRE_EXTENDED) != 0) < 0)
                 {
                 *errorcodeptr = ERR15;
                 goto FAILED;
                 }
+
+              /* Fudge the value of "called" so that when it is inserted as an
+              offset below, what it actually inserted is the reference number
+              of the group. */
+
               called = cd->start_code + recno;
               PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);
               }
@@ -4717,7 +5081,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
             recursion that could loop for ever, and diagnose that case. */
 
             else if (GET(called, 1) == 0 &&
-                     could_be_empty(called, code, bcptr, utf8))
+                     could_be_empty(called, code, bcptr, utf8, cd))
               {
               *errorcodeptr = ERR40;
               goto FAILED;
@@ -4755,23 +5119,23 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         set = unset = 0;
         optset = &set;
 
-        while (*ptr != ')' && *ptr != ':')
+        while (*ptr != CHAR_RIGHT_PARENTHESIS && *ptr != CHAR_COLON)
           {
           switch (*ptr++)
             {
-            case '-': optset = &unset; break;
+            case CHAR_MINUS: optset = &unset; break;
 
-            case 'J':    /* Record that it changed in the external options */
+            case CHAR_J:    /* Record that it changed in the external options */
             *optset |= PCRE_DUPNAMES;
             cd->external_flags |= PCRE_JCHANGED;
             break;
 
-            case 'i': *optset |= PCRE_CASELESS; break;
-            case 'm': *optset |= PCRE_MULTILINE; break;
-            case 's': *optset |= PCRE_DOTALL; break;
-            case 'x': *optset |= PCRE_EXTENDED; break;
-            case 'U': *optset |= PCRE_UNGREEDY; break;
-            case 'X': *optset |= PCRE_EXTRA; break;
+            case CHAR_i: *optset |= PCRE_CASELESS; break;
+            case CHAR_m: *optset |= PCRE_MULTILINE; break;
+            case CHAR_s: *optset |= PCRE_DOTALL; break;
+            case CHAR_x: *optset |= PCRE_EXTENDED; break;
+            case CHAR_U: *optset |= PCRE_UNGREEDY; break;
+            case CHAR_X: *optset |= PCRE_EXTRA; break;
 
             default:  *errorcodeptr = ERR12;
                       ptr--;    /* Correct the offset */
@@ -4805,14 +5169,14 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         options if this setting actually changes any of them, and reset the
         greedy defaults and the case value for firstbyte and reqbyte. */
 
-        if (*ptr == ')')
+        if (*ptr == CHAR_RIGHT_PARENTHESIS)
           {
           if (code == cd->start_code + 1 + LINK_SIZE &&
                (lengthptr == NULL || *lengthptr == 2 + 2*LINK_SIZE))
             {
             cd->external_options = newoptions;
             }
-         else
+          else
             {
             if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
               {
@@ -4945,7 +5309,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
     /* Error if hit end of pattern */
 
-    if (*ptr != ')')
+    if (*ptr != CHAR_RIGHT_PARENTHESIS)
       {
       *errorcodeptr = ERR14;
       goto FAILED;
@@ -5043,7 +5407,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
     We can test for values between ESC_b and ESC_Z for the latter; this may
     have to change if any new ones are ever created. */
 
-    case '\\':
+    case CHAR_BACKSLASH:
     tempptr = ptr;
     c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE);
     if (*errorcodeptr != 0) goto FAILED;
@@ -5052,8 +5416,9 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
       {
       if (-c == ESC_Q)            /* Handle start of quoted string */
         {
-        if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */
-          else inescq = TRUE;
+        if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
+          ptr += 2;               /* avoid empty string */
+            else inescq = TRUE;
         continue;
         }
 
@@ -5081,7 +5446,8 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         {
         const uschar *p;
         save_hwm = cd->hwm;   /* Normally this is set when '(' is read */
-        terminator = (*(++ptr) == '<')? '>' : '\'';
+        terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
+          CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
 
         /* These two statements stop the compiler for warning about possibly
         unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In
@@ -5093,7 +5459,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
         /* Test for a name */
 
-        if (ptr[1] != '+' && ptr[1] != '-')
+        if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
           {
           BOOL isnumber = TRUE;
           for (p = ptr + 1; *p != 0 && *p != terminator; p++)
@@ -5118,7 +5484,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         /* Test a signed number in angle brackets or quotes. */
 
         p = ptr + 2;
-        while (g_ascii_isdigit (*p) != 0) p++;
+        while (g_ascii_isdigit(*p) != 0) p++;
         if (*p != terminator)
           {
           *errorcodeptr = ERR57;
@@ -5131,10 +5497,13 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
       /* \k<name> or \k'name' is a back reference by name (Perl syntax).
       We also support \k{name} (.NET syntax) */
 
-      if (-c == ESC_k && (ptr[1] == '<' || ptr[1] == '\'' || ptr[1] == '{'))
+      if (-c == ESC_k && (ptr[1] == CHAR_LESS_THAN_SIGN ||
+          ptr[1] == CHAR_APOSTROPHE || ptr[1] == CHAR_LEFT_CURLY_BRACKET))
         {
         is_recurse = FALSE;
-        terminator = (*(++ptr) == '<')? '>' : (*ptr == '\'')? '\'' : '}';
+        terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
+          CHAR_GREATER_THAN_SIGN : (*ptr == CHAR_APOSTROPHE)?
+          CHAR_APOSTROPHE : CHAR_RIGHT_CURLY_BRACKET;
         goto NAMED_REF_OR_RECURSE;
         }
 
@@ -5144,6 +5513,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
       if (-c >= ESC_REF)
         {
+        open_capitem *oc;
         recno = -c - ESC_REF;
 
         HANDLE_REFERENCE:    /* Come here from named backref handling */
@@ -5153,6 +5523,19 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
         PUT2INC(code, 0, recno);
         cd->backref_map |= (recno < 32)? (1 << recno) : 1;
         if (recno > cd->top_backref) cd->top_backref = recno;
+
+        /* Check to see if this back reference is recursive, that it, it
+        is inside the group that it references. A flag is set so that the
+        group can be made atomic. */
+
+        for (oc = cd->open_caps; oc != NULL; oc = oc->next)
+          {
+          if (oc->number == recno)
+            {
+            oc->flag = TRUE;
+            break;
+            }
+          }
         }
 
       /* So are Unicode property matches, if supported. */
@@ -5237,7 +5620,7 @@ we set the flag only if there is a literal "\r" or "\n" in the class. */
 
     /* Remember if \r or \n were seen */
 
-    if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n')
+    if (mcbuffer[0] == CHAR_CR || mcbuffer[0] == CHAR_NL)
       cd->external_flags |= PCRE_HASCRORLF;
 
     /* Set the first and required bytes appropriately. If no previous first
@@ -5335,15 +5718,18 @@ uschar *code = *codeptr;
 uschar *last_branch = code;
 uschar *start_bracket = code;
 uschar *reverse_count = NULL;
+open_capitem capitem;
+int capnumber = 0;
 int firstbyte, reqbyte;
 int branchfirstbyte, branchreqbyte;
 int length;
 int orig_bracount;
 int max_bracount;
+int old_external_options = cd->external_options;
 branch_chain bc;
 
 bc.outer = bcptr;
-bc.current = code;
+bc.current_branch = code;
 
 firstbyte = reqbyte = REQ_UNSET;
 
@@ -5361,6 +5747,19 @@ the code that abstracts option settings at the start of the pattern and makes
 them global. It tests the value of length for (2 + 2*LINK_SIZE) in the
 pre-compile phase to find out whether anything has yet been compiled or not. */
 
+/* If this is a capturing subpattern, add to the chain of open capturing items
+so that we can detect them if (*ACCEPT) is encountered. This is also used to
+detect groups that contain recursive back references to themselves. */
+
+if (*code == OP_CBRA)
+  {
+  capnumber = GET2(code, 1 + LINK_SIZE);
+  capitem.number = capnumber;
+  capitem.next = cd->open_caps;
+  capitem.flag = FALSE;
+  cd->open_caps = &capitem;
+  }
+
 /* Offset is set zero to mark that this bracket is still open */
 
 PUT(code, 1, 0);
@@ -5405,6 +5804,15 @@ for (;;)
     return FALSE;
     }
 
+  /* If the external options have changed during this branch, it means that we
+  are at the top level, and a leading option setting has been encountered. We
+  need to re-set the original option values to take account of this so that,
+  during the pre-compile phase, we know to allow for a re-set at the start of
+  subsequent branches. */
+
+  if (old_external_options != cd->external_options)
+    oldims = cd->external_options & PCRE_IMS;
+
   /* Keep the highest bracket count in case (?| was used and some branch
   has fewer than the rest. */
 
@@ -5455,21 +5863,29 @@ for (;;)
 
     /* If lookbehind, check that this branch matches a fixed-length string, and
     put the length into the OP_REVERSE item. Temporarily mark the end of the
-    branch with OP_END. */
+    branch with OP_END. If the branch contains OP_RECURSE, the result is -3
+    because there may be forward references that we can't check here. Set a
+    flag to cause another lookbehind check at the end. Why not do it all at the
+    end? Because common, erroneous checks are picked up here and the offset of
+    the problem can be shown. */
 
     if (lookbehind)
       {
       int fixed_length;
       *code = OP_END;
-      fixed_length = find_fixedlength(last_branch, options);
+      fixed_length = find_fixedlength(last_branch, options, FALSE, cd);
       DPRINTF(("fixed length = %d\n", fixed_length));
-      if (fixed_length < 0)
+      if (fixed_length == -3)
+        {
+        cd->check_lookbehind = TRUE;
+        }
+      else if (fixed_length < 0)
         {
         *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25;
         *ptrptr = ptr;
         return FALSE;
         }
-      PUT(reverse_count, 0, fixed_length);
+      else { PUT(reverse_count, 0, fixed_length); }
       }
     }
 
@@ -5482,7 +5898,7 @@ for (;;)
   compile a resetting op-code following, except at the very end of the pattern.
   Return leaving the pointer at the terminating char. */
 
-  if (*ptr != '|')
+  if (*ptr != CHAR_VERTICAL_LINE)
     {
     if (lengthptr == NULL)
       {
@@ -5503,9 +5919,30 @@ for (;;)
     PUT(code, 1, code - start_bracket);
     code += 1 + LINK_SIZE;
 
-    /* Resetting option if needed */
+    /* If it was a capturing subpattern, check to see if it contained any
+    recursive back references. If so, we must wrap it in atomic brackets.
+    In any event, remove the block from the chain. */
 
-    if ((options & PCRE_IMS) != oldims && *ptr == ')')
+    if (capnumber > 0)
+      {
+      if (cd->open_caps->flag)
+        {
+        memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
+          code - start_bracket);
+        *start_bracket = OP_ONCE;
+        code += 1 + LINK_SIZE;
+        PUT(start_bracket, 1, code - start_bracket);
+        *code = OP_KET;
+        PUT(code, 1, code - start_bracket);
+        code += 1 + LINK_SIZE;
+        length += 2 + 2*LINK_SIZE;
+        }
+      cd->open_caps = cd->open_caps->next;
+      }
+
+    /* Reset options if needed. */
+
+    if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
       {
       *code++ = OP_OPT;
       *code++ = oldims;
@@ -5552,7 +5989,7 @@ for (;;)
     {
     *code = OP_ALT;
     PUT(code, 1, code - last_branch);
-    bc.current = last_branch = code;
+    bc.current_branch = last_branch = code;
     code += 1 + LINK_SIZE;
     }
 
@@ -5687,6 +6124,34 @@ do {
      NULL, 0, FALSE);
    register int op = *scode;
 
+   /* If we are at the start of a conditional assertion group, *both* the
+   conditional assertion *and* what follows the condition must satisfy the test
+   for start of line. Other kinds of condition fail. Note that there may be an
+   auto-callout at the start of a condition. */
+
+   if (op == OP_COND)
+     {
+     scode += 1 + LINK_SIZE;
+     if (*scode == OP_CALLOUT) scode += _pcre_OP_lengths[OP_CALLOUT];
+     switch (*scode)
+       {
+       case OP_CREF:
+       case OP_NCREF:
+       case OP_RREF:
+       case OP_NRREF:
+       case OP_DEF:
+       return FALSE;
+
+       default:     /* Assertion */
+       if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
+       do scode += GET(scode, 1); while (*scode == OP_ALT);
+       scode += 1 + LINK_SIZE;
+       break;
+       }
+     scode = first_significant_code(scode, NULL, 0, FALSE);
+     op = *scode;
+     }
+
    /* Non-capturing brackets */
 
    if (op == OP_BRA)
@@ -5705,8 +6170,10 @@ do {
 
    /* Other brackets */
 
-   else if (op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
-     { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; }
+   else if (op == OP_ASSERT || op == OP_ONCE)
+     {
+     if (!is_startline(scode, bracket_map, backref_map)) return FALSE;
+     }
 
    /* .* means "start at start or after \n" if it isn't in brackets that
    may be referenced. */
@@ -5840,9 +6307,7 @@ int length = 1;  /* For final END opcode */
 int firstbyte, reqbyte, newline;
 int errorcode = 0;
 int skipatstart = 0;
-#ifdef SUPPORT_UTF8
-BOOL utf8;
-#endif
+BOOL utf8 = (options & PCRE_UTF8) != 0;
 size_t size;
 uschar *code;
 const uschar *codestart;
@@ -5885,30 +6350,6 @@ if (erroroffset == NULL)
 
 *erroroffset = 0;
 
-/* Can't support UTF8 unless PCRE has been compiled to include the code. */
-
-#ifdef SUPPORT_UTF8
-utf8 = (options & PCRE_UTF8) != 0;
-if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
-     (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
-  {
-  errorcode = ERR44;
-  goto PCRE_EARLY_ERROR_RETURN2;
-  }
-#else
-if ((options & PCRE_UTF8) != 0)
-  {
-  errorcode = ERR32;
-  goto PCRE_EARLY_ERROR_RETURN;
-  }
-#endif
-
-if ((options & ~PUBLIC_OPTIONS) != 0)
-  {
-  errorcode = ERR17;
-  goto PCRE_EARLY_ERROR_RETURN;
-  }
-
 /* Set up pointers to the individual character tables */
 
 if (tables == NULL) tables = _pcre_default_tables;
@@ -5917,28 +6358,40 @@ cd->fcc = tables + fcc_offset;
 cd->cbits = tables + cbits_offset;
 cd->ctypes = tables + ctypes_offset;
 
+/* Check that all undefined public option bits are zero */
+
+if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0)
+  {
+  errorcode = ERR17;
+  goto PCRE_EARLY_ERROR_RETURN;
+  }
+
 /* Check for global one-time settings at the start of the pattern, and remember
 the offset for later. */
 
-while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')
+while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
+       ptr[skipatstart+1] == CHAR_ASTERISK)
   {
   int newnl = 0;
   int newbsr = 0;
 
-  if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0)
+  if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
+    { skipatstart += 7; options |= PCRE_UTF8; continue; }
+
+  if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
     { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
-  else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3)  == 0)
+  else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3)  == 0)
     { skipatstart += 5; newnl = PCRE_NEWLINE_LF; }
-  else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5)  == 0)
+  else if (strncmp((char *)(ptr+skipatstart+2), STRING_CRLF_RIGHTPAR, 5)  == 0)
     { skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; }
-  else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0)
+  else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANY_RIGHTPAR, 4) == 0)
     { skipatstart += 6; newnl = PCRE_NEWLINE_ANY; }
-  else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8)  == 0)
+  else if (strncmp((char *)(ptr+skipatstart+2), STRING_ANYCRLF_RIGHTPAR, 8) == 0)
     { skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; }
 
-  else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0)
+  else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_ANYCRLF_RIGHTPAR, 12) == 0)
     { skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; }
-  else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0)
+  else if (strncmp((char *)(ptr+skipatstart+2), STRING_BSR_UNICODE_RIGHTPAR, 12) == 0)
     { skipatstart += 14; newbsr = PCRE_BSR_UNICODE; }
 
   if (newnl != 0)
@@ -5948,6 +6401,23 @@ while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*')
   else break;
   }
 
+/* Can't support UTF8 unless PCRE has been compiled to include the code. */
+
+#ifdef SUPPORT_UTF8
+if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
+     (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0)
+  {
+  errorcode = ERR44;
+  goto PCRE_EARLY_ERROR_RETURN2;
+  }
+#else
+if (utf8)
+  {
+  errorcode = ERR32;
+  goto PCRE_EARLY_ERROR_RETURN;
+  }
+#endif
+
 /* Check validity of \R options. */
 
 switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
@@ -5966,10 +6436,10 @@ current code allows for fixed one- or two-byte sequences, plus "any" and
 switch (options & PCRE_NEWLINE_BITS)
   {
   case 0: newline = NEWLINE; break;   /* Build-time default */
-  case PCRE_NEWLINE_CR: newline = '\r'; break;
-  case PCRE_NEWLINE_LF: newline = '\n'; break;
+  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
+  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
   case PCRE_NEWLINE_CR+
-       PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
+       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
   case PCRE_NEWLINE_ANY: newline = -1; break;
   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
   default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
@@ -6030,6 +6500,7 @@ cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
 cd->req_varyopt = 0;
 cd->external_options = options;
 cd->external_flags = 0;
+cd->open_caps = NULL;
 
 /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
 don't need to look at the result of the function here. The initial options have
@@ -6104,6 +6575,8 @@ cd->start_code = codestart;
 cd->hwm = cworkspace;
 cd->req_varyopt = 0;
 cd->had_accept = FALSE;
+cd->check_lookbehind = FALSE;
+cd->open_caps = NULL;
 
 /* Set up a starting, non-extracting bracket, then compile the expression. On
 error, errorcode will be set non-zero, so we don't need to look at the result
@@ -6129,7 +6602,7 @@ if debugging, leave the test till after things are printed out. */
 
 *code++ = OP_END;
 
-#ifndef DEBUG
+#ifndef PCRE_DEBUG
 if (code - codestart > length) errorcode = ERR23;
 #endif
 
@@ -6142,7 +6615,7 @@ while (errorcode == 0 && cd->hwm > cworkspace)
   cd->hwm -= LINK_SIZE;
   offset = GET(cd->hwm, 0);
   recno = GET(codestart, offset);
-  groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno);
+  groupptr = _pcre_find_bracket(codestart, utf8, recno);
   if (groupptr == NULL) errorcode = ERR53;
     else PUT(((uschar *)codestart), offset, groupptr - codestart);
   }
@@ -6152,6 +6625,47 @@ subpattern. */
 
 if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15;
 
+/* If there were any lookbehind assertions that contained OP_RECURSE
+(recursions or subroutine calls), a flag is set for them to be checked here,
+because they may contain forward references. Actual recursions can't be fixed
+length, but subroutine calls can. It is done like this so that those without
+OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The
+exceptional ones forgo this. We scan the pattern to check that they are fixed
+length, and set their lengths. */
+
+if (cd->check_lookbehind)
+  {
+  uschar *cc = (uschar *)codestart;
+
+  /* Loop, searching for OP_REVERSE items, and process those that do not have
+  their length set. (Actually, it will also re-process any that have a length
+  of zero, but that is a pathological case, and it does no harm.) When we find
+  one, we temporarily terminate the branch it is in while we scan it. */
+
+  for (cc = (uschar *)_pcre_find_bracket(codestart, utf8, -1);
+       cc != NULL;
+       cc = (uschar *)_pcre_find_bracket(cc, utf8, -1))
+    {
+    if (GET(cc, 1) == 0)
+      {
+      int fixed_length;
+      uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE);
+      int end_op = *be;
+      *be = OP_END;
+      fixed_length = find_fixedlength(cc, re->options, TRUE, cd);
+      *be = end_op;
+      DPRINTF(("fixed length = %d\n", fixed_length));
+      if (fixed_length < 0)
+        {
+        errorcode = (fixed_length == -2)? ERR36 : ERR25;
+        break;
+        }
+      PUT(cc, 1, fixed_length);
+      }
+    cc += 1 + LINK_SIZE;
+    }
+  }
+
 /* Failed to compile, or error while post-processing */
 
 if (errorcode != 0)
@@ -6212,8 +6726,7 @@ if (reqbyte >= 0 &&
 /* Print out the compiled data if debugging is enabled. This is never the
 case when building a production library. */
 
-#ifdef DEBUG
-
+#ifdef PCRE_DEBUG
 printf("Length = %d top_bracket = %d top_backref = %d\n",
   length, re->top_bracket, re->top_backref);
 
@@ -6250,7 +6763,7 @@ if (code - codestart > length)
   if (errorcodeptr != NULL) *errorcodeptr = ERR23;
   return NULL;
   }
-#endif   /* DEBUG */
+#endif   /* PCRE_DEBUG */
 
 return (pcre *)re;
 }
diff --git a/glib/pcre/pcre_config.c b/glib/pcre/pcre_config.c
index 114f0fb..78e8560 100644
--- a/glib/pcre/pcre_config.c
+++ b/glib/pcre/pcre_config.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -104,11 +104,11 @@ switch (what)
   break;
 
   case PCRE_CONFIG_MATCH_LIMIT:
-  *((unsigned int *)where) = MATCH_LIMIT;
+  *((unsigned long int *)where) = MATCH_LIMIT;
   break;
 
   case PCRE_CONFIG_MATCH_LIMIT_RECURSION:
-  *((unsigned int *)where) = MATCH_LIMIT_RECURSION;
+  *((unsigned long int *)where) = MATCH_LIMIT_RECURSION;
   break;
 
   case PCRE_CONFIG_STACKRECURSE:
diff --git a/glib/pcre/pcre_dfa_exec.c b/glib/pcre/pcre_dfa_exec.c
index 9a73a52..c241f5b 100644
--- a/glib/pcre/pcre_dfa_exec.c
+++ b/glib/pcre/pcre_dfa_exec.c
@@ -3,10 +3,11 @@
 *************************************************/
 
 /* PCRE is a library of functions to support regular expressions whose syntax
-and semantics are as close as possible to those of the Perl 5 language.
+and semantics are as close as possible to those of the Perl 5 language (but see
+below for why this module is different).
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -44,6 +45,34 @@ FSM). This is NOT Perl- compatible, but it has advantages in certain
 applications. */
 
 
+/* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
+the performance of his patterns greatly. I could not use it as it stood, as it
+was not thread safe, and made assumptions about pattern sizes. Also, it caused
+test 7 to loop, and test 9 to crash with a segfault.
+
+The issue is the check for duplicate states, which is done by a simple linear
+search up the state list. (Grep for "duplicate" below to find the code.) For
+many patterns, there will never be many states active at one time, so a simple
+linear search is fine. In patterns that have many active states, it might be a
+bottleneck. The suggested code used an indexing scheme to remember which states
+had previously been used for each character, and avoided the linear search when
+it knew there was no chance of a duplicate. This was implemented when adding
+states to the state lists.
+
+I wrote some thread-safe, not-limited code to try something similar at the time
+of checking for duplicates (instead of when adding states), using index vectors
+on the stack. It did give a 13% improvement with one specially constructed
+pattern for certain subject strings, but on other strings and on many of the
+simpler patterns in the test suite it did worse. The major problem, I think,
+was the extra time to initialize the index. This had to be done for each call
+of internal_dfa_exec(). (The supplied patch used a static vector, initialized
+only once - I suspect this was the cause of the problems with the tests.)
+
+Overall, I concluded that the gains in some cases did not outweigh the losses
+in others, so I abandoned this code. */
+
+
+
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
@@ -60,7 +89,6 @@ applications. */
 #define SP "                   "
 
 
-
 /*************************************************
 *      Code parameters and static tables         *
 *************************************************/
@@ -81,16 +109,18 @@ never stored, so we push them well clear of the normal opcodes. */
 character that is to be tested in some way. This makes is possible to
 centralize the loading of these characters. In the case of Type * etc, the
 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
-small value. ***NOTE*** If the start of this table is modified, the two tables
-that follow must also be modified. */
+small value. Non-zero values in the table are the offsets from the opcode where
+the character is to be found. ***NOTE*** If the start of this table is
+modified, the three tables that follow must also be modified. */
 
 static const uschar coptable[] = {
   0,                             /* End                                    */
   0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
   0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
   0, 0, 0,                       /* Any, AllAny, Anybyte                   */
-  0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
+  0, 0,                          /* \P, \p                                 */
   0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
+  0,                             /* \X                                     */
   0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
   1,                             /* Char                                   */
   1,                             /* Charnc                                 */
@@ -127,12 +157,69 @@ static const uschar coptable[] = {
   0,                             /* Reverse                                */
   0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
   0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
-  0,                             /* CREF                                   */
-  0,                             /* RREF                                   */
+  0, 0,                          /* CREF, NCREF                            */
+  0, 0,                          /* RREF, NRREF                            */
   0,                             /* DEF                                    */
   0, 0,                          /* BRAZERO, BRAMINZERO                    */
   0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
-  0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
+  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
+};
+
+/* This table identifies those opcodes that inspect a character. It is used to
+remember the fact that a character could have been inspected when the end of
+the subject is reached. ***NOTE*** If the start of this table is modified, the
+two tables that follow must also be modified. */
+
+static const uschar poptable[] = {
+  0,                             /* End                                    */
+  0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
+  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
+  1, 1, 1,                       /* Any, AllAny, Anybyte                   */
+  1, 1,                          /* \P, \p                                 */
+  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
+  1,                             /* \X                                     */
+  0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
+  1,                             /* Char                                   */
+  1,                             /* Charnc                                 */
+  1,                             /* not                                    */
+  /* Positive single-char repeats                                          */
+  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
+  1, 1, 1,                       /* upto, minupto, exact                   */
+  1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
+  /* Negative single-char repeats - only for chars < 256                   */
+  1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
+  1, 1, 1,                       /* NOT upto, minupto, exact               */
+  1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
+  /* Positive type repeats                                                 */
+  1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
+  1, 1, 1,                       /* Type upto, minupto, exact              */
+  1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
+  /* Character class & ref repeats                                         */
+  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
+  1, 1,                          /* CRRANGE, CRMINRANGE                    */
+  1,                             /* CLASS                                  */
+  1,                             /* NCLASS                                 */
+  1,                             /* XCLASS - variable length               */
+  0,                             /* REF                                    */
+  0,                             /* RECURSE                                */
+  0,                             /* CALLOUT                                */
+  0,                             /* Alt                                    */
+  0,                             /* Ket                                    */
+  0,                             /* KetRmax                                */
+  0,                             /* KetRmin                                */
+  0,                             /* Assert                                 */
+  0,                             /* Assert not                             */
+  0,                             /* Assert behind                          */
+  0,                             /* Assert behind not                      */
+  0,                             /* Reverse                                */
+  0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
+  0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
+  0, 0,                          /* CREF, NCREF                            */
+  0, 0,                          /* RREF, NRREF                            */
+  0,                             /* DEF                                    */
+  0, 0,                          /* BRAZERO, BRAMINZERO                    */
+  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
+  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
 };
 
 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
@@ -170,7 +257,7 @@ typedef struct stateblock {
 #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))
 
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 /*************************************************
 *             Print character string             *
 *************************************************/
@@ -390,6 +477,11 @@ if (*first_op == OP_REVERSE)
     current_subject -= gone_back;
     }
 
+  /* Save the earliest consulted character */
+
+  if (current_subject < md->start_used_ptr)
+    md->start_used_ptr = current_subject;
+
   /* Now we can process the individual branches. */
 
   end_code = this_start_code;
@@ -454,6 +546,8 @@ for (;;)
   int i, j;
   int clen, dlen;
   unsigned int c, d;
+  int forced_fail = 0;
+  BOOL could_continue = FALSE;
 
   /* Make the new state list into the active state list and empty the
   new state list. */
@@ -467,7 +561,7 @@ for (;;)
   workspace[0] ^= 1;              /* Remember for the restarting feature */
   workspace[1] = active_count;
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
   printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
   pchars((uschar *)ptr, strlen((char *)ptr), stdout);
   printf("\"\n");
@@ -511,9 +605,9 @@ for (;;)
     stateblock *current_state = active_states + i;
     const uschar *code;
     int state_offset = current_state->offset;
-    int count, codevalue;
+    int count, codevalue, rrc;
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
     printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
     if (clen == 0) printf("EOL\n");
       else if (c > 32 && c < 127) printf("'%c'\n", c);
@@ -543,7 +637,9 @@ for (;;)
         }
       }
 
-    /* Check for a duplicate state with the same count, and skip if found. */
+    /* Check for a duplicate state with the same count, and skip if found.
+    See the note at the head of this module about the possibility of improving
+    performance here. */
 
     for (j = 0; j < i; j++)
       {
@@ -560,6 +656,12 @@ for (;;)
     code = start_code + state_offset;
     codevalue = *code;
 
+    /* If this opcode inspects a character, but we are at the end of the
+    subject, remember the fact for use when testing for a partial match. */
+
+    if (clen == 0 && poptable[codevalue] != 0)
+      could_continue = TRUE;
+
     /* If this opcode is followed by an inline character, load it. It is
     tempting to test for the presence of a subject character here, but that
     is wrong, because sometimes zero repetitions of the subject are
@@ -606,11 +708,24 @@ for (;;)
 
     switch (codevalue)
       {
+/* ========================================================================== */
+      /* These cases are never obeyed. This is a fudge that causes a compile-
+      time error if the vectors coptable or poptable, which are indexed by
+      opcode, are not the correct length. It seems to be the only way to do
+      such a check at compile time, as the sizeof() operator does not work
+      in the C preprocessor. */
+
+      case OP_TABLE_LENGTH:
+      case OP_TABLE_LENGTH +
+        ((sizeof(coptable) == OP_TABLE_LENGTH) &&
+         (sizeof(poptable) == OP_TABLE_LENGTH)):
+      break;
 
 /* ========================================================================== */
       /* Reached a closing bracket. If not at the end of the pattern, carry
       on with the next opcode. Otherwise, unless we have an empty string and
-      PCRE_NOTEMPTY is set, save the match data, shifting up all previous
+      PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
+      start of the subject, save the match data, shifting up all previous
       matches so we always have the longest first. */
 
       case OP_KET:
@@ -624,26 +739,32 @@ for (;;)
           ADD_ACTIVE(state_offset - GET(code, 1), 0);
           }
         }
-      else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)
+      else
         {
-        if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
-          else if (match_count > 0 && ++match_count * 2 >= offsetcount)
-            match_count = 0;
-        count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
-        if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
-        if (offsetcount >= 2)
-          {
-          offsets[0] = current_subject - start_subject;
-          offsets[1] = ptr - start_subject;
-          DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
-            offsets[1] - offsets[0], current_subject));
-          }
-        if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
+        if (ptr > current_subject ||
+            ((md->moptions & PCRE_NOTEMPTY) == 0 &&
+              ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
+                current_subject > start_subject + md->start_offset)))
           {
-          DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
-            "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
-            match_count, rlevel*2-2, SP));
-          return match_count;
+          if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
+            else if (match_count > 0 && ++match_count * 2 >= offsetcount)
+              match_count = 0;
+          count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
+          if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
+          if (offsetcount >= 2)
+            {
+            offsets[0] = current_subject - start_subject;
+            offsets[1] = ptr - start_subject;
+            DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
+              offsets[1] - offsets[0], current_subject));
+            }
+          if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
+            {
+            DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
+              "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel,
+              match_count, rlevel*2-2, SP));
+            return match_count;
+            }
           }
         }
       break;
@@ -757,7 +878,7 @@ for (;;)
       if ((md->moptions & PCRE_NOTEOL) == 0)
         {
         if (clen == 0 ||
-            (IS_NEWLINE(ptr) &&
+            ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
                ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
             ))
           { ADD_ACTIVE(state_offset + 1, 0); }
@@ -794,6 +915,7 @@ for (;;)
         if (ptr > start_subject)
           {
           const uschar *temp = ptr - 1;
+          if (temp < md->start_used_ptr) md->start_used_ptr = temp;
 #ifdef SUPPORT_UTF8
           if (utf8) BACKCHAR(temp);
 #endif
@@ -802,8 +924,9 @@ for (;;)
           }
         else left_word = 0;
 
-        if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
-          else right_word = 0;
+        if (clen > 0)
+          right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
+        else right_word = 0;
 
         if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
           { ADD_ACTIVE(state_offset + 1, 0); }
@@ -2157,11 +2280,12 @@ for (;;)
 
 /* ========================================================================== */
       /* These are the opcodes for fancy brackets of various kinds. We have
-      to use recursion in order to handle them. The "always failing" assersion
-      (?!) is optimised when compiling to OP_FAIL, so we have to support that,
+      to use recursion in order to handle them. The "always failing" assertion
+      (?!) is optimised to OP_FAIL when compiling, so we have to support that,
       though the other "backtracking verbs" are not supported. */
 
       case OP_FAIL:
+      forced_fail++;    /* Count FAILs for multiple states */
       break;
 
       case OP_ASSERT:
@@ -2189,6 +2313,7 @@ for (;;)
           rlevel,                               /* function recursion level */
           recursing);                           /* pass on regex recursion */
 
+        if (rc == PCRE_ERROR_DFA_UITEM) return rc;
         if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
             { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
         }
@@ -2200,29 +2325,60 @@ for (;;)
         {
         int local_offsets[1000];
         int local_workspace[1000];
-        int condcode = code[LINK_SIZE+1];
+        int codelink = GET(code, 1);
+        int condcode;
+
+        /* Because of the way auto-callout works during compile, a callout item
+        is inserted between OP_COND and an assertion condition. This does not
+        happen for the other conditions. */
+
+        if (code[LINK_SIZE+1] == OP_CALLOUT)
+          {
+          rrc = 0;
+          if (pcre_callout != NULL)
+            {
+            pcre_callout_block cb;
+            cb.version          = 1;   /* Version 1 of the callout block */
+            cb.callout_number   = code[LINK_SIZE+2];
+            cb.offset_vector    = offsets;
+            cb.subject          = (PCRE_SPTR)start_subject;
+            cb.subject_length   = end_subject - start_subject;
+            cb.start_match      = current_subject - start_subject;
+            cb.current_position = ptr - start_subject;
+            cb.pattern_position = GET(code, LINK_SIZE + 3);
+            cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
+            cb.capture_top      = 1;
+            cb.capture_last     = -1;
+            cb.callout_data     = md->callout_data;
+            if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
+            }
+          if (rrc > 0) break;                      /* Fail this thread */
+          code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */
+          }
+
+        condcode = code[LINK_SIZE+1];
 
         /* Back reference conditions are not supported */
 
-        if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
+        if (condcode == OP_CREF || condcode == OP_NCREF)
+          return PCRE_ERROR_DFA_UCOND;
 
         /* The DEFINE condition is always false */
 
         if (condcode == OP_DEF)
-          {
-          ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
-          }
+          { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
 
         /* The only supported version of OP_RREF is for the value RREF_ANY,
         which means "test if in any recursion". We can't test for specifically
         recursed groups. */
 
-        else if (condcode == OP_RREF)
+        else if (condcode == OP_RREF || condcode == OP_NRREF)
           {
           int value = GET2(code, LINK_SIZE+2);
           if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
-          if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
-            else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
+          if (recursing > 0)
+            { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
+          else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
           }
 
         /* Otherwise, the condition is an assertion */
@@ -2248,11 +2404,12 @@ for (;;)
             rlevel,                               /* function recursion level */
             recursing);                           /* pass on regex recursion */
 
+          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
           if ((rc >= 0) ==
                 (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
             { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }
           else
-            { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
+            { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
           }
         }
       break;
@@ -2404,9 +2561,9 @@ for (;;)
       /* Handle callouts */
 
       case OP_CALLOUT:
+      rrc = 0;
       if (pcre_callout != NULL)
         {
-        int rrc;
         pcre_callout_block cb;
         cb.version          = 1;   /* Version 1 of the callout block */
         cb.callout_number   = code[1];
@@ -2421,8 +2578,9 @@ for (;;)
         cb.capture_last     = -1;
         cb.callout_data     = md->callout_data;
         if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */
-        if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); }
         }
+      if (rrc == 0)
+        { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }
       break;
 
 
@@ -2438,19 +2596,33 @@ for (;;)
   /* We have finished the processing at the current subject character. If no
   new states have been set for the next character, we have found all the
   matches that we are going to find. If we are at the top level and partial
-  matching has been requested, check for appropriate conditions. */
+  matching has been requested, check for appropriate conditions.
+
+  The "forced_ fail" variable counts the number of (*F) encountered for the
+  character. If it is equal to the original active_count (saved in
+  workspace[1]) it means that (*F) was found on every active state. In this
+  case we don't want to give a partial match.
+
+  The "could_continue" variable is true if a state could have continued but
+  for the fact that the end of the subject was reached. */
 
   if (new_count <= 0)
     {
-    if (match_count < 0 &&                     /* No matches found */
-        rlevel == 1 &&                         /* Top level match function */
-        (md->moptions & PCRE_PARTIAL) != 0 &&  /* Want partial matching */
-        ptr >= end_subject &&                  /* Reached end of subject */
-        ptr > current_subject)                 /* Matched non-empty string */
+    if (rlevel == 1 &&                               /* Top level, and */
+        could_continue &&                            /* Some could go on */
+        forced_fail != workspace[1] &&               /* Not all forced fail & */
+        (                                            /* either... */
+        (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
+        ||                                           /* or... */
+        ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
+         match_count < 0)                            /* no matches */
+        ) &&                                         /* And... */
+        ptr >= end_subject &&                     /* Reached end of subject */
+        ptr > current_subject)                    /* Matched non-empty string */
       {
       if (offsetcount >= 2)
         {
-        offsets[0] = current_subject - start_subject;
+        offsets[0] = md->start_used_ptr - start_subject;
         offsets[1] = end_subject - start_subject;
         }
       match_count = PCRE_ERROR_PARTIAL;
@@ -2592,6 +2764,7 @@ md->start_code = (const uschar *)argument_re +
     re->name_table_offset + re->name_count * re->name_entry_size;
 md->start_subject = (const unsigned char *)subject;
 md->end_subject = end_subject;
+md->start_offset = start_offset;
 md->moptions = options;
 md->poptions = re->options;
 
@@ -2614,10 +2787,10 @@ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)option
          PCRE_NEWLINE_BITS)
   {
   case 0: newline = NEWLINE; break;   /* Compile-time default */
-  case PCRE_NEWLINE_CR: newline = '\r'; break;
-  case PCRE_NEWLINE_LF: newline = '\n'; break;
+  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
+  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
   case PCRE_NEWLINE_CR+
-       PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
+       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
   case PCRE_NEWLINE_ANY: newline = -1; break;
   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
   default: return PCRE_ERROR_BADNEWLINE;
@@ -2696,8 +2869,8 @@ if (!anchored)
     }
   else
     {
-    if (startline && study != NULL &&
-         (study->options & PCRE_STUDY_MAPPED) != 0)
+    if (!startline && study != NULL &&
+         (study->flags & PCRE_STUDY_MAPPED) != 0)
       start_bits = study->start_bits;
     }
   }
@@ -2713,9 +2886,8 @@ if ((re->flags & PCRE_REQCHSET) != 0)
   }
 
 /* Call the main matching function, looping for a non-anchored regex after a
-failed match. Unless restarting, optimize by moving to the first match
-character if possible, when not anchored. Then unless wanting a partial match,
-check for a required later character. */
+failed match. If not restarting, perform certain optimizations at the start of
+a match. */
 
 for (;;)
   {
@@ -2725,11 +2897,10 @@ for (;;)
     {
     const uschar *save_end_subject = end_subject;
 
-    /* Advance to a unique first char if possible. If firstline is TRUE, the
-    start of the match is constrained to the first line of a multiline string.
-    Implement this by temporarily adjusting end_subject so that we stop
-    scanning at a newline. If the match fails at the newline, later code breaks
-    this loop. */
+    /* If firstline is TRUE, the start of the match is constrained to the first
+    line of a multiline string. Implement this by temporarily adjusting
+    end_subject so that we stop scanning at a newline. If the match fails at
+    the newline, later code breaks this loop. */
 
     if (firstline)
       {
@@ -2749,126 +2920,151 @@ for (;;)
       end_subject = t;
       }
 
-    if (first_byte >= 0)
+    /* There are some optimizations that avoid running the match if a known
+    starting point is not found. However, there is an option that disables
+    these, for testing and for ensuring that all callouts do actually occur. */
+
+    if ((options & PCRE_NO_START_OPTIMIZE) == 0)
       {
-      if (first_byte_caseless)
-        while (current_subject < end_subject &&
-               lcc[*current_subject] != first_byte)
-          current_subject++;
-      else
-        while (current_subject < end_subject && *current_subject != first_byte)
-          current_subject++;
-      }
+      /* Advance to a known first byte. */
 
-    /* Or to just after a linebreak for a multiline match if possible */
+      if (first_byte >= 0)
+        {
+        if (first_byte_caseless)
+          while (current_subject < end_subject &&
+                 lcc[*current_subject] != first_byte)
+            current_subject++;
+        else
+          while (current_subject < end_subject &&
+                 *current_subject != first_byte)
+            current_subject++;
+        }
 
-    else if (startline)
-      {
-      if (current_subject > md->start_subject + start_offset)
+      /* Or to just after a linebreak for a multiline match if possible */
+
+      else if (startline)
         {
-#ifdef SUPPORT_UTF8
-        if (utf8)
+        if (current_subject > md->start_subject + start_offset)
           {
-          while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
+#ifdef SUPPORT_UTF8
+          if (utf8)
             {
-            current_subject++;
-            while(current_subject < end_subject &&
-                  (*current_subject & 0xc0) == 0x80)
+            while (current_subject < end_subject &&
+                   !WAS_NEWLINE(current_subject))
+              {
               current_subject++;
+              while(current_subject < end_subject &&
+                    (*current_subject & 0xc0) == 0x80)
+                current_subject++;
+              }
             }
-          }
-        else
+          else
 #endif
-        while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
-          current_subject++;
-
-        /* If we have just passed a CR and the newline option is ANY or
-        ANYCRLF, and we are now at a LF, advance the match position by one more
-        character. */
-
-        if (current_subject[-1] == '\r' &&
-             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
-             current_subject < end_subject &&
-             *current_subject == '\n')
-          current_subject++;
+          while (current_subject < end_subject && !WAS_NEWLINE(current_subject))
+            current_subject++;
+
+          /* If we have just passed a CR and the newline option is ANY or
+          ANYCRLF, and we are now at a LF, advance the match position by one
+          more character. */
+
+          if (current_subject[-1] == CHAR_CR &&
+               (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
+               current_subject < end_subject &&
+               *current_subject == CHAR_NL)
+            current_subject++;
+          }
         }
-      }
 
-    /* Or to a non-unique first char after study */
+      /* Or to a non-unique first char after study */
 
-    else if (start_bits != NULL)
-      {
-      while (current_subject < end_subject)
+      else if (start_bits != NULL)
         {
-        register unsigned int c = *current_subject;
-        if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
-          else break;
+        while (current_subject < end_subject)
+          {
+          register unsigned int c = *current_subject;
+          if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;
+            else break;
+          }
         }
       }
 
     /* Restore fudged end_subject */
 
     end_subject = save_end_subject;
-    }
-
-  /* If req_byte is set, we know that that character must appear in the subject
-  for the match to succeed. If the first character is set, req_byte must be
-  later in the subject; otherwise the test starts at the match point. This
-  optimization can save a huge amount of work in patterns with nested unlimited
-  repeats that aren't going to match. Writing separate code for cased/caseless
-  versions makes it go faster, as does using an autoincrement and backing off
-  on a match.
-
-  HOWEVER: when the subject string is very, very long, searching to its end can
-  take a long time, and give bad performance on quite ordinary patterns. This
-  showed up when somebody was matching /^C/ on a 32-megabyte string... so we
-  don't do this when the string is sufficiently long.
-
-  ALSO: this processing is disabled when partial matching is requested.
-  */
-
-  if (req_byte >= 0 &&
-      end_subject - current_subject < REQ_BYTE_MAX &&
-      (options & PCRE_PARTIAL) == 0)
-    {
-    register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
 
-    /* We don't need to repeat the search if we haven't yet reached the
-    place we found it at last time. */
+    /* The following two optimizations are disabled for partial matching or if
+    disabling is explicitly requested (and of course, by the test above, this
+    code is not obeyed when restarting after a partial match). */
 
-    if (p > req_byte_ptr)
+    if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&
+        (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
       {
-      if (req_byte_caseless)
+      /* If the pattern was studied, a minimum subject length may be set. This
+      is a lower bound; no actual string of that length may actually match the
+      pattern. Although the value is, strictly, in characters, we treat it as
+      bytes to avoid spending too much time in this optimization. */
+
+      if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
+          (pcre_uint32)(end_subject - current_subject) < study->minlength)
+        return PCRE_ERROR_NOMATCH;
+
+      /* If req_byte is set, we know that that character must appear in the
+      subject for the match to succeed. If the first character is set, req_byte
+      must be later in the subject; otherwise the test starts at the match
+      point. This optimization can save a huge amount of work in patterns with
+      nested unlimited repeats that aren't going to match. Writing separate
+      code for cased/caseless versions makes it go faster, as does using an
+      autoincrement and backing off on a match.
+
+      HOWEVER: when the subject string is very, very long, searching to its end
+      can take a long time, and give bad performance on quite ordinary
+      patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
+      string... so we don't do this when the string is sufficiently long. */
+
+      if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
         {
-        while (p < end_subject)
-          {
-          register int pp = *p++;
-          if (pp == req_byte || pp == req_byte2) { p--; break; }
-          }
-        }
-      else
-        {
-        while (p < end_subject)
+        register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);
+
+        /* We don't need to repeat the search if we haven't yet reached the
+        place we found it at last time. */
+
+        if (p > req_byte_ptr)
           {
-          if (*p++ == req_byte) { p--; break; }
-          }
-        }
+          if (req_byte_caseless)
+            {
+            while (p < end_subject)
+              {
+              register int pp = *p++;
+              if (pp == req_byte || pp == req_byte2) { p--; break; }
+              }
+            }
+          else
+            {
+            while (p < end_subject)
+              {
+              if (*p++ == req_byte) { p--; break; }
+              }
+            }
 
-      /* If we can't find the required character, break the matching loop,
-      which will cause a return or PCRE_ERROR_NOMATCH. */
+          /* If we can't find the required character, break the matching loop,
+          which will cause a return or PCRE_ERROR_NOMATCH. */
 
-      if (p >= end_subject) break;
+          if (p >= end_subject) break;
 
-      /* If we have found the required character, save the point where we
-      found it, so that we don't search again next time round the loop if
-      the start hasn't passed this character yet. */
+          /* If we have found the required character, save the point where we
+          found it, so that we don't search again next time round the loop if
+          the start hasn't passed this character yet. */
 
-      req_byte_ptr = p;
+          req_byte_ptr = p;
+          }
+        }
       }
-    }
+    }   /* End of optimizations that are done when not restarting */
 
   /* OK, now we can do the business */
 
+  md->start_used_ptr = current_subject;
+
   rc = internal_dfa_exec(
     md,                                /* fixed match data */
     md->start_code,                    /* this subexpression's code */
@@ -2903,9 +3099,9 @@ for (;;)
   not contain any explicit matches for \r or \n, and the newline option is CRLF
   or ANY or ANYCRLF, advance the match position by one more character. */
 
-  if (current_subject[-1] == '\r' &&
+  if (current_subject[-1] == CHAR_CR &&
       current_subject < end_subject &&
-      *current_subject == '\n' &&
+      *current_subject == CHAR_NL &&
       (re->flags & PCRE_HASCRORLF) == 0 &&
         (md->nltype == NLTYPE_ANY ||
          md->nltype == NLTYPE_ANYCRLF ||
diff --git a/glib/pcre/pcre_exec.c b/glib/pcre/pcre_exec.c
index 06ce8f7..0a44fcc 100644
--- a/glib/pcre/pcre_exec.c
+++ b/glib/pcre/pcre_exec.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -89,7 +89,7 @@ static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
 
 
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 /*************************************************
 *        Debugging function to print chars       *
 *************************************************/
@@ -141,7 +141,7 @@ match_ref(int offset, register USPTR eptr, int length, match_data *md,
 {
 USPTR p = md->start_subject + md->offset_vector[offset];
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 if (eptr >= md->end_subject)
   printf("matching subject <null>");
 else
@@ -249,16 +249,16 @@ enum { RM1=1, RM2,  RM3,  RM4,  RM5,  RM6,  RM7,  RM8,  RM9,  RM10,
 
 /* These versions of the macros use the stack, as normal. There are debugging
 versions and production versions. Note that the "rw" argument of RMATCH isn't
-actuall used in this definition. */
+actually used in this definition. */
 
 #ifndef NO_RECURSE
 #define REGISTER register
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
   { \
   printf("match() called in line %d\n", __LINE__); \
-  rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1); \
+  rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1); \
   printf("to line %d\n", __LINE__); \
   }
 #define RRETURN(ra) \
@@ -268,7 +268,7 @@ actuall used in this definition. */
   }
 #else
 #define RMATCH(ra,rb,rc,rd,re,rf,rg,rw) \
-  rrc = match(ra,rb,mstart,rc,rd,re,rf,rg,rdepth+1)
+  rrc = match(ra,rb,mstart,markptr,rc,rd,re,rf,rg,rdepth+1)
 #define RRETURN(ra) return ra
 #endif
 
@@ -288,6 +288,7 @@ argument of match(), which never changes. */
   newframe->Xeptr = ra;\
   newframe->Xecode = rb;\
   newframe->Xmstart = mstart;\
+  newframe->Xmarkptr = markptr;\
   newframe->Xoffset_top = rc;\
   newframe->Xims = re;\
   newframe->Xeptrb = rf;\
@@ -322,9 +323,10 @@ typedef struct heapframe {
 
   /* Function arguments that may change */
 
-  const uschar *Xeptr;
+  USPTR Xeptr;
   const uschar *Xecode;
-  const uschar *Xmstart;
+  USPTR Xmstart;
+  USPTR Xmarkptr;
   int Xoffset_top;
   long int Xims;
   eptrblock *Xeptrb;
@@ -333,13 +335,15 @@ typedef struct heapframe {
 
   /* Function local variables */
 
-  const uschar *Xcallpat;
-  const uschar *Xcharptr;
-  const uschar *Xdata;
-  const uschar *Xnext;
-  const uschar *Xpp;
-  const uschar *Xprev;
-  const uschar *Xsaved_eptr;
+  USPTR Xcallpat;
+#ifdef SUPPORT_UTF8
+  USPTR Xcharptr;
+#endif
+  USPTR Xdata;
+  USPTR Xnext;
+  USPTR Xpp;
+  USPTR Xprev;
+  USPTR Xsaved_eptr;
 
   recursion_info Xnew_recursive;
 
@@ -360,6 +364,7 @@ typedef struct heapframe {
   uschar Xocchars[8];
 #endif
 
+  int Xcodelink;
   int Xctype;
   unsigned int Xfc;
   int Xfi;
@@ -395,10 +400,32 @@ typedef struct heapframe {
 
 /* This function is called recursively in many circumstances. Whenever it
 returns a negative (error) response, the outer incarnation must also return the
-same response.
+same response. */
+
+/* These macros pack up tests that are used for partial matching, and which
+appears several times in the code. We set the "hit end" flag if the pointer is
+at the end of the subject and also past the start of the subject (i.e.
+something has been matched). For hard partial matching, we then return
+immediately. The second one is used when we already know we are past the end of
+the subject. */
+
+#define CHECK_PARTIAL()\
+  if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\
+    {\
+    md->hitend = TRUE;\
+    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
+    }
+
+#define SCHECK_PARTIAL()\
+  if (md->partial != 0 && eptr > mstart)\
+    {\
+    md->hitend = TRUE;\
+    if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
+    }
+
 
-Performance note: It might be tempting to extract commonly used fields from the
-md structure (e.g. utf8, end_subject) into individual variables to improve
+/* Performance note: It might be tempting to extract commonly used fields from
+the md structure (e.g. utf8, end_subject) into individual variables to improve
 performance. Tests using gcc on a SPARC disproved this; in the first case, it
 made performance worse.
 
@@ -407,6 +434,7 @@ Arguments:
    ecode       pointer to current position in compiled code
    mstart      pointer to the current match start position (can be modified
                  by encountering \K)
+   markptr     pointer to the most recent MARK name, or NULL
    offset_top  current top pointer
    md          pointer to "static" info for the match
    ims         current /i, /m, and /s options
@@ -425,9 +453,9 @@ Returns:       MATCH_MATCH if matched            )  these values are >= 0
 */
 
 static int
-match(REGISTER USPTR eptr, REGISTER const uschar *ecode, const uschar *mstart,
-  int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb,
-  int flags, unsigned int rdepth)
+match(REGISTER USPTR eptr, REGISTER const uschar *ecode, USPTR mstart, USPTR
+  markptr, int offset_top, match_data *md, unsigned long int ims,
+  eptrblock *eptrb, int flags, unsigned int rdepth)
 {
 /* These variables do not need to be preserved over recursion in this function,
 so they can be ordinary variables in all cases. Mark some of them with
@@ -439,6 +467,7 @@ register unsigned int c;   /* Character values not kept over RMATCH() calls */
 register BOOL utf8;        /* Local copy of UTF-8 flag for speed */
 
 BOOL minimize, possessive; /* Quantifier options */
+int condcode;
 
 /* When recursion is not being used, all "local" variables that have to be
 preserved over calls to RMATCH() are part of a "frame" which is obtained from
@@ -454,6 +483,7 @@ frame->Xprevframe = NULL;            /* Marks the top level */
 frame->Xeptr = eptr;
 frame->Xecode = ecode;
 frame->Xmstart = mstart;
+frame->Xmarkptr = markptr;
 frame->Xoffset_top = offset_top;
 frame->Xims = ims;
 frame->Xeptrb = eptrb;
@@ -469,6 +499,7 @@ HEAP_RECURSE:
 #define eptr               frame->Xeptr
 #define ecode              frame->Xecode
 #define mstart             frame->Xmstart
+#define markptr            frame->Xmarkptr
 #define offset_top         frame->Xoffset_top
 #define ims                frame->Xims
 #define eptrb              frame->Xeptrb
@@ -481,6 +512,7 @@ HEAP_RECURSE:
 #define charptr            frame->Xcharptr
 #endif
 #define callpat            frame->Xcallpat
+#define codelink           frame->Xcodelink
 #define data               frame->Xdata
 #define next               frame->Xnext
 #define pp                 frame->Xpp
@@ -561,6 +593,7 @@ int oclength;
 uschar occhars[8];
 #endif
 
+int codelink;
 int ctype;
 int length;
 int max;
@@ -594,7 +627,7 @@ TAIL_RECURSE:
 /* OK, now we can get on with the real code of the function. Recursive calls
 are specified by the macro RMATCH and RRETURN is used to return. When
 NO_RECURSE is *not* defined, these just turn into a recursive call to match()
-and a "return", respectively (possibly with some debugging if DEBUG is
+and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
 defined). However, RMATCH isn't like a function call because it's quite a
 complicated macro. It has to be used in one particular way. This shouldn't,
 however, impact performance when true recursion is being used. */
@@ -636,14 +669,6 @@ for (;;)
   minimize = possessive = FALSE;
   op = *ecode;
 
-  /* For partial matching, remember if we ever hit the end of the subject after
-  matching at least one subject character. */
-
-  if (md->partial &&
-      eptr >= md->end_subject &&
-      eptr > mstart)
-    md->hitend = TRUE;
-
   switch(op)
     {
     case OP_FAIL:
@@ -693,7 +718,7 @@ for (;;)
     number = GET2(ecode, 1+LINK_SIZE);
     offset = number << 1;
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
     printf("start bracket %d\n", number);
     printf("subject=");
     pchars(eptr, 16, TRUE, md);
@@ -787,22 +812,175 @@ for (;;)
 
     case OP_COND:
     case OP_SCOND:
-    if (ecode[LINK_SIZE+1] == OP_RREF)         /* Recursion test */
+    codelink= GET(ecode, 1);
+
+    /* Because of the way auto-callout works during compile, a callout item is
+    inserted between OP_COND and an assertion condition. */
+
+    if (ecode[LINK_SIZE+1] == OP_CALLOUT)
       {
-      offset = GET2(ecode, LINK_SIZE + 2);     /* Recursion group number*/
-      condition = md->recursive != NULL &&
-        (offset == RREF_ANY || offset == md->recursive->group_num);
-      ecode += condition? 3 : GET(ecode, 1);
+      if (pcre_callout != NULL)
+        {
+        pcre_callout_block cb;
+        cb.version          = 1;   /* Version 1 of the callout block */
+        cb.callout_number   = ecode[LINK_SIZE+2];
+        cb.offset_vector    = md->offset_vector;
+        cb.subject          = (PCRE_SPTR)md->start_subject;
+        cb.subject_length   = md->end_subject - md->start_subject;
+        cb.start_match      = mstart - md->start_subject;
+        cb.current_position = eptr - md->start_subject;
+        cb.pattern_position = GET(ecode, LINK_SIZE + 3);
+        cb.next_item_length = GET(ecode, 3 + 2*LINK_SIZE);
+        cb.capture_top      = offset_top/2;
+        cb.capture_last     = md->capture_last;
+        cb.callout_data     = md->callout_data;
+        if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH);
+        if (rrc < 0) RRETURN(rrc);
+        }
+      ecode += _pcre_OP_lengths[OP_CALLOUT];
       }
 
-    else if (ecode[LINK_SIZE+1] == OP_CREF)    /* Group used test */
+    condcode = ecode[LINK_SIZE+1];
+
+    /* Now see what the actual condition is */
+
+    if (condcode == OP_RREF || condcode == OP_NRREF)    /* Recursion test */
+      {
+      if (md->recursive == NULL)                /* Not recursing => FALSE */
+        {
+        condition = FALSE;
+        ecode += GET(ecode, 1);
+        }
+      else
+        {
+        int recno = GET2(ecode, LINK_SIZE + 2);   /* Recursion group number*/
+        condition =  (recno == RREF_ANY || recno == md->recursive->group_num);
+
+        /* If the test is for recursion into a specific subpattern, and it is
+        false, but the test was set up by name, scan the table to see if the
+        name refers to any other numbers, and test them. The condition is true
+        if any one is set. */
+
+        if (!condition && condcode == OP_NRREF && recno != RREF_ANY)
+          {
+          uschar *slotA = md->name_table;
+          for (i = 0; i < md->name_count; i++)
+            {
+            if (GET2(slotA, 0) == recno) break;
+            slotA += md->name_entry_size;
+            }
+
+          /* Found a name for the number - there can be only one; duplicate
+          names for different numbers are allowed, but not vice versa. First
+          scan down for duplicates. */
+
+          if (i < md->name_count)
+            {
+            uschar *slotB = slotA;
+            while (slotB > md->name_table)
+              {
+              slotB -= md->name_entry_size;
+              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+                {
+                condition = GET2(slotB, 0) == md->recursive->group_num;
+                if (condition) break;
+                }
+              else break;
+              }
+
+            /* Scan up for duplicates */
+
+            if (!condition)
+              {
+              slotB = slotA;
+              for (i++; i < md->name_count; i++)
+                {
+                slotB += md->name_entry_size;
+                if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+                  {
+                  condition = GET2(slotB, 0) == md->recursive->group_num;
+                  if (condition) break;
+                  }
+                else break;
+                }
+              }
+            }
+          }
+
+        /* Chose branch according to the condition */
+
+        ecode += condition? 3 : GET(ecode, 1);
+        }
+      }
+
+    else if (condcode == OP_CREF || condcode == OP_NCREF)  /* Group used test */
       {
       offset = GET2(ecode, LINK_SIZE+2) << 1;  /* Doubled ref number */
       condition = offset < offset_top && md->offset_vector[offset] >= 0;
+
+      /* If the numbered capture is unset, but the reference was by name,
+      scan the table to see if the name refers to any other numbers, and test
+      them. The condition is true if any one is set. This is tediously similar
+      to the code above, but not close enough to try to amalgamate. */
+
+      if (!condition && condcode == OP_NCREF)
+        {
+        int refno = offset >> 1;
+        uschar *slotA = md->name_table;
+
+        for (i = 0; i < md->name_count; i++)
+          {
+          if (GET2(slotA, 0) == refno) break;
+          slotA += md->name_entry_size;
+          }
+
+        /* Found a name for the number - there can be only one; duplicate names
+        for different numbers are allowed, but not vice versa. First scan down
+        for duplicates. */
+
+        if (i < md->name_count)
+          {
+          uschar *slotB = slotA;
+          while (slotB > md->name_table)
+            {
+            slotB -= md->name_entry_size;
+            if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+              {
+              offset = GET2(slotB, 0) << 1;
+              condition = offset < offset_top &&
+                md->offset_vector[offset] >= 0;
+              if (condition) break;
+              }
+            else break;
+            }
+
+          /* Scan up for duplicates */
+
+          if (!condition)
+            {
+            slotB = slotA;
+            for (i++; i < md->name_count; i++)
+              {
+              slotB += md->name_entry_size;
+              if (strcmp((char *)slotA + 2, (char *)slotB + 2) == 0)
+                {
+                offset = GET2(slotB, 0) << 1;
+                condition = offset < offset_top &&
+                  md->offset_vector[offset] >= 0;
+                if (condition) break;
+                }
+              else break;
+              }
+            }
+          }
+        }
+
+      /* Chose branch according to the condition */
+
       ecode += condition? 3 : GET(ecode, 1);
       }
 
-    else if (ecode[LINK_SIZE+1] == OP_DEF)     /* DEFINE - always false */
+    else if (condcode == OP_DEF)     /* DEFINE - always false */
       {
       condition = FALSE;
       ecode += GET(ecode, 1);
@@ -829,7 +1007,7 @@ for (;;)
       else
         {
         condition = FALSE;
-        ecode += GET(ecode, 1);
+        ecode += codelink;
         }
       }
 
@@ -852,13 +1030,37 @@ for (;;)
         goto TAIL_RECURSE;
         }
       }
-    else                         /* Condition false & no 2nd alternative */
+    else                         /* Condition false & no alternative */
       {
       ecode += 1 + LINK_SIZE;
       }
     break;
 
 
+    /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
+    to close any currently open capturing brackets. */
+
+    case OP_CLOSE:
+    number = GET2(ecode, 1);
+    offset = number << 1;
+
+#ifdef PCRE_DEBUG
+      printf("end bracket %d at *ACCEPT", number);
+      printf("\n");
+#endif
+
+    md->capture_last = number;
+    if (offset >= md->offset_max) md->offset_overflow = TRUE; else
+      {
+      md->offset_vector[offset] =
+        md->offset_vector[md->offset_end - number];
+      md->offset_vector[offset+1] = eptr - md->start_subject;
+      if (offset_top <= offset) offset_top = offset + 2;
+      }
+    ecode += 3;
+    break;
+
+
     /* End of the pattern, either real or forced. If we are in a top-level
     recursion, we should restore the offsets appropriately and continue from
     after the call. */
@@ -872,16 +1074,25 @@ for (;;)
       md->recursive = rec->prevrec;
       memmove(md->offset_vector, rec->offset_save,
         rec->saved_max * sizeof(int));
-      mstart = rec->save_start;
+      offset_top = rec->save_offset_top;
       ims = original_ims;
       ecode = rec->after_call;
       break;
       }
 
-    /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty
-    string - backtracking will then try other alternatives, if any. */
+    /* Otherwise, if we have matched an empty string, fail if PCRE_NOTEMPTY is
+    set, or if PCRE_NOTEMPTY_ATSTART is set and we have matched at the start of
+    the subject. In both cases, backtracking will then try other alternatives,
+    if any. */
+
+    if (eptr == mstart &&
+        (md->notempty ||
+          (md->notempty_atstart &&
+            mstart == md->start_subject + md->start_offset)))
+      RRETURN(MATCH_NOMATCH);
+
+    /* Otherwise, we have a match. */
 
-    if (md->notempty && eptr == mstart) RRETURN(MATCH_NOMATCH);
     md->end_match_ptr = eptr;           /* Record where we ended */
     md->end_offset_top = offset_top;    /* and how many extracts were taken */
     md->start_match_ptr = mstart;       /* and the start (\K can modify) */
@@ -907,7 +1118,11 @@ for (;;)
       {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
         RM4);
-      if (rrc == MATCH_MATCH) break;
+      if (rrc == MATCH_MATCH)
+        {
+        mstart = md->start_match_ptr;   /* In case \K reset it */
+        break;
+        }
       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       ecode += GET(ecode, 1);
       }
@@ -926,7 +1141,9 @@ for (;;)
     offset_top = md->end_offset_top;
     continue;
 
-    /* Negative assertion: all branches must fail to match */
+    /* Negative assertion: all branches must fail to match. Encountering SKIP,
+    PRUNE, or COMMIT means we must assume failure without checking subsequent
+    branches. */
 
     case OP_ASSERT_NOT:
     case OP_ASSERTBACK_NOT:
@@ -935,6 +1152,11 @@ for (;;)
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, 0,
         RM5);
       if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH);
+      if (rrc == MATCH_SKIP || rrc == MATCH_PRUNE || rrc == MATCH_COMMIT)
+        {
+        do ecode += GET(ecode,1); while (*ecode == OP_ALT);
+        break;
+        }
       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       ecode += GET(ecode,1);
       }
@@ -972,8 +1194,9 @@ for (;;)
       if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
       }
 
-    /* Skip to next op code */
+    /* Save the earliest consulted character, then skip to next op code */
 
+    if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
     ecode += 1 + LINK_SIZE;
     break;
 
@@ -1052,8 +1275,7 @@ for (;;)
 
       memcpy(new_recursive.offset_save, md->offset_vector,
             new_recursive.saved_max * sizeof(int));
-      new_recursive.save_start = mstart;
-      mstart = eptr;
+      new_recursive.save_offset_top = offset_top;
 
       /* OK, now we can do the recursion. For each top-level alternative we
       restore the offset and recursion data. */
@@ -1075,6 +1297,8 @@ for (;;)
         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
           {
           DPRINTF(("Recursion gave error %d\n", rrc));
+          if (new_recursive.offset_save != stacksave)
+            (pcre_free)(new_recursive.offset_save);
           RRETURN(rrc);
           }
 
@@ -1098,7 +1322,8 @@ for (;;)
     a move back into the brackets. Friedl calls these "atomic" subpatterns.
     Check the alternative branches in turn - the matching won't pass the KET
     for this kind of subpattern. If any one branch matches, we carry on as at
-    the end of a normal bracket, leaving the subject pointer. */
+    the end of a normal bracket, leaving the subject pointer, but resetting
+    the start-of-match value in case it was changed by \K. */
 
     case OP_ONCE:
     prev = ecode;
@@ -1107,7 +1332,11 @@ for (;;)
     do
       {
       RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0, RM7);
-      if (rrc == MATCH_MATCH) break;
+      if (rrc == MATCH_MATCH)
+        {
+        mstart = md->start_match_ptr;
+        break;
+        }
       if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc);
       ecode += GET(ecode,1);
       }
@@ -1226,9 +1455,10 @@ for (;;)
       }
     else saved_eptr = NULL;
 
-    /* If we are at the end of an assertion group, stop matching and return
-    MATCH_MATCH, but record the current high water mark for use by positive
-    assertions. Do this also for the "once" (atomic) groups. */
+    /* If we are at the end of an assertion group or an atomic group, stop
+    matching and return MATCH_MATCH, but record the current high water mark for
+    use by positive assertions. We also need to record the match start in case
+    it was changed by \K. */
 
     if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
         *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
@@ -1236,6 +1466,7 @@ for (;;)
       {
       md->end_match_ptr = eptr;      /* For ONCE */
       md->end_offset_top = offset_top;
+      md->start_match_ptr = mstart;
       RRETURN(MATCH_MATCH);
       }
 
@@ -1250,7 +1481,7 @@ for (;;)
       number = GET2(prev, 1+LINK_SIZE);
       offset = number << 1;
 
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
       printf("end bracket %d", number);
       printf("\n");
 #endif
@@ -1272,9 +1503,9 @@ for (;;)
         recursion_info *rec = md->recursive;
         DPRINTF(("Recursion (%d) succeeded - continuing\n", number));
         md->recursive = rec->prevrec;
-        mstart = rec->save_start;
         memcpy(md->offset_vector, rec->offset_save,
           rec->saved_max * sizeof(int));
+        offset_top = rec->save_offset_top;
         ecode = rec->after_call;
         ims = original_ims;
         break;
@@ -1414,19 +1645,26 @@ for (;;)
 
       /* Find out if the previous and current characters are "word" characters.
       It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
-      be "non-word" characters. */
+      be "non-word" characters. Remember the earliest consulted character for
+      partial matching. */
 
 #ifdef SUPPORT_UTF8
       if (utf8)
         {
         if (eptr == md->start_subject) prev_is_word = FALSE; else
           {
-          const uschar *lastptr = eptr - 1;
+          USPTR lastptr = eptr - 1;
           while((*lastptr & 0xc0) == 0x80) lastptr--;
+          if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
           GETCHAR(c, lastptr);
           prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
           }
-        if (eptr >= md->end_subject) cur_is_word = FALSE; else
+        if (eptr >= md->end_subject)
+          {
+          SCHECK_PARTIAL();
+          cur_is_word = FALSE;
+          }
+        else
           {
           GETCHAR(c, eptr);
           cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
@@ -1435,13 +1673,20 @@ for (;;)
       else
 #endif
 
-      /* More streamlined when not in UTF-8 mode */
+      /* Not in UTF-8 mode */
 
         {
-        prev_is_word = (eptr != md->start_subject) &&
-          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
-        cur_is_word = (eptr < md->end_subject) &&
-          ((md->ctypes[*eptr] & ctype_word) != 0);
+        if (eptr == md->start_subject) prev_is_word = FALSE; else
+          {
+          if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
+          prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0);
+          }
+        if (eptr >= md->end_subject)
+          {
+          SCHECK_PARTIAL();
+          cur_is_word = FALSE;
+          }
+        else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0);
         }
 
       /* Now see if the situation is what we want */
@@ -1459,7 +1704,11 @@ for (;;)
     /* Fall through */
 
     case OP_ALLANY:
-    if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr++ >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
     ecode++;
     break;
@@ -1468,12 +1717,20 @@ for (;;)
     any byte, even newline, independent of the setting of PCRE_DOTALL. */
 
     case OP_ANYBYTE:
-    if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr++ >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     ecode++;
     break;
 
     case OP_NOT_DIGIT:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     if (
 #ifdef SUPPORT_UTF8
@@ -1486,7 +1743,11 @@ for (;;)
     break;
 
     case OP_DIGIT:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     if (
 #ifdef SUPPORT_UTF8
@@ -1499,7 +1760,11 @@ for (;;)
     break;
 
     case OP_NOT_WHITESPACE:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     if (
 #ifdef SUPPORT_UTF8
@@ -1512,7 +1777,11 @@ for (;;)
     break;
 
     case OP_WHITESPACE:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     if (
 #ifdef SUPPORT_UTF8
@@ -1525,7 +1794,11 @@ for (;;)
     break;
 
     case OP_NOT_WORDCHAR:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     if (
 #ifdef SUPPORT_UTF8
@@ -1538,7 +1811,11 @@ for (;;)
     break;
 
     case OP_WORDCHAR:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     if (
 #ifdef SUPPORT_UTF8
@@ -1551,7 +1828,11 @@ for (;;)
     break;
 
     case OP_ANYNL:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
@@ -1575,7 +1856,11 @@ for (;;)
     break;
 
     case OP_NOT_HSPACE:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
@@ -1605,7 +1890,11 @@ for (;;)
     break;
 
     case OP_HSPACE:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
@@ -1635,7 +1924,11 @@ for (;;)
     break;
 
     case OP_NOT_VSPACE:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
@@ -1653,7 +1946,11 @@ for (;;)
     break;
 
     case OP_VSPACE:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
     switch(c)
       {
@@ -1676,7 +1973,11 @@ for (;;)
 
     case OP_PROP:
     case OP_NOTPROP:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
       {
       int chartype = UCD_CHARTYPE(c);
@@ -1720,7 +2021,11 @@ for (;;)
     is in the binary; otherwise a compile-time error occurs. */
 
     case OP_EXTUNI:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     GETCHARINCTEST(c, eptr);
       {
       int category = UCD_CATEGORY(c);
@@ -1800,7 +2105,11 @@ for (;;)
         break;
 
         default:               /* No repeat follows */
-        if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
+        if (!match_ref(offset, eptr, length, md, ims))
+          {
+          CHECK_PARTIAL();
+          RRETURN(MATCH_NOMATCH);
+          }
         eptr += length;
         continue;              /* With the main loop */
         }
@@ -1816,7 +2125,11 @@ for (;;)
 
       for (i = 1; i <= min; i++)
         {
-        if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH);
+        if (!match_ref(offset, eptr, length, md, ims))
+          {
+          CHECK_PARTIAL();
+          RRETURN(MATCH_NOMATCH);
+          }
         eptr += length;
         }
 
@@ -1833,8 +2146,12 @@ for (;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || !match_ref(offset, eptr, length, md, ims))
+          if (fi >= max) RRETURN(MATCH_NOMATCH);
+          if (!match_ref(offset, eptr, length, md, ims))
+            {
+            CHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
+            }
           eptr += length;
           }
         /* Control never gets here */
@@ -1847,7 +2164,11 @@ for (;;)
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (!match_ref(offset, eptr, length, md, ims)) break;
+          if (!match_ref(offset, eptr, length, md, ims))
+            {
+            CHECK_PARTIAL();
+            break;
+            }
           eptr += length;
           }
         while (eptr >= pp)
@@ -1861,8 +2182,6 @@ for (;;)
       }
     /* Control never gets here */
 
-
-
     /* Match a bit-mapped character class, possibly repeatedly. This op code is
     used when all the characters in the class have values in the range 0-255,
     and either the matching is caseful, or the characters are in the range
@@ -1917,7 +2236,11 @@ for (;;)
         {
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(c, eptr);
           if (c > 255)
             {
@@ -1935,7 +2258,11 @@ for (;;)
         {
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           c = *eptr++;
           if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
           }
@@ -1959,7 +2286,12 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM16);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINC(c, eptr);
             if (c > 255)
               {
@@ -1979,7 +2311,12 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM17);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             c = *eptr++;
             if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
             }
@@ -2000,7 +2337,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (c > 255)
               {
@@ -2026,7 +2367,11 @@ for (;;)
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             c = *eptr;
             if ((data[c/8] & (1 << (c&7))) == 0) break;
             eptr++;
@@ -2046,7 +2391,8 @@ for (;;)
 
 
     /* Match an extended character class. This opcode is encountered only
-    in UTF-8 mode, because that's the only time it is compiled. */
+    when UTF-8 mode mode is supported. Nevertheless, we may not be in UTF-8
+    mode, because Unicode properties are supported in non-UTF-8 mode. */
 
 #ifdef SUPPORT_UTF8
     case OP_XCLASS:
@@ -2087,8 +2433,12 @@ for (;;)
 
       for (i = 1; i <= min; i++)
         {
-        if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
-        GETCHARINC(c, eptr);
+        if (eptr >= md->end_subject)
+          {
+          SCHECK_PARTIAL();
+          RRETURN(MATCH_NOMATCH);
+          }
+        GETCHARINCTEST(c, eptr);
         if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
         }
 
@@ -2106,8 +2456,13 @@ for (;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM20);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
-          GETCHARINC(c, eptr);
+          if (fi >= max) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
+          GETCHARINCTEST(c, eptr);
           if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
@@ -2121,8 +2476,12 @@ for (;;)
         for (i = min; i < max; i++)
           {
           int len = 1;
-          if (eptr >= md->end_subject) break;
-          GETCHARLEN(c, eptr, len);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            break;
+            }
+          GETCHARLENTEST(c, eptr, len);
           if (!_pcre_xclass(c, data)) break;
           eptr += len;
           }
@@ -2149,7 +2508,11 @@ for (;;)
       length = 1;
       ecode++;
       GETCHARLEN(fc, ecode, length);
-      if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
+      if (length > md->end_subject - eptr)
+        {
+        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
+        RRETURN(MATCH_NOMATCH);
+        }
       while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH);
       }
     else
@@ -2157,7 +2520,11 @@ for (;;)
 
     /* Non-UTF-8 mode */
       {
-      if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
+      if (md->end_subject - eptr < 1)
+        {
+        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
+        RRETURN(MATCH_NOMATCH);
+        }
       if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
       ecode += 2;
       }
@@ -2173,7 +2540,11 @@ for (;;)
       ecode++;
       GETCHARLEN(fc, ecode, length);
 
-      if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
+      if (length > md->end_subject - eptr)
+        {
+        CHECK_PARTIAL();             /* Not SCHECK_PARTIAL() */
+        RRETURN(MATCH_NOMATCH);
+        }
 
       /* If the pattern character's value is < 128, we have only one byte, and
       can use the fast lookup table. */
@@ -2208,7 +2579,11 @@ for (;;)
 
     /* Non-UTF-8 mode */
       {
-      if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH);
+      if (md->end_subject - eptr < 1)
+        {
+        SCHECK_PARTIAL();            /* This one can use SCHECK_PARTIAL() */
+        RRETURN(MATCH_NOMATCH);
+        }
       if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
       ecode += 2;
       }
@@ -2262,13 +2637,12 @@ for (;;)
     case OP_MINQUERY:
     c = *ecode++ - OP_STAR;
     minimize = (c & 1) != 0;
+
     min = rep_min[c];                 /* Pick up values from tables; */
     max = rep_max[c];                 /* zero for max => infinity */
     if (max == 0) max = INT_MAX;
 
-    /* Common code for all repeated single-character matches. We can give
-    up quickly if there are fewer than the minimum number of characters left in
-    the subject. */
+    /* Common code for all repeated single-character matches. */
 
     REPEATCHAR:
 #ifdef SUPPORT_UTF8
@@ -2277,7 +2651,6 @@ for (;;)
       length = 1;
       charptr = ecode;
       GETCHARLEN(fc, ecode, length);
-      if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
       ecode += length;
 
       /* Handle multibyte character matching specially here. There is
@@ -2295,18 +2668,18 @@ for (;;)
 
         for (i = 1; i <= min; i++)
           {
-          if (memcmp(eptr, charptr, length) == 0) eptr += length;
+          if (eptr <= md->end_subject - length &&
+            memcmp(eptr, charptr, length) == 0) eptr += length;
 #ifdef SUPPORT_UCP
-          /* Need braces because of following else */
-          else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
+          else if (oclength > 0 &&
+                   eptr <= md->end_subject - oclength &&
+                   memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+#endif  /* SUPPORT_UCP */
           else
             {
-            if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
-            eptr += oclength;
+            CHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
             }
-#else   /* without SUPPORT_UCP */
-          else { RRETURN(MATCH_NOMATCH); }
-#endif  /* SUPPORT_UCP */
           }
 
         if (min == max) continue;
@@ -2317,19 +2690,19 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM22);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
-            if (memcmp(eptr, charptr, length) == 0) eptr += length;
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr <= md->end_subject - length &&
+              memcmp(eptr, charptr, length) == 0) eptr += length;
 #ifdef SUPPORT_UCP
-            /* Need braces because of following else */
-            else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
+            else if (oclength > 0 &&
+                     eptr <= md->end_subject - oclength &&
+                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+#endif  /* SUPPORT_UCP */
             else
               {
-              if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
-              eptr += oclength;
+              CHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
               }
-#else   /* without SUPPORT_UCP */
-            else { RRETURN (MATCH_NOMATCH); }
-#endif  /* SUPPORT_UCP */
             }
           /* Control never gets here */
           }
@@ -2339,33 +2712,34 @@ for (;;)
           pp = eptr;
           for (i = min; i < max; i++)
             {
-            if (eptr > md->end_subject - length) break;
-            if (memcmp(eptr, charptr, length) == 0) eptr += length;
+            if (eptr <= md->end_subject - length &&
+                memcmp(eptr, charptr, length) == 0) eptr += length;
 #ifdef SUPPORT_UCP
-            else if (oclength == 0) break;
+            else if (oclength > 0 &&
+                     eptr <= md->end_subject - oclength &&
+                     memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
+#endif  /* SUPPORT_UCP */
             else
               {
-              if (memcmp(eptr, occhars, oclength) != 0) break;
-              eptr += oclength;
+              CHECK_PARTIAL();
+              break;
               }
-#else   /* without SUPPORT_UCP */
-            else break;
-#endif  /* SUPPORT_UCP */
             }
 
           if (possessive) continue;
+
           for(;;)
-           {
-           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
-           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-           if (eptr == pp) RRETURN(MATCH_NOMATCH);
+            {
+            RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM23);
+            if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+            if (eptr == pp) { RRETURN(MATCH_NOMATCH); }
 #ifdef SUPPORT_UCP
-           eptr--;
-           BACKCHAR(eptr);
+            eptr--;
+            BACKCHAR(eptr);
 #else   /* without SUPPORT_UCP */
-           eptr -= length;
+            eptr -= length;
 #endif  /* SUPPORT_UCP */
-           }
+            }
           }
         /* Control never gets here */
         }
@@ -2378,10 +2752,8 @@ for (;;)
 #endif  /* SUPPORT_UTF8 */
 
     /* When not in UTF-8 mode, load a single-byte character. */
-      {
-      if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
-      fc = *ecode++;
-      }
+
+    fc = *ecode++;
 
     /* The value of fc at this point is always less than 256, though we may or
     may not be in UTF-8 mode. The code is duplicated for the caseless and
@@ -2399,7 +2771,14 @@ for (;;)
       {
       fc = md->lcc[fc];
       for (i = 1; i <= min; i++)
+        {
+        if (eptr >= md->end_subject)
+          {
+          SCHECK_PARTIAL();
+          RRETURN(MATCH_NOMATCH);
+          }
         if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+        }
       if (min == max) continue;
       if (minimize)
         {
@@ -2407,9 +2786,13 @@ for (;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM24);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || eptr >= md->end_subject ||
-              fc != md->lcc[*eptr++])
+          if (fi >= max) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
+            }
+          if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
         }
@@ -2418,10 +2801,17 @@ for (;;)
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            break;
+            }
+          if (fc != md->lcc[*eptr]) break;
           eptr++;
           }
+
         if (possessive) continue;
+
         while (eptr >= pp)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM25);
@@ -2437,16 +2827,31 @@ for (;;)
 
     else
       {
-      for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+      for (i = 1; i <= min; i++)
+        {
+        if (eptr >= md->end_subject)
+          {
+          SCHECK_PARTIAL();
+          RRETURN(MATCH_NOMATCH);
+          }
+        if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
+        }
+
       if (min == max) continue;
+
       if (minimize)
         {
         for (fi = min;; fi++)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM26);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || eptr >= md->end_subject || fc != *eptr++)
+          if (fi >= max) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
+            }
+          if (fc != *eptr++) RRETURN(MATCH_NOMATCH);
           }
         /* Control never gets here */
         }
@@ -2455,10 +2860,16 @@ for (;;)
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject || fc != *eptr) break;
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            break;
+            }
+          if (fc != *eptr) break;
           eptr++;
           }
         if (possessive) continue;
+
         while (eptr >= pp)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM27);
@@ -2474,7 +2885,11 @@ for (;;)
     checking can be multibyte. */
 
     case OP_NOT:
-    if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+    if (eptr >= md->end_subject)
+      {
+      SCHECK_PARTIAL();
+      RRETURN(MATCH_NOMATCH);
+      }
     ecode++;
     GETCHARINCTEST(c, eptr);
     if ((ims & PCRE_CASELESS) != 0)
@@ -2551,12 +2966,9 @@ for (;;)
     max = rep_max[c];                 /* zero for max => infinity */
     if (max == 0) max = INT_MAX;
 
-    /* Common code for all repeated single-byte matches. We can give up quickly
-    if there are fewer than the minimum number of bytes left in the
-    subject. */
+    /* Common code for all repeated single-byte matches. */
 
     REPEATNOTCHAR:
-    if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
     fc = *ecode++;
 
     /* The code is duplicated for the caseless and caseful cases, for speed,
@@ -2581,6 +2993,11 @@ for (;;)
         register unsigned int d;
         for (i = 1; i <= min; i++)
           {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(d, eptr);
           if (d < 256) d = md->lcc[d];
           if (fc == d) RRETURN(MATCH_NOMATCH);
@@ -2592,7 +3009,14 @@ for (;;)
       /* Not UTF-8 mode */
         {
         for (i = 1; i <= min; i++)
+          {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
+          }
         }
 
       if (min == max) continue;
@@ -2608,11 +3032,15 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM28);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINC(d, eptr);
             if (d < 256) d = md->lcc[d];
             if (fc == d) RRETURN(MATCH_NOMATCH);
-
             }
           }
         else
@@ -2623,8 +3051,13 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM29);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++])
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
+              }
+            if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH);
             }
           }
         /* Control never gets here */
@@ -2644,7 +3077,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(d, eptr, len);
             if (d < 256) d = md->lcc[d];
             if (fc == d) break;
@@ -2665,7 +3102,12 @@ for (;;)
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
+            if (fc == md->lcc[*eptr]) break;
             eptr++;
             }
           if (possessive) continue;
@@ -2693,6 +3135,11 @@ for (;;)
         register unsigned int d;
         for (i = 1; i <= min; i++)
           {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(d, eptr);
           if (fc == d) RRETURN(MATCH_NOMATCH);
           }
@@ -2702,7 +3149,14 @@ for (;;)
       /* Not UTF-8 mode */
         {
         for (i = 1; i <= min; i++)
+          {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
+          }
         }
 
       if (min == max) continue;
@@ -2718,7 +3172,12 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM32);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINC(d, eptr);
             if (fc == d) RRETURN(MATCH_NOMATCH);
             }
@@ -2731,8 +3190,13 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM33);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject || fc == *eptr++)
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
               RRETURN(MATCH_NOMATCH);
+              }
+            if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
             }
           }
         /* Control never gets here */
@@ -2752,7 +3216,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(d, eptr, len);
             if (fc == d) break;
             eptr += len;
@@ -2772,7 +3240,12 @@ for (;;)
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || fc == *eptr) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
+            if (fc == *eptr) break;
             eptr++;
             }
           if (possessive) continue;
@@ -2866,13 +3339,10 @@ for (;;)
 
     /* First, ensure the minimum number of matches are present. Use inline
     code for maximizing the speed, and do the type test once at the start
-    (i.e. keep it out of the loop). Also we can test that there are at least
-    the minimum number of bytes before we start. This isn't as effective in
-    UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that
+    (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
     is tidier. Also separate the UCP code, which can be the same for both UTF-8
     and single-bytes. */
 
-    if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH);
     if (min > 0)
       {
 #ifdef SUPPORT_UCP
@@ -2884,7 +3354,11 @@ for (;;)
           if (prop_fail_result) RRETURN(MATCH_NOMATCH);
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINCTEST(c, eptr);
             }
           break;
@@ -2892,7 +3366,11 @@ for (;;)
           case PT_LAMP:
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINCTEST(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
@@ -2905,7 +3383,11 @@ for (;;)
           case PT_GC:
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINCTEST(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
@@ -2916,7 +3398,11 @@ for (;;)
           case PT_PC:
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINCTEST(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
@@ -2927,7 +3413,11 @@ for (;;)
           case PT_SC:
           for (i = 1; i <= min; i++)
             {
-            if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINCTEST(c, eptr);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
@@ -2947,16 +3437,19 @@ for (;;)
         {
         for (i = 1; i <= min; i++)
           {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINCTEST(c, eptr);
           prop_category = UCD_CATEGORY(c);
           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
           while (eptr < md->end_subject)
             {
             int len = 1;
-            if (!utf8) c = *eptr; else
-              {
-              GETCHARLEN(c, eptr, len);
-              }
+            if (!utf8) c = *eptr;
+              else { GETCHARLEN(c, eptr, len); }
             prop_category = UCD_CATEGORY(c);
             if (prop_category != ucp_M) break;
             eptr += len;
@@ -2975,8 +3468,12 @@ for (;;)
         case OP_ANY:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject || IS_NEWLINE(eptr))
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
             RRETURN(MATCH_NOMATCH);
+            }
+          if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
           eptr++;
           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
           }
@@ -2985,20 +3482,29 @@ for (;;)
         case OP_ALLANY:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           eptr++;
           while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
           }
         break;
 
         case OP_ANYBYTE:
+        if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
         eptr += min;
         break;
 
         case OP_ANYNL:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3024,7 +3530,11 @@ for (;;)
         case OP_NOT_HSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3056,7 +3566,11 @@ for (;;)
         case OP_HSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3088,7 +3602,11 @@ for (;;)
         case OP_NOT_VSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3108,7 +3626,11 @@ for (;;)
         case OP_VSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(c, eptr);
           switch(c)
             {
@@ -3128,7 +3650,11 @@ for (;;)
         case OP_NOT_DIGIT:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINC(c, eptr);
           if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
             RRETURN(MATCH_NOMATCH);
@@ -3138,8 +3664,12 @@ for (;;)
         case OP_DIGIT:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject ||
-             *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
+          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0)
             RRETURN(MATCH_NOMATCH);
           /* No need to skip more bytes - we know it's a 1-byte character */
           }
@@ -3148,8 +3678,12 @@ for (;;)
         case OP_NOT_WHITESPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject ||
-             (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0))
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
+          if (*eptr < 128 && (md->ctypes[*eptr] & ctype_space) != 0)
             RRETURN(MATCH_NOMATCH);
           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
           }
@@ -3158,8 +3692,12 @@ for (;;)
         case OP_WHITESPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject ||
-             *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
+          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0)
             RRETURN(MATCH_NOMATCH);
           /* No need to skip more bytes - we know it's a 1-byte character */
           }
@@ -3168,8 +3706,12 @@ for (;;)
         case OP_NOT_WORDCHAR:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject ||
-             (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0))
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
+          if (*eptr < 128 && (md->ctypes[*eptr] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
           while (++eptr < md->end_subject && (*eptr & 0xc0) == 0x80);
           }
@@ -3178,8 +3720,12 @@ for (;;)
         case OP_WORDCHAR:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject ||
-             *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
+          if (*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
           /* No need to skip more bytes - we know it's a 1-byte character */
           }
@@ -3193,34 +3739,49 @@ for (;;)
 #endif     /* SUPPORT_UTF8 */
 
       /* Code for the non-UTF-8 case for minimum matching of operators other
-      than OP_PROP and OP_NOTPROP. We can assume that there are the minimum
-      number of bytes present, as this was tested above. */
+      than OP_PROP and OP_NOTPROP. */
 
       switch(ctype)
         {
         case OP_ANY:
         for (i = 1; i <= min; i++)
           {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
           eptr++;
           }
         break;
 
         case OP_ALLANY:
+        if (eptr > md->end_subject - min)
+          {
+          SCHECK_PARTIAL();
+          RRETURN(MATCH_NOMATCH);
+          }
         eptr += min;
         break;
 
         case OP_ANYBYTE:
+        if (eptr > md->end_subject - min)
+          {
+          SCHECK_PARTIAL();
+          RRETURN(MATCH_NOMATCH);
+          }
         eptr += min;
         break;
 
-        /* Because of the CRLF case, we can't assume the minimum number of
-        bytes are present in this case. */
-
         case OP_ANYNL:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           switch(*eptr++)
             {
             default: RRETURN(MATCH_NOMATCH);
@@ -3242,7 +3803,11 @@ for (;;)
         case OP_NOT_HSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           switch(*eptr++)
             {
             default: break;
@@ -3257,7 +3822,11 @@ for (;;)
         case OP_HSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           switch(*eptr++)
             {
             default: RRETURN(MATCH_NOMATCH);
@@ -3272,7 +3841,11 @@ for (;;)
         case OP_NOT_VSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           switch(*eptr++)
             {
             default: break;
@@ -3289,7 +3862,11 @@ for (;;)
         case OP_VSPACE:
         for (i = 1; i <= min; i++)
           {
-          if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           switch(*eptr++)
             {
             default: RRETURN(MATCH_NOMATCH);
@@ -3305,34 +3882,76 @@ for (;;)
 
         case OP_NOT_DIGIT:
         for (i = 1; i <= min; i++)
+          {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
+          }
         break;
 
         case OP_DIGIT:
         for (i = 1; i <= min; i++)
+          {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
+          }
         break;
 
         case OP_NOT_WHITESPACE:
         for (i = 1; i <= min; i++)
+          {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
+          }
         break;
 
         case OP_WHITESPACE:
         for (i = 1; i <= min; i++)
+          {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
+          }
         break;
 
         case OP_NOT_WORDCHAR:
         for (i = 1; i <= min; i++)
+          {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if ((md->ctypes[*eptr++] & ctype_word) != 0)
             RRETURN(MATCH_NOMATCH);
+          }
         break;
 
         case OP_WORDCHAR:
         for (i = 1; i <= min; i++)
+          {
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           if ((md->ctypes[*eptr++] & ctype_word) == 0)
             RRETURN(MATCH_NOMATCH);
+          }
         break;
 
         default:
@@ -3360,7 +3979,12 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM36);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINC(c, eptr);
             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
             }
@@ -3371,7 +3995,12 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM37);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINC(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
@@ -3386,7 +4015,12 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM38);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINC(c, eptr);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
@@ -3399,7 +4033,12 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM39);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINC(c, eptr);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
@@ -3412,7 +4051,12 @@ for (;;)
             {
             RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM40);
             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-            if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+            if (fi >= max) RRETURN(MATCH_NOMATCH);
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              RRETURN(MATCH_NOMATCH);
+              }
             GETCHARINC(c, eptr);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
@@ -3434,17 +4078,20 @@ for (;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM41);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
+          if (fi >= max) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
           GETCHARINCTEST(c, eptr);
           prop_category = UCD_CATEGORY(c);
           if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH);
           while (eptr < md->end_subject)
             {
             int len = 1;
-            if (!utf8) c = *eptr; else
-              {
-              GETCHARLEN(c, eptr, len);
-              }
+            if (!utf8) c = *eptr;
+              else { GETCHARLEN(c, eptr, len); }
             prop_category = UCD_CATEGORY(c);
             if (prop_category != ucp_M) break;
             eptr += len;
@@ -3463,10 +4110,14 @@ for (;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || eptr >= md->end_subject ||
-               (ctype == OP_ANY && IS_NEWLINE(eptr)))
+          if (fi >= max) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
+          if (ctype == OP_ANY && IS_NEWLINE(eptr))
             RRETURN(MATCH_NOMATCH);
-
           GETCHARINC(c, eptr);
           switch(ctype)
             {
@@ -3622,10 +4273,14 @@ for (;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43);
           if (rrc != MATCH_NOMATCH) RRETURN(rrc);
-          if (fi >= max || eptr >= md->end_subject ||
-               (ctype == OP_ANY && IS_NEWLINE(eptr)))
+          if (fi >= max) RRETURN(MATCH_NOMATCH);
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            RRETURN(MATCH_NOMATCH);
+            }
+          if (ctype == OP_ANY && IS_NEWLINE(eptr))
             RRETURN(MATCH_NOMATCH);
-
           c = *eptr++;
           switch(ctype)
             {
@@ -3750,7 +4405,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (prop_fail_result) break;
             eptr+= len;
@@ -3761,7 +4420,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
@@ -3776,7 +4439,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
@@ -3789,7 +4456,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
@@ -3802,7 +4473,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
@@ -3831,7 +4506,11 @@ for (;;)
         {
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject) break;
+          if (eptr >= md->end_subject)
+            {
+            SCHECK_PARTIAL();
+            break;
+            }
           GETCHARINCTEST(c, eptr);
           prop_category = UCD_CATEGORY(c);
           if (prop_category == ucp_M) break;
@@ -3851,6 +4530,7 @@ for (;;)
         /* eptr is now past the end of the maximum run */
 
         if (possessive) continue;
+
         for(;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
@@ -3886,7 +4566,12 @@ for (;;)
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+              if (eptr >= md->end_subject)
+                {
+                SCHECK_PARTIAL();
+                break;
+                }
+              if (IS_NEWLINE(eptr)) break;
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -3898,7 +4583,12 @@ for (;;)
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+              if (eptr >= md->end_subject)
+                {
+                SCHECK_PARTIAL();
+                break;
+                }
+              if (IS_NEWLINE(eptr)) break;
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -3910,7 +4600,11 @@ for (;;)
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject) break;
+              if (eptr >= md->end_subject)
+                {
+                SCHECK_PARTIAL();
+                break;
+                }
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -3923,15 +4617,22 @@ for (;;)
           case OP_ANYBYTE:
           c = max - min;
           if (c > (unsigned int)(md->end_subject - eptr))
-            c = md->end_subject - eptr;
-          eptr += c;
+            {
+            eptr = md->end_subject;
+            SCHECK_PARTIAL();
+            }
+          else eptr += c;
           break;
 
           case OP_ANYNL:
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (c == 0x000d)
               {
@@ -3956,7 +4657,11 @@ for (;;)
             {
             BOOL gotspace;
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
@@ -3994,7 +4699,11 @@ for (;;)
             {
             BOOL gotspace;
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
@@ -4018,7 +4727,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
             eptr+= len;
@@ -4029,7 +4742,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
             eptr+= len;
@@ -4040,7 +4757,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
             eptr+= len;
@@ -4051,7 +4772,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
             eptr+= len;
@@ -4062,7 +4787,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
             eptr+= len;
@@ -4073,7 +4802,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             GETCHARLEN(c, eptr, len);
             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
             eptr+= len;
@@ -4105,7 +4838,12 @@ for (;;)
           case OP_ANY:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
+            if (IS_NEWLINE(eptr)) break;
             eptr++;
             }
           break;
@@ -4114,14 +4852,21 @@ for (;;)
           case OP_ANYBYTE:
           c = max - min;
           if (c > (unsigned int)(md->end_subject - eptr))
-            c = md->end_subject - eptr;
-          eptr += c;
+            {
+            eptr = md->end_subject;
+            SCHECK_PARTIAL();
+            }
+          else eptr += c;
           break;
 
           case OP_ANYNL:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             c = *eptr;
             if (c == 0x000d)
               {
@@ -4142,7 +4887,11 @@ for (;;)
           case OP_NOT_HSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             c = *eptr;
             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
             eptr++;
@@ -4152,7 +4901,11 @@ for (;;)
           case OP_HSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             c = *eptr;
             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
             eptr++;
@@ -4162,7 +4915,11 @@ for (;;)
           case OP_NOT_VSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             c = *eptr;
             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
               break;
@@ -4173,7 +4930,11 @@ for (;;)
           case OP_VSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
+              break;
+              }
             c = *eptr;
             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
               break;
@@ -4184,8 +4945,12 @@ for (;;)
           case OP_NOT_DIGIT:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
               break;
+              }
+            if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
             eptr++;
             }
           break;
@@ -4193,8 +4958,12 @@ for (;;)
           case OP_DIGIT:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
               break;
+              }
+            if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
             eptr++;
             }
           break;
@@ -4202,8 +4971,12 @@ for (;;)
           case OP_NOT_WHITESPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
               break;
+              }
+            if ((md->ctypes[*eptr] & ctype_space) != 0) break;
             eptr++;
             }
           break;
@@ -4211,8 +4984,12 @@ for (;;)
           case OP_WHITESPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
               break;
+              }
+            if ((md->ctypes[*eptr] & ctype_space) == 0) break;
             eptr++;
             }
           break;
@@ -4220,8 +4997,12 @@ for (;;)
           case OP_NOT_WORDCHAR:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
               break;
+              }
+            if ((md->ctypes[*eptr] & ctype_word) != 0) break;
             eptr++;
             }
           break;
@@ -4229,8 +5010,12 @@ for (;;)
           case OP_WORDCHAR:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+            if (eptr >= md->end_subject)
+              {
+              SCHECK_PARTIAL();
               break;
+              }
+            if ((md->ctypes[*eptr] & ctype_word) == 0) break;
             eptr++;
             }
           break;
@@ -4408,6 +5193,7 @@ const uschar *tables;
 const uschar *start_bits = NULL;
 USPTR start_match = (USPTR)subject + start_offset;
 USPTR end_subject;
+USPTR start_partial = NULL;
 USPTR req_byte_ptr = start_match - 1;
 
 pcre_study_data internal_study;
@@ -4424,6 +5210,13 @@ if (re == NULL || subject == NULL ||
    (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
 if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
 
+/* This information is for finding all the numbers associated with a given
+name, for condition testing. */
+
+md->name_table = (uschar *)re + re->name_table_offset;
+md->name_count = re->name_count;
+md->name_entry_size = re->name_entry_size;
+
 /* Fish out the optional data from the extra_data structure, first setting
 the default values. */
 
@@ -4491,7 +5284,9 @@ md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
 md->notbol = (options & PCRE_NOTBOL) != 0;
 md->noteol = (options & PCRE_NOTEOL) != 0;
 md->notempty = (options & PCRE_NOTEMPTY) != 0;
-md->partial = (options & PCRE_PARTIAL) != 0;
+md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
+md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
+              ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
 md->hitend = FALSE;
 
 md->recursive = NULL;                   /* No recursion at top level */
@@ -4532,10 +5327,10 @@ switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
   {
   case 0: newline = NEWLINE; break;   /* Compile-time default */
-  case PCRE_NEWLINE_CR: newline = '\r'; break;
-  case PCRE_NEWLINE_LF: newline = '\n'; break;
+  case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
+  case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
   case PCRE_NEWLINE_CR+
-       PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
+       PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
   case PCRE_NEWLINE_ANY: newline = -1; break;
   case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
   default: return PCRE_ERROR_BADNEWLINE;
@@ -4565,8 +5360,9 @@ else
     }
   }
 
-/* Partial matching is supported only for a restricted set of regexes at the
-moment. */
+/* Partial matching was originally supported only for a restricted set of
+regexes; from release 8.00 there are no restrictions, but the bits are still
+defined (though never set). So there's no harm in leaving this code. */
 
 if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
   return PCRE_ERROR_BADPARTIAL;
@@ -4577,11 +5373,11 @@ back the character offset. */
 #ifdef SUPPORT_UTF8
 if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
   {
-  if (_pcre_valid_utf8((uschar *)subject, length) >= 0)
+  if (_pcre_valid_utf8((USPTR)subject, length) >= 0)
     return PCRE_ERROR_BADUTF8;
   if (start_offset > 0 && start_offset < length)
     {
-    int tb = ((uschar *)subject)[start_offset];
+    int tb = ((USPTR)subject)[start_offset];
     if (tb > 127)
       {
       tb &= 0xc0;
@@ -4653,7 +5449,7 @@ if (!anchored)
     }
   else
     if (!startline && study != NULL &&
-      (study->options & PCRE_STUDY_MAPPED) != 0)
+      (study->flags & PCRE_STUDY_MAPPED) != 0)
         start_bits = study->start_bits;
   }
 
@@ -4687,11 +5483,11 @@ for(;;)
     while (iptr < iend) *iptr++ = -1;
     }
 
-  /* Advance to a unique first char if possible. If firstline is TRUE, the
-  start of the match is constrained to the first line of a multiline string.
-  That is, the match must be before or at the first newline. Implement this by
-  temporarily adjusting end_subject so that we stop scanning at a newline. If
-  the match fails at the newline, later code breaks this loop. */
+  /* If firstline is TRUE, the start of the match is constrained to the first
+  line of a multiline string. That is, the match must be before or at the first
+  newline. Implement this by temporarily adjusting end_subject so that we stop
+  scanning at a newline. If the match fails at the newline, later code breaks
+  this loop. */
 
   if (firstline)
     {
@@ -4711,138 +5507,164 @@ for(;;)
     end_subject = t;
     }
 
-  /* Now advance to a unique first byte if there is one. */
+  /* There are some optimizations that avoid running the match if a known
+  starting point is not found, or if a known later character is not present.
+  However, there is an option that disables these, for testing and for ensuring
+  that all callouts do actually occur. */
 
-  if (first_byte >= 0)
+  if ((options & PCRE_NO_START_OPTIMIZE) == 0)
     {
-    if (first_byte_caseless)
-      while (start_match < end_subject && md->lcc[*start_match] != first_byte)
-        start_match++;
-    else
-      while (start_match < end_subject && *start_match != first_byte)
-        start_match++;
-    }
+    /* Advance to a unique first byte if there is one. */
 
-  /* Or to just after a linebreak for a multiline match */
+    if (first_byte >= 0)
+      {
+      if (first_byte_caseless)
+        while (start_match < end_subject && md->lcc[*start_match] != first_byte)
+          start_match++;
+      else
+        while (start_match < end_subject && *start_match != first_byte)
+          start_match++;
+      }
 
-  else if (startline)
-    {
-    if (start_match > md->start_subject + start_offset)
+    /* Or to just after a linebreak for a multiline match */
+
+    else if (startline)
       {
-#ifdef SUPPORT_UTF8
-      if (utf8)
+      if (start_match > md->start_subject + start_offset)
         {
-        while (start_match < end_subject && !WAS_NEWLINE(start_match))
+#ifdef SUPPORT_UTF8
+        if (utf8)
           {
-          start_match++;
-          while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
+          while (start_match < end_subject && !WAS_NEWLINE(start_match))
+            {
             start_match++;
+            while(start_match < end_subject && (*start_match & 0xc0) == 0x80)
+              start_match++;
+            }
           }
-        }
-      else
+        else
 #endif
-      while (start_match < end_subject && !WAS_NEWLINE(start_match))
-        start_match++;
-
-      /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
-      and we are now at a LF, advance the match position by one more character.
-      */
-
-      if (start_match[-1] == '\r' &&
-           (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
-           start_match < end_subject &&
-           *start_match == '\n')
-        start_match++;
+        while (start_match < end_subject && !WAS_NEWLINE(start_match))
+          start_match++;
+
+        /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
+        and we are now at a LF, advance the match position by one more character.
+        */
+
+        if (start_match[-1] == CHAR_CR &&
+             (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
+             start_match < end_subject &&
+             *start_match == CHAR_NL)
+          start_match++;
+        }
       }
-    }
 
-  /* Or to a non-unique first byte after study */
+    /* Or to a non-unique first byte after study */
 
-  else if (start_bits != NULL)
-    {
-    while (start_match < end_subject)
+    else if (start_bits != NULL)
       {
-      register unsigned int c = *start_match;
-      if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
-        else break;
+      while (start_match < end_subject)
+        {
+        register unsigned int c = *start_match;
+        if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++;
+          else break;
+        }
       }
-    }
+    }   /* Starting optimizations */
 
   /* Restore fudged end_subject */
 
   end_subject = save_end_subject;
 
-#ifdef DEBUG  /* Sigh. Some compilers never learn. */
-  printf(">>>> Match against: ");
-  pchars(start_match, end_subject - start_match, TRUE, md);
-  printf("\n");
-#endif
+  /* The following two optimizations are disabled for partial matching or if
+  disabling is explicitly requested. */
 
-  /* If req_byte is set, we know that that character must appear in the subject
-  for the match to succeed. If the first character is set, req_byte must be
-  later in the subject; otherwise the test starts at the match point. This
-  optimization can save a huge amount of backtracking in patterns with nested
-  unlimited repeats that aren't going to match. Writing separate code for
-  cased/caseless versions makes it go faster, as does using an autoincrement
-  and backing off on a match.
-
-  HOWEVER: when the subject string is very, very long, searching to its end can
-  take a long time, and give bad performance on quite ordinary patterns. This
-  showed up when somebody was matching something like /^\d+C/ on a 32-megabyte
-  string... so we don't do this when the string is sufficiently long.
-
-  ALSO: this processing is disabled when partial matching is requested.
-  */
-
-  if (req_byte >= 0 &&
-      end_subject - start_match < REQ_BYTE_MAX &&
-      !md->partial)
+  if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
     {
-    register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
+    /* If the pattern was studied, a minimum subject length may be set. This is
+    a lower bound; no actual string of that length may actually match the
+    pattern. Although the value is, strictly, in characters, we treat it as
+    bytes to avoid spending too much time in this optimization. */
 
-    /* We don't need to repeat the search if we haven't yet reached the
-    place we found it at last time. */
+    if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
+        (pcre_uint32)(end_subject - start_match) < study->minlength)
+      {
+      rc = MATCH_NOMATCH;
+      break;
+      }
 
-    if (p > req_byte_ptr)
+    /* If req_byte is set, we know that that character must appear in the
+    subject for the match to succeed. If the first character is set, req_byte
+    must be later in the subject; otherwise the test starts at the match point.
+    This optimization can save a huge amount of backtracking in patterns with
+    nested unlimited repeats that aren't going to match. Writing separate code
+    for cased/caseless versions makes it go faster, as does using an
+    autoincrement and backing off on a match.
+
+    HOWEVER: when the subject string is very, very long, searching to its end
+    can take a long time, and give bad performance on quite ordinary patterns.
+    This showed up when somebody was matching something like /^\d+C/ on a
+    32-megabyte string... so we don't do this when the string is sufficiently
+    long. */
+
+    if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX)
       {
-      if (req_byte_caseless)
+      register USPTR p = start_match + ((first_byte >= 0)? 1 : 0);
+
+      /* We don't need to repeat the search if we haven't yet reached the
+      place we found it at last time. */
+
+      if (p > req_byte_ptr)
         {
-        while (p < end_subject)
+        if (req_byte_caseless)
           {
-          register int pp = *p++;
-          if (pp == req_byte || pp == req_byte2) { p--; break; }
+          while (p < end_subject)
+            {
+            register int pp = *p++;
+            if (pp == req_byte || pp == req_byte2) { p--; break; }
+            }
           }
-        }
-      else
-        {
-        while (p < end_subject)
+        else
           {
-          if (*p++ == req_byte) { p--; break; }
+          while (p < end_subject)
+            {
+            if (*p++ == req_byte) { p--; break; }
+            }
           }
-        }
 
-      /* If we can't find the required character, break the matching loop,
-      forcing a match failure. */
+        /* If we can't find the required character, break the matching loop,
+        forcing a match failure. */
 
-      if (p >= end_subject)
-        {
-        rc = MATCH_NOMATCH;
-        break;
-        }
+        if (p >= end_subject)
+          {
+          rc = MATCH_NOMATCH;
+          break;
+          }
 
-      /* If we have found the required character, save the point where we
-      found it, so that we don't search again next time round the loop if
-      the start hasn't passed this character yet. */
+        /* If we have found the required character, save the point where we
+        found it, so that we don't search again next time round the loop if
+        the start hasn't passed this character yet. */
 
-      req_byte_ptr = p;
+        req_byte_ptr = p;
+        }
       }
     }
 
-  /* OK, we can now run the match. */
+#ifdef PCRE_DEBUG  /* Sigh. Some compilers never learn. */
+  printf(">>>> Match against: ");
+  pchars(start_match, end_subject - start_match, TRUE, md);
+  printf("\n");
+#endif
+
+  /* OK, we can now run the match. If "hitend" is set afterwards, remember the
+  first starting point for which a partial match was found. */
 
   md->start_match_ptr = start_match;
+  md->start_used_ptr = start_match;
   md->match_call_count = 0;
-  rc = match(start_match, md->start_code, start_match, 2, md, ims, NULL, 0, 0);
+  rc = match(start_match, md->start_code, start_match, NULL, 2, md, ims, NULL,
+    0, 0);
+  if (md->hitend && start_partial == NULL) start_partial = md->start_used_ptr;
 
   switch(rc)
     {
@@ -4872,7 +5694,7 @@ for(;;)
     rc = MATCH_NOMATCH;
     goto ENDLOOP;
 
-    /* Any other return is some kind of error. */
+    /* Any other return is either a match, or some kind of error. */
 
     default:
     goto ENDLOOP;
@@ -4902,9 +5724,9 @@ for(;;)
   not contain any explicit matches for \r or \n, and the newline option is CRLF
   or ANY or ANYCRLF, advance the match position by one more character. */
 
-  if (start_match[-1] == '\r' &&
+  if (start_match[-1] == CHAR_CR &&
       start_match < end_subject &&
-      *start_match == '\n' &&
+      *start_match == CHAR_NL &&
       (re->flags & PCRE_HASCRORLF) == 0 &&
         (md->nltype == NLTYPE_ANY ||
          md->nltype == NLTYPE_ANYCRLF ||
@@ -4978,14 +5800,19 @@ if (using_temporary_offsets)
   (pcre_free)(md->offset_vector);
   }
 
-if (rc != MATCH_NOMATCH)
+if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
   {
   DPRINTF((">>>> error: returning %d\n", rc));
   return rc;
   }
-else if (md->partial && md->hitend)
+else if (start_partial != NULL)
   {
   DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
+  if (offsetcount > 1)
+    {
+    offsets[0] = start_partial - (USPTR)subject;
+    offsets[1] = end_subject - (USPTR)subject;
+    }
   return PCRE_ERROR_PARTIAL;
   }
 else
diff --git a/glib/pcre/pcre_fullinfo.c b/glib/pcre/pcre_fullinfo.c
index 30566bb..6b8d789 100644
--- a/glib/pcre/pcre_fullinfo.c
+++ b/glib/pcre/pcre_fullinfo.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -89,7 +89,7 @@ if (re->magic_number != MAGIC_NUMBER)
 switch (what)
   {
   case PCRE_INFO_OPTIONS:
-  *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS;
+  *((unsigned long int *)where) = re->options & PUBLIC_COMPILE_OPTIONS;
   break;
 
   case PCRE_INFO_SIZE:
@@ -119,10 +119,16 @@ switch (what)
 
   case PCRE_INFO_FIRSTTABLE:
   *((const uschar **)where) =
-    (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)?
+    (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0)?
       ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL;
   break;
 
+  case PCRE_INFO_MINLENGTH:
+  *((int *)where) =
+    (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0)?
+      study->minlength : -1;
+  break;
+
   case PCRE_INFO_LASTLITERAL:
   *((int *)where) =
     ((re->flags & PCRE_REQCHSET) != 0)? re->req_byte : -1;
@@ -144,6 +150,9 @@ switch (what)
   *((const uschar **)where) = (const uschar *)(_pcre_default_tables);
   break;
 
+  /* From release 8.00 this will always return TRUE because NOPARTIAL is
+  no longer ever set (the restrictions have been removed). */
+
   case PCRE_INFO_OKPARTIAL:
   *((int *)where) = (re->flags & PCRE_NOPARTIAL) == 0;
   break;
diff --git a/glib/pcre/pcre_globals.c b/glib/pcre/pcre_globals.c
index e759ed5..18d30cf 100644
--- a/glib/pcre/pcre_globals.c
+++ b/glib/pcre/pcre_globals.c
@@ -43,8 +43,14 @@ PCRE is thread-clean and doesn't use any global variables in the normal sense.
 However, it calls memory allocation and freeing functions via the four
 indirections below, and it can optionally do callouts, using the fifth
 indirection. These values can be changed by the caller, but are shared between
-all threads. However, when compiling for Virtual Pascal, things are done
-differently, and global variables are not used (see pcre.in). */
+all threads.
+
+For MS Visual Studio and Symbian OS, there are problems in initializing these
+variables to non-local functions. In these cases, therefore, an indirection via
+a local function is used.
+
+Also, when compiling for Virtual Pascal, things are done differently, and
+global variables are not used. */
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
@@ -52,6 +58,19 @@ differently, and global variables are not used (see pcre.in). */
 
 #include "pcre_internal.h"
 
+#if defined _MSC_VER || defined  __SYMBIAN32__
+static void* LocalPcreMalloc(size_t aSize)
+  {
+  return malloc(aSize);
+  }
+static void LocalPcreFree(void* aPtr)
+  {
+  free(aPtr);
+  }
 PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
 
+#elif !defined VPCOMPAT
+PCRE_EXP_DATA_DEFN int   (*pcre_callout)(pcre_callout_block *) = NULL;
+#endif
+
 /* End of pcre_globals.c */
diff --git a/glib/pcre/pcre_info.c b/glib/pcre/pcre_info.c
index 02cf1c9..f35f398 100644
--- a/glib/pcre/pcre_info.c
+++ b/glib/pcre/pcre_info.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -83,7 +83,7 @@ if (re->magic_number != MAGIC_NUMBER)
   re = _pcre_try_flipped(re, &internal_re, NULL, NULL);
   if (re == NULL) return PCRE_ERROR_BADMAGIC;
   }
-if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
+if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_COMPILE_OPTIONS);
 if (first_byte != NULL)
   *first_byte = ((re->flags & PCRE_FIRSTSET) != 0)? re->first_byte :
      ((re->flags & PCRE_STARTLINE) != 0)? -1 : -2;
diff --git a/glib/pcre/pcre_internal.h b/glib/pcre/pcre_internal.h
index 519b871..a1a06d4 100644
--- a/glib/pcre/pcre_internal.h
+++ b/glib/pcre/pcre_internal.h
@@ -7,7 +7,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -45,10 +45,24 @@ functions whose names all begin with "_pcre_". */
 #ifndef PCRE_INTERNAL_H
 #define PCRE_INTERNAL_H
 
-/* Define DEBUG to get debugging output on stdout. */
+/* Define PCRE_DEBUG to get debugging output on stdout. */
 
 #if 0
-#define DEBUG
+#define PCRE_DEBUG
+#endif
+
+/* We do not support both EBCDIC and UTF-8 at the same time. The "configure"
+script prevents both being selected, but not everybody uses "configure". */
+
+#if defined EBCDIC && defined SUPPORT_UTF8
+#error The use of both EBCDIC and SUPPORT_UTF8 is not supported.
+#endif
+
+/* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
+"configure" script ensures this, but not everybody uses "configure". */
+
+#if defined SUPPORT_UCP && !defined SUPPORT_UTF8
+#define SUPPORT_UTF8 1
 #endif
 
 /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
@@ -60,7 +74,7 @@ It turns out that the Mac Debugging.h header also defines the macro DPRINTF, so
 be absolutely sure we get our version. */
 
 #undef DPRINTF
-#ifdef DEBUG
+#ifdef PCRE_DEBUG
 #define DPRINTF(p) printf p
 #else
 #define DPRINTF(p) /* Nothing */
@@ -72,8 +86,6 @@ setjmp and stdarg are used is when NO_RECURSE is set. */
 
 #include <ctype.h>
 #include <limits.h>
-#include <setjmp.h>
-#include <stdarg.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -172,6 +184,26 @@ preprocessor time in standard C environments. */
   #error Cannot determine a type for 32-bit unsigned integers
 #endif
 
+/* When checking for integer overflow in pcre_compile(), we need to handle
+large integers. If a 64-bit integer type is available, we can use that.
+Otherwise we have to cast to double, which of course requires floating point
+arithmetic. Handle this by defining a macro for the appropriate type. If
+stdint.h is available, include it; it may define INT64_MAX. Systems that do not
+have stdint.h (e.g. Solaris) may have inttypes.h. The macro int64_t may be set
+by "configure". */
+
+#if HAVE_STDINT_H
+#include <stdint.h>
+#elif HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
+#if defined INT64_MAX || defined int64_t
+#define INT64_OR_DOUBLE int64_t
+#else
+#define INT64_OR_DOUBLE double
+#endif
+
 /* All character handling must be done as unsigned characters. Otherwise there
 are problems with top-bit-set characters and functions such as isspace().
 However, we leave the interface to the outside world as char *, because that
@@ -259,6 +291,7 @@ option on the command line. */
 #define strncmp(s1,s2,m) _strncmp(s1,s2,m)
 #define memcmp(s,c,n)    _memcmp(s,c,n)
 #define memcpy(d,s,n)    _memcpy(d,s,n)
+#define memmove(d,s,n)   _memmove(d,s,n)
 #define memset(s,c,n)    _memset(s,c,n)
 #else  /* VPCOMPAT */
 
@@ -477,6 +510,26 @@ if there are extra bytes. This is called when we know we are in UTF-8 mode. */
     len += gcaa; \
     }
 
+/* Get the next UTF-8 character, testing for UTF-8 mode, not advancing the
+pointer, incrementing length if there are extra bytes. This is called when we
+know we are in UTF-8 mode. */
+
+#define GETCHARLENTEST(c, eptr, len) \
+  c = *eptr; \
+  if (utf8 && c >= 0xc0) \
+    { \
+    int gcii; \
+    int gcaa = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */ \
+    int gcss = 6*gcaa; \
+    c = (c & _pcre_utf8_table3[gcaa]) << gcss; \
+    for (gcii = 1; gcii <= gcaa; gcii++) \
+      { \
+      gcss -= 6; \
+      c |= (eptr[gcii] & 0x3f) << gcss; \
+      } \
+    len += gcaa; \
+    }
+
 /* If the pointer is not at the start of a character, move it back until
 it is. This is called only in UTF-8 mode - we don't put a test within the macro
 because almost all calls are already within a block of UTF-8 only code. */
@@ -500,7 +553,9 @@ Standard C system should have one. */
 
 /* Private flags containing information about the compiled regex. They used to
 live at the top end of the options word, but that got almost full, so now they
-are in a 16-bit flags word. */
+are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
+the restrictions on partial matching have been lifted. It remains for backwards
+compatibility. */
 
 #define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
 #define PCRE_FIRSTSET      0x0002  /* first_byte is set */
@@ -512,6 +567,7 @@ are in a 16-bit flags word. */
 /* Options for the "extra" block produced by pcre_study(). */
 
 #define PCRE_STUDY_MAPPED   0x01     /* a map of starting chars exists */
+#define PCRE_STUDY_MINLEN   0x02     /* a minimum length field exists */
 
 /* Masks for identifying the public options that are permitted at compile
 time, run time, or study time, respectively. */
@@ -519,7 +575,7 @@ time, run time, or study time, respectively. */
 #define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \
                            PCRE_NEWLINE_ANYCRLF)
 
-#define PUBLIC_OPTIONS \
+#define PUBLIC_COMPILE_OPTIONS \
   (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
    PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
    PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \
@@ -527,13 +583,15 @@ time, run time, or study time, respectively. */
    PCRE_JAVASCRIPT_COMPAT)
 
 #define PUBLIC_EXEC_OPTIONS \
-  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
-   PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
+  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
+   PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \
+   PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
 
 #define PUBLIC_DFA_EXEC_OPTIONS \
-  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \
-   PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \
-   PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)
+  (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
+   PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \
+   PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
+   PCRE_NO_START_OPTIMIZE)
 
 #define PUBLIC_STUDY_OPTIONS 0   /* None defined */
 
@@ -559,33 +617,566 @@ variable-length repeat, or a anything other than literal characters. */
 #define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
 
 /* Miscellaneous definitions. The #ifndef is to pacify compiler warnings in
-environments where these macros are defined elsewhere. */
+environments where these macros are defined elsewhere. Unfortunately, there
+is no way to do the same for the typedef. */
+
+typedef gboolean  BOOL;
+
+/* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
+character constants like '*' because the compiler would emit their EBCDIC code,
+which is different from their ASCII/UTF-8 code. Instead we define macros for
+the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
+is enabled. When UTF-8 support is not enabled, the definitions use character
+literals. Both character and string versions of each character are needed, and
+there are some longer strings as well.
+
+This means that, on EBCDIC platforms, the PCRE library can handle either
+EBCDIC, or UTF-8, but not both. To support both in the same compiled library
+would need different lookups depending on whether PCRE_UTF8 was set or not.
+This would make it impossible to use characters in switch/case statements,
+which would reduce performance. For a theoretical use (which nobody has asked
+for) in a minority area (EBCDIC platforms), this is not sensible. Any
+application that did need both could compile two versions of the library, using
+macros to give the functions distinct names. */
+
+#ifndef SUPPORT_UTF8
+
+/* UTF-8 support is not enabled; use the platform-dependent character literals
+so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */
+
+#define CHAR_HT                     '\t'
+#define CHAR_VT                     '\v'
+#define CHAR_FF                     '\f'
+#define CHAR_CR                     '\r'
+#define CHAR_NL                     '\n'
+#define CHAR_BS                     '\b'
+#define CHAR_BEL                    '\a'
+#ifdef EBCDIC
+#define CHAR_ESC                    '\047'
+#define CHAR_DEL                    '\007'
+#else
+#define CHAR_ESC                    '\033'
+#define CHAR_DEL                    '\177'
+#endif
+
+#define CHAR_SPACE                  ' '
+#define CHAR_EXCLAMATION_MARK       '!'
+#define CHAR_QUOTATION_MARK         '"'
+#define CHAR_NUMBER_SIGN            '#'
+#define CHAR_DOLLAR_SIGN            '$'
+#define CHAR_PERCENT_SIGN           '%'
+#define CHAR_AMPERSAND              '&'
+#define CHAR_APOSTROPHE             '\''
+#define CHAR_LEFT_PARENTHESIS       '('
+#define CHAR_RIGHT_PARENTHESIS      ')'
+#define CHAR_ASTERISK               '*'
+#define CHAR_PLUS                   '+'
+#define CHAR_COMMA                  ','
+#define CHAR_MINUS                  '-'
+#define CHAR_DOT                    '.'
+#define CHAR_SLASH                  '/'
+#define CHAR_0                      '0'
+#define CHAR_1                      '1'
+#define CHAR_2                      '2'
+#define CHAR_3                      '3'
+#define CHAR_4                      '4'
+#define CHAR_5                      '5'
+#define CHAR_6                      '6'
+#define CHAR_7                      '7'
+#define CHAR_8                      '8'
+#define CHAR_9                      '9'
+#define CHAR_COLON                  ':'
+#define CHAR_SEMICOLON              ';'
+#define CHAR_LESS_THAN_SIGN         '<'
+#define CHAR_EQUALS_SIGN            '='
+#define CHAR_GREATER_THAN_SIGN      '>'
+#define CHAR_QUESTION_MARK          '?'
+#define CHAR_COMMERCIAL_AT          '@'
+#define CHAR_A                      'A'
+#define CHAR_B                      'B'
+#define CHAR_C                      'C'
+#define CHAR_D                      'D'
+#define CHAR_E                      'E'
+#define CHAR_F                      'F'
+#define CHAR_G                      'G'
+#define CHAR_H                      'H'
+#define CHAR_I                      'I'
+#define CHAR_J                      'J'
+#define CHAR_K                      'K'
+#define CHAR_L                      'L'
+#define CHAR_M                      'M'
+#define CHAR_N                      'N'
+#define CHAR_O                      'O'
+#define CHAR_P                      'P'
+#define CHAR_Q                      'Q'
+#define CHAR_R                      'R'
+#define CHAR_S                      'S'
+#define CHAR_T                      'T'
+#define CHAR_U                      'U'
+#define CHAR_V                      'V'
+#define CHAR_W                      'W'
+#define CHAR_X                      'X'
+#define CHAR_Y                      'Y'
+#define CHAR_Z                      'Z'
+#define CHAR_LEFT_SQUARE_BRACKET    '['
+#define CHAR_BACKSLASH              '\\'
+#define CHAR_RIGHT_SQUARE_BRACKET   ']'
+#define CHAR_CIRCUMFLEX_ACCENT      '^'
+#define CHAR_UNDERSCORE             '_'
+#define CHAR_GRAVE_ACCENT           '`'
+#define CHAR_a                      'a'
+#define CHAR_b                      'b'
+#define CHAR_c                      'c'
+#define CHAR_d                      'd'
+#define CHAR_e                      'e'
+#define CHAR_f                      'f'
+#define CHAR_g                      'g'
+#define CHAR_h                      'h'
+#define CHAR_i                      'i'
+#define CHAR_j                      'j'
+#define CHAR_k                      'k'
+#define CHAR_l                      'l'
+#define CHAR_m                      'm'
+#define CHAR_n                      'n'
+#define CHAR_o                      'o'
+#define CHAR_p                      'p'
+#define CHAR_q                      'q'
+#define CHAR_r                      'r'
+#define CHAR_s                      's'
+#define CHAR_t                      't'
+#define CHAR_u                      'u'
+#define CHAR_v                      'v'
+#define CHAR_w                      'w'
+#define CHAR_x                      'x'
+#define CHAR_y                      'y'
+#define CHAR_z                      'z'
+#define CHAR_LEFT_CURLY_BRACKET     '{'
+#define CHAR_VERTICAL_LINE          '|'
+#define CHAR_RIGHT_CURLY_BRACKET    '}'
+#define CHAR_TILDE                  '~'
+
+#define STR_HT                      "\t"
+#define STR_VT                      "\v"
+#define STR_FF                      "\f"
+#define STR_CR                      "\r"
+#define STR_NL                      "\n"
+#define STR_BS                      "\b"
+#define STR_BEL                     "\a"
+#ifdef EBCDIC
+#define STR_ESC                     "\047"
+#define STR_DEL                     "\007"
+#else
+#define STR_ESC                     "\033"
+#define STR_DEL                     "\177"
+#endif
 
-typedef gboolean BOOL;
+#define STR_SPACE                   " "
+#define STR_EXCLAMATION_MARK        "!"
+#define STR_QUOTATION_MARK          "\""
+#define STR_NUMBER_SIGN             "#"
+#define STR_DOLLAR_SIGN             "$"
+#define STR_PERCENT_SIGN            "%"
+#define STR_AMPERSAND               "&"
+#define STR_APOSTROPHE              "'"
+#define STR_LEFT_PARENTHESIS        "("
+#define STR_RIGHT_PARENTHESIS       ")"
+#define STR_ASTERISK                "*"
+#define STR_PLUS                    "+"
+#define STR_COMMA                   ","
+#define STR_MINUS                   "-"
+#define STR_DOT                     "."
+#define STR_SLASH                   "/"
+#define STR_0                       "0"
+#define STR_1                       "1"
+#define STR_2                       "2"
+#define STR_3                       "3"
+#define STR_4                       "4"
+#define STR_5                       "5"
+#define STR_6                       "6"
+#define STR_7                       "7"
+#define STR_8                       "8"
+#define STR_9                       "9"
+#define STR_COLON                   ":"
+#define STR_SEMICOLON               ";"
+#define STR_LESS_THAN_SIGN          "<"
+#define STR_EQUALS_SIGN             "="
+#define STR_GREATER_THAN_SIGN       ">"
+#define STR_QUESTION_MARK           "?"
+#define STR_COMMERCIAL_AT           "@"
+#define STR_A                       "A"
+#define STR_B                       "B"
+#define STR_C                       "C"
+#define STR_D                       "D"
+#define STR_E                       "E"
+#define STR_F                       "F"
+#define STR_G                       "G"
+#define STR_H                       "H"
+#define STR_I                       "I"
+#define STR_J                       "J"
+#define STR_K                       "K"
+#define STR_L                       "L"
+#define STR_M                       "M"
+#define STR_N                       "N"
+#define STR_O                       "O"
+#define STR_P                       "P"
+#define STR_Q                       "Q"
+#define STR_R                       "R"
+#define STR_S                       "S"
+#define STR_T                       "T"
+#define STR_U                       "U"
+#define STR_V                       "V"
+#define STR_W                       "W"
+#define STR_X                       "X"
+#define STR_Y                       "Y"
+#define STR_Z                       "Z"
+#define STR_LEFT_SQUARE_BRACKET     "["
+#define STR_BACKSLASH               "\\"
+#define STR_RIGHT_SQUARE_BRACKET    "]"
+#define STR_CIRCUMFLEX_ACCENT       "^"
+#define STR_UNDERSCORE              "_"
+#define STR_GRAVE_ACCENT            "`"
+#define STR_a                       "a"
+#define STR_b                       "b"
+#define STR_c                       "c"
+#define STR_d                       "d"
+#define STR_e                       "e"
+#define STR_f                       "f"
+#define STR_g                       "g"
+#define STR_h                       "h"
+#define STR_i                       "i"
+#define STR_j                       "j"
+#define STR_k                       "k"
+#define STR_l                       "l"
+#define STR_m                       "m"
+#define STR_n                       "n"
+#define STR_o                       "o"
+#define STR_p                       "p"
+#define STR_q                       "q"
+#define STR_r                       "r"
+#define STR_s                       "s"
+#define STR_t                       "t"
+#define STR_u                       "u"
+#define STR_v                       "v"
+#define STR_w                       "w"
+#define STR_x                       "x"
+#define STR_y                       "y"
+#define STR_z                       "z"
+#define STR_LEFT_CURLY_BRACKET      "{"
+#define STR_VERTICAL_LINE           "|"
+#define STR_RIGHT_CURLY_BRACKET     "}"
+#define STR_TILDE                   "~"
+
+#define STRING_ACCEPT0              "ACCEPT\0"
+#define STRING_COMMIT0              "COMMIT\0"
+#define STRING_F0                   "F\0"
+#define STRING_FAIL0                "FAIL\0"
+#define STRING_PRUNE0               "PRUNE\0"
+#define STRING_SKIP0                "SKIP\0"
+#define STRING_THEN                 "THEN"
+
+#define STRING_alpha0               "alpha\0"
+#define STRING_lower0               "lower\0"
+#define STRING_upper0               "upper\0"
+#define STRING_alnum0               "alnum\0"
+#define STRING_ascii0               "ascii\0"
+#define STRING_blank0               "blank\0"
+#define STRING_cntrl0               "cntrl\0"
+#define STRING_digit0               "digit\0"
+#define STRING_graph0               "graph\0"
+#define STRING_print0               "print\0"
+#define STRING_punct0               "punct\0"
+#define STRING_space0               "space\0"
+#define STRING_word0                "word\0"
+#define STRING_xdigit               "xdigit"
+
+#define STRING_DEFINE               "DEFINE"
+
+#define STRING_CR_RIGHTPAR          "CR)"
+#define STRING_LF_RIGHTPAR          "LF)"
+#define STRING_CRLF_RIGHTPAR        "CRLF)"
+#define STRING_ANY_RIGHTPAR         "ANY)"
+#define STRING_ANYCRLF_RIGHTPAR     "ANYCRLF)"
+#define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
+#define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
+#define STRING_UTF8_RIGHTPAR        "UTF8)"
+
+#else  /* SUPPORT_UTF8 */
+
+/* UTF-8 support is enabled; always use UTF-8 (=ASCII) character codes. This
+works in both modes non-EBCDIC platforms, and on EBCDIC platforms in UTF-8 mode
+only. */
+
+#define CHAR_HT                     '\011'
+#define CHAR_VT                     '\013'
+#define CHAR_FF                     '\014'
+#define CHAR_CR                     '\015'
+#define CHAR_NL                     '\012'
+#define CHAR_BS                     '\010'
+#define CHAR_BEL                    '\007'
+#define CHAR_ESC                    '\033'
+#define CHAR_DEL                    '\177'
+
+#define CHAR_SPACE                  '\040'
+#define CHAR_EXCLAMATION_MARK       '\041'
+#define CHAR_QUOTATION_MARK         '\042'
+#define CHAR_NUMBER_SIGN            '\043'
+#define CHAR_DOLLAR_SIGN            '\044'
+#define CHAR_PERCENT_SIGN           '\045'
+#define CHAR_AMPERSAND              '\046'
+#define CHAR_APOSTROPHE             '\047'
+#define CHAR_LEFT_PARENTHESIS       '\050'
+#define CHAR_RIGHT_PARENTHESIS      '\051'
+#define CHAR_ASTERISK               '\052'
+#define CHAR_PLUS                   '\053'
+#define CHAR_COMMA                  '\054'
+#define CHAR_MINUS                  '\055'
+#define CHAR_DOT                    '\056'
+#define CHAR_SLASH                  '\057'
+#define CHAR_0                      '\060'
+#define CHAR_1                      '\061'
+#define CHAR_2                      '\062'
+#define CHAR_3                      '\063'
+#define CHAR_4                      '\064'
+#define CHAR_5                      '\065'
+#define CHAR_6                      '\066'
+#define CHAR_7                      '\067'
+#define CHAR_8                      '\070'
+#define CHAR_9                      '\071'
+#define CHAR_COLON                  '\072'
+#define CHAR_SEMICOLON              '\073'
+#define CHAR_LESS_THAN_SIGN         '\074'
+#define CHAR_EQUALS_SIGN            '\075'
+#define CHAR_GREATER_THAN_SIGN      '\076'
+#define CHAR_QUESTION_MARK          '\077'
+#define CHAR_COMMERCIAL_AT          '\100'
+#define CHAR_A                      '\101'
+#define CHAR_B                      '\102'
+#define CHAR_C                      '\103'
+#define CHAR_D                      '\104'
+#define CHAR_E                      '\105'
+#define CHAR_F                      '\106'
+#define CHAR_G                      '\107'
+#define CHAR_H                      '\110'
+#define CHAR_I                      '\111'
+#define CHAR_J                      '\112'
+#define CHAR_K                      '\113'
+#define CHAR_L                      '\114'
+#define CHAR_M                      '\115'
+#define CHAR_N                      '\116'
+#define CHAR_O                      '\117'
+#define CHAR_P                      '\120'
+#define CHAR_Q                      '\121'
+#define CHAR_R                      '\122'
+#define CHAR_S                      '\123'
+#define CHAR_T                      '\124'
+#define CHAR_U                      '\125'
+#define CHAR_V                      '\126'
+#define CHAR_W                      '\127'
+#define CHAR_X                      '\130'
+#define CHAR_Y                      '\131'
+#define CHAR_Z                      '\132'
+#define CHAR_LEFT_SQUARE_BRACKET    '\133'
+#define CHAR_BACKSLASH              '\134'
+#define CHAR_RIGHT_SQUARE_BRACKET   '\135'
+#define CHAR_CIRCUMFLEX_ACCENT      '\136'
+#define CHAR_UNDERSCORE             '\137'
+#define CHAR_GRAVE_ACCENT           '\140'
+#define CHAR_a                      '\141'
+#define CHAR_b                      '\142'
+#define CHAR_c                      '\143'
+#define CHAR_d                      '\144'
+#define CHAR_e                      '\145'
+#define CHAR_f                      '\146'
+#define CHAR_g                      '\147'
+#define CHAR_h                      '\150'
+#define CHAR_i                      '\151'
+#define CHAR_j                      '\152'
+#define CHAR_k                      '\153'
+#define CHAR_l                      '\154'
+#define CHAR_m                      '\155'
+#define CHAR_n                      '\156'
+#define CHAR_o                      '\157'
+#define CHAR_p                      '\160'
+#define CHAR_q                      '\161'
+#define CHAR_r                      '\162'
+#define CHAR_s                      '\163'
+#define CHAR_t                      '\164'
+#define CHAR_u                      '\165'
+#define CHAR_v                      '\166'
+#define CHAR_w                      '\167'
+#define CHAR_x                      '\170'
+#define CHAR_y                      '\171'
+#define CHAR_z                      '\172'
+#define CHAR_LEFT_CURLY_BRACKET     '\173'
+#define CHAR_VERTICAL_LINE          '\174'
+#define CHAR_RIGHT_CURLY_BRACKET    '\175'
+#define CHAR_TILDE                  '\176'
+
+#define STR_HT                      "\011"
+#define STR_VT                      "\013"
+#define STR_FF                      "\014"
+#define STR_CR                      "\015"
+#define STR_NL                      "\012"
+#define STR_BS                      "\010"
+#define STR_BEL                     "\007"
+#define STR_ESC                     "\033"
+#define STR_DEL                     "\177"
+
+#define STR_SPACE                   "\040"
+#define STR_EXCLAMATION_MARK        "\041"
+#define STR_QUOTATION_MARK          "\042"
+#define STR_NUMBER_SIGN             "\043"
+#define STR_DOLLAR_SIGN             "\044"
+#define STR_PERCENT_SIGN            "\045"
+#define STR_AMPERSAND               "\046"
+#define STR_APOSTROPHE              "\047"
+#define STR_LEFT_PARENTHESIS        "\050"
+#define STR_RIGHT_PARENTHESIS       "\051"
+#define STR_ASTERISK                "\052"
+#define STR_PLUS                    "\053"
+#define STR_COMMA                   "\054"
+#define STR_MINUS                   "\055"
+#define STR_DOT                     "\056"
+#define STR_SLASH                   "\057"
+#define STR_0                       "\060"
+#define STR_1                       "\061"
+#define STR_2                       "\062"
+#define STR_3                       "\063"
+#define STR_4                       "\064"
+#define STR_5                       "\065"
+#define STR_6                       "\066"
+#define STR_7                       "\067"
+#define STR_8                       "\070"
+#define STR_9                       "\071"
+#define STR_COLON                   "\072"
+#define STR_SEMICOLON               "\073"
+#define STR_LESS_THAN_SIGN          "\074"
+#define STR_EQUALS_SIGN             "\075"
+#define STR_GREATER_THAN_SIGN       "\076"
+#define STR_QUESTION_MARK           "\077"
+#define STR_COMMERCIAL_AT           "\100"
+#define STR_A                       "\101"
+#define STR_B                       "\102"
+#define STR_C                       "\103"
+#define STR_D                       "\104"
+#define STR_E                       "\105"
+#define STR_F                       "\106"
+#define STR_G                       "\107"
+#define STR_H                       "\110"
+#define STR_I                       "\111"
+#define STR_J                       "\112"
+#define STR_K                       "\113"
+#define STR_L                       "\114"
+#define STR_M                       "\115"
+#define STR_N                       "\116"
+#define STR_O                       "\117"
+#define STR_P                       "\120"
+#define STR_Q                       "\121"
+#define STR_R                       "\122"
+#define STR_S                       "\123"
+#define STR_T                       "\124"
+#define STR_U                       "\125"
+#define STR_V                       "\126"
+#define STR_W                       "\127"
+#define STR_X                       "\130"
+#define STR_Y                       "\131"
+#define STR_Z                       "\132"
+#define STR_LEFT_SQUARE_BRACKET     "\133"
+#define STR_BACKSLASH               "\134"
+#define STR_RIGHT_SQUARE_BRACKET    "\135"
+#define STR_CIRCUMFLEX_ACCENT       "\136"
+#define STR_UNDERSCORE              "\137"
+#define STR_GRAVE_ACCENT            "\140"
+#define STR_a                       "\141"
+#define STR_b                       "\142"
+#define STR_c                       "\143"
+#define STR_d                       "\144"
+#define STR_e                       "\145"
+#define STR_f                       "\146"
+#define STR_g                       "\147"
+#define STR_h                       "\150"
+#define STR_i                       "\151"
+#define STR_j                       "\152"
+#define STR_k                       "\153"
+#define STR_l                       "\154"
+#define STR_m                       "\155"
+#define STR_n                       "\156"
+#define STR_o                       "\157"
+#define STR_p                       "\160"
+#define STR_q                       "\161"
+#define STR_r                       "\162"
+#define STR_s                       "\163"
+#define STR_t                       "\164"
+#define STR_u                       "\165"
+#define STR_v                       "\166"
+#define STR_w                       "\167"
+#define STR_x                       "\170"
+#define STR_y                       "\171"
+#define STR_z                       "\172"
+#define STR_LEFT_CURLY_BRACKET      "\173"
+#define STR_VERTICAL_LINE           "\174"
+#define STR_RIGHT_CURLY_BRACKET     "\175"
+#define STR_TILDE                   "\176"
+
+#define STRING_ACCEPT0              STR_A STR_C STR_C STR_E STR_P STR_T "\0"
+#define STRING_COMMIT0              STR_C STR_O STR_M STR_M STR_I STR_T "\0"
+#define STRING_F0                   STR_F "\0"
+#define STRING_FAIL0                STR_F STR_A STR_I STR_L "\0"
+#define STRING_PRUNE0               STR_P STR_R STR_U STR_N STR_E "\0"
+#define STRING_SKIP0                STR_S STR_K STR_I STR_P "\0"
+#define STRING_THEN                 STR_T STR_H STR_E STR_N
+
+#define STRING_alpha0               STR_a STR_l STR_p STR_h STR_a "\0"
+#define STRING_lower0               STR_l STR_o STR_w STR_e STR_r "\0"
+#define STRING_upper0               STR_u STR_p STR_p STR_e STR_r "\0"
+#define STRING_alnum0               STR_a STR_l STR_n STR_u STR_m "\0"
+#define STRING_ascii0               STR_a STR_s STR_c STR_i STR_i "\0"
+#define STRING_blank0               STR_b STR_l STR_a STR_n STR_k "\0"
+#define STRING_cntrl0               STR_c STR_n STR_t STR_r STR_l "\0"
+#define STRING_digit0               STR_d STR_i STR_g STR_i STR_t "\0"
+#define STRING_graph0               STR_g STR_r STR_a STR_p STR_h "\0"
+#define STRING_print0               STR_p STR_r STR_i STR_n STR_t "\0"
+#define STRING_punct0               STR_p STR_u STR_n STR_c STR_t "\0"
+#define STRING_space0               STR_s STR_p STR_a STR_c STR_e "\0"
+#define STRING_word0                STR_w STR_o STR_r STR_d       "\0"
+#define STRING_xdigit               STR_x STR_d STR_i STR_g STR_i STR_t
+
+#define STRING_DEFINE               STR_D STR_E STR_F STR_I STR_N STR_E
+
+#define STRING_CR_RIGHTPAR          STR_C STR_R STR_RIGHT_PARENTHESIS
+#define STRING_LF_RIGHTPAR          STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_CRLF_RIGHTPAR        STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_ANY_RIGHTPAR         STR_A STR_N STR_Y STR_RIGHT_PARENTHESIS
+#define STRING_ANYCRLF_RIGHTPAR     STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
+#define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
+#define STRING_UTF8_RIGHTPAR        STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
+
+#endif  /* SUPPORT_UTF8 */
 
 /* Escape items that are just an encoding of a particular data value. */
 
 #ifndef ESC_e
-#define ESC_e 27
+#define ESC_e CHAR_ESC
 #endif
 
 #ifndef ESC_f
-#define ESC_f '\f'
+#define ESC_f CHAR_FF
 #endif
 
 #ifndef ESC_n
-#define ESC_n '\n'
+#define ESC_n CHAR_NL
 #endif
 
 #ifndef ESC_r
-#define ESC_r '\r'
+#define ESC_r CHAR_CR
 #endif
 
 /* We can't officially use ESC_t because it is a POSIX reserved identifier
 (presumably because of all the others like size_t). */
 
 #ifndef ESC_tee
-#define ESC_tee '\t'
+#define ESC_tee CHAR_HT
 #endif
 
 /* Codes for different types of Unicode property */
@@ -632,8 +1223,8 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
 OP_EOD must correspond in order to the list of escapes immediately above.
 
 *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
-that follow must also be updated to match. There is also a table called
-"coptable" in pcre_dfa_exec.c that must be updated. */
+that follow must also be updated to match. There are also tables called
+"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
 
 enum {
   OP_END,            /* 0 End of pattern */
@@ -769,30 +1360,45 @@ enum {
   OP_SCBRA,          /* 98 Start of capturing bracket, check empty */
   OP_SCOND,          /* 99 Conditional group, check empty */
 
+  /* The next two pairs must (respectively) be kept together. */
+
   OP_CREF,           /* 100 Used to hold a capture number as condition */
-  OP_RREF,           /* 101 Used to hold a recursion number as condition */
-  OP_DEF,            /* 102 The DEFINE condition */
+  OP_NCREF,          /* 101 Same, but generaged by a name reference*/
+  OP_RREF,           /* 102 Used to hold a recursion number as condition */
+  OP_NRREF,          /* 103 Same, but generaged by a name reference*/
+  OP_DEF,            /* 104 The DEFINE condition */
 
-  OP_BRAZERO,        /* 103 These two must remain together and in this */
-  OP_BRAMINZERO,     /* 104 order. */
+  OP_BRAZERO,        /* 105 These two must remain together and in this */
+  OP_BRAMINZERO,     /* 106 order. */
 
   /* These are backtracking control verbs */
 
-  OP_PRUNE,          /* 105 */
-  OP_SKIP,           /* 106 */
-  OP_THEN,           /* 107 */
-  OP_COMMIT,         /* 108 */
+  OP_PRUNE,          /* 107 */
+  OP_SKIP,           /* 108 */
+  OP_THEN,           /* 109 */
+  OP_COMMIT,         /* 110 */
 
   /* These are forced failure and success verbs */
 
-  OP_FAIL,           /* 109 */
-  OP_ACCEPT,         /* 110 */
+  OP_FAIL,           /* 111 */
+  OP_ACCEPT,         /* 112 */
+  OP_CLOSE,          /* 113 Used before OP_ACCEPT to close open captures */
 
   /* This is used to skip a subpattern with a {0} quantifier */
 
-  OP_SKIPZERO        /* 111 */
+  OP_SKIPZERO,       /* 114 */
+
+  /* This is not an opcode, but is used to check that tables indexed by opcode
+  are the correct length, in order to catch updating errors - there have been
+  some in the past. */
+
+  OP_TABLE_LENGTH
 };
 
+/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
+definitions that follow must also be updated to match. There are also tables
+called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */
+
 
 /* This macro defines textual names for all the opcodes. These are used only
 for debugging. The macro is referenced only in pcre_printint.c. */
@@ -814,9 +1420,10 @@ for debugging. The macro is referenced only in pcre_printint.c. */
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \
   "AssertB", "AssertB not", "Reverse",                            \
   "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
-  "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero",    \
+  "Cond ref", "Cond nref", "Cond rec", "Cond nrec", "Cond def",   \
+  "Brazero", "Braminzero",                                        \
   "*PRUNE", "*SKIP", "*THEN", "*COMMIT", "*FAIL", "*ACCEPT",      \
-  "Skip zero"
+  "Close", "Skip zero"
 
 
 /* This macro defines the length of fixed length operations in the compiled
@@ -833,8 +1440,9 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
   1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
   1, 1, 1,                       /* Any, AllAny, Anybyte                   */ \
-  3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \
+  3, 3,                          /* \P, \p                                 */ \
   1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
+  1,                             /* \X                                     */ \
   1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \
   2,                             /* Char  - the minimum length             */ \
   2,                             /* Charnc  - the minimum length           */ \
@@ -876,20 +1484,22 @@ in UTF-8 mode. The code that uses this table must know about such things. */
   1+LINK_SIZE,                   /* SBRA                                   */ \
   3+LINK_SIZE,                   /* SCBRA                                  */ \
   1+LINK_SIZE,                   /* SCOND                                  */ \
-  3,                             /* CREF                                   */ \
-  3,                             /* RREF                                   */ \
+  3, 3,                          /* CREF, NCREF                            */ \
+  3, 3,                          /* RREF, NRREF                            */ \
   1,                             /* DEF                                    */ \
   1, 1,                          /* BRAZERO, BRAMINZERO                    */ \
   1, 1, 1, 1,                    /* PRUNE, SKIP, THEN, COMMIT,             */ \
-  1, 1, 1                        /* FAIL, ACCEPT, SKIPZERO                 */
+  1, 1, 3, 1                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
 
 
-/* A magic value for OP_RREF to indicate the "any recursion" condition. */
+/* A magic value for OP_RREF and OP_NRREF to indicate the "any recursion"
+condition. */
 
 #define RREF_ANY  0xffff
 
-/* Error code numbers. They are given names so that they can more easily be
-tracked. */
+/* Compile time error code numbers. They are given names so that they can more
+easily be tracked. When a new number is added, the table called eint in
+pcreposix.c must be updated. */
 
 enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
        ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19,
@@ -897,7 +1507,7 @@ enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,
        ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39,
        ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
        ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
-       ERR60, ERR61, ERR62, ERR63, ERR64 };
+       ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERRCOUNT };
 
 /* The real format of the start of the pcre block; the index of names and the
 code vector run on as long as necessary after the end. We store an explicit
@@ -913,7 +1523,7 @@ Because people can now save and re-use compiled patterns, any additions to this
 structure should be made at the end, and something earlier (e.g. a new
 flag in the options or one of the dummy fields) should indicate that the new
 fields are present. Currently PCRE always sets the dummy fields to zero.
-NOTE NOTE NOTE:
+NOTE NOTE NOTE
 */
 
 typedef struct real_pcre {
@@ -940,10 +1550,22 @@ remark (see NOTE above) about extending this structure applies. */
 
 typedef struct pcre_study_data {
   pcre_uint32 size;               /* Total that was malloced */
-  pcre_uint32 options;
-  uschar start_bits[32];
+  pcre_uint32 flags;              /* Private flags */
+  uschar start_bits[32];          /* Starting char bits */
+  pcre_uint32 minlength;          /* Minimum subject length */
 } pcre_study_data;
 
+/* Structure for building a chain of open capturing subpatterns during
+compiling, so that instructions to close them can be compiled when (*ACCEPT) is
+encountered. This is also used to identify subpatterns that contain recursive
+back references to themselves, so that they can be made atomic. */
+
+typedef struct open_capitem {
+  struct open_capitem *next;    /* Chain link */
+  pcre_uint16 number;           /* Capture number */
+  pcre_uint16 flag;             /* Set TRUE if recursive back ref */
+} open_capitem;
+
 /* Structure for passing "static" information around between the functions
 doing the compiling, so that they are thread-safe. */
 
@@ -956,6 +1578,7 @@ typedef struct compile_data {
   const uschar *start_code;     /* The start of the compiled code */
   const uschar *start_pattern;  /* The start of the pattern */
   const uschar *end_pattern;    /* The end of the pattern */
+  open_capitem *open_caps;      /* Chain of open capture items */
   uschar *hwm;                  /* High watermark of workspace */
   uschar *name_table;           /* The name/number table */
   int  names_found;             /* Number of entries so far */
@@ -968,6 +1591,7 @@ typedef struct compile_data {
   int  external_flags;          /* External flag bits to be set */
   int  req_varyopt;             /* "After variable item" flag for reqbyte */
   BOOL had_accept;              /* (*ACCEPT) encountered */
+  BOOL check_lookbehind;        /* Lookbehinds need later checking */
   int  nltype;                  /* Newline type */
   int  nllen;                   /* Newline string length */
   uschar nl[4];                 /* Newline string when fixed length */
@@ -978,7 +1602,7 @@ branches, for testing for left recursion. */
 
 typedef struct branch_chain {
   struct branch_chain *outer;
-  uschar *current;
+  uschar *current_branch;
 } branch_chain;
 
 /* Structure for items in a linked list that represents an explicit recursive
@@ -988,9 +1612,9 @@ typedef struct recursion_info {
   struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
   int group_num;                /* Number of group that was called */
   const uschar *after_call;     /* "Return value": points after the call in the expr */
-  USPTR save_start;             /* Old value of mstart */
   int *offset_save;             /* Pointer to start of saved offsets */
   int saved_max;                /* Number of saved offsets */
+  int save_offset_top;          /* Current value of offset_top */
 } recursion_info;
 
 /* Structure for building a chain of data for holding the values of the subject
@@ -1015,6 +1639,9 @@ typedef struct match_data {
   int    offset_max;            /* The maximum usable for return data */
   int    nltype;                /* Newline type */
   int    nllen;                 /* Newline string length */
+  int    name_count;            /* Number of names in name table */
+  int    name_entry_size;       /* Size of entry in names table */
+  uschar *name_table;           /* Table of names */
   uschar nl[4];                 /* Newline string when fixed */
   const uschar *lcc;            /* Points to lower casing table */
   const uschar *ctypes;         /* Points to table of type maps */
@@ -1025,7 +1652,7 @@ typedef struct match_data {
   BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
   BOOL   endonly;               /* Dollar not before final \n */
   BOOL   notempty;              /* Empty string match not wanted */
-  BOOL   partial;               /* PARTIAL flag */
+  BOOL   notempty_atstart;      /* Empty string match at start not wanted */
   BOOL   hitend;                /* Hit the end of the subject at some point */
   BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */
   const uschar *start_code;     /* For use when recursing */
@@ -1033,6 +1660,8 @@ typedef struct match_data {
   USPTR  end_subject;           /* End of the subject string */
   USPTR  start_match_ptr;       /* Start of matched string */
   USPTR  end_match_ptr;         /* Subject position at end match */
+  USPTR  start_used_ptr;        /* Earliest consulted character */
+  int    partial;               /* PARTIAL options */
   int    end_offset_top;        /* Highwater mark at end of match */
   int    capture_last;          /* Most recent capture number */
   int    start_offset;          /* The start offset value */
@@ -1049,7 +1678,9 @@ typedef struct dfa_match_data {
   const uschar *start_code;     /* Start of the compiled pattern */
   const uschar *start_subject;  /* Start of the subject string */
   const uschar *end_subject;    /* End of subject string */
+  const uschar *start_used_ptr; /* Earliest consulted character */
   const uschar *tables;         /* Character tables */
+  int   start_offset;           /* The start offset value */
   int   moptions;               /* Match options */
   int   poptions;               /* Pattern options */
   int    nltype;                /* Newline type */
@@ -1128,20 +1759,30 @@ extern const uschar _pcre_OP_lengths[];
 one of the exported public functions. They have to be "external" in the C
 sense, but are not part of the PCRE public API. */
 
-extern BOOL         _pcre_is_newline(const uschar *, int, const uschar *,
-                      int *, BOOL);
-extern int          _pcre_ord2utf8(int, uschar *);
-extern real_pcre   *_pcre_try_flipped(const real_pcre *, real_pcre *,
-                      const pcre_study_data *, pcre_study_data *);
-extern int          _pcre_valid_utf8(const uschar *, int);
-extern BOOL         _pcre_was_newline(const uschar *, int, const uschar *,
-                      int *, BOOL);
-extern BOOL         _pcre_xclass(int, const uschar *);
-extern unsigned int _pcre_ucp_othercase(unsigned int);
+extern const uschar *_pcre_find_bracket(const uschar *, BOOL, int);
+extern BOOL          _pcre_is_newline(USPTR, int, USPTR, int *, BOOL);
+extern int           _pcre_ord2utf8(int, uschar *);
+extern real_pcre    *_pcre_try_flipped(const real_pcre *, real_pcre *,
+                       const pcre_study_data *, pcre_study_data *);
+extern int           _pcre_valid_utf8(USPTR, int);
+extern BOOL          _pcre_was_newline(USPTR, int, USPTR, int *, BOOL);
+extern BOOL          _pcre_xclass(int, const uschar *);
 
 
+/* Unicode character database (UCD) */
+
+typedef struct {
+  uschar script;
+  uschar chartype;
+  pcre_int32 other_case;
+} ucd_record;
+
+extern const ucd_record  _pcre_ucd_records[];
+extern const uschar      _pcre_ucd_stage1[];
+extern const pcre_uint16 _pcre_ucd_stage2[];
 extern const int         _pcre_ucp_gentype[];
 
+extern unsigned int      _pcre_ucp_othercase (unsigned int);
 
 /* UCD access macros */
 
diff --git a/glib/pcre/pcre_newline.c b/glib/pcre/pcre_newline.c
index 5888576..38cf7f7 100644
--- a/glib/pcre/pcre_newline.c
+++ b/glib/pcre/pcre_newline.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -73,8 +73,7 @@ Returns:       TRUE or FALSE
 */
 
 BOOL
-_pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,
-  int *lenptr, BOOL utf8)
+_pcre_is_newline(USPTR ptr, int type, USPTR endptr, int *lenptr, BOOL utf8)
 {
 int c;
 if (utf8) { GETCHAR(c, ptr); } else c = *ptr;
@@ -123,8 +122,7 @@ Returns:       TRUE or FALSE
 */
 
 BOOL
-_pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,
-  int *lenptr, BOOL utf8)
+_pcre_was_newline(USPTR ptr, int type, USPTR startptr, int *lenptr, BOOL utf8)
 {
 int c;
 ptr--;
diff --git a/glib/pcre/pcre_study.c b/glib/pcre/pcre_study.c
index 778851d..bd00a53 100644
--- a/glib/pcre/pcre_study.c
+++ b/glib/pcre/pcre_study.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2010 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -54,6 +54,379 @@ supporting functions. */
 enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE };
 
 
+
+/*************************************************
+*   Find the minimum subject length for a group  *
+*************************************************/
+
+/* Scan a parenthesized group and compute the minimum length of subject that
+is needed to match it. This is a lower bound; it does not mean there is a
+string of that length that matches. In UTF8 mode, the result is in characters
+rather than bytes.
+
+Arguments:
+  code       pointer to start of group (the bracket)
+  startcode  pointer to start of the whole pattern
+  options    the compiling options
+
+Returns:   the minimum length
+           -1 if \C was encountered
+           -2 internal error (missing capturing bracket)
+*/
+
+static int
+find_minlength(const uschar *code, const uschar *startcode, int options)
+{
+int length = -1;
+BOOL utf8 = (options & PCRE_UTF8) != 0;
+BOOL had_recurse = FALSE;
+register int branchlength = 0;
+register uschar *cc = (uschar *)code + 1 + LINK_SIZE;
+
+if (*code == OP_CBRA || *code == OP_SCBRA) cc += 2;
+
+/* Scan along the opcodes for this branch. If we get to the end of the
+branch, check the length against that of the other branches. */
+
+for (;;)
+  {
+  int d, min;
+  uschar *cs, *ce;
+  register int op = *cc;
+
+  switch (op)
+    {
+    case OP_COND:
+    case OP_SCOND:
+
+    /* If there is only one branch in a condition, the implied branch has zero
+    length, so we don't add anything. This covers the DEFINE "condition"
+    automatically. */
+
+    cs = cc + GET(cc, 1);
+    if (*cs != OP_ALT)
+      {
+      cc = cs + 1 + LINK_SIZE;
+      break;
+      }
+
+    /* Otherwise we can fall through and treat it the same as any other
+    subpattern. */
+
+    case OP_CBRA:
+    case OP_SCBRA:
+    case OP_BRA:
+    case OP_SBRA:
+    case OP_ONCE:
+    d = find_minlength(cc, startcode, options);
+    if (d < 0) return d;
+    branchlength += d;
+    do cc += GET(cc, 1); while (*cc == OP_ALT);
+    cc += 1 + LINK_SIZE;
+    break;
+
+    /* Reached end of a branch; if it's a ket it is the end of a nested
+    call. If it's ALT it is an alternation in a nested call. If it is
+    END it's the end of the outer call. All can be handled by the same code. */
+
+    case OP_ALT:
+    case OP_KET:
+    case OP_KETRMAX:
+    case OP_KETRMIN:
+    case OP_END:
+    if (length < 0 || (!had_recurse && branchlength < length))
+      length = branchlength;
+    if (*cc != OP_ALT) return length;
+    cc += 1 + LINK_SIZE;
+    branchlength = 0;
+    had_recurse = FALSE;
+    break;
+
+    /* Skip over assertive subpatterns */
+
+    case OP_ASSERT:
+    case OP_ASSERT_NOT:
+    case OP_ASSERTBACK:
+    case OP_ASSERTBACK_NOT:
+    do cc += GET(cc, 1); while (*cc == OP_ALT);
+    /* Fall through */
+
+    /* Skip over things that don't match chars */
+
+    case OP_REVERSE:
+    case OP_CREF:
+    case OP_NCREF:
+    case OP_RREF:
+    case OP_NRREF:
+    case OP_DEF:
+    case OP_OPT:
+    case OP_CALLOUT:
+    case OP_SOD:
+    case OP_SOM:
+    case OP_EOD:
+    case OP_EODN:
+    case OP_CIRC:
+    case OP_DOLL:
+    case OP_NOT_WORD_BOUNDARY:
+    case OP_WORD_BOUNDARY:
+    cc += _pcre_OP_lengths[*cc];
+    break;
+
+    /* Skip over a subpattern that has a {0} or {0,x} quantifier */
+
+    case OP_BRAZERO:
+    case OP_BRAMINZERO:
+    case OP_SKIPZERO:
+    cc += _pcre_OP_lengths[*cc];
+    do cc += GET(cc, 1); while (*cc == OP_ALT);
+    cc += 1 + LINK_SIZE;
+    break;
+
+    /* Handle literal characters and + repetitions */
+
+    case OP_CHAR:
+    case OP_CHARNC:
+    case OP_NOT:
+    case OP_PLUS:
+    case OP_MINPLUS:
+    case OP_POSPLUS:
+    case OP_NOTPLUS:
+    case OP_NOTMINPLUS:
+    case OP_NOTPOSPLUS:
+    branchlength++;
+    cc += 2;
+#ifdef SUPPORT_UTF8
+    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+#endif
+    break;
+
+    case OP_TYPEPLUS:
+    case OP_TYPEMINPLUS:
+    case OP_TYPEPOSPLUS:
+    branchlength++;
+    cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
+    break;
+
+    /* Handle exact repetitions. The count is already in characters, but we
+    need to skip over a multibyte character in UTF8 mode.  */
+
+    case OP_EXACT:
+    case OP_NOTEXACT:
+    branchlength += GET2(cc,1);
+    cc += 4;
+#ifdef SUPPORT_UTF8
+    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+#endif
+    break;
+
+    case OP_TYPEEXACT:
+    branchlength += GET2(cc,1);
+    cc += (cc[3] == OP_PROP || cc[3] == OP_NOTPROP)? 6 : 4;
+    break;
+
+    /* Handle single-char non-literal matchers */
+
+    case OP_PROP:
+    case OP_NOTPROP:
+    cc += 2;
+    /* Fall through */
+
+    case OP_NOT_DIGIT:
+    case OP_DIGIT:
+    case OP_NOT_WHITESPACE:
+    case OP_WHITESPACE:
+    case OP_NOT_WORDCHAR:
+    case OP_WORDCHAR:
+    case OP_ANY:
+    case OP_ALLANY:
+    case OP_EXTUNI:
+    case OP_HSPACE:
+    case OP_NOT_HSPACE:
+    case OP_VSPACE:
+    case OP_NOT_VSPACE:
+    branchlength++;
+    cc++;
+    break;
+
+    /* "Any newline" might match two characters */
+
+    case OP_ANYNL:
+    branchlength += 2;
+    cc++;
+    break;
+
+    /* The single-byte matcher means we can't proceed in UTF-8 mode */
+
+    case OP_ANYBYTE:
+#ifdef SUPPORT_UTF8
+    if (utf8) return -1;
+#endif
+    branchlength++;
+    cc++;
+    break;
+
+    /* For repeated character types, we have to test for \p and \P, which have
+    an extra two bytes of parameters. */
+
+    case OP_TYPESTAR:
+    case OP_TYPEMINSTAR:
+    case OP_TYPEQUERY:
+    case OP_TYPEMINQUERY:
+    case OP_TYPEPOSSTAR:
+    case OP_TYPEPOSQUERY:
+    if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
+    cc += _pcre_OP_lengths[op];
+    break;
+
+    case OP_TYPEUPTO:
+    case OP_TYPEMINUPTO:
+    case OP_TYPEPOSUPTO:
+    if (cc[3] == OP_PROP || cc[3] == OP_NOTPROP) cc += 2;
+    cc += _pcre_OP_lengths[op];
+    break;
+
+    /* Check a class for variable quantification */
+
+#ifdef SUPPORT_UTF8
+    case OP_XCLASS:
+    cc += GET(cc, 1) - 33;
+    /* Fall through */
+#endif
+
+    case OP_CLASS:
+    case OP_NCLASS:
+    cc += 33;
+
+    switch (*cc)
+      {
+      case OP_CRPLUS:
+      case OP_CRMINPLUS:
+      branchlength++;
+      /* Fall through */
+
+      case OP_CRSTAR:
+      case OP_CRMINSTAR:
+      case OP_CRQUERY:
+      case OP_CRMINQUERY:
+      cc++;
+      break;
+
+      case OP_CRRANGE:
+      case OP_CRMINRANGE:
+      branchlength += GET2(cc,1);
+      cc += 5;
+      break;
+
+      default:
+      branchlength++;
+      break;
+      }
+    break;
+
+    /* Backreferences and subroutine calls are treated in the same way: we find
+    the minimum length for the subpattern. A recursion, however, causes an
+    a flag to be set that causes the length of this branch to be ignored. The
+    logic is that a recursion can only make sense if there is another
+    alternation that stops the recursing. That will provide the minimum length
+    (when no recursion happens). A backreference within the group that it is
+    referencing behaves in the same way.
+
+    If PCRE_JAVASCRIPT_COMPAT is set, a backreference to an unset bracket
+    matches an empty string (by default it causes a matching failure), so in
+    that case we must set the minimum length to zero. */
+
+    case OP_REF:
+    if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
+      {
+      ce = cs = (uschar *)_pcre_find_bracket(startcode, utf8, GET2(cc, 1));
+      if (cs == NULL) return -2;
+      do ce += GET(ce, 1); while (*ce == OP_ALT);
+      if (cc > cs && cc < ce)
+        {
+        d = 0;
+        had_recurse = TRUE;
+        }
+      else d = find_minlength(cs, startcode, options);
+      }
+    else d = 0;
+    cc += 3;
+
+    /* Handle repeated back references */
+
+    switch (*cc)
+      {
+      case OP_CRSTAR:
+      case OP_CRMINSTAR:
+      case OP_CRQUERY:
+      case OP_CRMINQUERY:
+      min = 0;
+      cc++;
+      break;
+
+      case OP_CRRANGE:
+      case OP_CRMINRANGE:
+      min = GET2(cc, 1);
+      cc += 5;
+      break;
+
+      default:
+      min = 1;
+      break;
+      }
+
+    branchlength += min * d;
+    break;
+
+    case OP_RECURSE:
+    cs = ce = (uschar *)startcode + GET(cc, 1);
+    if (cs == NULL) return -2;
+    do ce += GET(ce, 1); while (*ce == OP_ALT);
+    if (cc > cs && cc < ce)
+      had_recurse = TRUE;
+    else
+      branchlength += find_minlength(cs, startcode, options);
+    cc += 1 + LINK_SIZE;
+    break;
+
+    /* Anything else does not or need not match a character. We can get the
+    item's length from the table, but for those that can match zero occurrences
+    of a character, we must take special action for UTF-8 characters. */
+
+    case OP_UPTO:
+    case OP_NOTUPTO:
+    case OP_MINUPTO:
+    case OP_NOTMINUPTO:
+    case OP_POSUPTO:
+    case OP_STAR:
+    case OP_MINSTAR:
+    case OP_NOTMINSTAR:
+    case OP_POSSTAR:
+    case OP_NOTPOSSTAR:
+    case OP_QUERY:
+    case OP_MINQUERY:
+    case OP_NOTMINQUERY:
+    case OP_POSQUERY:
+    case OP_NOTPOSQUERY:
+    cc += _pcre_OP_lengths[op];
+#ifdef SUPPORT_UTF8
+    if (utf8 && cc[-1] >= 0xc0) cc += _pcre_utf8_table4[cc[-1] & 0x3f];
+#endif
+    break;
+
+    /* For the record, these are the opcodes that are matched by "default":
+    OP_ACCEPT, OP_CLOSE, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_SET_SOM, OP_SKIP,
+    OP_THEN. */
+
+    default:
+    cc += _pcre_OP_lengths[op];
+    break;
+    }
+  }
+/* Control never gets here */
+}
+
+
+
 /*************************************************
 *      Set a bit and maybe its alternate case    *
 *************************************************/
@@ -71,7 +444,8 @@ Returns:        nothing
 */
 
 static void
-set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd)
+set_table_bit(uschar *start_bits, unsigned int c, BOOL caseless,
+  compile_data *cd)
 {
 start_bits[c/8] |= (1 << (c&7));
 if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
@@ -233,7 +607,7 @@ do
       case OP_QUERY:
       case OP_MINQUERY:
       case OP_POSQUERY:
-      set_bit(start_bits, tcode[1], caseless, cd);
+      set_table_bit(start_bits, tcode[1], caseless, cd);
       tcode += 2;
 #ifdef SUPPORT_UTF8
       if (utf8 && tcode[-1] >= 0xc0)
@@ -246,7 +620,7 @@ do
       case OP_UPTO:
       case OP_MINUPTO:
       case OP_POSUPTO:
-      set_bit(start_bits, tcode[3], caseless, cd);
+      set_table_bit(start_bits, tcode[3], caseless, cd);
       tcode += 4;
 #ifdef SUPPORT_UTF8
       if (utf8 && tcode[-1] >= 0xc0)
@@ -264,7 +638,7 @@ do
       case OP_PLUS:
       case OP_MINPLUS:
       case OP_POSPLUS:
-      set_bit(start_bits, tcode[1], caseless, cd);
+      set_table_bit(start_bits, tcode[1], caseless, cd);
       try_next = FALSE;
       break;
 
@@ -500,13 +874,15 @@ Arguments:
             set NULL unless error
 
 Returns:    pointer to a pcre_extra block, with study_data filled in and the
-              appropriate flag set;
+              appropriate flags set;
             NULL on error or if no optimization possible
 */
 
 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
 pcre_study(const pcre *external_re, int options, const char **errorptr)
 {
+int min;
+BOOL bits_set = FALSE;
 uschar start_bits[32];
 pcre_extra *extra;
 pcre_study_data *study;
@@ -533,30 +909,39 @@ code = (uschar *)re + re->name_table_offset +
   (re->name_count * re->name_entry_size);
 
 /* For an anchored pattern, or an unanchored pattern that has a first char, or
-a multiline pattern that matches only at "line starts", no further processing
-at present. */
+a multiline pattern that matches only at "line starts", there is no point in
+seeking a list of starting bytes. */
 
-if ((re->options & PCRE_ANCHORED) != 0 ||
-    (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
-  return NULL;
+if ((re->options & PCRE_ANCHORED) == 0 &&
+    (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
+  {
+  /* Set the character tables in the block that is passed around */
+
+  tables = re->tables;
+  if (tables == NULL)
+    (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
+    (void *)(&tables));
 
-/* Set the character tables in the block that is passed around */
+  compile_block.lcc = tables + lcc_offset;
+  compile_block.fcc = tables + fcc_offset;
+  compile_block.cbits = tables + cbits_offset;
+  compile_block.ctypes = tables + ctypes_offset;
 
-tables = re->tables;
-if (tables == NULL)
-  (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
-  (void *)(&tables));
+  /* See if we can find a fixed set of initial characters for the pattern. */
 
-compile_block.lcc = tables + lcc_offset;
-compile_block.fcc = tables + fcc_offset;
-compile_block.cbits = tables + cbits_offset;
-compile_block.ctypes = tables + ctypes_offset;
+  memset(start_bits, 0, 32 * sizeof(uschar));
+  bits_set = set_start_bits(code, start_bits,
+    (re->options & PCRE_CASELESS) != 0, (re->options & PCRE_UTF8) != 0,
+    &compile_block) == SSB_DONE;
+  }
+
+/* Find the minimum length of subject string. */
+
+min = find_minlength(code, code, re->options);
 
-/* See if we can find a fixed set of initial characters for the pattern. */
+/* Return NULL if no optimization is possible. */
 
-memset(start_bits, 0, 32 * sizeof(uschar));
-if (set_start_bits(code, start_bits, (re->options & PCRE_CASELESS) != 0,
-  (re->options & PCRE_UTF8) != 0, &compile_block) != SSB_DONE) return NULL;
+if (!bits_set && min < 0) return NULL;
 
 /* Get a pcre_extra block and a pcre_study_data block. The study data is put in
 the latter, which is pointed to by the former, which may also get additional
@@ -579,8 +964,19 @@ extra->flags = PCRE_EXTRA_STUDY_DATA;
 extra->study_data = study;
 
 study->size = sizeof(pcre_study_data);
-study->options = PCRE_STUDY_MAPPED;
-memcpy(study->start_bits, start_bits, sizeof(start_bits));
+study->flags = 0;
+
+if (bits_set)
+  {
+  study->flags |= PCRE_STUDY_MAPPED;
+  memcpy(study->start_bits, start_bits, sizeof(start_bits));
+  }
+
+if (min >= 0)
+  {
+  study->flags |= PCRE_STUDY_MINLEN;
+  study->minlength = min;
+  }
 
 return extra;
 }
diff --git a/glib/pcre/pcre_tables.c b/glib/pcre/pcre_tables.c
index 160bc5d..b7f7ba5 100644
--- a/glib/pcre/pcre_tables.c
+++ b/glib/pcre/pcre_tables.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -109,244 +109,411 @@ putting all the names into a single, large string and then using offsets in the
 table itself. Maintenance is more error-prone, but frequent changes to this
 data are unlikely.
 
-July 2008: There is now a script called maint/GenerateUtt.py which can be used
-to generate this data instead of maintaining it entirely by hand. */
+July 2008: There is now a script called maint/GenerateUtt.py that can be used
+to generate this data instead of maintaining it entirely by hand.
+
+The script was updated in March 2009 to generate a new EBCDIC-compliant
+version. Like all other character and string literals that are compared against
+the regular expression pattern, we must use STR_ macros instead of literal
+strings to make sure that UTF-8 support works on EBCDIC platforms. */
+
+#define STRING_Any0 STR_A STR_n STR_y "\0"
+#define STRING_Arabic0 STR_A STR_r STR_a STR_b STR_i STR_c "\0"
+#define STRING_Armenian0 STR_A STR_r STR_m STR_e STR_n STR_i STR_a STR_n "\0"
+#define STRING_Avestan0 STR_A STR_v STR_e STR_s STR_t STR_a STR_n "\0"
+#define STRING_Balinese0 STR_B STR_a STR_l STR_i STR_n STR_e STR_s STR_e "\0"
+#define STRING_Bamum0 STR_B STR_a STR_m STR_u STR_m "\0"
+#define STRING_Bengali0 STR_B STR_e STR_n STR_g STR_a STR_l STR_i "\0"
+#define STRING_Bopomofo0 STR_B STR_o STR_p STR_o STR_m STR_o STR_f STR_o "\0"
+#define STRING_Braille0 STR_B STR_r STR_a STR_i STR_l STR_l STR_e "\0"
+#define STRING_Buginese0 STR_B STR_u STR_g STR_i STR_n STR_e STR_s STR_e "\0"
+#define STRING_Buhid0 STR_B STR_u STR_h STR_i STR_d "\0"
+#define STRING_C0 STR_C "\0"
+#define STRING_Canadian_Aboriginal0 STR_C STR_a STR_n STR_a STR_d STR_i STR_a STR_n STR_UNDERSCORE STR_A STR_b STR_o STR_r STR_i STR_g STR_i STR_n STR_a STR_l "\0"
+#define STRING_Carian0 STR_C STR_a STR_r STR_i STR_a STR_n "\0"
+#define STRING_Cc0 STR_C STR_c "\0"
+#define STRING_Cf0 STR_C STR_f "\0"
+#define STRING_Cham0 STR_C STR_h STR_a STR_m "\0"
+#define STRING_Cherokee0 STR_C STR_h STR_e STR_r STR_o STR_k STR_e STR_e "\0"
+#define STRING_Cn0 STR_C STR_n "\0"
+#define STRING_Co0 STR_C STR_o "\0"
+#define STRING_Common0 STR_C STR_o STR_m STR_m STR_o STR_n "\0"
+#define STRING_Coptic0 STR_C STR_o STR_p STR_t STR_i STR_c "\0"
+#define STRING_Cs0 STR_C STR_s "\0"
+#define STRING_Cuneiform0 STR_C STR_u STR_n STR_e STR_i STR_f STR_o STR_r STR_m "\0"
+#define STRING_Cypriot0 STR_C STR_y STR_p STR_r STR_i STR_o STR_t "\0"
+#define STRING_Cyrillic0 STR_C STR_y STR_r STR_i STR_l STR_l STR_i STR_c "\0"
+#define STRING_Deseret0 STR_D STR_e STR_s STR_e STR_r STR_e STR_t "\0"
+#define STRING_Devanagari0 STR_D STR_e STR_v STR_a STR_n STR_a STR_g STR_a STR_r STR_i "\0"
+#define STRING_Egyptian_Hieroglyphs0 STR_E STR_g STR_y STR_p STR_t STR_i STR_a STR_n STR_UNDERSCORE STR_H STR_i STR_e STR_r STR_o STR_g STR_l STR_y STR_p STR_h STR_s "\0"
+#define STRING_Ethiopic0 STR_E STR_t STR_h STR_i STR_o STR_p STR_i STR_c "\0"
+#define STRING_Georgian0 STR_G STR_e STR_o STR_r STR_g STR_i STR_a STR_n "\0"
+#define STRING_Glagolitic0 STR_G STR_l STR_a STR_g STR_o STR_l STR_i STR_t STR_i STR_c "\0"
+#define STRING_Gothic0 STR_G STR_o STR_t STR_h STR_i STR_c "\0"
+#define STRING_Greek0 STR_G STR_r STR_e STR_e STR_k "\0"
+#define STRING_Gujarati0 STR_G STR_u STR_j STR_a STR_r STR_a STR_t STR_i "\0"
+#define STRING_Gurmukhi0 STR_G STR_u STR_r STR_m STR_u STR_k STR_h STR_i "\0"
+#define STRING_Han0 STR_H STR_a STR_n "\0"
+#define STRING_Hangul0 STR_H STR_a STR_n STR_g STR_u STR_l "\0"
+#define STRING_Hanunoo0 STR_H STR_a STR_n STR_u STR_n STR_o STR_o "\0"
+#define STRING_Hebrew0 STR_H STR_e STR_b STR_r STR_e STR_w "\0"
+#define STRING_Hiragana0 STR_H STR_i STR_r STR_a STR_g STR_a STR_n STR_a "\0"
+#define STRING_Imperial_Aramaic0 STR_I STR_m STR_p STR_e STR_r STR_i STR_a STR_l STR_UNDERSCORE STR_A STR_r STR_a STR_m STR_a STR_i STR_c "\0"
+#define STRING_Inherited0 STR_I STR_n STR_h STR_e STR_r STR_i STR_t STR_e STR_d "\0"
+#define STRING_Inscriptional_Pahlavi0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_h STR_l STR_a STR_v STR_i "\0"
+#define STRING_Inscriptional_Parthian0 STR_I STR_n STR_s STR_c STR_r STR_i STR_p STR_t STR_i STR_o STR_n STR_a STR_l STR_UNDERSCORE STR_P STR_a STR_r STR_t STR_h STR_i STR_a STR_n "\0"
+#define STRING_Javanese0 STR_J STR_a STR_v STR_a STR_n STR_e STR_s STR_e "\0"
+#define STRING_Kaithi0 STR_K STR_a STR_i STR_t STR_h STR_i "\0"
+#define STRING_Kannada0 STR_K STR_a STR_n STR_n STR_a STR_d STR_a "\0"
+#define STRING_Katakana0 STR_K STR_a STR_t STR_a STR_k STR_a STR_n STR_a "\0"
+#define STRING_Kayah_Li0 STR_K STR_a STR_y STR_a STR_h STR_UNDERSCORE STR_L STR_i "\0"
+#define STRING_Kharoshthi0 STR_K STR_h STR_a STR_r STR_o STR_s STR_h STR_t STR_h STR_i "\0"
+#define STRING_Khmer0 STR_K STR_h STR_m STR_e STR_r "\0"
+#define STRING_L0 STR_L "\0"
+#define STRING_L_AMPERSAND0 STR_L STR_AMPERSAND "\0"
+#define STRING_Lao0 STR_L STR_a STR_o "\0"
+#define STRING_Latin0 STR_L STR_a STR_t STR_i STR_n "\0"
+#define STRING_Lepcha0 STR_L STR_e STR_p STR_c STR_h STR_a "\0"
+#define STRING_Limbu0 STR_L STR_i STR_m STR_b STR_u "\0"
+#define STRING_Linear_B0 STR_L STR_i STR_n STR_e STR_a STR_r STR_UNDERSCORE STR_B "\0"
+#define STRING_Lisu0 STR_L STR_i STR_s STR_u "\0"
+#define STRING_Ll0 STR_L STR_l "\0"
+#define STRING_Lm0 STR_L STR_m "\0"
+#define STRING_Lo0 STR_L STR_o "\0"
+#define STRING_Lt0 STR_L STR_t "\0"
+#define STRING_Lu0 STR_L STR_u "\0"
+#define STRING_Lycian0 STR_L STR_y STR_c STR_i STR_a STR_n "\0"
+#define STRING_Lydian0 STR_L STR_y STR_d STR_i STR_a STR_n "\0"
+#define STRING_M0 STR_M "\0"
+#define STRING_Malayalam0 STR_M STR_a STR_l STR_a STR_y STR_a STR_l STR_a STR_m "\0"
+#define STRING_Mc0 STR_M STR_c "\0"
+#define STRING_Me0 STR_M STR_e "\0"
+#define STRING_Meetei_Mayek0 STR_M STR_e STR_e STR_t STR_e STR_i STR_UNDERSCORE STR_M STR_a STR_y STR_e STR_k "\0"
+#define STRING_Mn0 STR_M STR_n "\0"
+#define STRING_Mongolian0 STR_M STR_o STR_n STR_g STR_o STR_l STR_i STR_a STR_n "\0"
+#define STRING_Myanmar0 STR_M STR_y STR_a STR_n STR_m STR_a STR_r "\0"
+#define STRING_N0 STR_N "\0"
+#define STRING_Nd0 STR_N STR_d "\0"
+#define STRING_New_Tai_Lue0 STR_N STR_e STR_w STR_UNDERSCORE STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_u STR_e "\0"
+#define STRING_Nko0 STR_N STR_k STR_o "\0"
+#define STRING_Nl0 STR_N STR_l "\0"
+#define STRING_No0 STR_N STR_o "\0"
+#define STRING_Ogham0 STR_O STR_g STR_h STR_a STR_m "\0"
+#define STRING_Ol_Chiki0 STR_O STR_l STR_UNDERSCORE STR_C STR_h STR_i STR_k STR_i "\0"
+#define STRING_Old_Italic0 STR_O STR_l STR_d STR_UNDERSCORE STR_I STR_t STR_a STR_l STR_i STR_c "\0"
+#define STRING_Old_Persian0 STR_O STR_l STR_d STR_UNDERSCORE STR_P STR_e STR_r STR_s STR_i STR_a STR_n "\0"
+#define STRING_Old_South_Arabian0 STR_O STR_l STR_d STR_UNDERSCORE STR_S STR_o STR_u STR_t STR_h STR_UNDERSCORE STR_A STR_r STR_a STR_b STR_i STR_a STR_n "\0"
+#define STRING_Old_Turkic0 STR_O STR_l STR_d STR_UNDERSCORE STR_T STR_u STR_r STR_k STR_i STR_c "\0"
+#define STRING_Oriya0 STR_O STR_r STR_i STR_y STR_a "\0"
+#define STRING_Osmanya0 STR_O STR_s STR_m STR_a STR_n STR_y STR_a "\0"
+#define STRING_P0 STR_P "\0"
+#define STRING_Pc0 STR_P STR_c "\0"
+#define STRING_Pd0 STR_P STR_d "\0"
+#define STRING_Pe0 STR_P STR_e "\0"
+#define STRING_Pf0 STR_P STR_f "\0"
+#define STRING_Phags_Pa0 STR_P STR_h STR_a STR_g STR_s STR_UNDERSCORE STR_P STR_a "\0"
+#define STRING_Phoenician0 STR_P STR_h STR_o STR_e STR_n STR_i STR_c STR_i STR_a STR_n "\0"
+#define STRING_Pi0 STR_P STR_i "\0"
+#define STRING_Po0 STR_P STR_o "\0"
+#define STRING_Ps0 STR_P STR_s "\0"
+#define STRING_Rejang0 STR_R STR_e STR_j STR_a STR_n STR_g "\0"
+#define STRING_Runic0 STR_R STR_u STR_n STR_i STR_c "\0"
+#define STRING_S0 STR_S "\0"
+#define STRING_Samaritan0 STR_S STR_a STR_m STR_a STR_r STR_i STR_t STR_a STR_n "\0"
+#define STRING_Saurashtra0 STR_S STR_a STR_u STR_r STR_a STR_s STR_h STR_t STR_r STR_a "\0"
+#define STRING_Sc0 STR_S STR_c "\0"
+#define STRING_Shavian0 STR_S STR_h STR_a STR_v STR_i STR_a STR_n "\0"
+#define STRING_Sinhala0 STR_S STR_i STR_n STR_h STR_a STR_l STR_a "\0"
+#define STRING_Sk0 STR_S STR_k "\0"
+#define STRING_Sm0 STR_S STR_m "\0"
+#define STRING_So0 STR_S STR_o "\0"
+#define STRING_Sundanese0 STR_S STR_u STR_n STR_d STR_a STR_n STR_e STR_s STR_e "\0"
+#define STRING_Syloti_Nagri0 STR_S STR_y STR_l STR_o STR_t STR_i STR_UNDERSCORE STR_N STR_a STR_g STR_r STR_i "\0"
+#define STRING_Syriac0 STR_S STR_y STR_r STR_i STR_a STR_c "\0"
+#define STRING_Tagalog0 STR_T STR_a STR_g STR_a STR_l STR_o STR_g "\0"
+#define STRING_Tagbanwa0 STR_T STR_a STR_g STR_b STR_a STR_n STR_w STR_a "\0"
+#define STRING_Tai_Le0 STR_T STR_a STR_i STR_UNDERSCORE STR_L STR_e "\0"
+#define STRING_Tai_Tham0 STR_T STR_a STR_i STR_UNDERSCORE STR_T STR_h STR_a STR_m "\0"
+#define STRING_Tai_Viet0 STR_T STR_a STR_i STR_UNDERSCORE STR_V STR_i STR_e STR_t "\0"
+#define STRING_Tamil0 STR_T STR_a STR_m STR_i STR_l "\0"
+#define STRING_Telugu0 STR_T STR_e STR_l STR_u STR_g STR_u "\0"
+#define STRING_Thaana0 STR_T STR_h STR_a STR_a STR_n STR_a "\0"
+#define STRING_Thai0 STR_T STR_h STR_a STR_i "\0"
+#define STRING_Tibetan0 STR_T STR_i STR_b STR_e STR_t STR_a STR_n "\0"
+#define STRING_Tifinagh0 STR_T STR_i STR_f STR_i STR_n STR_a STR_g STR_h "\0"
+#define STRING_Ugaritic0 STR_U STR_g STR_a STR_r STR_i STR_t STR_i STR_c "\0"
+#define STRING_Vai0 STR_V STR_a STR_i "\0"
+#define STRING_Yi0 STR_Y STR_i "\0"
+#define STRING_Z0 STR_Z "\0"
+#define STRING_Zl0 STR_Z STR_l "\0"
+#define STRING_Zp0 STR_Z STR_p "\0"
+#define STRING_Zs0 STR_Z STR_s "\0"
 
 const char _pcre_utt_names[] =
-  "Any\0"
-  "Arabic\0"
-  "Armenian\0"
-  "Balinese\0"
-  "Bengali\0"
-  "Bopomofo\0"
-  "Braille\0"
-  "Buginese\0"
-  "Buhid\0"
-  "C\0"
-  "Canadian_Aboriginal\0"
-  "Carian\0"
-  "Cc\0"
-  "Cf\0"
-  "Cham\0"
-  "Cherokee\0"
-  "Cn\0"
-  "Co\0"
-  "Common\0"
-  "Coptic\0"
-  "Cs\0"
-  "Cuneiform\0"
-  "Cypriot\0"
-  "Cyrillic\0"
-  "Deseret\0"
-  "Devanagari\0"
-  "Ethiopic\0"
-  "Georgian\0"
-  "Glagolitic\0"
-  "Gothic\0"
-  "Greek\0"
-  "Gujarati\0"
-  "Gurmukhi\0"
-  "Han\0"
-  "Hangul\0"
-  "Hanunoo\0"
-  "Hebrew\0"
-  "Hiragana\0"
-  "Inherited\0"
-  "Kannada\0"
-  "Katakana\0"
-  "Kayah_Li\0"
-  "Kharoshthi\0"
-  "Khmer\0"
-  "L\0"
-  "L&\0"
-  "Lao\0"
-  "Latin\0"
-  "Lepcha\0"
-  "Limbu\0"
-  "Linear_B\0"
-  "Ll\0"
-  "Lm\0"
-  "Lo\0"
-  "Lt\0"
-  "Lu\0"
-  "Lycian\0"
-  "Lydian\0"
-  "M\0"
-  "Malayalam\0"
-  "Mc\0"
-  "Me\0"
-  "Mn\0"
-  "Mongolian\0"
-  "Myanmar\0"
-  "N\0"
-  "Nd\0"
-  "New_Tai_Lue\0"
-  "Nko\0"
-  "Nl\0"
-  "No\0"
-  "Ogham\0"
-  "Ol_Chiki\0"
-  "Old_Italic\0"
-  "Old_Persian\0"
-  "Oriya\0"
-  "Osmanya\0"
-  "P\0"
-  "Pc\0"
-  "Pd\0"
-  "Pe\0"
-  "Pf\0"
-  "Phags_Pa\0"
-  "Phoenician\0"
-  "Pi\0"
-  "Po\0"
-  "Ps\0"
-  "Rejang\0"
-  "Runic\0"
-  "S\0"
-  "Saurashtra\0"
-  "Sc\0"
-  "Shavian\0"
-  "Sinhala\0"
-  "Sk\0"
-  "Sm\0"
-  "So\0"
-  "Sundanese\0"
-  "Syloti_Nagri\0"
-  "Syriac\0"
-  "Tagalog\0"
-  "Tagbanwa\0"
-  "Tai_Le\0"
-  "Tamil\0"
-  "Telugu\0"
-  "Thaana\0"
-  "Thai\0"
-  "Tibetan\0"
-  "Tifinagh\0"
-  "Ugaritic\0"
-  "Vai\0"
-  "Yi\0"
-  "Z\0"
-  "Zl\0"
-  "Zp\0"
-  "Zs\0";
+  STRING_Any0
+  STRING_Arabic0
+  STRING_Armenian0
+  STRING_Avestan0
+  STRING_Balinese0
+  STRING_Bamum0
+  STRING_Bengali0
+  STRING_Bopomofo0
+  STRING_Braille0
+  STRING_Buginese0
+  STRING_Buhid0
+  STRING_C0
+  STRING_Canadian_Aboriginal0
+  STRING_Carian0
+  STRING_Cc0
+  STRING_Cf0
+  STRING_Cham0
+  STRING_Cherokee0
+  STRING_Cn0
+  STRING_Co0
+  STRING_Common0
+  STRING_Coptic0
+  STRING_Cs0
+  STRING_Cuneiform0
+  STRING_Cypriot0
+  STRING_Cyrillic0
+  STRING_Deseret0
+  STRING_Devanagari0
+  STRING_Egyptian_Hieroglyphs0
+  STRING_Ethiopic0
+  STRING_Georgian0
+  STRING_Glagolitic0
+  STRING_Gothic0
+  STRING_Greek0
+  STRING_Gujarati0
+  STRING_Gurmukhi0
+  STRING_Han0
+  STRING_Hangul0
+  STRING_Hanunoo0
+  STRING_Hebrew0
+  STRING_Hiragana0
+  STRING_Imperial_Aramaic0
+  STRING_Inherited0
+  STRING_Inscriptional_Pahlavi0
+  STRING_Inscriptional_Parthian0
+  STRING_Javanese0
+  STRING_Kaithi0
+  STRING_Kannada0
+  STRING_Katakana0
+  STRING_Kayah_Li0
+  STRING_Kharoshthi0
+  STRING_Khmer0
+  STRING_L0
+  STRING_L_AMPERSAND0
+  STRING_Lao0
+  STRING_Latin0
+  STRING_Lepcha0
+  STRING_Limbu0
+  STRING_Linear_B0
+  STRING_Lisu0
+  STRING_Ll0
+  STRING_Lm0
+  STRING_Lo0
+  STRING_Lt0
+  STRING_Lu0
+  STRING_Lycian0
+  STRING_Lydian0
+  STRING_M0
+  STRING_Malayalam0
+  STRING_Mc0
+  STRING_Me0
+  STRING_Meetei_Mayek0
+  STRING_Mn0
+  STRING_Mongolian0
+  STRING_Myanmar0
+  STRING_N0
+  STRING_Nd0
+  STRING_New_Tai_Lue0
+  STRING_Nko0
+  STRING_Nl0
+  STRING_No0
+  STRING_Ogham0
+  STRING_Ol_Chiki0
+  STRING_Old_Italic0
+  STRING_Old_Persian0
+  STRING_Old_South_Arabian0
+  STRING_Old_Turkic0
+  STRING_Oriya0
+  STRING_Osmanya0
+  STRING_P0
+  STRING_Pc0
+  STRING_Pd0
+  STRING_Pe0
+  STRING_Pf0
+  STRING_Phags_Pa0
+  STRING_Phoenician0
+  STRING_Pi0
+  STRING_Po0
+  STRING_Ps0
+  STRING_Rejang0
+  STRING_Runic0
+  STRING_S0
+  STRING_Samaritan0
+  STRING_Saurashtra0
+  STRING_Sc0
+  STRING_Shavian0
+  STRING_Sinhala0
+  STRING_Sk0
+  STRING_Sm0
+  STRING_So0
+  STRING_Sundanese0
+  STRING_Syloti_Nagri0
+  STRING_Syriac0
+  STRING_Tagalog0
+  STRING_Tagbanwa0
+  STRING_Tai_Le0
+  STRING_Tai_Tham0
+  STRING_Tai_Viet0
+  STRING_Tamil0
+  STRING_Telugu0
+  STRING_Thaana0
+  STRING_Thai0
+  STRING_Tibetan0
+  STRING_Tifinagh0
+  STRING_Ugaritic0
+  STRING_Vai0
+  STRING_Yi0
+  STRING_Z0
+  STRING_Zl0
+  STRING_Zp0
+  STRING_Zs0;
 
 const ucp_type_table _pcre_utt[] = {
   {   0, PT_ANY, 0 },
   {   4, PT_SC, ucp_Arabic },
   {  11, PT_SC, ucp_Armenian },
-  {  20, PT_SC, ucp_Balinese },
-  {  29, PT_SC, ucp_Bengali },
-  {  37, PT_SC, ucp_Bopomofo },
-  {  46, PT_SC, ucp_Braille },
-  {  54, PT_SC, ucp_Buginese },
-  {  63, PT_SC, ucp_Buhid },
-  {  69, PT_GC, ucp_C },
-  {  71, PT_SC, ucp_Canadian_Aboriginal },
-  {  91, PT_SC, ucp_Carian },
-  {  98, PT_PC, ucp_Cc },
-  { 101, PT_PC, ucp_Cf },
-  { 104, PT_SC, ucp_Cham },
-  { 109, PT_SC, ucp_Cherokee },
-  { 118, PT_PC, ucp_Cn },
-  { 121, PT_PC, ucp_Co },
-  { 124, PT_SC, ucp_Common },
-  { 131, PT_SC, ucp_Coptic },
-  { 138, PT_PC, ucp_Cs },
-  { 141, PT_SC, ucp_Cuneiform },
-  { 151, PT_SC, ucp_Cypriot },
-  { 159, PT_SC, ucp_Cyrillic },
-  { 168, PT_SC, ucp_Deseret },
-  { 176, PT_SC, ucp_Devanagari },
-  { 187, PT_SC, ucp_Ethiopic },
-  { 196, PT_SC, ucp_Georgian },
-  { 205, PT_SC, ucp_Glagolitic },
-  { 216, PT_SC, ucp_Gothic },
-  { 223, PT_SC, ucp_Greek },
-  { 229, PT_SC, ucp_Gujarati },
-  { 238, PT_SC, ucp_Gurmukhi },
-  { 247, PT_SC, ucp_Han },
-  { 251, PT_SC, ucp_Hangul },
-  { 258, PT_SC, ucp_Hanunoo },
-  { 266, PT_SC, ucp_Hebrew },
-  { 273, PT_SC, ucp_Hiragana },
-  { 282, PT_SC, ucp_Inherited },
-  { 292, PT_SC, ucp_Kannada },
-  { 300, PT_SC, ucp_Katakana },
-  { 309, PT_SC, ucp_Kayah_Li },
-  { 318, PT_SC, ucp_Kharoshthi },
-  { 329, PT_SC, ucp_Khmer },
-  { 335, PT_GC, ucp_L },
-  { 337, PT_LAMP, 0 },
-  { 340, PT_SC, ucp_Lao },
-  { 344, PT_SC, ucp_Latin },
-  { 350, PT_SC, ucp_Lepcha },
-  { 357, PT_SC, ucp_Limbu },
-  { 363, PT_SC, ucp_Linear_B },
-  { 372, PT_PC, ucp_Ll },
-  { 375, PT_PC, ucp_Lm },
-  { 378, PT_PC, ucp_Lo },
-  { 381, PT_PC, ucp_Lt },
-  { 384, PT_PC, ucp_Lu },
-  { 387, PT_SC, ucp_Lycian },
-  { 394, PT_SC, ucp_Lydian },
-  { 401, PT_GC, ucp_M },
-  { 403, PT_SC, ucp_Malayalam },
-  { 413, PT_PC, ucp_Mc },
-  { 416, PT_PC, ucp_Me },
-  { 419, PT_PC, ucp_Mn },
-  { 422, PT_SC, ucp_Mongolian },
-  { 432, PT_SC, ucp_Myanmar },
-  { 440, PT_GC, ucp_N },
-  { 442, PT_PC, ucp_Nd },
-  { 445, PT_SC, ucp_New_Tai_Lue },
-  { 457, PT_SC, ucp_Nko },
-  { 461, PT_PC, ucp_Nl },
-  { 464, PT_PC, ucp_No },
-  { 467, PT_SC, ucp_Ogham },
-  { 473, PT_SC, ucp_Ol_Chiki },
-  { 482, PT_SC, ucp_Old_Italic },
-  { 493, PT_SC, ucp_Old_Persian },
-  { 505, PT_SC, ucp_Oriya },
-  { 511, PT_SC, ucp_Osmanya },
-  { 519, PT_GC, ucp_P },
-  { 521, PT_PC, ucp_Pc },
-  { 524, PT_PC, ucp_Pd },
-  { 527, PT_PC, ucp_Pe },
-  { 530, PT_PC, ucp_Pf },
-  { 533, PT_SC, ucp_Phags_Pa },
-  { 542, PT_SC, ucp_Phoenician },
-  { 553, PT_PC, ucp_Pi },
-  { 556, PT_PC, ucp_Po },
-  { 559, PT_PC, ucp_Ps },
-  { 562, PT_SC, ucp_Rejang },
-  { 569, PT_SC, ucp_Runic },
-  { 575, PT_GC, ucp_S },
-  { 577, PT_SC, ucp_Saurashtra },
-  { 588, PT_PC, ucp_Sc },
-  { 591, PT_SC, ucp_Shavian },
-  { 599, PT_SC, ucp_Sinhala },
-  { 607, PT_PC, ucp_Sk },
-  { 610, PT_PC, ucp_Sm },
-  { 613, PT_PC, ucp_So },
-  { 616, PT_SC, ucp_Sundanese },
-  { 626, PT_SC, ucp_Syloti_Nagri },
-  { 639, PT_SC, ucp_Syriac },
-  { 646, PT_SC, ucp_Tagalog },
-  { 654, PT_SC, ucp_Tagbanwa },
-  { 663, PT_SC, ucp_Tai_Le },
-  { 670, PT_SC, ucp_Tamil },
-  { 676, PT_SC, ucp_Telugu },
-  { 683, PT_SC, ucp_Thaana },
-  { 690, PT_SC, ucp_Thai },
-  { 695, PT_SC, ucp_Tibetan },
-  { 703, PT_SC, ucp_Tifinagh },
-  { 712, PT_SC, ucp_Ugaritic },
-  { 721, PT_SC, ucp_Vai },
-  { 725, PT_SC, ucp_Yi },
-  { 728, PT_GC, ucp_Z },
-  { 730, PT_PC, ucp_Zl },
-  { 733, PT_PC, ucp_Zp },
-  { 736, PT_PC, ucp_Zs }
+  {  20, PT_SC, ucp_Avestan },
+  {  28, PT_SC, ucp_Balinese },
+  {  37, PT_SC, ucp_Bamum },
+  {  43, PT_SC, ucp_Bengali },
+  {  51, PT_SC, ucp_Bopomofo },
+  {  60, PT_SC, ucp_Braille },
+  {  68, PT_SC, ucp_Buginese },
+  {  77, PT_SC, ucp_Buhid },
+  {  83, PT_GC, ucp_C },
+  {  85, PT_SC, ucp_Canadian_Aboriginal },
+  { 105, PT_SC, ucp_Carian },
+  { 112, PT_PC, ucp_Cc },
+  { 115, PT_PC, ucp_Cf },
+  { 118, PT_SC, ucp_Cham },
+  { 123, PT_SC, ucp_Cherokee },
+  { 132, PT_PC, ucp_Cn },
+  { 135, PT_PC, ucp_Co },
+  { 138, PT_SC, ucp_Common },
+  { 145, PT_SC, ucp_Coptic },
+  { 152, PT_PC, ucp_Cs },
+  { 155, PT_SC, ucp_Cuneiform },
+  { 165, PT_SC, ucp_Cypriot },
+  { 173, PT_SC, ucp_Cyrillic },
+  { 182, PT_SC, ucp_Deseret },
+  { 190, PT_SC, ucp_Devanagari },
+  { 201, PT_SC, ucp_Egyptian_Hieroglyphs },
+  { 222, PT_SC, ucp_Ethiopic },
+  { 231, PT_SC, ucp_Georgian },
+  { 240, PT_SC, ucp_Glagolitic },
+  { 251, PT_SC, ucp_Gothic },
+  { 258, PT_SC, ucp_Greek },
+  { 264, PT_SC, ucp_Gujarati },
+  { 273, PT_SC, ucp_Gurmukhi },
+  { 282, PT_SC, ucp_Han },
+  { 286, PT_SC, ucp_Hangul },
+  { 293, PT_SC, ucp_Hanunoo },
+  { 301, PT_SC, ucp_Hebrew },
+  { 308, PT_SC, ucp_Hiragana },
+  { 317, PT_SC, ucp_Imperial_Aramaic },
+  { 334, PT_SC, ucp_Inherited },
+  { 344, PT_SC, ucp_Inscriptional_Pahlavi },
+  { 366, PT_SC, ucp_Inscriptional_Parthian },
+  { 389, PT_SC, ucp_Javanese },
+  { 398, PT_SC, ucp_Kaithi },
+  { 405, PT_SC, ucp_Kannada },
+  { 413, PT_SC, ucp_Katakana },
+  { 422, PT_SC, ucp_Kayah_Li },
+  { 431, PT_SC, ucp_Kharoshthi },
+  { 442, PT_SC, ucp_Khmer },
+  { 448, PT_GC, ucp_L },
+  { 450, PT_LAMP, 0 },
+  { 453, PT_SC, ucp_Lao },
+  { 457, PT_SC, ucp_Latin },
+  { 463, PT_SC, ucp_Lepcha },
+  { 470, PT_SC, ucp_Limbu },
+  { 476, PT_SC, ucp_Linear_B },
+  { 485, PT_SC, ucp_Lisu },
+  { 490, PT_PC, ucp_Ll },
+  { 493, PT_PC, ucp_Lm },
+  { 496, PT_PC, ucp_Lo },
+  { 499, PT_PC, ucp_Lt },
+  { 502, PT_PC, ucp_Lu },
+  { 505, PT_SC, ucp_Lycian },
+  { 512, PT_SC, ucp_Lydian },
+  { 519, PT_GC, ucp_M },
+  { 521, PT_SC, ucp_Malayalam },
+  { 531, PT_PC, ucp_Mc },
+  { 534, PT_PC, ucp_Me },
+  { 537, PT_SC, ucp_Meetei_Mayek },
+  { 550, PT_PC, ucp_Mn },
+  { 553, PT_SC, ucp_Mongolian },
+  { 563, PT_SC, ucp_Myanmar },
+  { 571, PT_GC, ucp_N },
+  { 573, PT_PC, ucp_Nd },
+  { 576, PT_SC, ucp_New_Tai_Lue },
+  { 588, PT_SC, ucp_Nko },
+  { 592, PT_PC, ucp_Nl },
+  { 595, PT_PC, ucp_No },
+  { 598, PT_SC, ucp_Ogham },
+  { 604, PT_SC, ucp_Ol_Chiki },
+  { 613, PT_SC, ucp_Old_Italic },
+  { 624, PT_SC, ucp_Old_Persian },
+  { 636, PT_SC, ucp_Old_South_Arabian },
+  { 654, PT_SC, ucp_Old_Turkic },
+  { 665, PT_SC, ucp_Oriya },
+  { 671, PT_SC, ucp_Osmanya },
+  { 679, PT_GC, ucp_P },
+  { 681, PT_PC, ucp_Pc },
+  { 684, PT_PC, ucp_Pd },
+  { 687, PT_PC, ucp_Pe },
+  { 690, PT_PC, ucp_Pf },
+  { 693, PT_SC, ucp_Phags_Pa },
+  { 702, PT_SC, ucp_Phoenician },
+  { 713, PT_PC, ucp_Pi },
+  { 716, PT_PC, ucp_Po },
+  { 719, PT_PC, ucp_Ps },
+  { 722, PT_SC, ucp_Rejang },
+  { 729, PT_SC, ucp_Runic },
+  { 735, PT_GC, ucp_S },
+  { 737, PT_SC, ucp_Samaritan },
+  { 747, PT_SC, ucp_Saurashtra },
+  { 758, PT_PC, ucp_Sc },
+  { 761, PT_SC, ucp_Shavian },
+  { 769, PT_SC, ucp_Sinhala },
+  { 777, PT_PC, ucp_Sk },
+  { 780, PT_PC, ucp_Sm },
+  { 783, PT_PC, ucp_So },
+  { 786, PT_SC, ucp_Sundanese },
+  { 796, PT_SC, ucp_Syloti_Nagri },
+  { 809, PT_SC, ucp_Syriac },
+  { 816, PT_SC, ucp_Tagalog },
+  { 824, PT_SC, ucp_Tagbanwa },
+  { 833, PT_SC, ucp_Tai_Le },
+  { 840, PT_SC, ucp_Tai_Tham },
+  { 849, PT_SC, ucp_Tai_Viet },
+  { 858, PT_SC, ucp_Tamil },
+  { 864, PT_SC, ucp_Telugu },
+  { 871, PT_SC, ucp_Thaana },
+  { 878, PT_SC, ucp_Thai },
+  { 883, PT_SC, ucp_Tibetan },
+  { 891, PT_SC, ucp_Tifinagh },
+  { 900, PT_SC, ucp_Ugaritic },
+  { 909, PT_SC, ucp_Vai },
+  { 913, PT_SC, ucp_Yi },
+  { 916, PT_GC, ucp_Z },
+  { 918, PT_PC, ucp_Zl },
+  { 921, PT_PC, ucp_Zp },
+  { 924, PT_PC, ucp_Zs }
 };
 
 const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table);
diff --git a/glib/pcre/pcre_try_flipped.c b/glib/pcre/pcre_try_flipped.c
index 0d2f3a2..606504c 100644
--- a/glib/pcre/pcre_try_flipped.c
+++ b/glib/pcre/pcre_try_flipped.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -128,7 +128,9 @@ if (study != NULL)
   {
   *internal_study = *study;   /* To copy other fields */
   internal_study->size = byteflip(study->size, sizeof(study->size));
-  internal_study->options = byteflip(study->options, sizeof(study->options));
+  internal_study->flags = byteflip(study->flags, sizeof(study->flags));
+  internal_study->minlength = byteflip(study->minlength,
+    sizeof(study->minlength));
   }
 
 return internal_re;
diff --git a/glib/pcre/pcre_ucp_searchfuncs.c b/glib/pcre/pcre_ucp_searchfuncs.c
index 3e1ed38..ce5c136 100644
--- a/glib/pcre/pcre_ucp_searchfuncs.c
+++ b/glib/pcre/pcre_ucp_searchfuncs.c
@@ -52,6 +52,50 @@ properties. */
 #include "ucp.h"               /* Category definitions */
 
 
+/* Table to translate from particular type value to the general value. */
+
+static int ucp_gentype[] = {
+  ucp_C, ucp_C, ucp_C, ucp_C, ucp_C,  /* Cc, Cf, Cn, Co, Cs */
+  ucp_L, ucp_L, ucp_L, ucp_L, ucp_L,  /* Ll, Lu, Lm, Lo, Lt */
+  ucp_M, ucp_M, ucp_M,                /* Mc, Me, Mn */
+  ucp_N, ucp_N, ucp_N,                /* Nd, Nl, No */
+  ucp_P, ucp_P, ucp_P, ucp_P, ucp_P,  /* Pc, Pd, Pe, Pf, Pi */
+  ucp_P, ucp_P,                       /* Ps, Po */
+  ucp_S, ucp_S, ucp_S, ucp_S,         /* Sc, Sk, Sm, So */
+  ucp_Z, ucp_Z, ucp_Z                 /* Zl, Zp, Zs */
+};
+
+
+
+/*************************************************
+*         Search table and return type           *
+*************************************************/
+
+/* Three values are returned: the category is ucp_C, ucp_L, etc. The detailed
+character type is ucp_Lu, ucp_Nd, etc. The script is ucp_Latin, etc.
+
+Arguments:
+  c           the character value
+  type_ptr    the detailed character type is returned here
+  script_ptr  the script is returned here
+
+Returns:      the character type category
+*/
+
+int
+_pcre_ucp_findprop(const unsigned int c, int *type_ptr, int *script_ptr)
+{
+/* Note that the Unicode types have the same values in glib and in
+ * PCRE, so ucp_Ll == G_UNICODE_LOWERCASE_LETTER,
+ * ucp_Zs == G_UNICODE_SPACE_SEPARATOR, and so on. */
+*type_ptr = g_unichar_type(c);
+*script_ptr = g_unichar_get_script(c);
+return ucp_gentype[*type_ptr];
+}
+
+
+
+
 /*************************************************
 *       Search table and return other case       *
 *************************************************/
@@ -68,7 +112,7 @@ Returns:      the other case or NOTACHAR if none
 unsigned int
 _pcre_ucp_othercase(const unsigned int c)
 {
-unsigned int other_case = NOTACHAR;
+int other_case = NOTACHAR;
 
 if (g_unichar_islower(c))
   other_case = g_unichar_toupper(c);
diff --git a/glib/pcre/pcre_xclass.c b/glib/pcre/pcre_xclass.c
index e8e7a50..c25ecdc 100644
--- a/glib/pcre/pcre_xclass.c
+++ b/glib/pcre/pcre_xclass.c
@@ -6,7 +6,7 @@
 and semantics are as close as possible to those of the Perl 5 language.
 
                        Written by Philip Hazel
-           Copyright (c) 1997-2008 University of Cambridge
+           Copyright (c) 1997-2009 University of Cambridge
 
 -----------------------------------------------------------------------------
 Redistribution and use in source and binary forms, with or without
@@ -39,8 +39,7 @@ POSSIBILITY OF SUCH DAMAGE.
 
 
 /* This module contains an internal function that is used to match an extended
-class (one that contains characters whose values are > 255). It is used by both
-pcre_exec() and pcre_def_exec(). */
+class. It is used by both pcre_exec() and pcre_def_exec(). */
 
 
 #ifdef HAVE_CONFIG_H
@@ -55,7 +54,7 @@ pcre_exec() and pcre_def_exec(). */
 *************************************************/
 
 /* This function is called to match a character against an extended class that
-might contain values > 255.
+might contain values > 255 and/or Unicode properties.
 
 Arguments:
   c           the character
diff --git a/glib/pcre/ucp.h b/glib/pcre/ucp.h
index fe910ce..e96c8b4 100644
--- a/glib/pcre/ucp.h
+++ b/glib/pcre/ucp.h
@@ -6,9 +6,8 @@
 #define _UCP_H
 
 /* This file contains definitions of the property values that are returned by
-the function _pcre_ucp_findprop(). New values that are added for new releases
-of Unicode should always be at the end of each enum, for backwards
-compatibility. */
+the UCD access macros. New values that are added for new releases of Unicode
+should always be at the end of each enum, for backwards compatibility. */
 
 /* These are the general character categories. */
 
@@ -121,24 +120,40 @@ enum {
   ucp_Tifinagh = G_UNICODE_SCRIPT_TIFINAGH,
   ucp_Ugaritic = G_UNICODE_SCRIPT_UGARITIC,
   ucp_Yi = G_UNICODE_SCRIPT_YI,
-  ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,	/* New for Unicode 5.0.0 */
-  ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,   /* New for Unicode 5.0.0 */
-  ucp_Nko = G_UNICODE_SCRIPT_NKO,           	/* New for Unicode 5.0.0 */
-  ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,     /* New for Unicode 5.0.0 */
-  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN, /* New for Unicode 5.0.0 */
-  ucp_Carian = G_UNICODE_SCRIPT_CARIAN,         /* New for Unicode 5.1 */
-  ucp_Cham = G_UNICODE_SCRIPT_CHAM,             /* New for Unicode 5.1 */
-  ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,     /* New for Unicode 5.1 */
-  ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,         /* New for Unicode 5.1 */
-  ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,         /* New for Unicode 5.1 */
-  ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,         /* New for Unicode 5.1 */
-  ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,     /* New for Unicode 5.1 */
-  ucp_Rejang = G_UNICODE_SCRIPT_REJANG,         /* New for Unicode 5.1 */
-  ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA, /* New for Unicode 5.1 */
-  ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,   /* New for Unicode 5.1 */
-  ucp_Vai = G_UNICODE_SCRIPT_VAI                /* New for Unicode 5.1 */
+  ucp_Balinese = G_UNICODE_SCRIPT_BALINESE,
+  ucp_Cuneiform = G_UNICODE_SCRIPT_CUNEIFORM,
+  ucp_Nko = G_UNICODE_SCRIPT_NKO,
+  ucp_Phags_Pa = G_UNICODE_SCRIPT_PHAGS_PA,
+  ucp_Phoenician = G_UNICODE_SCRIPT_PHOENICIAN,
+  ucp_Carian = G_UNICODE_SCRIPT_CARIAN,
+  ucp_Cham = G_UNICODE_SCRIPT_CHAM,
+  ucp_Kayah_Li = G_UNICODE_SCRIPT_KAYAH_LI,
+  ucp_Lepcha = G_UNICODE_SCRIPT_LEPCHA,
+  ucp_Lycian = G_UNICODE_SCRIPT_LYCIAN,
+  ucp_Lydian = G_UNICODE_SCRIPT_LYDIAN,
+  ucp_Ol_Chiki = G_UNICODE_SCRIPT_OL_CHIKI,
+  ucp_Rejang = G_UNICODE_SCRIPT_REJANG,
+  ucp_Saurashtra = G_UNICODE_SCRIPT_SAURASHTRA,
+  ucp_Sundanese = G_UNICODE_SCRIPT_SUNDANESE,
+  ucp_Vai = G_UNICODE_SCRIPT_VAI,
+  ucp_Avestan = G_UNICODE_SCRIPT_AVESTAN,
+  ucp_Bamum = G_UNICODE_SCRIPT_BAMUM,
+  ucp_Egyptian_Hieroglyphs = G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,
+  ucp_Imperial_Aramaic = G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,
+  ucp_Inscriptional_Pahlavi = G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,
+  ucp_Inscriptional_Parthian = G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN,
+  ucp_Javanese = G_UNICODE_SCRIPT_JAVANESE,
+  ucp_Kaithi = G_UNICODE_SCRIPT_KAITHI,
+  ucp_Lisu = G_UNICODE_SCRIPT_LISU,
+  ucp_Meetei_Mayek = G_UNICODE_SCRIPT_MEETEI_MAYEK,
+  ucp_Old_South_Arabian = G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,
+  ucp_Old_Turkic = G_UNICODE_SCRIPT_OLD_TURKISH,
+  ucp_Samaritan = G_UNICODE_SCRIPT_SAMARITAN,
+  ucp_Tai_Tham = G_UNICODE_SCRIPT_TAI_THAM,
+  ucp_Tai_Viet = G_UNICODE_SCRIPT_TAI_VIET
 };
 
 #endif
 
 /* End of ucp.h */
+



[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]