[libgda] Set SQLite version to 3.7.9 and SqlCipher to 2.0.3

From: Vivien Malerba <vivien src gnome org>
To: commits-list gnome org
Cc:
Subject: [libgda] Set SQLite version to 3.7.9 and SqlCipher to 2.0.3
Date: Tue, 31 Jan 2012 20:28:27 +0000 (UTC)
commit a4dfef2b8b737e932ba72a6b5386a0928c7fd6a6
Author: Vivien Malerba <malerba gnome-db org>
Date:   Tue Jan 31 17:12:13 2012 +0100

    Set SQLite version to 3.7.9 and SqlCipher to 2.0.3

 libgda/sqlite/sqlite-src/PragmasPatch |    6 +-
 libgda/sqlite/sqlite-src/sqlite3.c    | 9348 ++++++++++++------
 libgda/sqlite/sqlite-src/sqlite3.h    |  103 +-
 providers/sqlcipher/sqlcipher.patch   |17342 ++-------------------------------
 4 files changed, 7045 insertions(+), 19754 deletions(-)
---
diff --git a/libgda/sqlite/sqlite-src/PragmasPatch b/libgda/sqlite/sqlite-src/PragmasPatch
index 5aaee81..04b40c7 100644
--- a/libgda/sqlite/sqlite-src/PragmasPatch
+++ b/libgda/sqlite/sqlite-src/PragmasPatch
@@ -1,6 +1,6 @@
---- sqlite3.c.orig	2011-08-31 19:26:06.733916772 +0200
-+++ sqlite3.c	2011-08-31 19:26:20.473916778 +0200
-@@ -88929,6 +88929,60 @@
+--- sqlite3.c.orig	2011-11-01 13:31:21.000000000 +0100
++++ sqlite3.c	2012-01-31 11:11:26.739621515 +0100
+@@ -91158,6 +91158,60 @@
  
  #ifndef SQLITE_OMIT_SCHEMA_PRAGMAS
    /*
diff --git a/libgda/sqlite/sqlite-src/sqlite3.c b/libgda/sqlite/sqlite-src/sqlite3.c
index a065a63..e54e1be 100644
--- a/libgda/sqlite/sqlite-src/sqlite3.c
+++ b/libgda/sqlite/sqlite-src/sqlite3.c
@@ -1,6 +1,6 @@
 /******************************************************************************
 ** This file is an amalgamation of many separate C source files from SQLite
-** version 3.7.7.1.  By combining all the individual C code files into this 
+** version 3.7.9.  By combining all the individual C code files into this 
 ** single large file, the entire code can be compiled as a single translation
 ** unit.  This allows many compilers to do optimizations that would not be
 ** possible if the files were compiled separately.  Performance improvements
@@ -317,13 +317,6 @@
 #endif
 
 /*
-** The number of samples of an index that SQLite takes in order to 
-** construct a histogram of the table content when running ANALYZE
-** and with SQLITE_ENABLE_STAT2
-*/
-#define SQLITE_INDEX_SAMPLES 10
-
-/*
 ** The following macros are used to cast pointers to integers and
 ** integers to pointers.  The way you do this varies from one compiler
 ** to the next, so we have developed the following set of #if statements
@@ -387,19 +380,25 @@
 ** specify which memory allocation subsystem to use.
 **
 **     SQLITE_SYSTEM_MALLOC          // Use normal system malloc()
+**     SQLITE_WIN32_MALLOC           // Use Win32 native heap API
 **     SQLITE_MEMDEBUG               // Debugging version of system malloc()
 **
+** On Windows, if the SQLITE_WIN32_MALLOC_VALIDATE macro is defined and the
+** assert() macro is enabled, each call into the Win32 native heap subsystem
+** will cause HeapValidate to be called.  If heap validation should fail, an
+** assertion will be triggered.
+**
 ** (Historical note:  There used to be several other options, but we've
-** pared it down to just these two.)
+** pared it down to just these three.)
 **
 ** If none of the above are defined, then set SQLITE_SYSTEM_MALLOC as
 ** the default.
 */
-#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_MEMDEBUG)>1
+#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_WIN32_MALLOC)+defined(SQLITE_MEMDEBUG)>1
 # error "At most one of the following compile-time configuration options\
- is allows: SQLITE_SYSTEM_MALLOC, SQLITE_MEMDEBUG"
+ is allows: SQLITE_SYSTEM_MALLOC, SQLITE_WIN32_MALLOC, SQLITE_MEMDEBUG"
 #endif
-#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_MEMDEBUG)==0
+#if defined(SQLITE_SYSTEM_MALLOC)+defined(SQLITE_WIN32_MALLOC)+defined(SQLITE_MEMDEBUG)==0
 # define SQLITE_SYSTEM_MALLOC 1
 #endif
 
@@ -650,9 +649,9 @@ extern "C" {
 ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
 ** [sqlite_version()] and [sqlite_source_id()].
 */
-#define SQLITE_VERSION        "3.7.7.1"
-#define SQLITE_VERSION_NUMBER 3007007
-#define SQLITE_SOURCE_ID      "2011-06-28 17:39:05 af0d91adf497f5f36ec3813f04235a6e195a605f"
+#define SQLITE_VERSION        "3.7.9"
+#define SQLITE_VERSION_NUMBER 3007009
+#define SQLITE_SOURCE_ID      "2011-11-01 00:52:41 c7c6050ef060877ebe77b41d959e9df13f8c9b5e"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -1284,6 +1283,41 @@ struct sqlite3_io_methods {
 ** Applications should not call [sqlite3_file_control()] with this
 ** opcode as doing so may disrupt the operation of the specialized VFSes
 ** that do require it.  
+**
+** ^The [SQLITE_FCNTL_WIN32_AV_RETRY] opcode is used to configure automatic
+** retry counts and intervals for certain disk I/O operations for the
+** windows [VFS] in order to work to provide robustness against
+** anti-virus programs.  By default, the windows VFS will retry file read,
+** file write, and file delete operations up to 10 times, with a delay
+** of 25 milliseconds before the first retry and with the delay increasing
+** by an additional 25 milliseconds with each subsequent retry.  This
+** opcode allows those to values (10 retries and 25 milliseconds of delay)
+** to be adjusted.  The values are changed for all database connections
+** within the same process.  The argument is a pointer to an array of two
+** integers where the first integer i the new retry count and the second
+** integer is the delay.  If either integer is negative, then the setting
+** is not changed but instead the prior value of that setting is written
+** into the array entry, allowing the current retry settings to be
+** interrogated.  The zDbName parameter is ignored.
+**
+** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the
+** persistent [WAL | Write AHead Log] setting.  By default, the auxiliary
+** write ahead log and shared memory files used for transaction control
+** are automatically deleted when the latest connection to the database
+** closes.  Setting persistent WAL mode causes those files to persist after
+** close.  Persisting the files is useful when other processes that do not
+** have write permission on the directory containing the database file want
+** to read the database file, as the WAL and shared memory files must exist
+** in order for the database to be readable.  The fourth parameter to
+** [sqlite3_file_control()] for this opcode should be a pointer to an integer.
+** That integer is 0 to disable persistent WAL mode or 1 to enable persistent
+** WAL mode.  If the integer is -1, then it is overwritten with the current
+** WAL persistence setting.
+**
+** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening
+** a write transaction to indicate that, unless it is rolled back for some
+** reason, the entire database file will be overwritten by the current 
+** transaction. This is used by VACUUM operations.
 */
 #define SQLITE_FCNTL_LOCKSTATE        1
 #define SQLITE_GET_LOCKPROXYFILE      2
@@ -1293,7 +1327,9 @@ struct sqlite3_io_methods {
 #define SQLITE_FCNTL_CHUNK_SIZE       6
 #define SQLITE_FCNTL_FILE_POINTER     7
 #define SQLITE_FCNTL_SYNC_OMITTED     8
-
+#define SQLITE_FCNTL_WIN32_AV_RETRY   9
+#define SQLITE_FCNTL_PERSIST_WAL     10
+#define SQLITE_FCNTL_OVERWRITE       11
 
 /*
 ** CAPI3REF: Mutex Handle
@@ -1721,16 +1757,10 @@ SQLITE_API int sqlite3_db_config(sqlite3*, int op, ...);
 ** order to verify that SQLite recovers gracefully from such
 ** conditions.
 **
-** The xMalloc and xFree methods must work like the
-** malloc() and free() functions from the standard C library.
-** The xRealloc method must work like realloc() from the standard C library
-** with the exception that if the second argument to xRealloc is zero,
-** xRealloc must be a no-op - it must not perform any allocation or
-** deallocation.  ^SQLite guarantees that the second argument to
+** The xMalloc, xRealloc, and xFree methods must work like the
+** malloc(), realloc() and free() functions from the standard C library.
+** ^SQLite guarantees that the second argument to
 ** xRealloc is always a value returned by a prior call to xRoundup.
-** And so in cases where xRoundup always returns a positive number,
-** xRealloc can perform exactly as the standard library realloc() and
-** still be in compliance with this specification.
 **
 ** xSize should return the allocated size of a memory allocation
 ** previously obtained from xMalloc or xRealloc.  The allocated size
@@ -1916,8 +1946,8 @@ struct sqlite3_mem_methods {
 ** allocator is engaged to handle all of SQLites memory allocation needs.
 ** The first pointer (the memory pointer) must be aligned to an 8-byte
 ** boundary or subsequent behavior of SQLite will be undefined.
-** The minimum allocation size is capped at 2^12. Reasonable values
-** for the minimum allocation size are 2^5 through 2^8.</dd>
+** The minimum allocation size is capped at 2**12. Reasonable values
+** for the minimum allocation size are 2**5 through 2**8.</dd>
 **
 ** [[SQLITE_CONFIG_MUTEX]] <dt>SQLITE_CONFIG_MUTEX</dt>
 ** <dd> ^(This option takes a single argument which is a pointer to an
@@ -3316,7 +3346,8 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal);
 ** that the supplied string is nul-terminated, then there is a small
 ** performance advantage to be gained by passing an nByte parameter that
 ** is equal to the number of bytes in the input string <i>including</i>
-** the nul-terminator bytes.
+** the nul-terminator bytes as this saves SQLite from having to
+** make a copy of the input string.
 **
 ** ^If pzTail is not NULL then *pzTail is made to point to the first byte
 ** past the end of the first SQL statement in zSql.  These routines only
@@ -3367,7 +3398,7 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal);
 ** ^The specific value of WHERE-clause [parameter] might influence the 
 ** choice of query plan if the parameter is the left-hand side of a [LIKE]
 ** or [GLOB] operator or if the parameter is compared to an indexed column
-** and the [SQLITE_ENABLE_STAT2] compile-time option is enabled.
+** and the [SQLITE_ENABLE_STAT3] compile-time option is enabled.
 ** the 
 ** </li>
 ** </ol>
@@ -3537,6 +3568,13 @@ typedef struct sqlite3_context sqlite3_context;
 ** number of <u>bytes</u> in the value, not the number of characters.)^
 ** ^If the fourth parameter is negative, the length of the string is
 ** the number of bytes up to the first zero terminator.
+** If a non-negative fourth parameter is provided to sqlite3_bind_text()
+** or sqlite3_bind_text16() then that parameter must be the byte offset
+** where the NUL terminator would occur assuming the string were NUL
+** terminated.  If any NUL characters occur at byte offsets less than 
+** the value of the fourth parameter then the resulting string value will
+** contain embedded NULs.  The result of expressions involving strings
+** with embedded NULs is undefined.
 **
 ** ^The fifth argument to sqlite3_bind_blob(), sqlite3_bind_text(), and
 ** sqlite3_bind_text16() is a destructor used to dispose of the BLOB or
@@ -3870,6 +3908,12 @@ SQLITE_API int sqlite3_step(sqlite3_stmt*);
 ** (via calls to the [sqlite3_column_int | sqlite3_column_*()] of
 ** interfaces) then sqlite3_data_count(P) returns 0.
 ** ^The sqlite3_data_count(P) routine also returns 0 if P is a NULL pointer.
+** ^The sqlite3_data_count(P) routine returns 0 if the previous call to
+** [sqlite3_step](P) returned [SQLITE_DONE].  ^The sqlite3_data_count(P)
+** will return non-zero if previous call to [sqlite3_step](P) returned
+** [SQLITE_ROW], except in the case of the [PRAGMA incremental_vacuum]
+** where it always returns zero since each step of that multi-step
+** pragma returns 0 columns of data.
 **
 ** See also: [sqlite3_column_count()]
 */
@@ -4549,7 +4593,12 @@ typedef void (*sqlite3_destructor_type)(void*);
 ** ^If the 3rd parameter to the sqlite3_result_text* interfaces
 ** is non-negative, then as many bytes (not characters) of the text
 ** pointed to by the 2nd parameter are taken as the application-defined
-** function result.
+** function result.  If the 3rd parameter is non-negative, then it
+** must be the byte offset into the string where the NUL terminator would
+** appear if the string where NUL terminated.  If any NUL characters occur
+** in the string at a byte offset that is less than the value of the 3rd
+** parameter, then the resulting string will contain embedded NULs and the
+** result of expressions operating on strings with embedded NULs is undefined.
 ** ^If the 4th parameter to the sqlite3_result_text* interfaces
 ** or sqlite3_result_blob is a non-NULL pointer, then SQLite calls that
 ** function as the destructor on the text or BLOB result when it has
@@ -6332,6 +6381,18 @@ SQLITE_API int sqlite3_db_status(sqlite3*, int op, int *pCur, int *pHiwtr, int r
 ** the database connection.)^
 ** ^The highwater mark associated with SQLITE_DBSTATUS_STMT_USED is always 0.
 ** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_HIT]] ^(<dt>SQLITE_DBSTATUS_CACHE_HIT</dt>
+** <dd>This parameter returns the number of pager cache hits that have
+** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_HIT 
+** is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_MISS]] ^(<dt>SQLITE_DBSTATUS_CACHE_MISS</dt>
+** <dd>This parameter returns the number of pager cache misses that have
+** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_MISS 
+** is always 0.
+** </dd>
 ** </dl>
 */
 #define SQLITE_DBSTATUS_LOOKASIDE_USED       0
@@ -6341,7 +6402,9 @@ SQLITE_API int sqlite3_db_status(sqlite3*, int op, int *pCur, int *pHiwtr, int r
 #define SQLITE_DBSTATUS_LOOKASIDE_HIT        4
 #define SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE  5
 #define SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL  6
-#define SQLITE_DBSTATUS_MAX                  6   /* Largest defined DBSTATUS */
+#define SQLITE_DBSTATUS_CACHE_HIT            7
+#define SQLITE_DBSTATUS_CACHE_MISS           8
+#define SQLITE_DBSTATUS_MAX                  8   /* Largest defined DBSTATUS */
 
 
 /*
@@ -6395,7 +6458,6 @@ SQLITE_API int sqlite3_stmt_status(sqlite3_stmt*, int op,int resetFlg);
 ** A non-zero value in this counter may indicate an opportunity to
 ** improvement performance by adding permanent indices that do not
 ** need to be reinitialized each time the statement is run.</dd>
-**
 ** </dl>
 */
 #define SQLITE_STMTSTATUS_FULLSCAN_STEP     1
@@ -7682,6 +7744,18 @@ typedef INT8_TYPE i8;              /* 1-byte signed integer */
 #define SQLITE_MAX_U32  ((((u64)1)<<32)-1)
 
 /*
+** The datatype used to store estimates of the number of rows in a
+** table or index.  This is an unsigned integer type.  For 99.9% of
+** the world, a 32-bit integer is sufficient.  But a 64-bit integer
+** can be used at compile-time if desired.
+*/
+#ifdef SQLITE_64BIT_STATS
+ typedef u64 tRowcnt;    /* 64-bit only if requested at compile-time */
+#else
+ typedef u32 tRowcnt;    /* 32-bit is the default */
+#endif
+
+/*
 ** Macros to determine whether the machine is big or little endian,
 ** evaluated at runtime.
 */
@@ -8146,6 +8220,7 @@ SQLITE_PRIVATE   int sqlite3SchemaMutexHeld(sqlite3*,int,Schema*);
 */
 #ifndef _SQLITE_VDBE_H_
 #define _SQLITE_VDBE_H_
+/* #include <stdio.h> */
 
 /*
 ** A single VDBE is an opaque structure named "Vdbe".  Only routines
@@ -8189,6 +8264,7 @@ struct VdbeOp {
     KeyInfo *pKeyInfo;     /* Used when p4type is P4_KEYINFO */
     int *ai;               /* Used when p4type is P4_INTARRAY */
     SubProgram *pProgram;  /* Used when p4type is P4_SUBPROGRAM */
+    int (*xAdvance)(BtCursor *, int *);
   } p4;
 #ifdef SQLITE_DEBUG
   char *zComment;          /* Comment to improve readability */
@@ -8244,6 +8320,7 @@ typedef struct VdbeOpList VdbeOpList;
 #define P4_INT32    (-14) /* P4 is a 32-bit signed integer */
 #define P4_INTARRAY (-15) /* P4 is a vector of 32-bit integers */
 #define P4_SUBPROGRAM  (-18) /* P4 is a pointer to a SubProgram structure */
+#define P4_ADVANCE  (-19) /* P4 is a pointer to BtreeNext() or BtreePrev() */
 
 /* When adding a P4 argument using P4_KEYINFO, a copy of the KeyInfo structure
 ** is made.  That copy is freed when the Vdbe is finalized.  But if the
@@ -8341,102 +8418,105 @@ typedef struct VdbeOpList VdbeOpList;
 #define OP_Or                                  68   /* same as TK_OR       */
 #define OP_Not                                 19   /* same as TK_NOT      */
 #define OP_BitNot                              93   /* same as TK_BITNOT   */
-#define OP_If                                  26
-#define OP_IfNot                               27
+#define OP_Once                                26
+#define OP_If                                  27
+#define OP_IfNot                               28
 #define OP_IsNull                              73   /* same as TK_ISNULL   */
 #define OP_NotNull                             74   /* same as TK_NOTNULL  */
-#define OP_Column                              28
-#define OP_Affinity                            29
-#define OP_MakeRecord                          30
-#define OP_Count                               31
-#define OP_Savepoint                           32
-#define OP_AutoCommit                          33
-#define OP_Transaction                         34
-#define OP_ReadCookie                          35
-#define OP_SetCookie                           36
-#define OP_VerifyCookie                        37
-#define OP_OpenRead                            38
-#define OP_OpenWrite                           39
-#define OP_OpenAutoindex                       40
-#define OP_OpenEphemeral                       41
-#define OP_OpenPseudo                          42
-#define OP_Close                               43
-#define OP_SeekLt                              44
-#define OP_SeekLe                              45
-#define OP_SeekGe                              46
-#define OP_SeekGt                              47
-#define OP_Seek                                48
-#define OP_NotFound                            49
-#define OP_Found                               50
-#define OP_IsUnique                            51
-#define OP_NotExists                           52
-#define OP_Sequence                            53
-#define OP_NewRowid                            54
-#define OP_Insert                              55
-#define OP_InsertInt                           56
-#define OP_Delete                              57
-#define OP_ResetCount                          58
-#define OP_RowKey                              59
-#define OP_RowData                             60
-#define OP_Rowid                               61
-#define OP_NullRow                             62
-#define OP_Last                                63
-#define OP_Sort                                64
-#define OP_Rewind                              65
-#define OP_Prev                                66
-#define OP_Next                                67
-#define OP_IdxInsert                           70
-#define OP_IdxDelete                           71
-#define OP_IdxRowid                            72
-#define OP_IdxLT                               81
-#define OP_IdxGE                               92
-#define OP_Destroy                             95
-#define OP_Clear                               96
-#define OP_CreateIndex                         97
-#define OP_CreateTable                         98
-#define OP_ParseSchema                         99
-#define OP_LoadAnalysis                       100
-#define OP_DropTable                          101
-#define OP_DropIndex                          102
-#define OP_DropTrigger                        103
-#define OP_IntegrityCk                        104
-#define OP_RowSetAdd                          105
-#define OP_RowSetRead                         106
-#define OP_RowSetTest                         107
-#define OP_Program                            108
-#define OP_Param                              109
-#define OP_FkCounter                          110
-#define OP_FkIfZero                           111
-#define OP_MemMax                             112
-#define OP_IfPos                              113
-#define OP_IfNeg                              114
-#define OP_IfZero                             115
-#define OP_AggStep                            116
-#define OP_AggFinal                           117
-#define OP_Checkpoint                         118
-#define OP_JournalMode                        119
-#define OP_Vacuum                             120
-#define OP_IncrVacuum                         121
-#define OP_Expire                             122
-#define OP_TableLock                          123
-#define OP_VBegin                             124
-#define OP_VCreate                            125
-#define OP_VDestroy                           126
-#define OP_VOpen                              127
-#define OP_VFilter                            128
-#define OP_VColumn                            129
-#define OP_VNext                              131
-#define OP_VRename                            132
-#define OP_VUpdate                            133
-#define OP_Pagecount                          134
-#define OP_MaxPgcnt                           135
-#define OP_Trace                              136
-#define OP_Noop                               137
-#define OP_Explain                            138
-
-/* The following opcode values are never used */
-#define OP_NotUsed_139                        139
-#define OP_NotUsed_140                        140
+#define OP_Column                              29
+#define OP_Affinity                            30
+#define OP_MakeRecord                          31
+#define OP_Count                               32
+#define OP_Savepoint                           33
+#define OP_AutoCommit                          34
+#define OP_Transaction                         35
+#define OP_ReadCookie                          36
+#define OP_SetCookie                           37
+#define OP_VerifyCookie                        38
+#define OP_OpenRead                            39
+#define OP_OpenWrite                           40
+#define OP_OpenAutoindex                       41
+#define OP_OpenEphemeral                       42
+#define OP_SorterOpen                          43
+#define OP_OpenPseudo                          44
+#define OP_Close                               45
+#define OP_SeekLt                              46
+#define OP_SeekLe                              47
+#define OP_SeekGe                              48
+#define OP_SeekGt                              49
+#define OP_Seek                                50
+#define OP_NotFound                            51
+#define OP_Found                               52
+#define OP_IsUnique                            53
+#define OP_NotExists                           54
+#define OP_Sequence                            55
+#define OP_NewRowid                            56
+#define OP_Insert                              57
+#define OP_InsertInt                           58
+#define OP_Delete                              59
+#define OP_ResetCount                          60
+#define OP_SorterCompare                       61
+#define OP_SorterData                          62
+#define OP_RowKey                              63
+#define OP_RowData                             64
+#define OP_Rowid                               65
+#define OP_NullRow                             66
+#define OP_Last                                67
+#define OP_SorterSort                          70
+#define OP_Sort                                71
+#define OP_Rewind                              72
+#define OP_SorterNext                          81
+#define OP_Prev                                92
+#define OP_Next                                95
+#define OP_SorterInsert                        96
+#define OP_IdxInsert                           97
+#define OP_IdxDelete                           98
+#define OP_IdxRowid                            99
+#define OP_IdxLT                              100
+#define OP_IdxGE                              101
+#define OP_Destroy                            102
+#define OP_Clear                              103
+#define OP_CreateIndex                        104
+#define OP_CreateTable                        105
+#define OP_ParseSchema                        106
+#define OP_LoadAnalysis                       107
+#define OP_DropTable                          108
+#define OP_DropIndex                          109
+#define OP_DropTrigger                        110
+#define OP_IntegrityCk                        111
+#define OP_RowSetAdd                          112
+#define OP_RowSetRead                         113
+#define OP_RowSetTest                         114
+#define OP_Program                            115
+#define OP_Param                              116
+#define OP_FkCounter                          117
+#define OP_FkIfZero                           118
+#define OP_MemMax                             119
+#define OP_IfPos                              120
+#define OP_IfNeg                              121
+#define OP_IfZero                             122
+#define OP_AggStep                            123
+#define OP_AggFinal                           124
+#define OP_Checkpoint                         125
+#define OP_JournalMode                        126
+#define OP_Vacuum                             127
+#define OP_IncrVacuum                         128
+#define OP_Expire                             129
+#define OP_TableLock                          131
+#define OP_VBegin                             132
+#define OP_VCreate                            133
+#define OP_VDestroy                           134
+#define OP_VOpen                              135
+#define OP_VFilter                            136
+#define OP_VColumn                            137
+#define OP_VNext                              138
+#define OP_VRename                            139
+#define OP_VUpdate                            140
+#define OP_Pagecount                          146
+#define OP_MaxPgcnt                           147
+#define OP_Trace                              148
+#define OP_Noop                               149
+#define OP_Explain                            150
 
 
 /* Properties such as "out2" or "jump" that are specified in
@@ -8454,22 +8534,22 @@ typedef struct VdbeOpList VdbeOpList;
 /*   0 */ 0x00, 0x01, 0x05, 0x04, 0x04, 0x10, 0x00, 0x02,\
 /*   8 */ 0x02, 0x02, 0x02, 0x02, 0x02, 0x00, 0x24, 0x24,\
 /*  16 */ 0x00, 0x00, 0x00, 0x24, 0x04, 0x05, 0x04, 0x00,\
-/*  24 */ 0x00, 0x01, 0x05, 0x05, 0x00, 0x00, 0x00, 0x02,\
-/*  32 */ 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00, 0x00,\
-/*  40 */ 0x00, 0x00, 0x00, 0x00, 0x11, 0x11, 0x11, 0x11,\
-/*  48 */ 0x08, 0x11, 0x11, 0x11, 0x11, 0x02, 0x02, 0x00,\
-/*  56 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x01,\
-/*  64 */ 0x01, 0x01, 0x01, 0x01, 0x4c, 0x4c, 0x08, 0x00,\
-/*  72 */ 0x02, 0x05, 0x05, 0x15, 0x15, 0x15, 0x15, 0x15,\
+/*  24 */ 0x00, 0x01, 0x05, 0x05, 0x05, 0x00, 0x00, 0x00,\
+/*  32 */ 0x02, 0x00, 0x00, 0x00, 0x02, 0x10, 0x00, 0x00,\
+/*  40 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x11,\
+/*  48 */ 0x11, 0x11, 0x08, 0x11, 0x11, 0x11, 0x11, 0x02,\
+/*  56 */ 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\
+/*  64 */ 0x00, 0x02, 0x00, 0x01, 0x4c, 0x4c, 0x01, 0x01,\
+/*  72 */ 0x01, 0x05, 0x05, 0x15, 0x15, 0x15, 0x15, 0x15,\
 /*  80 */ 0x15, 0x01, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c, 0x4c,\
-/*  88 */ 0x4c, 0x4c, 0x4c, 0x4c, 0x01, 0x24, 0x02, 0x02,\
-/*  96 */ 0x00, 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,\
-/* 104 */ 0x00, 0x0c, 0x45, 0x15, 0x01, 0x02, 0x00, 0x01,\
-/* 112 */ 0x08, 0x05, 0x05, 0x05, 0x00, 0x00, 0x00, 0x02,\
-/* 120 */ 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\
-/* 128 */ 0x01, 0x00, 0x02, 0x01, 0x00, 0x00, 0x02, 0x02,\
-/* 136 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04,\
-/* 144 */ 0x04, 0x04,}
+/*  88 */ 0x4c, 0x4c, 0x4c, 0x4c, 0x01, 0x24, 0x02, 0x01,\
+/*  96 */ 0x08, 0x08, 0x00, 0x02, 0x01, 0x01, 0x02, 0x00,\
+/* 104 */ 0x02, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,\
+/* 112 */ 0x0c, 0x45, 0x15, 0x01, 0x02, 0x00, 0x01, 0x08,\
+/* 120 */ 0x05, 0x05, 0x05, 0x00, 0x00, 0x00, 0x02, 0x00,\
+/* 128 */ 0x01, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00,\
+/* 136 */ 0x01, 0x00, 0x01, 0x00, 0x00, 0x04, 0x04, 0x04,\
+/* 144 */ 0x04, 0x04, 0x02, 0x02, 0x00, 0x00, 0x00,}
 
 /************** End of opcodes.h *********************************************/
 /************** Continuing where we left off in vdbe.h ***********************/
@@ -8487,12 +8567,12 @@ SQLITE_PRIVATE int sqlite3VdbeAddOp4(Vdbe*,int,int,int,int,const char *zP4,int);
 SQLITE_PRIVATE int sqlite3VdbeAddOp4Int(Vdbe*,int,int,int,int,int);
 SQLITE_PRIVATE int sqlite3VdbeAddOpList(Vdbe*, int nOp, VdbeOpList const *aOp);
 SQLITE_PRIVATE void sqlite3VdbeAddParseSchemaOp(Vdbe*,int,char*);
-SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe*, int addr, int P1);
-SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe*, int addr, int P2);
-SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe*, int addr, int P3);
+SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe*, u32 addr, int P1);
+SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe*, u32 addr, int P2);
+SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe*, u32 addr, int P3);
 SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe*, u8 P5);
 SQLITE_PRIVATE void sqlite3VdbeJumpHere(Vdbe*, int addr);
-SQLITE_PRIVATE void sqlite3VdbeChangeToNoop(Vdbe*, int addr, int N);
+SQLITE_PRIVATE void sqlite3VdbeChangeToNoop(Vdbe*, int addr);
 SQLITE_PRIVATE void sqlite3VdbeChangeP4(Vdbe*, int addr, const char *zP4, int N);
 SQLITE_PRIVATE void sqlite3VdbeUsesBtree(Vdbe*, int);
 SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe*, int);
@@ -8524,9 +8604,9 @@ SQLITE_PRIVATE void sqlite3VdbeSetVarmask(Vdbe*, int);
 SQLITE_PRIVATE   char *sqlite3VdbeExpandSql(Vdbe*, const char*);
 #endif
 
-SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,char*,int);
-SQLITE_PRIVATE void sqlite3VdbeDeleteUnpackedRecord(UnpackedRecord*);
+SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(KeyInfo*,int,const void*,UnpackedRecord*);
 SQLITE_PRIVATE int sqlite3VdbeRecordCompare(int,const void*,UnpackedRecord*);
+SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(KeyInfo *, char *, int, char **);
 
 #ifndef SQLITE_OMIT_TRIGGER
 SQLITE_PRIVATE void sqlite3VdbeLinkSubProgram(Vdbe *, SubProgram *);
@@ -8706,6 +8786,8 @@ SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager*);
 SQLITE_PRIVATE int sqlite3PagerNosync(Pager*);
 SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager*);
 SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager*);
+SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *, int, int, int *);
+SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *);
 
 /* Functions used to truncate the database file. */
 SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager*,Pgno);
@@ -9242,14 +9324,17 @@ SQLITE_PRIVATE int sqlite3OsCloseFree(sqlite3_file *);
 */
 #define sqlite3_mutex_alloc(X)    ((sqlite3_mutex*)8)
 #define sqlite3_mutex_free(X)
-#define sqlite3_mutex_enter(X)
+#define sqlite3_mutex_enter(X)    
 #define sqlite3_mutex_try(X)      SQLITE_OK
-#define sqlite3_mutex_leave(X)
+#define sqlite3_mutex_leave(X)    
 #define sqlite3_mutex_held(X)     ((void)(X),1)
 #define sqlite3_mutex_notheld(X)  ((void)(X),1)
 #define sqlite3MutexAlloc(X)      ((sqlite3_mutex*)8)
 #define sqlite3MutexInit()        SQLITE_OK
 #define sqlite3MutexEnd()
+#define MUTEX_LOGIC(X)
+#else
+#define MUTEX_LOGIC(X)            X
 #endif /* defined(SQLITE_MUTEX_OMIT) */
 
 /************** End of mutex.h ***********************************************/
@@ -9560,6 +9645,7 @@ struct sqlite3 {
 #define SQLITE_GroupByOrder   0x20        /* Disable GROUPBY cover of ORDERBY */
 #define SQLITE_FactorOutConst 0x40        /* Disable factoring out constants */
 #define SQLITE_IdxRealAsInt   0x80        /* Store REAL as INT in indices */
+#define SQLITE_DistinctOpt    0x80        /* DISTINCT using indexes */
 #define SQLITE_OptMask        0xff        /* Mask of all disablable opts */
 
 /*
@@ -9881,7 +9967,7 @@ struct Table {
   Column *aCol;        /* Information about each column */
   Index *pIndex;       /* List of SQL indexes on this table. */
   int tnum;            /* Root BTree node for this table (see note above) */
-  unsigned nRowEst;    /* Estimated rows in table - from sqlite_stat1 table */
+  tRowcnt nRowEst;     /* Estimated rows in table - from sqlite_stat1 table */
   Select *pSelect;     /* NULL for tables.  Points to definition if a view. */
   u16 nRef;            /* Number of pointers to this Table */
   u8 tabFlags;         /* Mask of TF_* values */
@@ -10080,7 +10166,7 @@ struct Index {
   char *zName;     /* Name of this index */
   int nColumn;     /* Number of columns in the table used by this index */
   int *aiColumn;   /* Which columns are used by this index.  1st is 0 */
-  unsigned *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */
+  tRowcnt *aiRowEst; /* Result of ANALYZE: Est. rows selected by each column */
   Table *pTable;   /* The SQL table being indexed */
   int tnum;        /* Page containing root of this index in database file */
   u8 onError;      /* OE_Abort, OE_Ignore, OE_Replace, or OE_None */
@@ -10091,20 +10177,29 @@ struct Index {
   Schema *pSchema; /* Schema containing this index */
   u8 *aSortOrder;  /* Array of size Index.nColumn. True==DESC, False==ASC */
   char **azColl;   /* Array of collation sequence names for index */
-  IndexSample *aSample;    /* Array of SQLITE_INDEX_SAMPLES samples */
+#ifdef SQLITE_ENABLE_STAT3
+  int nSample;             /* Number of elements in aSample[] */
+  tRowcnt avgEq;           /* Average nEq value for key values not in aSample */
+  IndexSample *aSample;    /* Samples of the left-most key */
+#endif
 };
 
 /*
-** Each sample stored in the sqlite_stat2 table is represented in memory 
-** using a structure of this type.
+** Each sample stored in the sqlite_stat3 table is represented in memory 
+** using a structure of this type.  See documentation at the top of the
+** analyze.c source file for additional information.
 */
 struct IndexSample {
   union {
     char *z;        /* Value if eType is SQLITE_TEXT or SQLITE_BLOB */
-    double r;       /* Value if eType is SQLITE_FLOAT or SQLITE_INTEGER */
+    double r;       /* Value if eType is SQLITE_FLOAT */
+    i64 i;          /* Value if eType is SQLITE_INTEGER */
   } u;
   u8 eType;         /* SQLITE_NULL, SQLITE_INTEGER ... etc. */
-  u8 nByte;         /* Size in byte of text or blob. */
+  int nByte;        /* Size in byte of text or blob. */
+  tRowcnt nEq;      /* Est. number of rows where the key equals this sample */
+  tRowcnt nLt;      /* Est. number of rows where key is less than this sample */
+  tRowcnt nDLt;     /* Est. number of distinct keys less than this sample */
 };
 
 /*
@@ -10139,6 +10234,7 @@ struct AggInfo {
   u8 useSortingIdx;       /* In direct mode, reference the sorting index rather
                           ** than the source table */
   int sortingIdx;         /* Cursor number of the sorting index */
+  int sortingIdxPTab;     /* Cursor number of pseudo-table */
   ExprList *pGroupBy;     /* The group by clause */
   int nSortingColumn;     /* Number of columns in the sorting index */
   struct AggInfo_col {    /* For each column used in source tables */
@@ -10448,9 +10544,11 @@ struct SrcList {
     char *zAlias;     /* The "B" part of a "A AS B" phrase.  zName is the "A" */
     Table *pTab;      /* An SQL table corresponding to zName */
     Select *pSelect;  /* A SELECT statement used in place of a table name */
-    u8 isPopulated;   /* Temporary table associated with SELECT is populated */
+    int addrFillSub;  /* Address of subroutine to manifest a subquery */
+    int regReturn;    /* Register holding return address of addrFillSub */
     u8 jointype;      /* Type of join between this able and the previous */
     u8 notIndexed;    /* True if there is a NOT INDEXED clause */
+    u8 isCorrelated;  /* True if sub-query is correlated */
 #ifndef SQLITE_OMIT_EXPLAIN
     u8 iSelectId;     /* If pSelect!=0, the id of the sub-select in EQP */
 #endif
@@ -10553,10 +10651,10 @@ struct WhereLevel {
 #define WHERE_ORDERBY_MAX      0x0002 /* ORDER BY processing for max() func */
 #define WHERE_ONEPASS_DESIRED  0x0004 /* Want to do one-pass UPDATE/DELETE */
 #define WHERE_DUPLICATES_OK    0x0008 /* Ok to return a row more than once */
-#define WHERE_OMIT_OPEN        0x0010 /* Table cursors are already open */
-#define WHERE_OMIT_CLOSE       0x0020 /* Omit close of table & index cursors */
-#define WHERE_FORCE_TABLE      0x0040 /* Do not use an index-only search */
-#define WHERE_ONETABLE_ONLY    0x0080 /* Only code the 1st table in pTabList */
+#define WHERE_OMIT_OPEN_CLOSE  0x0010 /* Table cursors are already open */
+#define WHERE_FORCE_TABLE      0x0020 /* Do not use an index-only search */
+#define WHERE_ONETABLE_ONLY    0x0040 /* Only code the 1st table in pTabList */
+#define WHERE_AND_ONLY         0x0080 /* Don't use indices for OR terms */
 
 /*
 ** The WHERE clause processing routine has two halves.  The
@@ -10570,6 +10668,7 @@ struct WhereInfo {
   u16 wctrlFlags;      /* Flags originally passed to sqlite3WhereBegin() */
   u8 okOnePass;        /* Ok to use one-pass algorithm for UPDATE or DELETE */
   u8 untestedTerms;    /* Not all WHERE terms resolved by outer loop */
+  u8 eDistinct;
   SrcList *pTabList;             /* List of tables in the join */
   int iTop;                      /* The very beginning of the WHERE loop */
   int iContinue;                 /* Jump here to continue with next record */
@@ -10581,6 +10680,9 @@ struct WhereInfo {
   WhereLevel a[1];               /* Information about each nest loop in WHERE */
 };
 
+#define WHERE_DISTINCT_UNIQUE 1
+#define WHERE_DISTINCT_ORDERED 2
+
 /*
 ** A NameContext defines a context in which to resolve table and column
 ** names.  The context consists of a list of tables (the pSrcList) field and
@@ -10666,6 +10768,7 @@ struct Select {
 #define SF_UsesEphemeral   0x0008  /* Uses the OpenEphemeral opcode */
 #define SF_Expanded        0x0010  /* sqlite3SelectExpand() called on this */
 #define SF_HasTypeInfo     0x0020  /* FROM subqueries have Table metadata */
+#define SF_UseSorter       0x0040  /* Sort using a sorter */
 
 
 /*
@@ -11305,6 +11408,7 @@ SQLITE_PRIVATE   int sqlite3ViewGetColumnNames(Parse*,Table*);
 #endif
 
 SQLITE_PRIVATE void sqlite3DropTable(Parse*, SrcList*, int, int);
+SQLITE_PRIVATE void sqlite3CodeDropTable(Parse*, Table*, int, int);
 SQLITE_PRIVATE void sqlite3DeleteTable(sqlite3*, Table*);
 #ifndef SQLITE_OMIT_AUTOINCREMENT
 SQLITE_PRIVATE   void sqlite3AutoincrementBegin(Parse *pParse);
@@ -11342,7 +11446,7 @@ SQLITE_PRIVATE Expr *sqlite3LimitWhere(Parse *, SrcList *, Expr *, ExprList *, E
 #endif
 SQLITE_PRIVATE void sqlite3DeleteFrom(Parse*, SrcList*, Expr*);
 SQLITE_PRIVATE void sqlite3Update(Parse*, SrcList*, ExprList*, Expr*, int);
-SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**, u16);
+SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(Parse*, SrcList*, Expr*, ExprList**,ExprList*,u16);
 SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo*);
 SQLITE_PRIVATE int sqlite3ExprCodeGetColumn(Parse*, Table*, int, int, int);
 SQLITE_PRIVATE void sqlite3ExprCodeGetColumnOfTable(Vdbe*, Table*, int, int, int);
@@ -11561,7 +11665,7 @@ SQLITE_PRIVATE void sqlite3ValueSetStr(sqlite3_value*, int, const void *,u8,
 SQLITE_PRIVATE void sqlite3ValueFree(sqlite3_value*);
 SQLITE_PRIVATE sqlite3_value *sqlite3ValueNew(sqlite3 *);
 SQLITE_PRIVATE char *sqlite3Utf16to8(sqlite3 *, const void*, int, u8);
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
 SQLITE_PRIVATE char *sqlite3Utf8to16(sqlite3 *, u8, char *, int, int *);
 #endif
 SQLITE_PRIVATE int sqlite3ValueFromExpr(sqlite3 *, Expr *, u8, u8, sqlite3_value **);
@@ -11663,6 +11767,7 @@ SQLITE_PRIVATE   int sqlite3Utf8To8(unsigned char*);
 #  define sqlite3VtabUnlock(X)
 #  define sqlite3VtabUnlockList(X)
 #  define sqlite3VtabSavepoint(X, Y, Z) SQLITE_OK
+#  define sqlite3GetVTable(X,Y)  ((VTable*)0)
 #else
 SQLITE_PRIVATE    void sqlite3VtabClear(sqlite3 *db, Table*);
 SQLITE_PRIVATE    int sqlite3VtabSync(sqlite3 *db, char **);
@@ -11672,6 +11777,7 @@ SQLITE_PRIVATE    void sqlite3VtabLock(VTable *);
 SQLITE_PRIVATE    void sqlite3VtabUnlock(VTable *);
 SQLITE_PRIVATE    void sqlite3VtabUnlockList(sqlite3*);
 SQLITE_PRIVATE    int sqlite3VtabSavepoint(sqlite3 *, int, int);
+SQLITE_PRIVATE    VTable *sqlite3GetVTable(sqlite3*, Table*);
 #  define sqlite3VtabInSync(db) ((db)->nVTrans>0 && (db)->aVTrans==0)
 #endif
 SQLITE_PRIVATE void sqlite3VtabMakeWritable(Parse*,Table*);
@@ -11691,7 +11797,6 @@ SQLITE_PRIVATE int sqlite3Reprepare(Vdbe*);
 SQLITE_PRIVATE void sqlite3ExprListCheckLength(Parse*, ExprList*, const char*);
 SQLITE_PRIVATE CollSeq *sqlite3BinaryCompareCollSeq(Parse *, Expr *, Expr *);
 SQLITE_PRIVATE int sqlite3TempInMemory(const sqlite3*);
-SQLITE_PRIVATE VTable *sqlite3GetVTable(sqlite3*, Table*);
 SQLITE_PRIVATE const char *sqlite3JournalModename(int);
 SQLITE_PRIVATE int sqlite3Checkpoint(sqlite3*, int, int, int*, int*);
 SQLITE_PRIVATE int sqlite3WalDefaultHook(void*,sqlite3*,const char*,int);
@@ -11992,7 +12097,7 @@ SQLITE_PRIVATE SQLITE_WSD struct Sqlite3Config sqlite3Config = {
    SQLITE_THREADSAFE==1,      /* bFullMutex */
    SQLITE_USE_URI,            /* bOpenUri */
    0x7ffffffe,                /* mxStrlen */
-   100,                       /* szLookaside */
+   128,                       /* szLookaside */
    500,                       /* nLookaside */
    {0,0,0,0,0,0,0,0},         /* m */
    {0,0,0,0,0,0,0,0,0},       /* mutex */
@@ -12185,8 +12290,8 @@ static const char * const azCompileOpt[] = {
 #ifdef SQLITE_ENABLE_RTREE
   "ENABLE_RTREE",
 #endif
-#ifdef SQLITE_ENABLE_STAT2
-  "ENABLE_STAT2",
+#ifdef SQLITE_ENABLE_STAT3
+  "ENABLE_STAT3",
 #endif
 #ifdef SQLITE_ENABLE_UNLOCK_NOTIFY
   "ENABLE_UNLOCK_NOTIFY",
@@ -12215,6 +12320,9 @@ static const char * const azCompileOpt[] = {
 #ifdef SQLITE_LOCK_TRACE
   "LOCK_TRACE",
 #endif
+#ifdef SQLITE_MAX_SCHEMA_RETRY
+  "MAX_SCHEMA_RETRY=" CTIMEOPT_VAL(SQLITE_MAX_SCHEMA_RETRY),
+#endif
 #ifdef SQLITE_MEMDEBUG
   "MEMDEBUG",
 #endif
@@ -12328,6 +12436,9 @@ static const char * const azCompileOpt[] = {
 #ifdef SQLITE_OMIT_MEMORYDB
   "OMIT_MEMORYDB",
 #endif
+#ifdef SQLITE_OMIT_MERGE_SORT
+  "OMIT_MERGE_SORT",
+#endif
 #ifdef SQLITE_OMIT_OR_OPTIMIZATION
   "OMIT_OR_OPTIMIZATION",
 #endif
@@ -12514,6 +12625,9 @@ typedef struct VdbeOp Op;
 */
 typedef unsigned char Bool;
 
+/* Opaque type used by code in vdbesort.c */
+typedef struct VdbeSorter VdbeSorter;
+
 /*
 ** A cursor is a pointer into a single BTree within a database file.
 ** The cursor can seek to a BTree entry with a particular key, or
@@ -12540,11 +12654,13 @@ struct VdbeCursor {
   Bool isTable;         /* True if a table requiring integer keys */
   Bool isIndex;         /* True if an index containing keys only - no data */
   Bool isOrdered;       /* True if the underlying table is BTREE_UNORDERED */
+  Bool isSorter;        /* True if a new-style sorter */
   sqlite3_vtab_cursor *pVtabCursor;  /* The cursor for a virtual table */
   const sqlite3_module *pModule;     /* Module for cursor pVtabCursor */
   i64 seqCount;         /* Sequence counter */
   i64 movetoTarget;     /* Argument to the deferred sqlite3BtreeMoveto() */
   i64 lastRowid;        /* Last rowid from a Next or NextIdx operation */
+  VdbeSorter *pSorter;  /* Sorter object for OP_SorterOpen cursors */
 
   /* Result of last sqlite3BtreeMoveto() done by an OP_NotExists or 
   ** OP_IsUnique opcode on this cursor. */
@@ -12864,6 +12980,9 @@ SQLITE_PRIVATE int sqlite3VdbeMemNumerify(Mem*);
 SQLITE_PRIVATE int sqlite3VdbeMemFromBtree(BtCursor*,int,int,int,Mem*);
 SQLITE_PRIVATE void sqlite3VdbeMemRelease(Mem *p);
 SQLITE_PRIVATE void sqlite3VdbeMemReleaseExternal(Mem *p);
+#define MemReleaseExt(X)  \
+  if((X)->flags&(MEM_Agg|MEM_Dyn|MEM_RowSet|MEM_Frame)) \
+    sqlite3VdbeMemReleaseExternal(X);
 SQLITE_PRIVATE int sqlite3VdbeMemFinalize(Mem*, FuncDef*);
 SQLITE_PRIVATE const char *sqlite3OpcodeName(int);
 SQLITE_PRIVATE int sqlite3VdbeMemGrow(Mem *pMem, int n, int preserve);
@@ -12871,6 +12990,25 @@ SQLITE_PRIVATE int sqlite3VdbeCloseStatement(Vdbe *, int);
 SQLITE_PRIVATE void sqlite3VdbeFrameDelete(VdbeFrame*);
 SQLITE_PRIVATE int sqlite3VdbeFrameRestore(VdbeFrame *);
 SQLITE_PRIVATE void sqlite3VdbeMemStoreType(Mem *pMem);
+SQLITE_PRIVATE int sqlite3VdbeTransferError(Vdbe *p);
+
+#ifdef SQLITE_OMIT_MERGE_SORT
+# define sqlite3VdbeSorterInit(Y,Z)      SQLITE_OK
+# define sqlite3VdbeSorterWrite(X,Y,Z)   SQLITE_OK
+# define sqlite3VdbeSorterClose(Y,Z)
+# define sqlite3VdbeSorterRowkey(Y,Z)    SQLITE_OK
+# define sqlite3VdbeSorterRewind(X,Y,Z)  SQLITE_OK
+# define sqlite3VdbeSorterNext(X,Y,Z)    SQLITE_OK
+# define sqlite3VdbeSorterCompare(X,Y,Z) SQLITE_OK
+#else
+SQLITE_PRIVATE int sqlite3VdbeSorterInit(sqlite3 *, VdbeCursor *);
+SQLITE_PRIVATE void sqlite3VdbeSorterClose(sqlite3 *, VdbeCursor *);
+SQLITE_PRIVATE int sqlite3VdbeSorterRowkey(VdbeCursor *, Mem *);
+SQLITE_PRIVATE int sqlite3VdbeSorterNext(sqlite3 *, VdbeCursor *, int *);
+SQLITE_PRIVATE int sqlite3VdbeSorterRewind(sqlite3 *, VdbeCursor *, int *);
+SQLITE_PRIVATE int sqlite3VdbeSorterWrite(sqlite3 *, VdbeCursor *, Mem *);
+SQLITE_PRIVATE int sqlite3VdbeSorterCompare(VdbeCursor *, Mem *, int *);
+#endif
 
 #if !defined(SQLITE_OMIT_SHARED_CACHE) && SQLITE_THREADSAFE>0
 SQLITE_PRIVATE   void sqlite3VdbeEnter(Vdbe*);
@@ -13110,6 +13248,28 @@ SQLITE_API int sqlite3_db_status(
       break;
     }
 
+    /*
+    ** Set *pCurrent to the total cache hits or misses encountered by all
+    ** pagers the database handle is connected to. *pHighwater is always set 
+    ** to zero.
+    */
+    case SQLITE_DBSTATUS_CACHE_HIT:
+    case SQLITE_DBSTATUS_CACHE_MISS: {
+      int i;
+      int nRet = 0;
+      assert( SQLITE_DBSTATUS_CACHE_MISS==SQLITE_DBSTATUS_CACHE_HIT+1 );
+
+      for(i=0; i<db->nDb; i++){
+        if( db->aDb[i].pBt ){
+          Pager *pPager = sqlite3BtreePager(db->aDb[i].pBt);
+          sqlite3PagerCacheStat(pPager, op, resetFlag, &nRet);
+        }
+      }
+      *pHighwater = 0;
+      *pCurrent = nRet;
+      break;
+    }
+
     default: {
       rc = SQLITE_ERROR;
     }
@@ -13165,6 +13325,8 @@ SQLITE_API int sqlite3_db_status(
 **      Willmann-Bell, Inc
 **      Richmond, Virginia (USA)
 */
+/* #include <stdlib.h> */
+/* #include <assert.h> */
 #include <time.h>
 
 #ifndef SQLITE_OMIT_DATETIME_FUNCS
@@ -13408,12 +13570,18 @@ static int parseYyyyMmDd(const char *zDate, DateTime *p){
 }
 
 /*
-** Set the time to the current time reported by the VFS
+** Set the time to the current time reported by the VFS.
+**
+** Return the number of errors.
 */
-static void setDateTimeToCurrent(sqlite3_context *context, DateTime *p){
+static int setDateTimeToCurrent(sqlite3_context *context, DateTime *p){
   sqlite3 *db = sqlite3_context_db_handle(context);
-  sqlite3OsCurrentTimeInt64(db->pVfs, &p->iJD);
-  p->validJD = 1;
+  if( sqlite3OsCurrentTimeInt64(db->pVfs, &p->iJD)==SQLITE_OK ){
+    p->validJD = 1;
+    return 0;
+  }else{
+    return 1;
+  }
 }
 
 /*
@@ -13443,8 +13611,7 @@ static int parseDateOrTime(
   }else if( parseHhMmSs(zDate, p)==0 ){
     return 0;
   }else if( sqlite3StrICmp(zDate,"now")==0){
-    setDateTimeToCurrent(context, p);
-    return 0;
+    return setDateTimeToCurrent(context, p);
   }else if( sqlite3AtoF(zDate, &r, sqlite3Strlen30(zDate), SQLITE_UTF8) ){
     p->iJD = (sqlite3_int64)(r*86400000.0 + 0.5);
     p->validJD = 1;
@@ -13546,7 +13713,9 @@ static int osLocaltime(time_t *t, struct tm *pTm){
 #if (!defined(HAVE_LOCALTIME_R) || !HAVE_LOCALTIME_R) \
       && (!defined(HAVE_LOCALTIME_S) || !HAVE_LOCALTIME_S)
   struct tm *pX;
+#if SQLITE_THREADSAFE>0
   sqlite3_mutex *mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+#endif
   sqlite3_mutex_enter(mutex);
   pX = localtime(t);
 #ifndef SQLITE_OMIT_BUILTIN_TEST
@@ -13869,8 +14038,9 @@ static int isDate(
   int eType;
   memset(p, 0, sizeof(*p));
   if( argc==0 ){
-    setDateTimeToCurrent(context, p);
-  }else if( (eType = sqlite3_value_type(argv[0]))==SQLITE_FLOAT
+    return setDateTimeToCurrent(context, p);
+  }
+  if( (eType = sqlite3_value_type(argv[0]))==SQLITE_FLOAT
                    || eType==SQLITE_INTEGER ){
     p->iJD = (sqlite3_int64)(sqlite3_value_double(argv[0])*86400000.0 + 0.5);
     p->validJD = 1;
@@ -14182,31 +14352,28 @@ static void currentTimeFunc(
   char *zFormat = (char *)sqlite3_user_data(context);
   sqlite3 *db;
   sqlite3_int64 iT;
+  struct tm *pTm;
+  struct tm sNow;
   char zBuf[20];
 
   UNUSED_PARAMETER(argc);
   UNUSED_PARAMETER(argv);
 
   db = sqlite3_context_db_handle(context);
-  sqlite3OsCurrentTimeInt64(db->pVfs, &iT);
+  if( sqlite3OsCurrentTimeInt64(db->pVfs, &iT) ) return;
   t = iT/1000 - 10000*(sqlite3_int64)21086676;
 #ifdef HAVE_GMTIME_R
-  {
-    struct tm sNow;
-    gmtime_r(&t, &sNow);
-    strftime(zBuf, 20, zFormat, &sNow);
-  }
+  pTm = gmtime_r(&t, &sNow);
 #else
-  {
-    struct tm *pTm;
-    sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
-    pTm = gmtime(&t);
-    strftime(zBuf, 20, zFormat, pTm);
-    sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
-  }
+  sqlite3_mutex_enter(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
+  pTm = gmtime(&t);
+  if( pTm ) memcpy(&sNow, pTm, sizeof(sNow));
+  sqlite3_mutex_leave(sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER));
 #endif
-
-  sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+  if( pTm ){
+    strftime(zBuf, 20, zFormat, &sNow);
+    sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT);
+  }
 }
 #endif
 
@@ -14380,7 +14547,7 @@ SQLITE_PRIVATE int sqlite3OsOpen(
   ** down into the VFS layer.  Some SQLITE_OPEN_ flags (for example,
   ** SQLITE_OPEN_FULLMUTEX or SQLITE_OPEN_SHAREDCACHE) are blocked before
   ** reaching the VFS. */
-  rc = pVfs->xOpen(pVfs, zPath, pFile, flags & 0x87f3f, pFlagsOut);
+  rc = pVfs->xOpen(pVfs, zPath, pFile, flags & 0x87f7f, pFlagsOut);
   assert( rc==SQLITE_OK || pFile->pMethods==0 );
   return rc;
 }
@@ -14452,7 +14619,7 @@ SQLITE_PRIVATE int sqlite3OsOpenMalloc(
 ){
   int rc = SQLITE_NOMEM;
   sqlite3_file *pFile;
-  pFile = (sqlite3_file *)sqlite3Malloc(pVfs->szOsFile);
+  pFile = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile);
   if( pFile ){
     rc = sqlite3OsOpen(pVfs, zFile, pFile, flags, pOutFlags);
     if( rc!=SQLITE_OK ){
@@ -14541,12 +14708,12 @@ static void vfsUnlink(sqlite3_vfs *pVfs){
 ** true.
 */
 SQLITE_API int sqlite3_vfs_register(sqlite3_vfs *pVfs, int makeDflt){
-  sqlite3_mutex *mutex = 0;
+  MUTEX_LOGIC(sqlite3_mutex *mutex;)
 #ifndef SQLITE_OMIT_AUTOINIT
   int rc = sqlite3_initialize();
   if( rc ) return rc;
 #endif
-  mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+  MUTEX_LOGIC( mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); )
   sqlite3_mutex_enter(mutex);
   vfsUnlink(pVfs);
   if( makeDflt || vfsList==0 ){
@@ -14916,6 +15083,7 @@ SQLITE_PRIVATE void sqlite3MemSetDefault(void){
 # define backtrace(A,B) 1
 # define backtrace_symbols_fd(A,B,C)
 #endif
+/* #include <stdio.h> */
 
 /*
 ** Each memory allocation looks like this:
@@ -15841,7 +16009,7 @@ static void *memsys3MallocUnsafe(int nByte){
 ** This function assumes that the necessary mutexes, if any, are
 ** already held by the caller. Hence "Unsafe".
 */
-void memsys3FreeUnsafe(void *pOld){
+static void memsys3FreeUnsafe(void *pOld){
   Mem3Block *p = (Mem3Block*)pOld;
   int i;
   u32 size, x;
@@ -15916,7 +16084,7 @@ static void *memsys3Malloc(int nBytes){
 /*
 ** Free memory.
 */
-void memsys3Free(void *pPrior){
+static void memsys3Free(void *pPrior){
   assert( pPrior );
   memsys3Enter();
   memsys3FreeUnsafe(pPrior);
@@ -15926,7 +16094,7 @@ void memsys3Free(void *pPrior){
 /*
 ** Change the size of an existing memory allocation
 */
-void *memsys3Realloc(void *pPrior, int nBytes){
+static void *memsys3Realloc(void *pPrior, int nBytes){
   int nOld;
   void *p;
   if( pPrior==0 ){
@@ -18019,6 +18187,7 @@ SQLITE_PRIVATE sqlite3_mutex_methods const *sqlite3DefaultMutex(void){
 **
 ** Memory allocation functions used throughout sqlite.
 */
+/* #include <stdarg.h> */
 
 /*
 ** Attempt to release up to n bytes of non-essential memory currently
@@ -18792,48 +18961,10 @@ SQLITE_PRIVATE int sqlite3ApiExit(sqlite3* db, int rc){
 **
 **************************************************************************
 **
-** The following modules is an enhanced replacement for the "printf" subroutines
-** found in the standard C library.  The following enhancements are
-** supported:
-**
-**      +  Additional functions.  The standard set of "printf" functions
-**         includes printf, fprintf, sprintf, vprintf, vfprintf, and
-**         vsprintf.  This module adds the following:
-**
-**           *  snprintf -- Works like sprintf, but has an extra argument
-**                          which is the size of the buffer written to.
-**
-**           *  mprintf --  Similar to sprintf.  Writes output to memory
-**                          obtained from malloc.
-**
-**           *  xprintf --  Calls a function to dispose of output.
-**
-**           *  nprintf --  No output, but returns the number of characters
-**                          that would have been output by printf.
-**
-**           *  A v- version (ex: vsnprintf) of every function is also
-**              supplied.
-**
-**      +  A few extensions to the formatting notation are supported:
-**
-**           *  The "=" flag (similar to "-") causes the output to be
-**              be centered in the appropriately sized field.
-**
-**           *  The %b field outputs an integer in binary notation.
-**
-**           *  The %c field now accepts a precision.  The character output
-**              is repeated by the number of times the precision specifies.
-**
-**           *  The %' field works like %c, but takes as its character the
-**              next character of the format string, instead of the next
-**              argument.  For example,  printf("%.78'-")  prints 78 minus
-**              signs, the same as  printf("%.78c",'-').
-**
-**      +  When compiled using GCC on a SPARC, this version of printf is
-**         faster than the library printf for SUN OS 4.1.
-**
-**      +  All functions are fully reentrant.
-**
+** This file contains code for a set of "printf"-like routines.  These
+** routines format strings much like the printf() from the standard C
+** library, though the implementation here has enhancements to support
+** SQLlite.
 */
 
 /*
@@ -18971,43 +19102,15 @@ static void appendSpace(StrAccum *pAccum, int N){
 
 /*
 ** On machines with a small stack size, you can redefine the
-** SQLITE_PRINT_BUF_SIZE to be less than 350.
+** SQLITE_PRINT_BUF_SIZE to be something smaller, if desired.
 */
 #ifndef SQLITE_PRINT_BUF_SIZE
-# if defined(SQLITE_SMALL_STACK)
-#   define SQLITE_PRINT_BUF_SIZE 50
-# else
-#   define SQLITE_PRINT_BUF_SIZE 350
-# endif
+# define SQLITE_PRINT_BUF_SIZE 70
 #endif
 #define etBUFSIZE SQLITE_PRINT_BUF_SIZE  /* Size of the output buffer */
 
 /*
-** The root program.  All variations call this core.
-**
-** INPUTS:
-**   func   This is a pointer to a function taking three arguments
-**            1. A pointer to anything.  Same as the "arg" parameter.
-**            2. A pointer to the list of characters to be output
-**               (Note, this list is NOT null terminated.)
-**            3. An integer number of characters to be output.
-**               (Note: This number might be zero.)
-**
-**   arg    This is the pointer to anything which will be passed as the
-**          first argument to "func".  Use it for whatever you like.
-**
-**   fmt    This is the format string, as in the usual print.
-**
-**   ap     This is a pointer to a list of arguments.  Same as in
-**          vfprint.
-**
-** OUTPUTS:
-**          The return value is the total number of characters sent to
-**          the function "func".  Returns -1 on a error.
-**
-** Note that the order in which automatic variables are declared below
-** seems to make a big difference in determining how fast this beast
-** will run.
+** Render a string given by "fmt" into the StrAccum object.
 */
 SQLITE_PRIVATE void sqlite3VXPrintf(
   StrAccum *pAccum,                  /* Accumulate results here */
@@ -19030,23 +19133,23 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
   etByte flag_long;          /* True if "l" flag is present */
   etByte flag_longlong;      /* True if the "ll" flag is present */
   etByte done;               /* Loop termination flag */
+  etByte xtype = 0;          /* Conversion paradigm */
+  char prefix;               /* Prefix character.  "+" or "-" or " " or '\0'. */
   sqlite_uint64 longvalue;   /* Value for integer types */
   LONGDOUBLE_TYPE realvalue; /* Value for real types */
   const et_info *infop;      /* Pointer to the appropriate info structure */
-  char buf[etBUFSIZE];       /* Conversion buffer */
-  char prefix;               /* Prefix character.  "+" or "-" or " " or '\0'. */
-  etByte xtype = 0;          /* Conversion paradigm */
-  char *zExtra;              /* Extra memory used for etTCLESCAPE conversions */
+  char *zOut;                /* Rendering buffer */
+  int nOut;                  /* Size of the rendering buffer */
+  char *zExtra;              /* Malloced memory used by some conversion */
 #ifndef SQLITE_OMIT_FLOATING_POINT
   int  exp, e2;              /* exponent of real numbers */
+  int nsd;                   /* Number of significant digits returned */
   double rounder;            /* Used for rounding floating point values */
   etByte flag_dp;            /* True if decimal point should be shown */
   etByte flag_rtz;           /* True if trailing zeros should be removed */
-  etByte flag_exp;           /* True to force display of the exponent */
-  int nsd;                   /* Number of significant digits returned */
 #endif
+  char buf[etBUFSIZE];       /* Conversion buffer */
 
-  length = 0;
   bufpt = 0;
   for(; (c=(*fmt))!=0; ++fmt){
     if( c!='%' ){
@@ -19091,9 +19194,6 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
         c = *++fmt;
       }
     }
-    if( width > etBUFSIZE-10 ){
-      width = etBUFSIZE-10;
-    }
     /* Get the precision */
     if( c=='.' ){
       precision = 0;
@@ -19140,12 +19240,6 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
     }
     zExtra = 0;
 
-
-    /* Limit the precision to prevent overflowing buf[] during conversion */
-    if( precision>etBUFSIZE-40 && (infop->flags & FLAG_STRING)==0 ){
-      precision = etBUFSIZE-40;
-    }
-
     /*
     ** At this point, variables are initialized as follows:
     **
@@ -19210,16 +19304,26 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
         if( flag_zeropad && precision<width-(prefix!=0) ){
           precision = width-(prefix!=0);
         }
-        bufpt = &buf[etBUFSIZE-1];
+        if( precision<etBUFSIZE-10 ){
+          nOut = etBUFSIZE;
+          zOut = buf;
+        }else{
+          nOut = precision + 10;
+          zOut = zExtra = sqlite3Malloc( nOut );
+          if( zOut==0 ){
+            pAccum->mallocFailed = 1;
+            return;
+          }
+        }
+        bufpt = &zOut[nOut-1];
         if( xtype==etORDINAL ){
           static const char zOrd[] = "thstndrd";
           int x = (int)(longvalue % 10);
           if( x>=4 || (longvalue/10)%10==1 ){
             x = 0;
           }
-          buf[etBUFSIZE-3] = zOrd[x*2];
-          buf[etBUFSIZE-2] = zOrd[x*2+1];
-          bufpt -= 2;
+          *(--bufpt) = zOrd[x*2+1];
+          *(--bufpt) = zOrd[x*2];
         }
         {
           register const char *cset;      /* Use registers for speed */
@@ -19231,7 +19335,7 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
             longvalue = longvalue/base;
           }while( longvalue>0 );
         }
-        length = (int)(&buf[etBUFSIZE-1]-bufpt);
+        length = (int)(&zOut[nOut-1]-bufpt);
         for(idx=precision-length; idx>0; idx--){
           *(--bufpt) = '0';                             /* Zero pad */
         }
@@ -19242,7 +19346,7 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
           pre = &aPrefix[infop->prefix];
           for(; (x=(*pre))!=0; pre++) *(--bufpt) = x;
         }
-        length = (int)(&buf[etBUFSIZE-1]-bufpt);
+        length = (int)(&zOut[nOut-1]-bufpt);
         break;
       case etFLOAT:
       case etEXP:
@@ -19252,7 +19356,6 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
         length = 0;
 #else
         if( precision<0 ) precision = 6;         /* Set default precision */
-        if( precision>etBUFSIZE/2-10 ) precision = etBUFSIZE/2-10;
         if( realvalue<0.0 ){
           realvalue = -realvalue;
           prefix = '-';
@@ -19300,7 +19403,6 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
         ** If the field type is etGENERIC, then convert to either etEXP
         ** or etFLOAT, as appropriate.
         */
-        flag_exp = xtype==etEXP;
         if( xtype!=etFLOAT ){
           realvalue += rounder;
           if( realvalue>=10.0 ){ realvalue *= 0.1; exp++; }
@@ -19321,6 +19423,14 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
         }else{
           e2 = exp;
         }
+        if( e2+precision+width > etBUFSIZE - 15 ){
+          bufpt = zExtra = sqlite3Malloc( e2+precision+width+15 );
+          if( bufpt==0 ){
+            pAccum->mallocFailed = 1;
+            return;
+          }
+        }
+        zOut = bufpt;
         nsd = 0;
         flag_dp = (precision>0 ?1:0) | flag_alternateform | flag_altform2;
         /* The sign in front of the number */
@@ -19352,7 +19462,7 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
         /* Remove trailing zeros and the "." if no digits follow the "." */
         if( flag_rtz && flag_dp ){
           while( bufpt[-1]=='0' ) *(--bufpt) = 0;
-          assert( bufpt>buf );
+          assert( bufpt>zOut );
           if( bufpt[-1]=='.' ){
             if( flag_altform2 ){
               *(bufpt++) = '0';
@@ -19362,7 +19472,7 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
           }
         }
         /* Add the "eNNN" suffix */
-        if( flag_exp || xtype==etEXP ){
+        if( xtype==etEXP ){
           *(bufpt++) = aDigits[infop->charset];
           if( exp<0 ){
             *(bufpt++) = '-'; exp = -exp;
@@ -19381,8 +19491,8 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
         /* The converted number is in buf[] and zero terminated. Output it.
         ** Note that the number is in the usual order, not reversed as with
         ** integer conversions. */
-        length = (int)(bufpt-buf);
-        bufpt = buf;
+        length = (int)(bufpt-zOut);
+        bufpt = zOut;
 
         /* Special case:  Add leading zeros if the flag_zeropad flag is
         ** set and we are not left justified */
@@ -19520,9 +19630,7 @@ SQLITE_PRIVATE void sqlite3VXPrintf(
         appendSpace(pAccum, nspace);
       }
     }
-    if( zExtra ){
-      sqlite3_free(zExtra);
-    }
+    sqlite3_free(zExtra);
   }/* End for loop over the format string */
 } /* End of function */
 
@@ -19536,6 +19644,7 @@ SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum *p, const char *z, int N){
     testcase(p->mallocFailed);
     return;
   }
+  assert( p->zText!=0 || p->nChar==0 );
   if( N<0 ){
     N = sqlite3Strlen30(z);
   }
@@ -19567,7 +19676,7 @@ SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum *p, const char *z, int N){
         zNew = sqlite3_realloc(zOld, p->nAlloc);
       }
       if( zNew ){
-        if( zOld==0 ) memcpy(zNew, p->zText, p->nChar);
+        if( zOld==0 && p->nChar>0 ) memcpy(zNew, p->zText, p->nChar);
         p->zText = zNew;
       }else{
         p->mallocFailed = 1;
@@ -19576,6 +19685,7 @@ SQLITE_PRIVATE void sqlite3StrAccumAppend(StrAccum *p, const char *z, int N){
       }
     }
   }
+  assert( p->zText );
   memcpy(&p->zText[p->nChar], z, N);
   p->nChar += N;
 }
@@ -19996,6 +20106,7 @@ SQLITE_PRIVATE void sqlite3PrngResetState(void){
 **     0xfe 0xff   big-endian utf-16 follows
 **
 */
+/* #include <assert.h> */
 
 #ifndef SQLITE_AMALGAMATION
 /*
@@ -20424,7 +20535,7 @@ SQLITE_PRIVATE char *sqlite3Utf16to8(sqlite3 *db, const void *z, int nByte, u8 e
 ** If a malloc failure occurs, NULL is returned and the db.mallocFailed
 ** flag set.
 */
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
 SQLITE_PRIVATE char *sqlite3Utf8to16(sqlite3 *db, u8 enc, char *z, int n, int *pnOut){
   Mem m;
   memset(&m, 0, sizeof(m));
@@ -20538,6 +20649,7 @@ SQLITE_PRIVATE void sqlite3UtfSelfTest(void){
 ** strings, and stuff like that.
 **
 */
+/* #include <stdarg.h> */
 #ifdef SQLITE_HAVE_ISNAN
 # include <math.h>
 #endif
@@ -20852,7 +20964,7 @@ SQLITE_PRIVATE int sqlite3AtoF(const char *z, double *pResult, int length, u8 en
     }
     /* copy digits to exponent */
     while( z<zEnd && sqlite3Isdigit(*z) ){
-      e = e*10 + (*z - '0');
+      e = e<10000 ? (e*10 + (*z - '0')) : 10000;
       z+=incr;
       eValid = 1;
     }
@@ -20903,6 +21015,12 @@ do_atof_calc:
           result = s * scale;
           result *= 1.0e+308;
         }
+      }else if( e>=342 ){
+        if( esign<0 ){
+          result = 0.0*s;
+        }else{
+          result = 1e308*1e308*s;  /* Infinity */
+        }
       }else{
         /* 1.0e+22 is the largest power of 10 than can be 
         ** represented exactly. */
@@ -21670,12 +21788,15 @@ SQLITE_PRIVATE int sqlite3AbsInt32(int x){
 
 #ifdef SQLITE_ENABLE_8_3_NAMES
 /*
-** If SQLITE_ENABLE_8_3_NAME is set at compile-time and if the database
+** If SQLITE_ENABLE_8_3_NAMES is set at compile-time and if the database
 ** filename in zBaseFilename is a URI with the "8_3_names=1" parameter and
 ** if filename in z[] has a suffix (a.k.a. "extension") that is longer than
 ** three characters, then shorten the suffix on z[] to be the last three
 ** characters of the original suffix.
 **
+** If SQLITE_ENABLE_8_3_NAMES is set to 2 at compile-time, then always
+** do the suffix shortening regardless of URI parameter.
+**
 ** Examples:
 **
 **     test.db-journal    =>   test.nal
@@ -21683,9 +21804,12 @@ SQLITE_PRIVATE int sqlite3AbsInt32(int x){
 **     test.db-shm        =>   test.shm
 */
 SQLITE_PRIVATE void sqlite3FileSuffix3(const char *zBaseFilename, char *z){
+#if SQLITE_ENABLE_8_3_NAMES<2
   const char *zOk;
   zOk = sqlite3_uri_parameter(zBaseFilename, "8_3_names");
-  if( zOk && sqlite3GetBoolean(zOk) ){
+  if( zOk && sqlite3GetBoolean(zOk) )
+#endif
+  {
     int i, sz;
     sz = sqlite3Strlen30(z);
     for(i=sz-1; i>0 && z[i]!='/' && z[i]!='.'; i--){}
@@ -21710,6 +21834,7 @@ SQLITE_PRIVATE void sqlite3FileSuffix3(const char *zBaseFilename, char *z){
 ** This is the implementation of generic hash-tables
 ** used in SQLite.
 */
+/* #include <assert.h> */
 
 /* Turn bulk memory into a hash table object by initializing the
 ** fields of the Hash structure.
@@ -22004,53 +22129,53 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
      /*  23 */ "Permutation",
      /*  24 */ "Compare",
      /*  25 */ "Jump",
-     /*  26 */ "If",
-     /*  27 */ "IfNot",
-     /*  28 */ "Column",
-     /*  29 */ "Affinity",
-     /*  30 */ "MakeRecord",
-     /*  31 */ "Count",
-     /*  32 */ "Savepoint",
-     /*  33 */ "AutoCommit",
-     /*  34 */ "Transaction",
-     /*  35 */ "ReadCookie",
-     /*  36 */ "SetCookie",
-     /*  37 */ "VerifyCookie",
-     /*  38 */ "OpenRead",
-     /*  39 */ "OpenWrite",
-     /*  40 */ "OpenAutoindex",
-     /*  41 */ "OpenEphemeral",
-     /*  42 */ "OpenPseudo",
-     /*  43 */ "Close",
-     /*  44 */ "SeekLt",
-     /*  45 */ "SeekLe",
-     /*  46 */ "SeekGe",
-     /*  47 */ "SeekGt",
-     /*  48 */ "Seek",
-     /*  49 */ "NotFound",
-     /*  50 */ "Found",
-     /*  51 */ "IsUnique",
-     /*  52 */ "NotExists",
-     /*  53 */ "Sequence",
-     /*  54 */ "NewRowid",
-     /*  55 */ "Insert",
-     /*  56 */ "InsertInt",
-     /*  57 */ "Delete",
-     /*  58 */ "ResetCount",
-     /*  59 */ "RowKey",
-     /*  60 */ "RowData",
-     /*  61 */ "Rowid",
-     /*  62 */ "NullRow",
-     /*  63 */ "Last",
-     /*  64 */ "Sort",
-     /*  65 */ "Rewind",
-     /*  66 */ "Prev",
-     /*  67 */ "Next",
+     /*  26 */ "Once",
+     /*  27 */ "If",
+     /*  28 */ "IfNot",
+     /*  29 */ "Column",
+     /*  30 */ "Affinity",
+     /*  31 */ "MakeRecord",
+     /*  32 */ "Count",
+     /*  33 */ "Savepoint",
+     /*  34 */ "AutoCommit",
+     /*  35 */ "Transaction",
+     /*  36 */ "ReadCookie",
+     /*  37 */ "SetCookie",
+     /*  38 */ "VerifyCookie",
+     /*  39 */ "OpenRead",
+     /*  40 */ "OpenWrite",
+     /*  41 */ "OpenAutoindex",
+     /*  42 */ "OpenEphemeral",
+     /*  43 */ "SorterOpen",
+     /*  44 */ "OpenPseudo",
+     /*  45 */ "Close",
+     /*  46 */ "SeekLt",
+     /*  47 */ "SeekLe",
+     /*  48 */ "SeekGe",
+     /*  49 */ "SeekGt",
+     /*  50 */ "Seek",
+     /*  51 */ "NotFound",
+     /*  52 */ "Found",
+     /*  53 */ "IsUnique",
+     /*  54 */ "NotExists",
+     /*  55 */ "Sequence",
+     /*  56 */ "NewRowid",
+     /*  57 */ "Insert",
+     /*  58 */ "InsertInt",
+     /*  59 */ "Delete",
+     /*  60 */ "ResetCount",
+     /*  61 */ "SorterCompare",
+     /*  62 */ "SorterData",
+     /*  63 */ "RowKey",
+     /*  64 */ "RowData",
+     /*  65 */ "Rowid",
+     /*  66 */ "NullRow",
+     /*  67 */ "Last",
      /*  68 */ "Or",
      /*  69 */ "And",
-     /*  70 */ "IdxInsert",
-     /*  71 */ "IdxDelete",
-     /*  72 */ "IdxRowid",
+     /*  70 */ "SorterSort",
+     /*  71 */ "Sort",
+     /*  72 */ "Rewind",
      /*  73 */ "IsNull",
      /*  74 */ "NotNull",
      /*  75 */ "Ne",
@@ -22059,7 +22184,7 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
      /*  78 */ "Le",
      /*  79 */ "Lt",
      /*  80 */ "Ge",
-     /*  81 */ "IdxLT",
+     /*  81 */ "SorterNext",
      /*  82 */ "BitAnd",
      /*  83 */ "BitOr",
      /*  84 */ "ShiftLeft",
@@ -22070,60 +22195,65 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
      /*  89 */ "Divide",
      /*  90 */ "Remainder",
      /*  91 */ "Concat",
-     /*  92 */ "IdxGE",
+     /*  92 */ "Prev",
      /*  93 */ "BitNot",
      /*  94 */ "String8",
-     /*  95 */ "Destroy",
-     /*  96 */ "Clear",
-     /*  97 */ "CreateIndex",
-     /*  98 */ "CreateTable",
-     /*  99 */ "ParseSchema",
-     /* 100 */ "LoadAnalysis",
-     /* 101 */ "DropTable",
-     /* 102 */ "DropIndex",
-     /* 103 */ "DropTrigger",
-     /* 104 */ "IntegrityCk",
-     /* 105 */ "RowSetAdd",
-     /* 106 */ "RowSetRead",
-     /* 107 */ "RowSetTest",
-     /* 108 */ "Program",
-     /* 109 */ "Param",
-     /* 110 */ "FkCounter",
-     /* 111 */ "FkIfZero",
-     /* 112 */ "MemMax",
-     /* 113 */ "IfPos",
-     /* 114 */ "IfNeg",
-     /* 115 */ "IfZero",
-     /* 116 */ "AggStep",
-     /* 117 */ "AggFinal",
-     /* 118 */ "Checkpoint",
-     /* 119 */ "JournalMode",
-     /* 120 */ "Vacuum",
-     /* 121 */ "IncrVacuum",
-     /* 122 */ "Expire",
-     /* 123 */ "TableLock",
-     /* 124 */ "VBegin",
-     /* 125 */ "VCreate",
-     /* 126 */ "VDestroy",
-     /* 127 */ "VOpen",
-     /* 128 */ "VFilter",
-     /* 129 */ "VColumn",
+     /*  95 */ "Next",
+     /*  96 */ "SorterInsert",
+     /*  97 */ "IdxInsert",
+     /*  98 */ "IdxDelete",
+     /*  99 */ "IdxRowid",
+     /* 100 */ "IdxLT",
+     /* 101 */ "IdxGE",
+     /* 102 */ "Destroy",
+     /* 103 */ "Clear",
+     /* 104 */ "CreateIndex",
+     /* 105 */ "CreateTable",
+     /* 106 */ "ParseSchema",
+     /* 107 */ "LoadAnalysis",
+     /* 108 */ "DropTable",
+     /* 109 */ "DropIndex",
+     /* 110 */ "DropTrigger",
+     /* 111 */ "IntegrityCk",
+     /* 112 */ "RowSetAdd",
+     /* 113 */ "RowSetRead",
+     /* 114 */ "RowSetTest",
+     /* 115 */ "Program",
+     /* 116 */ "Param",
+     /* 117 */ "FkCounter",
+     /* 118 */ "FkIfZero",
+     /* 119 */ "MemMax",
+     /* 120 */ "IfPos",
+     /* 121 */ "IfNeg",
+     /* 122 */ "IfZero",
+     /* 123 */ "AggStep",
+     /* 124 */ "AggFinal",
+     /* 125 */ "Checkpoint",
+     /* 126 */ "JournalMode",
+     /* 127 */ "Vacuum",
+     /* 128 */ "IncrVacuum",
+     /* 129 */ "Expire",
      /* 130 */ "Real",
-     /* 131 */ "VNext",
-     /* 132 */ "VRename",
-     /* 133 */ "VUpdate",
-     /* 134 */ "Pagecount",
-     /* 135 */ "MaxPgcnt",
-     /* 136 */ "Trace",
-     /* 137 */ "Noop",
-     /* 138 */ "Explain",
-     /* 139 */ "NotUsed_139",
-     /* 140 */ "NotUsed_140",
+     /* 131 */ "TableLock",
+     /* 132 */ "VBegin",
+     /* 133 */ "VCreate",
+     /* 134 */ "VDestroy",
+     /* 135 */ "VOpen",
+     /* 136 */ "VFilter",
+     /* 137 */ "VColumn",
+     /* 138 */ "VNext",
+     /* 139 */ "VRename",
+     /* 140 */ "VUpdate",
      /* 141 */ "ToText",
      /* 142 */ "ToBlob",
      /* 143 */ "ToNumeric",
      /* 144 */ "ToInt",
      /* 145 */ "ToReal",
+     /* 146 */ "Pagecount",
+     /* 147 */ "MaxPgcnt",
+     /* 148 */ "Trace",
+     /* 149 */ "Noop",
+     /* 150 */ "Explain",
   };
   return azName[i];
 }
@@ -22218,11 +22348,14 @@ SQLITE_PRIVATE const char *sqlite3OpcodeName(int i){
 # error "The MEMORY_DEBUG macro is obsolete.  Use SQLITE_DEBUG instead."
 #endif
 
-#ifdef SQLITE_DEBUG
-SQLITE_PRIVATE int sqlite3OSTrace = 0;
-#define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+# ifndef SQLITE_DEBUG_OS_TRACE
+#   define SQLITE_DEBUG_OS_TRACE 0
+# endif
+  int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE;
+# define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
 #else
-#define OSTRACE(X)
+# define OSTRACE(X)
 #endif
 
 /*
@@ -24379,6 +24512,7 @@ SQLITE_API int sqlite3_os_end(void){
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
+/* #include <time.h> */
 #include <sys/time.h>
 #include <errno.h>
 #ifndef SQLITE_OMIT_WAL
@@ -24414,6 +24548,7 @@ SQLITE_API int sqlite3_os_end(void){
 ** the SQLITE_UNIX_THREADS macro.
 */
 #if SQLITE_THREADSAFE
+/* # include <pthread.h> */
 # define SQLITE_UNIX_THREADS 1
 #endif
 
@@ -24469,7 +24604,6 @@ struct unixFile {
   sqlite3_io_methods const *pMethod;  /* Always the first entry */
   unixInodeInfo *pInode;              /* Info about locks on this inode */
   int h;                              /* The file descriptor */
-  int dirfd;                          /* File descriptor for the directory */
   unsigned char eFileLock;            /* The type of lock held on this fd */
   unsigned char ctrlFlags;            /* Behavioral bits.  UNIXFILE_* flags */
   int lastErrno;                      /* The unix errno from last I/O error */
@@ -24511,8 +24645,14 @@ struct unixFile {
 /*
 ** Allowed values for the unixFile.ctrlFlags bitmask:
 */
-#define UNIXFILE_EXCL   0x01     /* Connections from one process only */
-#define UNIXFILE_RDONLY 0x02     /* Connection is read only */
+#define UNIXFILE_EXCL        0x01     /* Connections from one process only */
+#define UNIXFILE_RDONLY      0x02     /* Connection is read only */
+#define UNIXFILE_PERSIST_WAL 0x04     /* Persistent WAL mode */
+#ifndef SQLITE_DISABLE_DIRSYNC
+# define UNIXFILE_DIRSYNC    0x08     /* Directory sync needed */
+#else
+# define UNIXFILE_DIRSYNC    0x00
+#endif
 
 /*
 ** Include code that is common to all os_*.c files
@@ -24550,11 +24690,14 @@ struct unixFile {
 # error "The MEMORY_DEBUG macro is obsolete.  Use SQLITE_DEBUG instead."
 #endif
 
-#ifdef SQLITE_DEBUG
-SQLITE_PRIVATE int sqlite3OSTrace = 0;
-#define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+# ifndef SQLITE_DEBUG_OS_TRACE
+#   define SQLITE_DEBUG_OS_TRACE 0
+# endif
+  int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE;
+# define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
 #else
-#define OSTRACE(X)
+# define OSTRACE(X)
 #endif
 
 /*
@@ -24763,6 +24906,9 @@ static int posixOpen(const char *zFile, int flags, int mode){
   return open(zFile, flags, mode);
 }
 
+/* Forward reference */
+static int openDirectory(const char*, int*);
+
 /*
 ** Many system calls are accessed through pointer-to-functions so that
 ** they may be overridden at runtime to facilitate fault injection during
@@ -24859,6 +25005,12 @@ static struct unix_syscall {
 #endif
 #define osFallocate ((int(*)(int,off_t,off_t))aSyscall[15].pCurrent)
 
+  { "unlink",       (sqlite3_syscall_ptr)unlink,           0 },
+#define osUnlink    ((int(*)(const char*))aSyscall[16].pCurrent)
+
+  { "openDirectory",    (sqlite3_syscall_ptr)openDirectory,      0 },
+#define osOpenDirectory ((int(*)(const char*,int*))aSyscall[17].pCurrent)
+
 }; /* End of the overrideable system calls */
 
 /*
@@ -24980,7 +25132,7 @@ static int unixMutexHeld(void) {
 #endif
 
 
-#ifdef SQLITE_DEBUG
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
 /*
 ** Helper function for printing out trace information from debugging
 ** binaries. This returns the string represetation of the supplied
@@ -25143,7 +25295,9 @@ static int sqliteErrorFromPosixError(int posixError, int sqliteIOErr) {
   case ENODEV:
   case ENXIO:
   case ENOENT:
+#ifdef ESTALE                     /* ESTALE is not defined on Interix systems */
   case ESTALE:
+#endif
   case ENOSYS:
     /* these should force the client to close the file and reconnect */
     
@@ -25813,14 +25967,14 @@ static int unixLock(sqlite3_file *id, int eFileLock){
   */
   int rc = SQLITE_OK;
   unixFile *pFile = (unixFile*)id;
-  unixInodeInfo *pInode = pFile->pInode;
+  unixInodeInfo *pInode;
   struct flock lock;
   int tErrno = 0;
 
   assert( pFile );
   OSTRACE(("LOCK    %d %s was %s(%s,%d) pid=%d (unix)\n", pFile->h,
       azFileLock(eFileLock), azFileLock(pFile->eFileLock),
-      azFileLock(pInode->eFileLock), pInode->nShared , getpid()));
+      azFileLock(pFile->pInode->eFileLock), pFile->pInode->nShared , getpid()));
 
   /* If there is already a lock of this type or more restrictive on the
   ** unixFile, do nothing. Don't use the end_lock: exit path, as
@@ -26024,7 +26178,6 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
   unixInodeInfo *pInode;
   struct flock lock;
   int rc = SQLITE_OK;
-  int h;
 
   assert( pFile );
   OSTRACE(("UNLOCK  %d %d was %d(%d,%d) pid=%d (unix)\n", pFile->h, eFileLock,
@@ -26036,14 +26189,10 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
     return SQLITE_OK;
   }
   unixEnterMutex();
-  h = pFile->h;
   pInode = pFile->pInode;
   assert( pInode->nShared!=0 );
   if( pFile->eFileLock>SHARED_LOCK ){
     assert( pInode->eFileLock==pFile->eFileLock );
-    SimulateIOErrorBenign(1);
-    SimulateIOError( h=(-1) )
-    SimulateIOErrorBenign(0);
 
 #ifndef NDEBUG
     /* When reducing a lock such that other processes can start
@@ -26054,11 +26203,6 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
     ** the file has changed and hence might not know to flush their
     ** cache.  The use of a stale cache can lead to database corruption.
     */
-#if 0
-    assert( pFile->inNormalWrite==0
-         || pFile->dbUpdate==0
-         || pFile->transCntrChng==1 );
-#endif
     pFile->inNormalWrite = 0;
 #endif
 
@@ -26160,9 +26304,6 @@ static int posixUnlock(sqlite3_file *id, int eFileLock, int handleNFSUnlock){
       lock.l_type = F_UNLCK;
       lock.l_whence = SEEK_SET;
       lock.l_start = lock.l_len = 0L;
-      SimulateIOErrorBenign(1);
-      SimulateIOError( h=(-1) )
-      SimulateIOErrorBenign(0);
       if( unixFileLock(pFile, &lock)==0 ){
         pInode->eFileLock = NO_LOCK;
       }else{
@@ -26213,10 +26354,6 @@ static int unixUnlock(sqlite3_file *id, int eFileLock){
 */
 static int closeUnixFile(sqlite3_file *id){
   unixFile *pFile = (unixFile*)id;
-  if( pFile->dirfd>=0 ){
-    robust_close(pFile, pFile->dirfd, __LINE__);
-    pFile->dirfd=-1;
-  }
   if( pFile->h>=0 ){
     robust_close(pFile, pFile->h, __LINE__);
     pFile->h = -1;
@@ -26224,7 +26361,7 @@ static int closeUnixFile(sqlite3_file *id){
 #if OS_VXWORKS
   if( pFile->pId ){
     if( pFile->isDelete ){
-      unlink(pFile->pId->zCanonicalName);
+      osUnlink(pFile->pId->zCanonicalName);
     }
     vxworksReleaseFileId(pFile->pId);
     pFile->pId = 0;
@@ -26473,7 +26610,7 @@ static int dotlockUnlock(sqlite3_file *id, int eFileLock) {
   
   /* To fully unlock the database, delete the lock file */
   assert( eFileLock==NO_LOCK );
-  if( unlink(zLockFile) ){
+  if( osUnlink(zLockFile) ){
     int rc = 0;
     int tErrno = errno;
     if( ENOENT != tErrno ){
@@ -26979,11 +27116,12 @@ static int afpCheckReservedLock(sqlite3_file *id, int *pResOut){
   int rc = SQLITE_OK;
   int reserved = 0;
   unixFile *pFile = (unixFile*)id;
+  afpLockingContext *context;
   
   SimulateIOError( return SQLITE_IOERR_CHECKRESERVEDLOCK; );
   
   assert( pFile );
-  afpLockingContext *context = (afpLockingContext *) pFile->lockingContext;
+  context = (afpLockingContext *) pFile->lockingContext;
   if( context->reserved ){
     *pResOut = 1;
     return SQLITE_OK;
@@ -27123,7 +27261,7 @@ static int afpLock(sqlite3_file *id, int eFileLock){
   ** operating system calls for the specified lock.
   */
   if( eFileLock==SHARED_LOCK ){
-    int lrc1, lrc2, lrc1Errno;
+    int lrc1, lrc2, lrc1Errno = 0;
     long lk, mask;
     
     assert( pInode->nShared==0 );
@@ -27497,17 +27635,19 @@ static int seekAndWrite(unixFile *id, i64 offset, const void *pBuf, int cnt){
 #elif defined(USE_PREAD64)
   do{ got = osPwrite64(id->h, pBuf, cnt, offset);}while( got<0 && errno==EINTR);
 #else
-  newOffset = lseek(id->h, offset, SEEK_SET);
-  SimulateIOError( newOffset-- );
-  if( newOffset!=offset ){
-    if( newOffset == -1 ){
-      ((unixFile*)id)->lastErrno = errno;
-    }else{
-      ((unixFile*)id)->lastErrno = 0;			
+  do{
+    newOffset = lseek(id->h, offset, SEEK_SET);
+    SimulateIOError( newOffset-- );
+    if( newOffset!=offset ){
+      if( newOffset == -1 ){
+        ((unixFile*)id)->lastErrno = errno;
+      }else{
+        ((unixFile*)id)->lastErrno = 0;			
+      }
+      return -1;
     }
-    return -1;
-  }
-  do{ got = osWrite(id->h, pBuf, cnt); }while( got<0 && errno==EINTR );
+    got = osWrite(id->h, pBuf, cnt);
+  }while( got<0 && errno==EINTR );
 #endif
   TIMER_END;
   if( got<0 ){
@@ -27597,11 +27737,11 @@ SQLITE_API int sqlite3_fullsync_count = 0;
 
 /*
 ** We do not trust systems to provide a working fdatasync().  Some do.
-** Others do no.  To be safe, we will stick with the (slower) fsync().
-** If you know that your system does support fdatasync() correctly,
+** Others do no.  To be safe, we will stick with the (slightly slower)
+** fsync(). If you know that your system does support fdatasync() correctly,
 ** then simply compile with -Dfdatasync=fdatasync
 */
-#if !defined(fdatasync) && !defined(__linux__)
+#if !defined(fdatasync)
 # define fdatasync fsync
 #endif
 
@@ -27710,6 +27850,50 @@ static int full_fsync(int fd, int fullSync, int dataOnly){
 }
 
 /*
+** Open a file descriptor to the directory containing file zFilename.
+** If successful, *pFd is set to the opened file descriptor and
+** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
+** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
+** value.
+**
+** The directory file descriptor is used for only one thing - to
+** fsync() a directory to make sure file creation and deletion events
+** are flushed to disk.  Such fsyncs are not needed on newer
+** journaling filesystems, but are required on older filesystems.
+**
+** This routine can be overridden using the xSetSysCall interface.
+** The ability to override this routine was added in support of the
+** chromium sandbox.  Opening a directory is a security risk (we are
+** told) so making it overrideable allows the chromium sandbox to
+** replace this routine with a harmless no-op.  To make this routine
+** a no-op, replace it with a stub that returns SQLITE_OK but leaves
+** *pFd set to a negative number.
+**
+** If SQLITE_OK is returned, the caller is responsible for closing
+** the file descriptor *pFd using close().
+*/
+static int openDirectory(const char *zFilename, int *pFd){
+  int ii;
+  int fd = -1;
+  char zDirname[MAX_PATHNAME+1];
+
+  sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
+  for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);
+  if( ii>0 ){
+    zDirname[ii] = '\0';
+    fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
+    if( fd>=0 ){
+#ifdef FD_CLOEXEC
+      osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
+#endif
+      OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
+    }
+  }
+  *pFd = fd;
+  return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));
+}
+
+/*
 ** Make sure all writes to a particular file are committed to disk.
 **
 ** If dataOnly==0 then both the file itself and its metadata (file
@@ -27749,28 +27933,23 @@ static int unixSync(sqlite3_file *id, int flags){
     pFile->lastErrno = errno;
     return unixLogError(SQLITE_IOERR_FSYNC, "full_fsync", pFile->zPath);
   }
-  if( pFile->dirfd>=0 ){
-    OSTRACE(("DIRSYNC %-3d (have_fullfsync=%d fullsync=%d)\n", pFile->dirfd,
+
+  /* Also fsync the directory containing the file if the DIRSYNC flag
+  ** is set.  This is a one-time occurrance.  Many systems (examples: AIX)
+  ** are unable to fsync a directory, so ignore errors on the fsync.
+  */
+  if( pFile->ctrlFlags & UNIXFILE_DIRSYNC ){
+    int dirfd;
+    OSTRACE(("DIRSYNC %s (have_fullfsync=%d fullsync=%d)\n", pFile->zPath,
             HAVE_FULLFSYNC, isFullsync));
-#ifndef SQLITE_DISABLE_DIRSYNC
-    /* The directory sync is only attempted if full_fsync is
-    ** turned off or unavailable.  If a full_fsync occurred above,
-    ** then the directory sync is superfluous.
-    */
-    if( (!HAVE_FULLFSYNC || !isFullsync) && full_fsync(pFile->dirfd,0,0) ){
-       /*
-       ** We have received multiple reports of fsync() returning
-       ** errors when applied to directories on certain file systems.
-       ** A failed directory sync is not a big deal.  So it seems
-       ** better to ignore the error.  Ticket #1657
-       */
-       /* pFile->lastErrno = errno; */
-       /* return SQLITE_IOERR; */
+    rc = osOpenDirectory(pFile->zPath, &dirfd);
+    if( rc==SQLITE_OK && dirfd>=0 ){
+      full_fsync(dirfd, 0, 0);
+      robust_close(pFile, dirfd, __LINE__);
+    }else if( rc==SQLITE_CANTOPEN ){
+      rc = SQLITE_OK;
     }
-#endif
-    /* Only need to sync once, so close the  directory when we are done */
-    robust_close(pFile, pFile->dirfd, __LINE__);
-    pFile->dirfd = -1;
+    pFile->ctrlFlags &= ~UNIXFILE_DIRSYNC;
   }
   return rc;
 }
@@ -27852,14 +28031,12 @@ static int proxyFileControl(sqlite3_file*,int,void*);
 
 /* 
 ** This function is called to handle the SQLITE_FCNTL_SIZE_HINT 
-** file-control operation.
-**
-** If the user has configured a chunk-size for this file, it could be
-** that the file needs to be extended at this point. Otherwise, the
-** SQLITE_FCNTL_SIZE_HINT operation is a no-op for Unix.
+** file-control operation.  Enlarge the database to nBytes in size
+** (rounded up to the next chunk-size).  If the database is already
+** nBytes or larger, this routine is a no-op.
 */
 static int fcntlSizeHint(unixFile *pFile, i64 nByte){
-  if( pFile->szChunk ){
+  if( pFile->szChunk>0 ){
     i64 nSize;                    /* Required file size */
     struct stat buf;              /* Used to hold return values of fstat() */
    
@@ -27908,21 +28085,37 @@ static int fcntlSizeHint(unixFile *pFile, i64 nByte){
 ** Information and control of an open file handle.
 */
 static int unixFileControl(sqlite3_file *id, int op, void *pArg){
+  unixFile *pFile = (unixFile*)id;
   switch( op ){
     case SQLITE_FCNTL_LOCKSTATE: {
-      *(int*)pArg = ((unixFile*)id)->eFileLock;
+      *(int*)pArg = pFile->eFileLock;
       return SQLITE_OK;
     }
     case SQLITE_LAST_ERRNO: {
-      *(int*)pArg = ((unixFile*)id)->lastErrno;
+      *(int*)pArg = pFile->lastErrno;
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_CHUNK_SIZE: {
-      ((unixFile*)id)->szChunk = *(int *)pArg;
+      pFile->szChunk = *(int *)pArg;
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_SIZE_HINT: {
-      return fcntlSizeHint((unixFile *)id, *(i64 *)pArg);
+      int rc;
+      SimulateIOErrorBenign(1);
+      rc = fcntlSizeHint(pFile, *(i64 *)pArg);
+      SimulateIOErrorBenign(0);
+      return rc;
+    }
+    case SQLITE_FCNTL_PERSIST_WAL: {
+      int bPersist = *(int*)pArg;
+      if( bPersist<0 ){
+        *(int*)pArg = (pFile->ctrlFlags & UNIXFILE_PERSIST_WAL)!=0;
+      }else if( bPersist==0 ){
+        pFile->ctrlFlags &= ~UNIXFILE_PERSIST_WAL;
+      }else{
+        pFile->ctrlFlags |= UNIXFILE_PERSIST_WAL;
+      }
+      return SQLITE_OK;
     }
 #ifndef NDEBUG
     /* The pager calls this method to signal that it has done
@@ -28038,11 +28231,9 @@ struct unixShm {
   unixShmNode *pShmNode;     /* The underlying unixShmNode object */
   unixShm *pNext;            /* Next unixShm with the same unixShmNode */
   u8 hasMutex;               /* True if holding the unixShmNode mutex */
+  u8 id;                     /* Id of this connection within its unixShmNode */
   u16 sharedMask;            /* Mask of shared locks held */
   u16 exclMask;              /* Mask of exclusive locks held */
-#ifdef SQLITE_DEBUG
-  u8 id;                     /* Id of this connection within its unixShmNode */
-#endif
 };
 
 /*
@@ -28138,7 +28329,7 @@ static void unixShmPurge(unixFile *pFd){
   if( p && p->nRef==0 ){
     int i;
     assert( p->pInode==pFd->pInode );
-    if( p->mutex ) sqlite3_mutex_free(p->mutex);
+    sqlite3_mutex_free(p->mutex);
     for(i=0; i<p->nRegion; i++){
       if( p->h>=0 ){
         munmap(p->apRegion[i], p->szRegion);
@@ -28254,16 +28445,15 @@ static int unixOpenSharedMemory(unixFile *pDbFd){
     }
 
     if( pInode->bProcessLock==0 ){
-      pShmNode->h = robust_open(zShmFilename, O_RDWR|O_CREAT,
-                               (sStat.st_mode & 0777));
+      const char *zRO;
+      int openFlags = O_RDWR | O_CREAT;
+      zRO = sqlite3_uri_parameter(pDbFd->zPath, "readonly_shm");
+      if( zRO && sqlite3GetBoolean(zRO) ){
+        openFlags = O_RDONLY;
+        pShmNode->isReadonly = 1;
+      }
+      pShmNode->h = robust_open(zShmFilename, openFlags, (sStat.st_mode&0777));
       if( pShmNode->h<0 ){
-        const char *zRO;
-        zRO = sqlite3_uri_parameter(pDbFd->zPath, "readonly_shm");
-        if( zRO && sqlite3GetBoolean(zRO) ){
-          pShmNode->h = robust_open(zShmFilename, O_RDONLY,
-                                    (sStat.st_mode & 0777));
-          pShmNode->isReadonly = 1;
-        }
         if( pShmNode->h<0 ){
           rc = unixLogError(SQLITE_CANTOPEN_BKPT, "open", zShmFilename);
           goto shm_open_err;
@@ -28607,7 +28797,7 @@ static int unixShmUnmap(
   assert( pShmNode->nRef>0 );
   pShmNode->nRef--;
   if( pShmNode->nRef==0 ){
-    if( deleteFlag && pShmNode->h>=0 ) unlink(pShmNode->zFilename);
+    if( deleteFlag && pShmNode->h>=0 ) osUnlink(pShmNode->zFilename);
     unixShmPurge(pDbFd);
   }
   unixLeaveMutex();
@@ -28920,7 +29110,7 @@ typedef const sqlite3_io_methods *(*finder_type)(const char*,unixFile*);
 static int fillInUnixFile(
   sqlite3_vfs *pVfs,      /* Pointer to vfs object */
   int h,                  /* Open file descriptor of file being opened */
-  int dirfd,              /* Directory file descriptor */
+  int syncDir,            /* True to sync directory on first sync */
   sqlite3_file *pId,      /* Write to the unixFile structure here */
   const char *zFilename,  /* Name of the file being opened */
   int noLock,             /* Omit locking if true */
@@ -28949,9 +29139,11 @@ static int fillInUnixFile(
   assert( zFilename==0 || zFilename[0]=='/' );
 #endif
 
+  /* No locking occurs in temporary files */
+  assert( zFilename!=0 || noLock );
+
   OSTRACE(("OPEN    %-3d %s\n", h, zFilename));
   pNew->h = h;
-  pNew->dirfd = dirfd;
   pNew->zPath = zFilename;
   if( memcmp(pVfs->zName,"unix-excl",10)==0 ){
     pNew->ctrlFlags = UNIXFILE_EXCL;
@@ -28961,6 +29153,9 @@ static int fillInUnixFile(
   if( isReadOnly ){
     pNew->ctrlFlags |= UNIXFILE_RDONLY;
   }
+  if( syncDir ){
+    pNew->ctrlFlags |= UNIXFILE_DIRSYNC;
+  }
 
 #if OS_VXWORKS
   pNew->pId = vxworksFindFileId(zFilename);
@@ -29048,6 +29243,7 @@ static int fillInUnixFile(
     */
     char *zLockFile;
     int nFilename;
+    assert( zFilename!=0 );
     nFilename = (int)strlen(zFilename) + 6;
     zLockFile = (char *)sqlite3_malloc(nFilename);
     if( zLockFile==0 ){
@@ -29087,13 +29283,12 @@ static int fillInUnixFile(
   if( rc!=SQLITE_OK ){
     if( h>=0 ) robust_close(pNew, h, __LINE__);
     h = -1;
-    unlink(zFilename);
+    osUnlink(zFilename);
     isDelete = 0;
   }
   pNew->isDelete = isDelete;
 #endif
   if( rc!=SQLITE_OK ){
-    if( dirfd>=0 ) robust_close(pNew, dirfd, __LINE__);
     if( h>=0 ) robust_close(pNew, h, __LINE__);
   }else{
     pNew->pMethod = pLockingStyle;
@@ -29103,37 +29298,6 @@ static int fillInUnixFile(
 }
 
 /*
-** Open a file descriptor to the directory containing file zFilename.
-** If successful, *pFd is set to the opened file descriptor and
-** SQLITE_OK is returned. If an error occurs, either SQLITE_NOMEM
-** or SQLITE_CANTOPEN is returned and *pFd is set to an undefined
-** value.
-**
-** If SQLITE_OK is returned, the caller is responsible for closing
-** the file descriptor *pFd using close().
-*/
-static int openDirectory(const char *zFilename, int *pFd){
-  int ii;
-  int fd = -1;
-  char zDirname[MAX_PATHNAME+1];
-
-  sqlite3_snprintf(MAX_PATHNAME, zDirname, "%s", zFilename);
-  for(ii=(int)strlen(zDirname); ii>1 && zDirname[ii]!='/'; ii--);
-  if( ii>0 ){
-    zDirname[ii] = '\0';
-    fd = robust_open(zDirname, O_RDONLY|O_BINARY, 0);
-    if( fd>=0 ){
-#ifdef FD_CLOEXEC
-      osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
-#endif
-      OSTRACE(("OPENDIR %-3d %s\n", fd, zDirname));
-    }
-  }
-  *pFd = fd;
-  return (fd>=0?SQLITE_OK:unixLogError(SQLITE_CANTOPEN_BKPT, "open", zDirname));
-}
-
-/*
 ** Return the name of a directory in which to put temporary files.
 ** If no suitable temporary file directory can be found, return NULL.
 */
@@ -29247,7 +29411,7 @@ static UnixUnusedFd *findReusableFd(const char *zPath, int flags){
   **
   ** Even if a subsequent open() call does succeed, the consequences of
   ** not searching for a resusable file descriptor are not dire.  */
-  if( 0==stat(zPath, &sStat) ){
+  if( 0==osStat(zPath, &sStat) ){
     unixInodeInfo *pInode;
 
     unixEnterMutex();
@@ -29314,16 +29478,24 @@ static int findCreateFileMode(
     **   "<path to db>-journalNN"
     **   "<path to db>-walNN"
     **
-    ** where NN is a 4 digit decimal number. The NN naming schemes are 
+    ** where NN is a decimal number. The NN naming schemes are 
     ** used by the test_multiplex.c module.
     */
     nDb = sqlite3Strlen30(zPath) - 1; 
-    while( nDb>0 && zPath[nDb]!='-' ) nDb--;
-    if( nDb==0 ) return SQLITE_OK;
+#ifdef SQLITE_ENABLE_8_3_NAMES
+    while( nDb>0 && !sqlite3Isalnum(zPath[nDb]) ) nDb--;
+    if( nDb==0 || zPath[nDb]!='-' ) return SQLITE_OK;
+#else
+    while( zPath[nDb]!='-' ){
+      assert( nDb>0 );
+      assert( zPath[nDb]!='\n' );
+      nDb--;
+    }
+#endif
     memcpy(zDb, zPath, nDb);
     zDb[nDb] = '\0';
 
-    if( 0==stat(zDb, &sStat) ){
+    if( 0==osStat(zDb, &sStat) ){
       *pMode = sStat.st_mode & 0777;
     }else{
       rc = SQLITE_IOERR_FSTAT;
@@ -29365,7 +29537,6 @@ static int unixOpen(
 ){
   unixFile *p = (unixFile *)pFile;
   int fd = -1;                   /* File descriptor returned by open() */
-  int dirfd = -1;                /* Directory file descriptor */
   int openFlags = 0;             /* Flags to pass to open() */
   int eType = flags&0xFFFFFF00;  /* Type of file to open */
   int noLock;                    /* True to omit locking primitives */
@@ -29379,12 +29550,15 @@ static int unixOpen(
 #if SQLITE_ENABLE_LOCKING_STYLE
   int isAutoProxy  = (flags & SQLITE_OPEN_AUTOPROXY);
 #endif
+#if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
+  struct statfs fsInfo;
+#endif
 
   /* If creating a master or main-file journal, this function will open
   ** a file-descriptor on the directory too. The first time unixSync()
   ** is called the directory file descriptor will be fsync()ed and close()d.
   */
-  int isOpenDirectory = (isCreate && (
+  int syncDir = (isCreate && (
         eType==SQLITE_OPEN_MASTER_JOURNAL 
      || eType==SQLITE_OPEN_MAIN_JOURNAL 
      || eType==SQLITE_OPEN_WAL
@@ -29438,7 +29612,7 @@ static int unixOpen(
     p->pUnused = pUnused;
   }else if( !zName ){
     /* If zName is NULL, the upper layer is requesting a temp file. */
-    assert(isDelete && !isOpenDirectory);
+    assert(isDelete && !syncDir);
     rc = unixGetTempname(MAX_PATHNAME+1, zTmpname);
     if( rc!=SQLITE_OK ){
       return rc;
@@ -29494,7 +29668,7 @@ static int unixOpen(
 #if OS_VXWORKS
     zPath = zName;
 #else
-    unlink(zName);
+    osUnlink(zName);
 #endif
   }
 #if SQLITE_ENABLE_LOCKING_STYLE
@@ -29503,19 +29677,6 @@ static int unixOpen(
   }
 #endif
 
-  if( isOpenDirectory ){
-    rc = openDirectory(zPath, &dirfd);
-    if( rc!=SQLITE_OK ){
-      /* It is safe to close fd at this point, because it is guaranteed not
-      ** to be open on a database file. If it were open on a database file,
-      ** it would not be safe to close as this would release any locks held
-      ** on the file by this process.  */
-      assert( eType!=SQLITE_OPEN_MAIN_DB );
-      robust_close(p, fd, __LINE__);
-      goto open_finished;
-    }
-  }
-
 #ifdef FD_CLOEXEC
   osFcntl(fd, F_SETFD, osFcntl(fd, F_GETFD, 0) | FD_CLOEXEC);
 #endif
@@ -29524,10 +29685,8 @@ static int unixOpen(
 
   
 #if defined(__APPLE__) || SQLITE_ENABLE_LOCKING_STYLE
-  struct statfs fsInfo;
   if( fstatfs(fd, &fsInfo) == -1 ){
     ((unixFile*)pFile)->lastErrno = errno;
-    if( dirfd>=0 ) robust_close(p, dirfd, __LINE__);
     robust_close(p, fd, __LINE__);
     return SQLITE_IOERR_ACCESS;
   }
@@ -29549,7 +29708,6 @@ static int unixOpen(
     if( envforce!=NULL ){
       useProxy = atoi(envforce)>0;
     }else{
-      struct statfs fsInfo;
       if( statfs(zPath, &fsInfo) == -1 ){
         /* In theory, the close(fd) call is sub-optimal. If the file opened
         ** with fd is a database file, and there are other connections open
@@ -29559,9 +29717,6 @@ static int unixOpen(
         ** not while other file descriptors opened by the same process on
         ** the same file are working.  */
         p->lastErrno = errno;
-        if( dirfd>=0 ){
-          robust_close(p, dirfd, __LINE__);
-        }
         robust_close(p, fd, __LINE__);
         rc = SQLITE_IOERR_ACCESS;
         goto open_finished;
@@ -29569,7 +29724,7 @@ static int unixOpen(
       useProxy = !(fsInfo.f_flags&MNT_LOCAL);
     }
     if( useProxy ){
-      rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock,
+      rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock,
                           isDelete, isReadonly);
       if( rc==SQLITE_OK ){
         rc = proxyTransformUnixFile((unixFile*)pFile, ":auto:");
@@ -29587,7 +29742,7 @@ static int unixOpen(
   }
 #endif
   
-  rc = fillInUnixFile(pVfs, fd, dirfd, pFile, zPath, noLock,
+  rc = fillInUnixFile(pVfs, fd, syncDir, pFile, zPath, noLock,
                       isDelete, isReadonly);
 open_finished:
   if( rc!=SQLITE_OK ){
@@ -29609,13 +29764,13 @@ static int unixDelete(
   int rc = SQLITE_OK;
   UNUSED_PARAMETER(NotUsed);
   SimulateIOError(return SQLITE_IOERR_DELETE);
-  if( unlink(zPath)==(-1) && errno!=ENOENT ){
+  if( osUnlink(zPath)==(-1) && errno!=ENOENT ){
     return unixLogError(SQLITE_IOERR_DELETE, "unlink", zPath);
   }
 #ifndef SQLITE_DISABLE_DIRSYNC
   if( dirSync ){
     int fd;
-    rc = openDirectory(zPath, &fd);
+    rc = osOpenDirectory(zPath, &fd);
     if( rc==SQLITE_OK ){
 #if OS_VXWORKS
       if( fsync(fd)==-1 )
@@ -29626,6 +29781,8 @@ static int unixDelete(
         rc = unixLogError(SQLITE_IOERR_DIR_FSYNC, "fsync", zPath);
       }
       robust_close(0, fd, __LINE__);
+    }else if( rc==SQLITE_CANTOPEN ){
+      rc = SQLITE_OK;
     }
   }
 #endif
@@ -29668,7 +29825,7 @@ static int unixAccess(
   *pResOut = (osAccess(zPath, amode)==0);
   if( flags==SQLITE_ACCESS_EXISTS && *pResOut ){
     struct stat buf;
-    if( 0==stat(zPath, &buf) && buf.st_size==0 ){
+    if( 0==osStat(zPath, &buf) && buf.st_size==0 ){
       *pResOut = 0;
     }
   }
@@ -29866,10 +30023,12 @@ SQLITE_API int sqlite3_current_time = 0;  /* Fake system time in seconds since 1
 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the
 ** proleptic Gregorian calendar.
 **
-** On success, return 0.  Return 1 if the time and date cannot be found.
+** On success, return SQLITE_OK.  Return SQLITE_ERROR if the time and date 
+** cannot be found.
 */
 static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
   static const sqlite3_int64 unixEpoch = 24405875*(sqlite3_int64)8640000;
+  int rc = SQLITE_OK;
 #if defined(NO_GETTOD)
   time_t t;
   time(&t);
@@ -29880,8 +30039,11 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
   *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_nsec/1000000;
 #else
   struct timeval sNow;
-  gettimeofday(&sNow, 0);
-  *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
+  if( gettimeofday(&sNow, 0)==0 ){
+    *piNow = unixEpoch + 1000*(sqlite3_int64)sNow.tv_sec + sNow.tv_usec/1000;
+  }else{
+    rc = SQLITE_ERROR;
+  }
 #endif
 
 #ifdef SQLITE_TEST
@@ -29890,7 +30052,7 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
   }
 #endif
   UNUSED_PARAMETER(NotUsed);
-  return 0;
+  return rc;
 }
 
 /*
@@ -29899,11 +30061,12 @@ static int unixCurrentTimeInt64(sqlite3_vfs *NotUsed, sqlite3_int64 *piNow){
 ** return 0.  Return 1 if the time and date cannot be found.
 */
 static int unixCurrentTime(sqlite3_vfs *NotUsed, double *prNow){
-  sqlite3_int64 i;
+  sqlite3_int64 i = 0;
+  int rc;
   UNUSED_PARAMETER(NotUsed);
-  unixCurrentTimeInt64(0, &i);
+  rc = unixCurrentTimeInt64(0, &i);
   *prNow = i/86400000.0;
-  return 0;
+  return rc;
 }
 
 /*
@@ -30187,7 +30350,6 @@ static int proxyCreateUnixFile(
     int islockfile           /* if non zero missing dirs will be created */
 ) {
   int fd = -1;
-  int dirfd = -1;
   unixFile *pNew;
   int rc = SQLITE_OK;
   int openFlags = O_RDWR | O_CREAT;
@@ -30252,7 +30414,7 @@ static int proxyCreateUnixFile(
   pUnused->flags = openFlags;
   pNew->pUnused = pUnused;
   
-  rc = fillInUnixFile(&dummyVfs, fd, dirfd, (sqlite3_file*)pNew, path, 0, 0, 0);
+  rc = fillInUnixFile(&dummyVfs, fd, 0, (sqlite3_file*)pNew, path, 0, 0, 0);
   if( rc==SQLITE_OK ){
     *ppFile = pNew;
     return SQLITE_OK;
@@ -30292,6 +30454,8 @@ static int proxyGetHostID(unsigned char *pHostID, int *pError){
       return SQLITE_IOERR;
     }
   }
+#else
+  UNUSED_PARAMETER(pError);
 #endif
 #ifdef SQLITE_TEST
   /* simulate multiple hosts by creating unique hostid file paths */
@@ -30366,7 +30530,7 @@ static int proxyBreakConchLock(unixFile *pFile, uuid_t myHostID){
 end_breaklock:
   if( rc ){
     if( fd>=0 ){
-      unlink(tPath);
+      osUnlink(tPath);
       robust_close(pFile, fd, __LINE__);
     }
     fprintf(stderr, "failed to break stale lock on %s, %s\n", cPath, errmsg);
@@ -30384,6 +30548,7 @@ static int proxyConchLock(unixFile *pFile, uuid_t myHostID, int lockType){
   int nTries = 0;
   struct timespec conchModTime;
   
+  memset(&conchModTime, 0, sizeof(conchModTime));
   do {
     rc = conchFile->pMethod->xLock((sqlite3_file*)conchFile, lockType);
     nTries ++;
@@ -30615,11 +30780,12 @@ static int proxyTakeConch(unixFile *pFile){
     end_takeconch:
       OSTRACE(("TRANSPROXY: CLOSE  %d\n", pFile->h));
       if( rc==SQLITE_OK && pFile->openFlags ){
+        int fd;
         if( pFile->h>=0 ){
           robust_close(pFile, pFile->h, __LINE__);
         }
         pFile->h = -1;
-        int fd = robust_open(pCtx->dbPath, pFile->openFlags,
+        fd = robust_open(pCtx->dbPath, pFile->openFlags,
                       SQLITE_DEFAULT_FILE_PERMISSIONS);
         OSTRACE(("TRANSPROXY: OPEN  %d\n", fd));
         if( fd>=0 ){
@@ -31189,7 +31355,7 @@ SQLITE_API int sqlite3_os_init(void){
 
   /* Double-check that the aSyscall[] array has been constructed
   ** correctly.  See ticket [bb3a86e890c8e96ab] */
-  assert( ArraySize(aSyscall)==16 );
+  assert( ArraySize(aSyscall)==18 );
 
   /* Register all VFSes defined in the aVfs[] array */
   for(i=0; i<(sizeof(aVfs)/sizeof(sqlite3_vfs)); i++){
@@ -31305,11 +31471,14 @@ SQLITE_API int sqlite3_os_end(void){
 # error "The MEMORY_DEBUG macro is obsolete.  Use SQLITE_DEBUG instead."
 #endif
 
-#ifdef SQLITE_DEBUG
-SQLITE_PRIVATE int sqlite3OSTrace = 0;
-#define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
+# ifndef SQLITE_DEBUG_OS_TRACE
+#   define SQLITE_DEBUG_OS_TRACE 0
+# endif
+  int sqlite3OSTrace = SQLITE_DEBUG_OS_TRACE;
+# define OSTRACE(X)          if( sqlite3OSTrace ) sqlite3DebugPrintf X
 #else
-#define OSTRACE(X)
+# define OSTRACE(X)
 #endif
 
 /*
@@ -31521,8 +31690,9 @@ struct winFile {
   const sqlite3_io_methods *pMethod; /*** Must be first ***/
   sqlite3_vfs *pVfs;      /* The VFS used to open this file */
   HANDLE h;               /* Handle for accessing the file */
-  unsigned char locktype; /* Type of lock currently held on this file */
+  u8 locktype;            /* Type of lock currently held on this file */
   short sharedLockByte;   /* Randomly chosen byte used as a shared lock */
+  u8 bPersistWal;         /* True to persist WAL files */
   DWORD lastErrno;        /* The Windows errno from the last I/O error */
   DWORD sectorSize;       /* Sector size of the device file is on */
   winShm *pShm;           /* Instance of shared memory on this file */
@@ -31537,6 +31707,76 @@ struct winFile {
 #endif
 };
 
+/*
+ * If compiled with SQLITE_WIN32_MALLOC on Windows, we will use the
+ * various Win32 API heap functions instead of our own.
+ */
+#ifdef SQLITE_WIN32_MALLOC
+/*
+ * The initial size of the Win32-specific heap.  This value may be zero.
+ */
+#ifndef SQLITE_WIN32_HEAP_INIT_SIZE
+#  define SQLITE_WIN32_HEAP_INIT_SIZE ((SQLITE_DEFAULT_CACHE_SIZE) * \
+                                       (SQLITE_DEFAULT_PAGE_SIZE) + 4194304)
+#endif
+
+/*
+ * The maximum size of the Win32-specific heap.  This value may be zero.
+ */
+#ifndef SQLITE_WIN32_HEAP_MAX_SIZE
+#  define SQLITE_WIN32_HEAP_MAX_SIZE  (0)
+#endif
+
+/*
+ * The extra flags to use in calls to the Win32 heap APIs.  This value may be
+ * zero for the default behavior.
+ */
+#ifndef SQLITE_WIN32_HEAP_FLAGS
+#  define SQLITE_WIN32_HEAP_FLAGS     (0)
+#endif
+
+/*
+** The winMemData structure stores information required by the Win32-specific
+** sqlite3_mem_methods implementation.
+*/
+typedef struct winMemData winMemData;
+struct winMemData {
+#ifndef NDEBUG
+  u32 magic;    /* Magic number to detect structure corruption. */
+#endif
+  HANDLE hHeap; /* The handle to our heap. */
+  BOOL bOwned;  /* Do we own the heap (i.e. destroy it on shutdown)? */
+};
+
+#ifndef NDEBUG
+#define WINMEM_MAGIC     0x42b2830b
+#endif
+
+static struct winMemData win_mem_data = {
+#ifndef NDEBUG
+  WINMEM_MAGIC,
+#endif
+  NULL, FALSE
+};
+
+#ifndef NDEBUG
+#define winMemAssertMagic() assert( win_mem_data.magic==WINMEM_MAGIC )
+#else
+#define winMemAssertMagic()
+#endif
+
+#define winMemGetHeap() win_mem_data.hHeap
+
+static void *winMemMalloc(int nBytes);
+static void winMemFree(void *pPrior);
+static void *winMemRealloc(void *pPrior, int nBytes);
+static int winMemSize(void *p);
+static int winMemRoundup(int n);
+static int winMemInit(void *pAppData);
+static void winMemShutdown(void *pAppData);
+
+SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void);
+#endif /* SQLITE_WIN32_MALLOC */
 
 /*
 ** Forward prototypes.
@@ -31589,6 +31829,188 @@ static int sqlite3_os_type = 0;
   }
 #endif /* SQLITE_OS_WINCE */
 
+#ifdef SQLITE_WIN32_MALLOC
+/*
+** Allocate nBytes of memory.
+*/
+static void *winMemMalloc(int nBytes){
+  HANDLE hHeap;
+  void *p;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+  assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+  assert( nBytes>=0 );
+  p = HeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes);
+  if( !p ){
+    sqlite3_log(SQLITE_NOMEM, "failed to HeapAlloc %u bytes (%d), heap=%p",
+        nBytes, GetLastError(), (void*)hHeap);
+  }
+  return p;
+}
+
+/*
+** Free memory.
+*/
+static void winMemFree(void *pPrior){
+  HANDLE hHeap;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+  assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) );
+#endif
+  if( !pPrior ) return; /* Passing NULL to HeapFree is undefined. */
+  if( !HeapFree(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) ){
+    sqlite3_log(SQLITE_NOMEM, "failed to HeapFree block %p (%d), heap=%p",
+        pPrior, GetLastError(), (void*)hHeap);
+  }
+}
+
+/*
+** Change the size of an existing memory allocation
+*/
+static void *winMemRealloc(void *pPrior, int nBytes){
+  HANDLE hHeap;
+  void *p;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+  assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior) );
+#endif
+  assert( nBytes>=0 );
+  if( !pPrior ){
+    p = HeapAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, (SIZE_T)nBytes);
+  }else{
+    p = HeapReAlloc(hHeap, SQLITE_WIN32_HEAP_FLAGS, pPrior, (SIZE_T)nBytes);
+  }
+  if( !p ){
+    sqlite3_log(SQLITE_NOMEM, "failed to %s %u bytes (%d), heap=%p",
+        pPrior ? "HeapReAlloc" : "HeapAlloc", nBytes, GetLastError(),
+        (void*)hHeap);
+  }
+  return p;
+}
+
+/*
+** Return the size of an outstanding allocation, in bytes.
+*/
+static int winMemSize(void *p){
+  HANDLE hHeap;
+  SIZE_T n;
+
+  winMemAssertMagic();
+  hHeap = winMemGetHeap();
+  assert( hHeap!=0 );
+  assert( hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+  assert ( HeapValidate(hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+  if( !p ) return 0;
+  n = HeapSize(hHeap, SQLITE_WIN32_HEAP_FLAGS, p);
+  if( n==(SIZE_T)-1 ){
+    sqlite3_log(SQLITE_NOMEM, "failed to HeapSize block %p (%d), heap=%p",
+        p, GetLastError(), (void*)hHeap);
+    return 0;
+  }
+  return (int)n;
+}
+
+/*
+** Round up a request size to the next valid allocation size.
+*/
+static int winMemRoundup(int n){
+  return n;
+}
+
+/*
+** Initialize this module.
+*/
+static int winMemInit(void *pAppData){
+  winMemData *pWinMemData = (winMemData *)pAppData;
+
+  if( !pWinMemData ) return SQLITE_ERROR;
+  assert( pWinMemData->magic==WINMEM_MAGIC );
+  if( !pWinMemData->hHeap ){
+    pWinMemData->hHeap = HeapCreate(SQLITE_WIN32_HEAP_FLAGS,
+                                    SQLITE_WIN32_HEAP_INIT_SIZE,
+                                    SQLITE_WIN32_HEAP_MAX_SIZE);
+    if( !pWinMemData->hHeap ){
+      sqlite3_log(SQLITE_NOMEM,
+          "failed to HeapCreate (%d), flags=%u, initSize=%u, maxSize=%u",
+          GetLastError(), SQLITE_WIN32_HEAP_FLAGS, SQLITE_WIN32_HEAP_INIT_SIZE,
+          SQLITE_WIN32_HEAP_MAX_SIZE);
+      return SQLITE_NOMEM;
+    }
+    pWinMemData->bOwned = TRUE;
+  }
+  assert( pWinMemData->hHeap!=0 );
+  assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+  assert( HeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+  return SQLITE_OK;
+}
+
+/*
+** Deinitialize this module.
+*/
+static void winMemShutdown(void *pAppData){
+  winMemData *pWinMemData = (winMemData *)pAppData;
+
+  if( !pWinMemData ) return;
+  if( pWinMemData->hHeap ){
+    assert( pWinMemData->hHeap!=INVALID_HANDLE_VALUE );
+#ifdef SQLITE_WIN32_MALLOC_VALIDATE
+    assert( HeapValidate(pWinMemData->hHeap, SQLITE_WIN32_HEAP_FLAGS, NULL) );
+#endif
+    if( pWinMemData->bOwned ){
+      if( !HeapDestroy(pWinMemData->hHeap) ){
+        sqlite3_log(SQLITE_NOMEM, "failed to HeapDestroy (%d), heap=%p",
+            GetLastError(), (void*)pWinMemData->hHeap);
+      }
+      pWinMemData->bOwned = FALSE;
+    }
+    pWinMemData->hHeap = NULL;
+  }
+}
+
+/*
+** Populate the low-level memory allocation function pointers in
+** sqlite3GlobalConfig.m with pointers to the routines in this file. The
+** arguments specify the block of memory to manage.
+**
+** This routine is only called by sqlite3_config(), and therefore
+** is not required to be threadsafe (it is not).
+*/
+SQLITE_PRIVATE const sqlite3_mem_methods *sqlite3MemGetWin32(void){
+  static const sqlite3_mem_methods winMemMethods = {
+    winMemMalloc,
+    winMemFree,
+    winMemRealloc,
+    winMemSize,
+    winMemRoundup,
+    winMemInit,
+    winMemShutdown,
+    &win_mem_data
+  };
+  return &winMemMethods;
+}
+
+SQLITE_PRIVATE void sqlite3MemSetDefault(void){
+  sqlite3_config(SQLITE_CONFIG_MALLOC, sqlite3MemGetWin32());
+}
+#endif /* SQLITE_WIN32_MALLOC */
+
 /*
 ** Convert a UTF-8 string to microsoft unicode (UTF-16?). 
 **
@@ -31821,6 +32243,54 @@ static int winLogErrorAtLine(
   return errcode;
 }
 
+/*
+** The number of times that a ReadFile(), WriteFile(), and DeleteFile()
+** will be retried following a locking error - probably caused by 
+** antivirus software.  Also the initial delay before the first retry.
+** The delay increases linearly with each retry.
+*/
+#ifndef SQLITE_WIN32_IOERR_RETRY
+# define SQLITE_WIN32_IOERR_RETRY 10
+#endif
+#ifndef SQLITE_WIN32_IOERR_RETRY_DELAY
+# define SQLITE_WIN32_IOERR_RETRY_DELAY 25
+#endif
+static int win32IoerrRetry = SQLITE_WIN32_IOERR_RETRY;
+static int win32IoerrRetryDelay = SQLITE_WIN32_IOERR_RETRY_DELAY;
+
+/*
+** If a ReadFile() or WriteFile() error occurs, invoke this routine
+** to see if it should be retried.  Return TRUE to retry.  Return FALSE
+** to give up with an error.
+*/
+static int retryIoerr(int *pnRetry){
+  DWORD e;
+  if( *pnRetry>=win32IoerrRetry ){
+    return 0;
+  }
+  e = GetLastError();
+  if( e==ERROR_ACCESS_DENIED ||
+      e==ERROR_LOCK_VIOLATION ||
+      e==ERROR_SHARING_VIOLATION ){
+    Sleep(win32IoerrRetryDelay*(1+*pnRetry));
+    ++*pnRetry;
+    return 1;
+  }
+  return 0;
+}
+
+/*
+** Log a I/O error retry episode.
+*/
+static void logIoerr(int nRetry){
+  if( nRetry ){
+    sqlite3_log(SQLITE_IOERR, 
+      "delayed %dms for lock/sharing conflict",
+      win32IoerrRetryDelay*nRetry*(nRetry+1)/2
+    );
+  }
+}
+
 #if SQLITE_OS_WINCE
 /*************************************************************************
 ** This section contains code for WinCE only.
@@ -31829,6 +32299,7 @@ static int winLogErrorAtLine(
 ** WindowsCE does not have a localtime() function.  So create a
 ** substitute.
 */
+/* #include <time.h> */
 struct tm *__cdecl localtime(const time_t *t)
 {
   static struct tm y;
@@ -32238,6 +32709,7 @@ static int winRead(
 ){
   winFile *pFile = (winFile*)id;  /* file handle */
   DWORD nRead;                    /* Number of bytes actually read from file */
+  int nRetry = 0;                 /* Number of retrys */
 
   assert( id!=0 );
   SimulateIOError(return SQLITE_IOERR_READ);
@@ -32246,10 +32718,12 @@ static int winRead(
   if( seekWinFile(pFile, offset) ){
     return SQLITE_FULL;
   }
-  if( !ReadFile(pFile->h, pBuf, amt, &nRead, 0) ){
+  while( !ReadFile(pFile->h, pBuf, amt, &nRead, 0) ){
+    if( retryIoerr(&nRetry) ) continue;
     pFile->lastErrno = GetLastError();
     return winLogError(SQLITE_IOERR_READ, "winRead", pFile->zPath);
   }
+  logIoerr(nRetry);
   if( nRead<(DWORD)amt ){
     /* Unread parts of the buffer must be zero-filled */
     memset(&((char*)pBuf)[nRead], 0, amt-nRead);
@@ -32271,6 +32745,7 @@ static int winWrite(
 ){
   int rc;                         /* True if error has occured, else false */
   winFile *pFile = (winFile*)id;  /* File handle */
+  int nRetry = 0;                 /* Number of retries */
 
   assert( amt>0 );
   assert( pFile );
@@ -32285,7 +32760,12 @@ static int winWrite(
     int nRem = amt;               /* Number of bytes yet to be written */
     DWORD nWrite;                 /* Bytes written by each WriteFile() call */
 
-    while( nRem>0 && WriteFile(pFile->h, aRem, nRem, &nWrite, 0) && nWrite>0 ){
+    while( nRem>0 ){
+      if( !WriteFile(pFile->h, aRem, nRem, &nWrite, 0) ){
+        if( retryIoerr(&nRetry) ) continue;
+        break;
+      }
+      if( nWrite<=0 ) break;
       aRem += nWrite;
       nRem -= nWrite;
     }
@@ -32301,6 +32781,8 @@ static int winWrite(
       return SQLITE_FULL;
     }
     return winLogError(SQLITE_IOERR_WRITE, "winWrite", pFile->zPath);
+  }else{
+    logIoerr(nRetry);
   }
   return SQLITE_OK;
 }
@@ -32322,7 +32804,7 @@ static int winTruncate(sqlite3_file *id, sqlite3_int64 nByte){
   ** actual file size after the operation may be larger than the requested
   ** size).
   */
-  if( pFile->szChunk ){
+  if( pFile->szChunk>0 ){
     nByte = ((nByte + pFile->szChunk - 1)/pFile->szChunk) * pFile->szChunk;
   }
 
@@ -32351,9 +32833,19 @@ SQLITE_API int sqlite3_fullsync_count = 0;
 ** Make sure all writes to a particular file are committed to disk.
 */
 static int winSync(sqlite3_file *id, int flags){
-#if !defined(NDEBUG) || !defined(SQLITE_NO_SYNC) || defined(SQLITE_DEBUG)
-  winFile *pFile = (winFile*)id;
+#ifndef SQLITE_NO_SYNC
+  /*
+  ** Used only when SQLITE_NO_SYNC is not defined.
+   */
   BOOL rc;
+#endif
+#if !defined(NDEBUG) || !defined(SQLITE_NO_SYNC) || \
+    (defined(SQLITE_TEST) && defined(SQLITE_DEBUG))
+  /*
+  ** Used when SQLITE_NO_SYNC is not defined and by the assert() and/or
+  ** OSTRACE() macros.
+   */
+  winFile *pFile = (winFile*)id;
 #else
   UNUSED_PARAMETER(id);
 #endif
@@ -32694,29 +33186,62 @@ static int winUnlock(sqlite3_file *id, int locktype){
 ** Control and query of the open file handle.
 */
 static int winFileControl(sqlite3_file *id, int op, void *pArg){
+  winFile *pFile = (winFile*)id;
   switch( op ){
     case SQLITE_FCNTL_LOCKSTATE: {
-      *(int*)pArg = ((winFile*)id)->locktype;
+      *(int*)pArg = pFile->locktype;
       return SQLITE_OK;
     }
     case SQLITE_LAST_ERRNO: {
-      *(int*)pArg = (int)((winFile*)id)->lastErrno;
+      *(int*)pArg = (int)pFile->lastErrno;
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_CHUNK_SIZE: {
-      ((winFile*)id)->szChunk = *(int *)pArg;
+      pFile->szChunk = *(int *)pArg;
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_SIZE_HINT: {
-      sqlite3_int64 sz = *(sqlite3_int64*)pArg;
-      SimulateIOErrorBenign(1);
-      winTruncate(id, sz);
-      SimulateIOErrorBenign(0);
+      if( pFile->szChunk>0 ){
+        sqlite3_int64 oldSz;
+        int rc = winFileSize(id, &oldSz);
+        if( rc==SQLITE_OK ){
+          sqlite3_int64 newSz = *(sqlite3_int64*)pArg;
+          if( newSz>oldSz ){
+            SimulateIOErrorBenign(1);
+            rc = winTruncate(id, newSz);
+            SimulateIOErrorBenign(0);
+          }
+        }
+        return rc;
+      }
+      return SQLITE_OK;
+    }
+    case SQLITE_FCNTL_PERSIST_WAL: {
+      int bPersist = *(int*)pArg;
+      if( bPersist<0 ){
+        *(int*)pArg = pFile->bPersistWal;
+      }else{
+        pFile->bPersistWal = bPersist!=0;
+      }
       return SQLITE_OK;
     }
     case SQLITE_FCNTL_SYNC_OMITTED: {
       return SQLITE_OK;
     }
+    case SQLITE_FCNTL_WIN32_AV_RETRY: {
+      int *a = (int*)pArg;
+      if( a[0]>0 ){
+        win32IoerrRetry = a[0];
+      }else{
+        a[0] = win32IoerrRetry;
+      }
+      if( a[1]>0 ){
+        win32IoerrRetryDelay = a[1];
+      }else{
+        a[1] = win32IoerrRetryDelay;
+      }
+      return SQLITE_OK;
+    }
   }
   return SQLITE_NOTFOUND;
 }
@@ -33525,6 +34050,7 @@ static int winOpen(
   winFile *pFile = (winFile*)id;
   void *zConverted;              /* Filename in OS encoding */
   const char *zUtf8Name = zName; /* Filename in UTF-8 encoding */
+  int cnt = 0;
 
   /* If argument zPath is a NULL pointer, this function is required to open
   ** a temporary file. Use this buffer to store the file name in.
@@ -33644,31 +34170,31 @@ static int winOpen(
 #endif
 
   if( isNT() ){
-    h = CreateFileW((WCHAR*)zConverted,
-       dwDesiredAccess,
-       dwShareMode,
-       NULL,
-       dwCreationDisposition,
-       dwFlagsAndAttributes,
-       NULL
-    );
+    while( (h = CreateFileW((WCHAR*)zConverted,
+                            dwDesiredAccess,
+                            dwShareMode, NULL,
+                            dwCreationDisposition,
+                            dwFlagsAndAttributes,
+                            NULL))==INVALID_HANDLE_VALUE &&
+                            retryIoerr(&cnt) ){}
 /* isNT() is 1 if SQLITE_OS_WINCE==1, so this else is never executed. 
 ** Since the ASCII version of these Windows API do not exist for WINCE,
 ** it's important to not reference them for WINCE builds.
 */
 #if SQLITE_OS_WINCE==0
   }else{
-    h = CreateFileA((char*)zConverted,
-       dwDesiredAccess,
-       dwShareMode,
-       NULL,
-       dwCreationDisposition,
-       dwFlagsAndAttributes,
-       NULL
-    );
+    while( (h = CreateFileA((char*)zConverted,
+                            dwDesiredAccess,
+                            dwShareMode, NULL,
+                            dwCreationDisposition,
+                            dwFlagsAndAttributes,
+                            NULL))==INVALID_HANDLE_VALUE &&
+                            retryIoerr(&cnt) ){}
 #endif
   }
 
+  logIoerr(cnt);
+
   OSTRACE(("OPEN %d %s 0x%lx %s\n", 
            h, zName, dwDesiredAccess, 
            h==INVALID_HANDLE_VALUE ? "failed" : "ok"));
@@ -33677,7 +34203,7 @@ static int winOpen(
     pFile->lastErrno = GetLastError();
     winLogError(SQLITE_CANTOPEN, "winOpen", zUtf8Name);
     free(zConverted);
-    if( isReadWrite ){
+    if( isReadWrite && !isExclusive ){
       return winOpen(pVfs, zName, id, 
              ((flags|SQLITE_OPEN_READONLY)&~(SQLITE_OPEN_CREATE|SQLITE_OPEN_READWRITE)), pOutFlags);
     }else{
@@ -33734,15 +34260,13 @@ static int winOpen(
 ** to MX_DELETION_ATTEMPTs deletion attempts are run before giving
 ** up and returning an error.
 */
-#define MX_DELETION_ATTEMPTS 5
 static int winDelete(
   sqlite3_vfs *pVfs,          /* Not used on win32 */
   const char *zFilename,      /* Name of file to delete */
   int syncDir                 /* Not used on win32 */
 ){
   int cnt = 0;
-  DWORD rc;
-  DWORD error = 0;
+  int rc;
   void *zConverted;
   UNUSED_PARAMETER(pVfs);
   UNUSED_PARAMETER(syncDir);
@@ -33753,34 +34277,30 @@ static int winDelete(
     return SQLITE_NOMEM;
   }
   if( isNT() ){
-    do{
-      DeleteFileW(zConverted);
-    }while(   (   ((rc = GetFileAttributesW(zConverted)) != INVALID_FILE_ATTRIBUTES)
-               || ((error = GetLastError()) == ERROR_ACCESS_DENIED))
-           && (++cnt < MX_DELETION_ATTEMPTS)
-           && (Sleep(100), 1) );
+    rc = 1;
+    while( GetFileAttributesW(zConverted)!=INVALID_FILE_ATTRIBUTES &&
+           (rc = DeleteFileW(zConverted))==0 && retryIoerr(&cnt) ){}
+    rc = rc ? SQLITE_OK : SQLITE_ERROR;
 /* isNT() is 1 if SQLITE_OS_WINCE==1, so this else is never executed. 
 ** Since the ASCII version of these Windows API do not exist for WINCE,
 ** it's important to not reference them for WINCE builds.
 */
 #if SQLITE_OS_WINCE==0
   }else{
-    do{
-      DeleteFileA(zConverted);
-    }while(   (   ((rc = GetFileAttributesA(zConverted)) != INVALID_FILE_ATTRIBUTES)
-               || ((error = GetLastError()) == ERROR_ACCESS_DENIED))
-           && (++cnt < MX_DELETION_ATTEMPTS)
-           && (Sleep(100), 1) );
+    rc = 1;
+    while( GetFileAttributesA(zConverted)!=INVALID_FILE_ATTRIBUTES &&
+           (rc = DeleteFileA(zConverted))==0 && retryIoerr(&cnt) ){}
+    rc = rc ? SQLITE_OK : SQLITE_ERROR;
 #endif
   }
+  if( rc ){
+    rc = winLogError(SQLITE_IOERR_DELETE, "winDelete", zFilename);
+  }else{
+    logIoerr(cnt);
+  }
   free(zConverted);
-  OSTRACE(("DELETE \"%s\" %s\n", zFilename,
-       ( (rc==INVALID_FILE_ATTRIBUTES) && (error==ERROR_FILE_NOT_FOUND)) ?
-         "ok" : "failed" ));
- 
-  return (   (rc == INVALID_FILE_ATTRIBUTES) 
-          && (error == ERROR_FILE_NOT_FOUND)) ? SQLITE_OK :
-                 winLogError(SQLITE_IOERR_DELETE, "winDelete", zFilename);
+  OSTRACE(("DELETE \"%s\" %s\n", zFilename, (rc ? "failed" : "ok" )));
+  return rc;
 }
 
 /*
@@ -33803,11 +34323,13 @@ static int winAccess(
     return SQLITE_NOMEM;
   }
   if( isNT() ){
+    int cnt = 0;
     WIN32_FILE_ATTRIBUTE_DATA sAttrData;
     memset(&sAttrData, 0, sizeof(sAttrData));
-    if( GetFileAttributesExW((WCHAR*)zConverted,
+    while( !(rc = GetFileAttributesExW((WCHAR*)zConverted,
                              GetFileExInfoStandard, 
-                             &sAttrData) ){
+                             &sAttrData)) && retryIoerr(&cnt) ){}
+    if( rc ){
       /* For an SQLITE_ACCESS_EXISTS query, treat a zero-length file
       ** as if it does not exist.
       */
@@ -33819,6 +34341,7 @@ static int winAccess(
         attr = sAttrData.dwFileAttributes;
       }
     }else{
+      logIoerr(cnt);
       if( GetLastError()!=ERROR_FILE_NOT_FOUND ){
         winLogError(SQLITE_IOERR_ACCESS, "winAccess", zFilename);
         free(zConverted);
@@ -33843,7 +34366,8 @@ static int winAccess(
       rc = attr!=INVALID_FILE_ATTRIBUTES;
       break;
     case SQLITE_ACCESS_READWRITE:
-      rc = (attr & FILE_ATTRIBUTE_READONLY)==0;
+      rc = attr!=INVALID_FILE_ATTRIBUTES &&
+             (attr & FILE_ATTRIBUTE_READONLY)==0;
       break;
     default:
       assert(!"Invalid flags argument");
@@ -34045,7 +34569,7 @@ static void winDlError(sqlite3_vfs *pVfs, int nBuf, char *zBufOut){
   UNUSED_PARAMETER(pVfs);
   getLastErrorMsg(nBuf, zBufOut);
 }
-void (*winDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol))(void){
+static void (*winDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol))(void){
   UNUSED_PARAMETER(pVfs);
 #if SQLITE_OS_WINCE
   /* The GetProcAddressA() routine is only available on wince. */
@@ -34056,7 +34580,7 @@ void (*winDlSym(sqlite3_vfs *pVfs, void *pHandle, const char *zSymbol))(void){
   return (void(*)(void))GetProcAddress((HANDLE)pHandle, zSymbol);
 #endif
 }
-void winDlClose(sqlite3_vfs *pVfs, void *pHandle){
+static void winDlClose(sqlite3_vfs *pVfs, void *pHandle){
   UNUSED_PARAMETER(pVfs);
   FreeLibrary((HANDLE)pHandle);
 }
@@ -34130,7 +34654,8 @@ SQLITE_API int sqlite3_current_time = 0;  /* Fake system time in seconds since 1
 ** epoch of noon in Greenwich on November 24, 4714 B.C according to the
 ** proleptic Gregorian calendar.
 **
-** On success, return 0.  Return 1 if the time and date cannot be found.
+** On success, return SQLITE_OK.  Return SQLITE_ERROR if the time and date 
+** cannot be found.
 */
 static int winCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *piNow){
   /* FILETIME structure is a 64-bit value representing the number of 
@@ -34150,7 +34675,7 @@ static int winCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *piNow){
   GetSystemTime(&time);
   /* if SystemTimeToFileTime() fails, it returns zero. */
   if (!SystemTimeToFileTime(&time,&ft)){
-    return 1;
+    return SQLITE_ERROR;
   }
 #else
   GetSystemTimeAsFileTime( &ft );
@@ -34166,7 +34691,7 @@ static int winCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *piNow){
   }
 #endif
   UNUSED_PARAMETER(pVfs);
-  return 0;
+  return SQLITE_OK;
 }
 
 /*
@@ -34174,7 +34699,7 @@ static int winCurrentTimeInt64(sqlite3_vfs *pVfs, sqlite3_int64 *piNow){
 ** current time and date as a Julian Day number into *prNow and
 ** return 0.  Return 1 if the time and date cannot be found.
 */
-int winCurrentTime(sqlite3_vfs *pVfs, double *prNow){
+static int winCurrentTime(sqlite3_vfs *pVfs, double *prNow){
   int rc;
   sqlite3_int64 i;
   rc = winCurrentTimeInt64(pVfs, &i);
@@ -35299,6 +35824,7 @@ typedef struct PgHdr1 PgHdr1;
 typedef struct PgFreeslot PgFreeslot;
 typedef struct PGroup PGroup;
 
+
 /* Each page cache (or PCache) belongs to a PGroup.  A PGroup is a set 
 ** of one or more PCaches that are able to recycle each others unpinned
 ** pages when they are under memory pressure.  A PGroup is an instance of
@@ -35563,15 +36089,22 @@ static int pcache1MemSize(void *p){
 */
 static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
   int nByte = sizeof(PgHdr1) + pCache->szPage;
-  void *pPg = pcache1Alloc(nByte);
-  PgHdr1 *p;
+  PgHdr1 *p = 0;
+  void *pPg;
+
+  /* The group mutex must be released before pcache1Alloc() is called. This
+  ** is because it may call sqlite3_release_memory(), which assumes that 
+  ** this mutex is not held. */
+  assert( sqlite3_mutex_held(pCache->pGroup->mutex) );
+  pcache1LeaveMutex(pCache->pGroup);
+  pPg = pcache1Alloc(nByte);
+  pcache1EnterMutex(pCache->pGroup);
+
   if( pPg ){
     p = PAGE_TO_PGHDR1(pCache, pPg);
     if( pCache->bPurgeable ){
       pCache->pGroup->nCurrentPage++;
     }
-  }else{
-    p = 0;
   }
   return p;
 }
@@ -35586,10 +36119,11 @@ static PgHdr1 *pcache1AllocPage(PCache1 *pCache){
 static void pcache1FreePage(PgHdr1 *p){
   if( ALWAYS(p) ){
     PCache1 *pCache = p->pCache;
+    assert( sqlite3_mutex_held(p->pCache->pGroup->mutex) );
+    pcache1Free(PGHDR1_TO_PAGE(p));
     if( pCache->bPurgeable ){
       pCache->pGroup->nCurrentPage--;
     }
-    pcache1Free(PGHDR1_TO_PAGE(p));
   }
 }
 
@@ -36027,9 +36561,7 @@ static void *pcache1Fetch(sqlite3_pcache *p, unsigned int iKey, int createFlag){
   */
   if( !pPage ){
     if( createFlag==1 ) sqlite3BeginBenignMalloc();
-    pcache1LeaveMutex(pGroup);
     pPage = pcache1AllocPage(pCache);
-    pcache1EnterMutex(pGroup);
     if( createFlag==1 ) sqlite3EndBenignMalloc();
   }
 
@@ -37461,8 +37993,8 @@ struct Pager {
   char *zJournal;             /* Name of the journal file */
   int (*xBusyHandler)(void*); /* Function to call when busy */
   void *pBusyHandlerArg;      /* Context argument for xBusyHandler */
+  int nHit, nMiss;            /* Total cache hits and misses */
 #ifdef SQLITE_TEST
-  int nHit, nMiss;            /* Cache hits and missing */
   int nRead, nWrite;          /* Database pages read/written */
 #endif
   void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
@@ -39494,7 +40026,6 @@ static int pager_playback(Pager *pPager, int isHot){
       rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0);
       if( rc!=SQLITE_OK ){
         if( rc==SQLITE_DONE ){
-          rc = SQLITE_OK;
           pPager->journalOff = szJ;
           break;
         }else if( rc==SQLITE_IOERR_SHORT_READ ){
@@ -39756,6 +40287,7 @@ static int pagerWalFrames(
 #endif
 
   assert( pPager->pWal );
+  assert( pList );
 #ifdef SQLITE_DEBUG
   /* Verify that the page list is in accending order */
   for(p=pList; p && p->pDirty; p=p->pDirty){
@@ -40530,6 +41062,7 @@ static int pagerSyncHotJournal(Pager *pPager){
 SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager){
   u8 *pTmp = (u8 *)pPager->pTmpSpace;
 
+  assert( assert_pager_state(pPager) );
   disable_simulated_io_errors();
   sqlite3BeginBenignMalloc();
   /* pPager->errCode = 0; */
@@ -40959,7 +41492,7 @@ static int pagerStress(void *p, PgHdr *pPg){
   **
   ** Spilling is also prohibited when in an error state since that could
   ** lead to database corruption.   In the current implementaton it 
-  ** is impossible for sqlite3PCacheFetch() to be called with createFlag==1
+  ** is impossible for sqlite3PcacheFetch() to be called with createFlag==1
   ** while in the error state, hence it is impossible for this routine to
   ** be called in the error state.  Nevertheless, we include a NEVER()
   ** test for the error state as a safeguard against future changes.
@@ -41795,14 +42328,13 @@ SQLITE_PRIVATE int sqlite3PagerAcquire(
     /* In this case the pcache already contains an initialized copy of
     ** the page. Return without further ado.  */
     assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) );
-    PAGER_INCR(pPager->nHit);
+    pPager->nHit++;
     return SQLITE_OK;
 
   }else{
     /* The pager cache has created a new page. Its content needs to 
     ** be initialized.  */
 
-    PAGER_INCR(pPager->nMiss);
     pPg = *ppPage;
     pPg->pPager = pPager;
 
@@ -41838,6 +42370,7 @@ SQLITE_PRIVATE int sqlite3PagerAcquire(
       IOTRACE(("ZERO %p %d\n", pPager, pgno));
     }else{
       assert( pPg->pPager==pPager );
+      pPager->nMiss++;
       rc = readDbPage(pPg);
       if( rc!=SQLITE_OK ){
         goto pager_acquire_err;
@@ -42873,6 +43406,31 @@ SQLITE_PRIVATE int *sqlite3PagerStats(Pager *pPager){
 #endif
 
 /*
+** Parameter eStat must be either SQLITE_DBSTATUS_CACHE_HIT or
+** SQLITE_DBSTATUS_CACHE_MISS. Before returning, *pnVal is incremented by the
+** current cache hit or miss count, according to the value of eStat. If the 
+** reset parameter is non-zero, the cache hit or miss count is zeroed before 
+** returning.
+*/
+SQLITE_PRIVATE void sqlite3PagerCacheStat(Pager *pPager, int eStat, int reset, int *pnVal){
+  int *piStat;
+
+  assert( eStat==SQLITE_DBSTATUS_CACHE_HIT
+       || eStat==SQLITE_DBSTATUS_CACHE_MISS
+  );
+  if( eStat==SQLITE_DBSTATUS_CACHE_HIT ){
+    piStat = &pPager->nHit;
+  }else{
+    piStat = &pPager->nMiss;
+  }
+
+  *pnVal += *piStat;
+  if( reset ){
+    *piStat = 0;
+  }
+}
+
+/*
 ** Return true if this is an in-memory pager.
 */
 SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager *pPager){
@@ -43601,6 +44159,13 @@ SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager){
   return rc;
 }
 
+/*
+** Unless this is an in-memory or temporary database, clear the pager cache.
+*/
+SQLITE_PRIVATE void sqlite3PagerClearCache(Pager *pPager){
+  if( !MEMDB && pPager->tempFile==0 ) pager_reset(pPager);
+}
+
 #ifdef SQLITE_HAS_CODEC
 /*
 ** This function is called by the wal module when writing page content
@@ -45427,13 +45992,15 @@ SQLITE_PRIVATE int sqlite3WalClose(
     */
     rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
     if( rc==SQLITE_OK ){
+      int bPersistWal = -1;
       if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
         pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
       }
       rc = sqlite3WalCheckpoint(
           pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
       );
-      if( rc==SQLITE_OK ){
+      sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersistWal);
+      if( rc==SQLITE_OK && bPersistWal!=1 ){
         isDelete = 1;
       }
     }
@@ -45964,7 +46531,7 @@ SQLITE_PRIVATE int sqlite3WalRead(
     int sz;
     i64 iOffset;
     sz = pWal->hdr.szPage;
-    sz = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
+    sz = (sz&0xfe00) + ((sz&0x0001)<<16);
     testcase( sz<=32768 );
     testcase( sz>=65536 );
     iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
@@ -48167,18 +48734,21 @@ static int btreeMoveto(
   int rc;                    /* Status code */
   UnpackedRecord *pIdxKey;   /* Unpacked index key */
   char aSpace[150];          /* Temp space for pIdxKey - to avoid a malloc */
+  char *pFree = 0;
 
   if( pKey ){
     assert( nKey==(i64)(int)nKey );
-    pIdxKey = sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey,
-                                      aSpace, sizeof(aSpace));
+    pIdxKey = sqlite3VdbeAllocUnpackedRecord(
+        pCur->pKeyInfo, aSpace, sizeof(aSpace), &pFree
+    );
     if( pIdxKey==0 ) return SQLITE_NOMEM;
+    sqlite3VdbeRecordUnpack(pCur->pKeyInfo, (int)nKey, pKey, pIdxKey);
   }else{
     pIdxKey = 0;
   }
   rc = sqlite3BtreeMovetoUnpacked(pCur, pIdxKey, nKey, bias, pRes);
-  if( pKey ){
-    sqlite3VdbeDeleteUnpackedRecord(pIdxKey);
+  if( pFree ){
+    sqlite3DbFree(pCur->pKeyInfo->db, pFree);
   }
   return rc;
 }
@@ -49274,17 +49844,19 @@ SQLITE_PRIVATE int sqlite3BtreeOpen(
     if( vfsFlags & SQLITE_OPEN_SHAREDCACHE ){
       int nFullPathname = pVfs->mxPathname+1;
       char *zFullPathname = sqlite3Malloc(nFullPathname);
-      sqlite3_mutex *mutexShared;
+      MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
       p->sharable = 1;
       if( !zFullPathname ){
         sqlite3_free(p);
         return SQLITE_NOMEM;
       }
       sqlite3OsFullPathname(pVfs, zFilename, nFullPathname, zFullPathname);
+#if SQLITE_THREADSAFE
       mutexOpen = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_OPEN);
       sqlite3_mutex_enter(mutexOpen);
       mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
       sqlite3_mutex_enter(mutexShared);
+#endif
       for(pBt=GLOBAL(BtShared*,sqlite3SharedCacheList); pBt; pBt=pBt->pNext){
         assert( pBt->nRef>0 );
         if( 0==strcmp(zFullPathname, sqlite3PagerFilename(pBt->pPager))
@@ -49390,9 +49962,9 @@ SQLITE_PRIVATE int sqlite3BtreeOpen(
     /* Add the new BtShared object to the linked list sharable BtShareds.
     */
     if( p->sharable ){
-      sqlite3_mutex *mutexShared;
+      MUTEX_LOGIC( sqlite3_mutex *mutexShared; )
       pBt->nRef = 1;
-      mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+      MUTEX_LOGIC( mutexShared = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);)
       if( SQLITE_THREADSAFE && sqlite3GlobalConfig.bCoreMutex ){
         pBt->mutex = sqlite3MutexAlloc(SQLITE_MUTEX_FAST);
         if( pBt->mutex==0 ){
@@ -49474,12 +50046,12 @@ btree_open_out:
 */
 static int removeFromSharingList(BtShared *pBt){
 #ifndef SQLITE_OMIT_SHARED_CACHE
-  sqlite3_mutex *pMaster;
+  MUTEX_LOGIC( sqlite3_mutex *pMaster; )
   BtShared *pList;
   int removed = 0;
 
   assert( sqlite3_mutex_notheld(pBt->mutex) );
-  pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+  MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); )
   sqlite3_mutex_enter(pMaster);
   pBt->nRef--;
   if( pBt->nRef<=0 ){
@@ -50254,11 +50826,12 @@ static int modifyPagePointer(MemPage *pPage, Pgno iFrom, Pgno iTo, u8 eType){
       if( eType==PTRMAP_OVERFLOW1 ){
         CellInfo info;
         btreeParseCellPtr(pPage, pCell, &info);
-        if( info.iOverflow ){
-          if( iFrom==get4byte(&pCell[info.iOverflow]) ){
-            put4byte(&pCell[info.iOverflow], iTo);
-            break;
-          }
+        if( info.iOverflow
+         && pCell+info.iOverflow+3<=pPage->aData+pPage->maskPage
+         && iFrom==get4byte(&pCell[info.iOverflow])
+        ){
+          put4byte(&pCell[info.iOverflow], iTo);
+          break;
         }
       }else{
         if( get4byte(pCell)==iFrom ){
@@ -50979,7 +51552,8 @@ static int btreeCursor(
     return SQLITE_READONLY;
   }
   if( iTable==1 && btreePagecount(pBt)==0 ){
-    return SQLITE_EMPTY;
+    assert( wrFlag==0 );
+    iTable = 0;
   }
 
   /* Now that no other errors can occur, finish filling in the BtCursor
@@ -51444,21 +52018,55 @@ static int accessPayload(
         /* Need to read this page properly. It contains some of the
         ** range of data that is being read (eOp==0) or written (eOp!=0).
         */
-        DbPage *pDbPage;
+#ifdef SQLITE_DIRECT_OVERFLOW_READ
+        sqlite3_file *fd;
+#endif
         int a = amt;
-        rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
-        if( rc==SQLITE_OK ){
-          aPayload = sqlite3PagerGetData(pDbPage);
-          nextPage = get4byte(aPayload);
-          if( a + offset > ovflSize ){
-            a = ovflSize - offset;
+        if( a + offset > ovflSize ){
+          a = ovflSize - offset;
+        }
+
+#ifdef SQLITE_DIRECT_OVERFLOW_READ
+        /* If all the following are true:
+        **
+        **   1) this is a read operation, and 
+        **   2) data is required from the start of this overflow page, and
+        **   3) the database is file-backed, and
+        **   4) there is no open write-transaction, and
+        **   5) the database is not a WAL database,
+        **
+        ** then data can be read directly from the database file into the
+        ** output buffer, bypassing the page-cache altogether. This speeds
+        ** up loading large records that span many overflow pages.
+        */
+        if( eOp==0                                             /* (1) */
+         && offset==0                                          /* (2) */
+         && pBt->inTransaction==TRANS_READ                     /* (4) */
+         && (fd = sqlite3PagerFile(pBt->pPager))->pMethods     /* (3) */
+         && pBt->pPage1->aData[19]==0x01                       /* (5) */
+        ){
+          u8 aSave[4];
+          u8 *aWrite = &pBuf[-4];
+          memcpy(aSave, aWrite, 4);
+          rc = sqlite3OsRead(fd, aWrite, a+4, pBt->pageSize * (nextPage-1));
+          nextPage = get4byte(aWrite);
+          memcpy(aWrite, aSave, 4);
+        }else
+#endif
+
+        {
+          DbPage *pDbPage;
+          rc = sqlite3PagerGet(pBt->pPager, nextPage, &pDbPage);
+          if( rc==SQLITE_OK ){
+            aPayload = sqlite3PagerGetData(pDbPage);
+            nextPage = get4byte(aPayload);
+            rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
+            sqlite3PagerUnref(pDbPage);
+            offset = 0;
           }
-          rc = copyPayload(&aPayload[offset+4], pBuf, a, eOp, pDbPage);
-          sqlite3PagerUnref(pDbPage);
-          offset = 0;
-          amt -= a;
-          pBuf += a;
         }
+        amt -= a;
+        pBuf += a;
       }
     }
   }
@@ -51733,6 +52341,9 @@ static int moveToRoot(BtCursor *pCur){
       releasePage(pCur->apPage[i]);
     }
     pCur->iPage = 0;
+  }else if( pCur->pgnoRoot==0 ){
+    pCur->eState = CURSOR_INVALID;
+    return SQLITE_OK;
   }else{
     rc = getAndInitPage(pBt, pCur->pgnoRoot, &pCur->apPage[0]);
     if( rc!=SQLITE_OK ){
@@ -51842,7 +52453,7 @@ SQLITE_PRIVATE int sqlite3BtreeFirst(BtCursor *pCur, int *pRes){
   rc = moveToRoot(pCur);
   if( rc==SQLITE_OK ){
     if( pCur->eState==CURSOR_INVALID ){
-      assert( pCur->apPage[pCur->iPage]->nCell==0 );
+      assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
       *pRes = 1;
     }else{
       assert( pCur->apPage[pCur->iPage]->nCell>0 );
@@ -51881,7 +52492,7 @@ SQLITE_PRIVATE int sqlite3BtreeLast(BtCursor *pCur, int *pRes){
   rc = moveToRoot(pCur);
   if( rc==SQLITE_OK ){
     if( CURSOR_INVALID==pCur->eState ){
-      assert( pCur->apPage[pCur->iPage]->nCell==0 );
+      assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
       *pRes = 1;
     }else{
       assert( pCur->eState==CURSOR_VALID );
@@ -51954,12 +52565,12 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked(
   if( rc ){
     return rc;
   }
-  assert( pCur->apPage[pCur->iPage] );
-  assert( pCur->apPage[pCur->iPage]->isInit );
-  assert( pCur->apPage[pCur->iPage]->nCell>0 || pCur->eState==CURSOR_INVALID );
+  assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage] );
+  assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->isInit );
+  assert( pCur->eState==CURSOR_INVALID || pCur->apPage[pCur->iPage]->nCell>0 );
   if( pCur->eState==CURSOR_INVALID ){
     *pRes = -1;
-    assert( pCur->apPage[pCur->iPage]->nCell==0 );
+    assert( pCur->pgnoRoot==0 || pCur->apPage[pCur->iPage]->nCell==0 );
     return SQLITE_OK;
   }
   assert( pCur->apPage[0]->intKey || pIdxKey );
@@ -52054,7 +52665,6 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked(
       if( c==0 ){
         if( pPage->intKey && !pPage->leaf ){
           lwr = idx;
-          upr = lwr - 1;
           break;
         }else{
           *pRes = 0;
@@ -52072,7 +52682,7 @@ SQLITE_PRIVATE int sqlite3BtreeMovetoUnpacked(
       }
       pCur->aiIdx[pCur->iPage] = (u16)(idx = (lwr+upr)/2);
     }
-    assert( lwr==upr+1 );
+    assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) );
     assert( pPage->isInit );
     if( pPage->leaf ){
       chldPg = 0;
@@ -52337,6 +52947,8 @@ static int allocateBtreePage(
         pTrunk = 0;
         goto end_allocate_page;
       }
+      assert( pTrunk!=0 );
+      assert( pTrunk->aData!=0 );
 
       k = get4byte(&pTrunk->aData[4]); /* # of leaves on this trunk page */
       if( k==0 && !searchList ){
@@ -52686,6 +53298,9 @@ static int clearCell(MemPage *pPage, unsigned char *pCell){
   if( info.iOverflow==0 ){
     return SQLITE_OK;  /* No overflow pages. Return without doing anything */
   }
+  if( pCell+info.iOverflow+3 > pPage->aData+pPage->maskPage ){
+    return SQLITE_CORRUPT;  /* Cell extends past end of page */
+  }
   ovflPgno = get4byte(&pCell[info.iOverflow]);
   assert( pBt->usableSize > 4 );
   ovflPageSize = pBt->usableSize - 4;
@@ -53461,13 +54076,15 @@ static int balance_nonroot(
       ** four bytes of the divider cell. So the pointer is safe to use
       ** later on.  
       **
-      ** Unless SQLite is compiled in secure-delete mode. In this case,
+      ** But not if we are in secure-delete mode. In secure-delete mode,
       ** the dropCell() routine will overwrite the entire cell with zeroes.
       ** In this case, temporarily copy the cell into the aOvflSpace[]
       ** buffer. It will be copied out again as soon as the aSpace[] buffer
       ** is allocated.  */
       if( pBt->secureDelete ){
-        int iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData);
+        int iOff;
+
+        iOff = SQLITE_PTR_TO_INT(apDiv[i]) - SQLITE_PTR_TO_INT(pParent->aData);
         if( (iOff+szNew[i])>(int)pBt->usableSize ){
           rc = SQLITE_CORRUPT_BKPT;
           memset(apOld, 0, (i+1)*sizeof(MemPage*));
@@ -53887,6 +54504,7 @@ static int balance_nonroot(
         /* Cell i is the cell immediately following the last cell on old
         ** sibling page j. If the siblings are not leaf pages of an
         ** intkey b-tree, then cell i was a divider cell. */
+        assert( j+1 < ArraySize(apCopy) );
         pOld = apCopy[++j];
         iNextOld = i + !leafData + pOld->nCell + pOld->nOverflow;
         if( pOld->nOverflow ){
@@ -54869,6 +55487,11 @@ SQLITE_PRIVATE int sqlite3BtreeUpdateMeta(Btree *p, int idx, u32 iMeta){
 SQLITE_PRIVATE int sqlite3BtreeCount(BtCursor *pCur, i64 *pnEntry){
   i64 nEntry = 0;                      /* Value to return in *pnEntry */
   int rc;                              /* Return code */
+
+  if( pCur->pgnoRoot==0 ){
+    *pnEntry = 0;
+    return SQLITE_OK;
+  }
   rc = moveToRoot(pCur);
 
   /* Unless an error occurs, the following loop runs one iteration for each
@@ -55653,7 +56276,6 @@ SQLITE_PRIVATE int sqlite3BtreeSetVersion(Btree *pBtree, int iVersion){
   BtShared *pBt = pBtree->pBt;
   int rc;                         /* Return code */
  
-  assert( pBtree->inTrans==TRANS_NONE );
   assert( iVersion==1 || iVersion==2 );
 
   /* If setting the version fields to 1, do not automatically open the
@@ -56092,102 +56714,106 @@ SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage){
     ** the case where the source and destination databases have the
     ** same schema version.
     */
-    if( rc==SQLITE_DONE 
-     && (rc = sqlite3BtreeUpdateMeta(p->pDest,1,p->iDestSchema+1))==SQLITE_OK
-    ){
-      int nDestTruncate;
-  
-      if( p->pDestDb ){
-        sqlite3ResetInternalSchema(p->pDestDb, -1);
-      }
-
-      /* Set nDestTruncate to the final number of pages in the destination
-      ** database. The complication here is that the destination page
-      ** size may be different to the source page size. 
-      **
-      ** If the source page size is smaller than the destination page size, 
-      ** round up. In this case the call to sqlite3OsTruncate() below will
-      ** fix the size of the file. However it is important to call
-      ** sqlite3PagerTruncateImage() here so that any pages in the 
-      ** destination file that lie beyond the nDestTruncate page mark are
-      ** journalled by PagerCommitPhaseOne() before they are destroyed
-      ** by the file truncation.
-      */
-      assert( pgszSrc==sqlite3BtreeGetPageSize(p->pSrc) );
-      assert( pgszDest==sqlite3BtreeGetPageSize(p->pDest) );
-      if( pgszSrc<pgszDest ){
-        int ratio = pgszDest/pgszSrc;
-        nDestTruncate = (nSrcPage+ratio-1)/ratio;
-        if( nDestTruncate==(int)PENDING_BYTE_PAGE(p->pDest->pBt) ){
-          nDestTruncate--;
+    if( rc==SQLITE_DONE ){
+      rc = sqlite3BtreeUpdateMeta(p->pDest,1,p->iDestSchema+1);
+      if( rc==SQLITE_OK ){
+        if( p->pDestDb ){
+          sqlite3ResetInternalSchema(p->pDestDb, -1);
+        }
+        if( destMode==PAGER_JOURNALMODE_WAL ){
+          rc = sqlite3BtreeSetVersion(p->pDest, 2);
         }
-      }else{
-        nDestTruncate = nSrcPage * (pgszSrc/pgszDest);
       }
-      sqlite3PagerTruncateImage(pDestPager, nDestTruncate);
-
-      if( pgszSrc<pgszDest ){
-        /* If the source page-size is smaller than the destination page-size,
-        ** two extra things may need to happen:
-        **
-        **   * The destination may need to be truncated, and
+      if( rc==SQLITE_OK ){
+        int nDestTruncate;
+        /* Set nDestTruncate to the final number of pages in the destination
+        ** database. The complication here is that the destination page
+        ** size may be different to the source page size. 
         **
-        **   * Data stored on the pages immediately following the 
-        **     pending-byte page in the source database may need to be
-        **     copied into the destination database.
+        ** If the source page size is smaller than the destination page size, 
+        ** round up. In this case the call to sqlite3OsTruncate() below will
+        ** fix the size of the file. However it is important to call
+        ** sqlite3PagerTruncateImage() here so that any pages in the 
+        ** destination file that lie beyond the nDestTruncate page mark are
+        ** journalled by PagerCommitPhaseOne() before they are destroyed
+        ** by the file truncation.
         */
-        const i64 iSize = (i64)pgszSrc * (i64)nSrcPage;
-        sqlite3_file * const pFile = sqlite3PagerFile(pDestPager);
-        i64 iOff;
-        i64 iEnd;
-
-        assert( pFile );
-        assert( (i64)nDestTruncate*(i64)pgszDest >= iSize || (
-              nDestTruncate==(int)(PENDING_BYTE_PAGE(p->pDest->pBt)-1)
-           && iSize>=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest
-        ));
+        assert( pgszSrc==sqlite3BtreeGetPageSize(p->pSrc) );
+        assert( pgszDest==sqlite3BtreeGetPageSize(p->pDest) );
+        if( pgszSrc<pgszDest ){
+          int ratio = pgszDest/pgszSrc;
+          nDestTruncate = (nSrcPage+ratio-1)/ratio;
+          if( nDestTruncate==(int)PENDING_BYTE_PAGE(p->pDest->pBt) ){
+            nDestTruncate--;
+          }
+        }else{
+          nDestTruncate = nSrcPage * (pgszSrc/pgszDest);
+        }
+        sqlite3PagerTruncateImage(pDestPager, nDestTruncate);
 
-        /* This call ensures that all data required to recreate the original
-        ** database has been stored in the journal for pDestPager and the
-        ** journal synced to disk. So at this point we may safely modify
-        ** the database file in any way, knowing that if a power failure
-        ** occurs, the original database will be reconstructed from the 
-        ** journal file.  */
-        rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1);
-
-        /* Write the extra pages and truncate the database file as required. */
-        iEnd = MIN(PENDING_BYTE + pgszDest, iSize);
-        for(
-          iOff=PENDING_BYTE+pgszSrc; 
-          rc==SQLITE_OK && iOff<iEnd; 
-          iOff+=pgszSrc
-        ){
-          PgHdr *pSrcPg = 0;
-          const Pgno iSrcPg = (Pgno)((iOff/pgszSrc)+1);
-          rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg);
+        if( pgszSrc<pgszDest ){
+          /* If the source page-size is smaller than the destination page-size,
+          ** two extra things may need to happen:
+          **
+          **   * The destination may need to be truncated, and
+          **
+          **   * Data stored on the pages immediately following the 
+          **     pending-byte page in the source database may need to be
+          **     copied into the destination database.
+          */
+          const i64 iSize = (i64)pgszSrc * (i64)nSrcPage;
+          sqlite3_file * const pFile = sqlite3PagerFile(pDestPager);
+          i64 iOff;
+          i64 iEnd;
+
+          assert( pFile );
+          assert( (i64)nDestTruncate*(i64)pgszDest >= iSize || (
+                nDestTruncate==(int)(PENDING_BYTE_PAGE(p->pDest->pBt)-1)
+             && iSize>=PENDING_BYTE && iSize<=PENDING_BYTE+pgszDest
+          ));
+
+          /* This call ensures that all data required to recreate the original
+          ** database has been stored in the journal for pDestPager and the
+          ** journal synced to disk. So at this point we may safely modify
+          ** the database file in any way, knowing that if a power failure
+          ** occurs, the original database will be reconstructed from the 
+          ** journal file.  */
+          rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 1);
+
+          /* Write the extra pages and truncate the database file as required */
+          iEnd = MIN(PENDING_BYTE + pgszDest, iSize);
+          for(
+            iOff=PENDING_BYTE+pgszSrc; 
+            rc==SQLITE_OK && iOff<iEnd; 
+            iOff+=pgszSrc
+          ){
+            PgHdr *pSrcPg = 0;
+            const Pgno iSrcPg = (Pgno)((iOff/pgszSrc)+1);
+            rc = sqlite3PagerGet(pSrcPager, iSrcPg, &pSrcPg);
+            if( rc==SQLITE_OK ){
+              u8 *zData = sqlite3PagerGetData(pSrcPg);
+              rc = sqlite3OsWrite(pFile, zData, pgszSrc, iOff);
+            }
+            sqlite3PagerUnref(pSrcPg);
+          }
           if( rc==SQLITE_OK ){
-            u8 *zData = sqlite3PagerGetData(pSrcPg);
-            rc = sqlite3OsWrite(pFile, zData, pgszSrc, iOff);
+            rc = backupTruncateFile(pFile, iSize);
           }
-          sqlite3PagerUnref(pSrcPg);
-        }
-        if( rc==SQLITE_OK ){
-          rc = backupTruncateFile(pFile, iSize);
-        }
 
-        /* Sync the database file to disk. */
-        if( rc==SQLITE_OK ){
-          rc = sqlite3PagerSync(pDestPager);
+          /* Sync the database file to disk. */
+          if( rc==SQLITE_OK ){
+            rc = sqlite3PagerSync(pDestPager);
+          }
+        }else{
+          rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0);
+        }
+    
+        /* Finish committing the transaction to the destination database. */
+        if( SQLITE_OK==rc
+         && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest, 0))
+        ){
+          rc = SQLITE_DONE;
         }
-      }else{
-        rc = sqlite3PagerCommitPhaseOne(pDestPager, 0, 0);
-      }
-  
-      /* Finish committing the transaction to the destination database. */
-      if( SQLITE_OK==rc
-       && SQLITE_OK==(rc = sqlite3BtreeCommitPhaseTwo(p->pDest, 0))
-      ){
-        rc = SQLITE_DONE;
       }
     }
   
@@ -56221,14 +56847,14 @@ SQLITE_API int sqlite3_backup_step(sqlite3_backup *p, int nPage){
 */
 SQLITE_API int sqlite3_backup_finish(sqlite3_backup *p){
   sqlite3_backup **pp;                 /* Ptr to head of pagers backup list */
-  sqlite3_mutex *mutex;                /* Mutex to protect source database */
+  MUTEX_LOGIC( sqlite3_mutex *mutex; ) /* Mutex to protect source database */
   int rc;                              /* Value to return */
 
   /* Enter the mutexes */
   if( p==0 ) return SQLITE_OK;
   sqlite3_mutex_enter(p->pSrcDb->mutex);
   sqlite3BtreeEnter(p->pSrc);
-  mutex = p->pSrcDb->mutex;
+  MUTEX_LOGIC( mutex = p->pSrcDb->mutex; )
   if( p->pDestDb ){
     sqlite3_mutex_enter(p->pDestDb->mutex);
   }
@@ -56347,10 +56973,18 @@ SQLITE_PRIVATE void sqlite3BackupRestart(sqlite3_backup *pBackup){
 */
 SQLITE_PRIVATE int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
   int rc;
+  sqlite3_file *pFd;              /* File descriptor for database pTo */
   sqlite3_backup b;
   sqlite3BtreeEnter(pTo);
   sqlite3BtreeEnter(pFrom);
 
+  assert( sqlite3BtreeIsInTrans(pTo) );
+  pFd = sqlite3PagerFile(sqlite3BtreePager(pTo));
+  if( pFd->pMethods ){
+    i64 nByte = sqlite3BtreeGetPageSize(pFrom)*(i64)sqlite3BtreeLastPage(pFrom);
+    sqlite3OsFileControl(pFd, SQLITE_FCNTL_OVERWRITE, &nByte);
+  }
+
   /* Set up an sqlite3_backup object. sqlite3_backup.pDestDb must be set
   ** to 0. This is used by the implementations of sqlite3_backup_step()
   ** and sqlite3_backup_finish() to detect that they are being called
@@ -56374,8 +57008,11 @@ SQLITE_PRIVATE int sqlite3BtreeCopyFile(Btree *pTo, Btree *pFrom){
   rc = sqlite3_backup_finish(&b);
   if( rc==SQLITE_OK ){
     pTo->pBt->pageSizeFixed = 0;
+  }else{
+    sqlite3PagerClearCache(sqlite3BtreePager(b.pDest));
   }
 
+  assert( sqlite3BtreeIsInTrans(pTo)==0 );
   sqlite3BtreeLeave(pFrom);
   sqlite3BtreeLeave(pTo);
   return rc;
@@ -56655,24 +57292,18 @@ SQLITE_PRIVATE int sqlite3VdbeMemFinalize(Mem *pMem, FuncDef *pFunc){
 */
 SQLITE_PRIVATE void sqlite3VdbeMemReleaseExternal(Mem *p){
   assert( p->db==0 || sqlite3_mutex_held(p->db->mutex) );
-  testcase( p->flags & MEM_Agg );
-  testcase( p->flags & MEM_Dyn );
-  testcase( p->flags & MEM_RowSet );
-  testcase( p->flags & MEM_Frame );
-  if( p->flags&(MEM_Agg|MEM_Dyn|MEM_RowSet|MEM_Frame) ){
-    if( p->flags&MEM_Agg ){
-      sqlite3VdbeMemFinalize(p, p->u.pDef);
-      assert( (p->flags & MEM_Agg)==0 );
-      sqlite3VdbeMemRelease(p);
-    }else if( p->flags&MEM_Dyn && p->xDel ){
-      assert( (p->flags&MEM_RowSet)==0 );
-      p->xDel((void *)p->z);
-      p->xDel = 0;
-    }else if( p->flags&MEM_RowSet ){
-      sqlite3RowSetClear(p->u.pRowSet);
-    }else if( p->flags&MEM_Frame ){
-      sqlite3VdbeMemSetNull(p);
-    }
+  if( p->flags&MEM_Agg ){
+    sqlite3VdbeMemFinalize(p, p->u.pDef);
+    assert( (p->flags & MEM_Agg)==0 );
+    sqlite3VdbeMemRelease(p);
+  }else if( p->flags&MEM_Dyn && p->xDel ){
+    assert( (p->flags&MEM_RowSet)==0 );
+    p->xDel((void *)p->z);
+    p->xDel = 0;
+  }else if( p->flags&MEM_RowSet ){
+    sqlite3RowSetClear(p->u.pRowSet);
+  }else if( p->flags&MEM_Frame ){
+    sqlite3VdbeMemSetNull(p);
   }
 }
 
@@ -56682,7 +57313,7 @@ SQLITE_PRIVATE void sqlite3VdbeMemReleaseExternal(Mem *p){
 ** (Mem.type==SQLITE_TEXT).
 */
 SQLITE_PRIVATE void sqlite3VdbeMemRelease(Mem *p){
-  sqlite3VdbeMemReleaseExternal(p);
+  MemReleaseExt(p);
   sqlite3DbFree(p->db, p->zMalloc);
   p->z = 0;
   p->zMalloc = 0;
@@ -57004,7 +57635,7 @@ SQLITE_PRIVATE void sqlite3VdbeMemPrepareToChange(Vdbe *pVdbe, Mem *pMem){
 */
 SQLITE_PRIVATE void sqlite3VdbeMemShallowCopy(Mem *pTo, const Mem *pFrom, int srcType){
   assert( (pFrom->flags & MEM_RowSet)==0 );
-  sqlite3VdbeMemReleaseExternal(pTo);
+  MemReleaseExt(pTo);
   memcpy(pTo, pFrom, MEMCELLSIZE);
   pTo->xDel = 0;
   if( (pFrom->flags&MEM_Static)==0 ){
@@ -57022,7 +57653,7 @@ SQLITE_PRIVATE int sqlite3VdbeMemCopy(Mem *pTo, const Mem *pFrom){
   int rc = SQLITE_OK;
 
   assert( (pFrom->flags & MEM_RowSet)==0 );
-  sqlite3VdbeMemReleaseExternal(pTo);
+  MemReleaseExt(pTo);
   memcpy(pTo, pFrom, MEMCELLSIZE);
   pTo->flags &= ~MEM_Dyn;
 
@@ -57416,11 +58047,11 @@ SQLITE_PRIVATE int sqlite3ValueFromExpr(
   }
   op = pExpr->op;
 
-  /* op can only be TK_REGISTER if we have compiled with SQLITE_ENABLE_STAT2.
+  /* op can only be TK_REGISTER if we have compiled with SQLITE_ENABLE_STAT3.
   ** The ifdef here is to enable us to achieve 100% branch test coverage even
-  ** when SQLITE_ENABLE_STAT2 is omitted.
+  ** when SQLITE_ENABLE_STAT3 is omitted.
   */
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
   if( op==TK_REGISTER ) op = pExpr->op2;
 #else
   if( NEVER(op==TK_REGISTER) ) op = pExpr->op2;
@@ -57977,6 +58608,12 @@ static void resolveP2Values(Vdbe *p, int *pMaxFuncArgs){
       n = pOp[-1].p1;
       if( n>nMaxArgs ) nMaxArgs = n;
 #endif
+    }else if( opcode==OP_Next || opcode==OP_SorterNext ){
+      pOp->p4.xAdvance = sqlite3BtreeNext;
+      pOp->p4type = P4_ADVANCE;
+    }else if( opcode==OP_Prev ){
+      pOp->p4.xAdvance = sqlite3BtreePrevious;
+      pOp->p4type = P4_ADVANCE;
     }
 
     if( (pOp->opflags & OPFLG_JUMP)!=0 && pOp->p2<0 ){
@@ -58068,10 +58705,9 @@ SQLITE_PRIVATE int sqlite3VdbeAddOpList(Vdbe *p, int nOp, VdbeOpList const *aOp)
 ** static array using sqlite3VdbeAddOpList but we want to make a
 ** few minor changes to the program.
 */
-SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe *p, int addr, int val){
+SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe *p, u32 addr, int val){
   assert( p!=0 );
-  assert( addr>=0 );
-  if( p->nOp>addr ){
+  if( ((u32)p->nOp)>addr ){
     p->aOp[addr].p1 = val;
   }
 }
@@ -58080,10 +58716,9 @@ SQLITE_PRIVATE void sqlite3VdbeChangeP1(Vdbe *p, int addr, int val){
 ** Change the value of the P2 operand for a specific instruction.
 ** This routine is useful for setting a jump destination.
 */
-SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe *p, int addr, int val){
+SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe *p, u32 addr, int val){
   assert( p!=0 );
-  assert( addr>=0 );
-  if( p->nOp>addr ){
+  if( ((u32)p->nOp)>addr ){
     p->aOp[addr].p2 = val;
   }
 }
@@ -58091,10 +58726,9 @@ SQLITE_PRIVATE void sqlite3VdbeChangeP2(Vdbe *p, int addr, int val){
 /*
 ** Change the value of the P3 operand for a specific instruction.
 */
-SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe *p, int addr, int val){
+SQLITE_PRIVATE void sqlite3VdbeChangeP3(Vdbe *p, u32 addr, int val){
   assert( p!=0 );
-  assert( addr>=0 );
-  if( p->nOp>addr ){
+  if( ((u32)p->nOp)>addr ){
     p->aOp[addr].p3 = val;
   }
 }
@@ -58116,8 +58750,8 @@ SQLITE_PRIVATE void sqlite3VdbeChangeP5(Vdbe *p, u8 val){
 ** the address of the next instruction to be coded.
 */
 SQLITE_PRIVATE void sqlite3VdbeJumpHere(Vdbe *p, int addr){
-  assert( addr>=0 );
-  sqlite3VdbeChangeP2(p, addr, p->nOp);
+  assert( addr>=0 || p->db->mallocFailed );
+  if( addr>=0 ) sqlite3VdbeChangeP2(p, addr, p->nOp);
 }
 
 
@@ -58211,18 +58845,15 @@ SQLITE_PRIVATE void sqlite3VdbeLinkSubProgram(Vdbe *pVdbe, SubProgram *p){
 }
 
 /*
-** Change N opcodes starting at addr to No-ops.
+** Change the opcode at addr into OP_Noop
 */
-SQLITE_PRIVATE void sqlite3VdbeChangeToNoop(Vdbe *p, int addr, int N){
+SQLITE_PRIVATE void sqlite3VdbeChangeToNoop(Vdbe *p, int addr){
   if( p->aOp ){
     VdbeOp *pOp = &p->aOp[addr];
     sqlite3 *db = p->db;
-    while( N-- ){
-      freeP4(db, pOp->p4type, pOp->p4.p);
-      memset(pOp, 0, sizeof(pOp[0]));
-      pOp->opcode = OP_Noop;
-      pOp++;
-    }
+    freeP4(db, pOp->p4type, pOp->p4.p);
+    memset(pOp, 0, sizeof(pOp[0]));
+    pOp->opcode = OP_Noop;
   }
 }
 
@@ -58325,30 +58956,29 @@ SQLITE_PRIVATE void sqlite3VdbeChangeP4(Vdbe *p, int addr, const char *zP4, int
 ** makes the code easier to read during debugging.  None of this happens
 ** in a production build.
 */
-SQLITE_PRIVATE void sqlite3VdbeComment(Vdbe *p, const char *zFormat, ...){
-  va_list ap;
-  if( !p ) return;
+static void vdbeVComment(Vdbe *p, const char *zFormat, va_list ap){
   assert( p->nOp>0 || p->aOp==0 );
   assert( p->aOp==0 || p->aOp[p->nOp-1].zComment==0 || p->db->mallocFailed );
   if( p->nOp ){
-    char **pz = &p->aOp[p->nOp-1].zComment;
+    assert( p->aOp );
+    sqlite3DbFree(p->db, p->aOp[p->nOp-1].zComment);
+    p->aOp[p->nOp-1].zComment = sqlite3VMPrintf(p->db, zFormat, ap);
+  }
+}
+SQLITE_PRIVATE void sqlite3VdbeComment(Vdbe *p, const char *zFormat, ...){
+  va_list ap;
+  if( p ){
     va_start(ap, zFormat);
-    sqlite3DbFree(p->db, *pz);
-    *pz = sqlite3VMPrintf(p->db, zFormat, ap);
+    vdbeVComment(p, zFormat, ap);
     va_end(ap);
   }
 }
 SQLITE_PRIVATE void sqlite3VdbeNoopComment(Vdbe *p, const char *zFormat, ...){
   va_list ap;
-  if( !p ) return;
-  sqlite3VdbeAddOp0(p, OP_Noop);
-  assert( p->nOp>0 || p->aOp==0 );
-  assert( p->aOp==0 || p->aOp[p->nOp-1].zComment==0 || p->db->mallocFailed );
-  if( p->nOp ){
-    char **pz = &p->aOp[p->nOp-1].zComment;
+  if( p ){
+    sqlite3VdbeAddOp0(p, OP_Noop);
     va_start(ap, zFormat);
-    sqlite3DbFree(p->db, *pz);
-    *pz = sqlite3VMPrintf(p->db, zFormat, ap);
+    vdbeVComment(p, zFormat, ap);
     va_end(ap);
   }
 }
@@ -58378,7 +59008,7 @@ SQLITE_PRIVATE void sqlite3VdbeNoopComment(Vdbe *p, const char *zFormat, ...){
 SQLITE_PRIVATE VdbeOp *sqlite3VdbeGetOp(Vdbe *p, int addr){
   /* C89 specifies that the constant "dummy" will be initialized to all
   ** zeros, which is correct.  MSVC generates a warning, nevertheless. */
-  static const VdbeOp dummy;  /* Ignore the MSVC warning about no initializer */
+  static VdbeOp dummy;  /* Ignore the MSVC warning about no initializer */
   assert( p->magic==VDBE_MAGIC_INIT );
   if( addr<0 ){
 #ifdef SQLITE_OMIT_TRACE
@@ -58486,6 +59116,10 @@ static char *displayP4(Op *pOp, char *zTemp, int nTemp){
       sqlite3_snprintf(nTemp, zTemp, "program");
       break;
     }
+    case P4_ADVANCE: {
+      zTemp[0] = 0;
+      break;
+    }
     default: {
       zP4 = pOp->p4.z;
       if( zP4==0 ){
@@ -58682,7 +59316,7 @@ SQLITE_PRIVATE int sqlite3VdbeList(
   sqlite3 *db = p->db;                 /* The database connection */
   int i;                               /* Loop counter */
   int rc = SQLITE_OK;                  /* Return code */
-  Mem *pMem = p->pResultSet = &p->aMem[1];  /* First Mem of result set */
+  Mem *pMem = &p->aMem[1];             /* First Mem of result set */
 
   assert( p->explain );
   assert( p->magic==VDBE_MAGIC_RUN );
@@ -58693,6 +59327,7 @@ SQLITE_PRIVATE int sqlite3VdbeList(
   ** sqlite3_column_text16(), causing a translation to UTF-16 encoding.
   */
   releaseMemArray(pMem, 8);
+  p->pResultSet = 0;
 
   if( p->rc==SQLITE_NOMEM ){
     /* This happens if a malloc() inside a call to sqlite3_column_text() or
@@ -58847,6 +59482,7 @@ SQLITE_PRIVATE int sqlite3VdbeList(
     }
 
     p->nResColumn = 8 - 4*(p->explain-1);
+    p->pResultSet = &p->aMem[1];
     p->rc = SQLITE_OK;
     rc = SQLITE_ROW;
   }
@@ -59109,6 +59745,7 @@ SQLITE_PRIVATE void sqlite3VdbeFreeCursor(Vdbe *p, VdbeCursor *pCx){
   if( pCx==0 ){
     return;
   }
+  sqlite3VdbeSorterClose(p->db, pCx);
   if( pCx->pBt ){
     sqlite3BtreeClose(pCx->pBt);
     /* The pCx->pCursor will be close automatically, if it exists, by
@@ -59849,6 +60486,30 @@ SQLITE_PRIVATE void sqlite3VdbeResetStepResult(Vdbe *p){
 }
 
 /*
+** Copy the error code and error message belonging to the VDBE passed
+** as the first argument to its database handle (so that they will be 
+** returned by calls to sqlite3_errcode() and sqlite3_errmsg()).
+**
+** This function does not clear the VDBE error code or message, just
+** copies them to the database handle.
+*/
+SQLITE_PRIVATE int sqlite3VdbeTransferError(Vdbe *p){
+  sqlite3 *db = p->db;
+  int rc = p->rc;
+  if( p->zErrMsg ){
+    u8 mallocFailed = db->mallocFailed;
+    sqlite3BeginBenignMalloc();
+    sqlite3ValueSetStr(db->pErr, -1, p->zErrMsg, SQLITE_UTF8, SQLITE_TRANSIENT);
+    sqlite3EndBenignMalloc();
+    db->mallocFailed = mallocFailed;
+    db->errCode = rc;
+  }else{
+    sqlite3Error(db, rc, 0);
+  }
+  return rc;
+}
+
+/*
 ** Clean up a VDBE after execution but do not delete the VDBE just yet.
 ** Write any error messages into *pzErrMsg.  Return the result code.
 **
@@ -59875,18 +60536,9 @@ SQLITE_PRIVATE int sqlite3VdbeReset(Vdbe *p){
   ** instructions yet, leave the main database error information unchanged.
   */
   if( p->pc>=0 ){
-    if( p->zErrMsg ){
-      sqlite3BeginBenignMalloc();
-      sqlite3ValueSetStr(db->pErr,-1,p->zErrMsg,SQLITE_UTF8,SQLITE_TRANSIENT);
-      sqlite3EndBenignMalloc();
-      db->errCode = p->rc;
-      sqlite3DbFree(db, p->zErrMsg);
-      p->zErrMsg = 0;
-    }else if( p->rc ){
-      sqlite3Error(db, p->rc, 0);
-    }else{
-      sqlite3Error(db, SQLITE_OK, 0);
-    }
+    sqlite3VdbeTransferError(p);
+    sqlite3DbFree(db, p->zErrMsg);
+    p->zErrMsg = 0;
     if( p->runOnlyOnce ) p->expired = 1;
   }else if( p->rc && p->expired ){
     /* The expired flag was set on the VDBE before the first call
@@ -60366,57 +61018,70 @@ SQLITE_PRIVATE u32 sqlite3VdbeSerialGet(
   return 0;
 }
 
-
 /*
-** Given the nKey-byte encoding of a record in pKey[], parse the
-** record into a UnpackedRecord structure.  Return a pointer to
-** that structure.
+** This routine is used to allocate sufficient space for an UnpackedRecord
+** structure large enough to be used with sqlite3VdbeRecordUnpack() if
+** the first argument is a pointer to KeyInfo structure pKeyInfo.
 **
-** The calling function might provide szSpace bytes of memory
-** space at pSpace.  This space can be used to hold the returned
-** VDbeParsedRecord structure if it is large enough.  If it is
-** not big enough, space is obtained from sqlite3_malloc().
+** The space is either allocated using sqlite3DbMallocRaw() or from within
+** the unaligned buffer passed via the second and third arguments (presumably
+** stack space). If the former, then *ppFree is set to a pointer that should
+** be eventually freed by the caller using sqlite3DbFree(). Or, if the 
+** allocation comes from the pSpace/szSpace buffer, *ppFree is set to NULL
+** before returning.
 **
-** The returned structure should be closed by a call to
-** sqlite3VdbeDeleteUnpackedRecord().
-*/ 
-SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeRecordUnpack(
-  KeyInfo *pKeyInfo,     /* Information about the record format */
-  int nKey,              /* Size of the binary record */
-  const void *pKey,      /* The binary record */
-  char *pSpace,          /* Unaligned space available to hold the object */
-  int szSpace            /* Size of pSpace[] in bytes */
+** If an OOM error occurs, NULL is returned.
+*/
+SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(
+  KeyInfo *pKeyInfo,              /* Description of the record */
+  char *pSpace,                   /* Unaligned space available */
+  int szSpace,                    /* Size of pSpace[] in bytes */
+  char **ppFree                   /* OUT: Caller should free this pointer */
 ){
-  const unsigned char *aKey = (const unsigned char *)pKey;
-  UnpackedRecord *p;  /* The unpacked record that we will return */
-  int nByte;          /* Memory space needed to hold p, in bytes */
-  int d;
-  u32 idx;
-  u16 u;              /* Unsigned loop counter */
-  u32 szHdr;
-  Mem *pMem;
-  int nOff;           /* Increase pSpace by this much to 8-byte align it */
-  
-  /*
-  ** We want to shift the pointer pSpace up such that it is 8-byte aligned.
+  UnpackedRecord *p;              /* Unpacked record to return */
+  int nOff;                       /* Increment pSpace by nOff to align it */
+  int nByte;                      /* Number of bytes required for *p */
+
+  /* We want to shift the pointer pSpace up such that it is 8-byte aligned.
   ** Thus, we need to calculate a value, nOff, between 0 and 7, to shift 
   ** it by.  If pSpace is already 8-byte aligned, nOff should be zero.
   */
   nOff = (8 - (SQLITE_PTR_TO_INT(pSpace) & 7)) & 7;
-  pSpace += nOff;
-  szSpace -= nOff;
   nByte = ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*(pKeyInfo->nField+1);
-  if( nByte>szSpace ){
-    p = sqlite3DbMallocRaw(pKeyInfo->db, nByte);
-    if( p==0 ) return 0;
-    p->flags = UNPACKED_NEED_FREE | UNPACKED_NEED_DESTROY;
+  if( nByte>szSpace+nOff ){
+    p = (UnpackedRecord *)sqlite3DbMallocRaw(pKeyInfo->db, nByte);
+    *ppFree = (char *)p;
+    if( !p ) return 0;
   }else{
-    p = (UnpackedRecord*)pSpace;
-    p->flags = UNPACKED_NEED_DESTROY;
+    p = (UnpackedRecord*)&pSpace[nOff];
+    *ppFree = 0;
   }
+
+  p->aMem = (Mem*)&((char*)p)[ROUND8(sizeof(UnpackedRecord))];
   p->pKeyInfo = pKeyInfo;
   p->nField = pKeyInfo->nField + 1;
-  p->aMem = pMem = (Mem*)&((char*)p)[ROUND8(sizeof(UnpackedRecord))];
+  return p;
+}
+
+/*
+** Given the nKey-byte encoding of a record in pKey[], populate the 
+** UnpackedRecord structure indicated by the fourth argument with the
+** contents of the decoded record.
+*/ 
+SQLITE_PRIVATE void sqlite3VdbeRecordUnpack(
+  KeyInfo *pKeyInfo,     /* Information about the record format */
+  int nKey,              /* Size of the binary record */
+  const void *pKey,      /* The binary record */
+  UnpackedRecord *p      /* Populate this structure before returning. */
+){
+  const unsigned char *aKey = (const unsigned char *)pKey;
+  int d; 
+  u32 idx;                        /* Offset in aKey[] to read from */
+  u16 u;                          /* Unsigned loop counter */
+  u32 szHdr;
+  Mem *pMem = p->aMem;
+
+  p->flags = 0;
   assert( EIGHT_BYTE_ALIGNMENT(pMem) );
   idx = getVarint32(aKey, szHdr);
   d = szHdr;
@@ -60435,31 +61100,6 @@ SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeRecordUnpack(
   }
   assert( u<=pKeyInfo->nField + 1 );
   p->nField = u;
-  return (void*)p;
-}
-
-/*
-** This routine destroys a UnpackedRecord object.
-*/
-SQLITE_PRIVATE void sqlite3VdbeDeleteUnpackedRecord(UnpackedRecord *p){
-#ifdef SQLITE_DEBUG
-  int i;
-  Mem *pMem;
-
-  assert( p!=0 );
-  assert( p->flags & UNPACKED_NEED_DESTROY );
-  for(i=0, pMem=p->aMem; i<p->nField; i++, pMem++){
-    /* The unpacked record is always constructed by the
-    ** sqlite3VdbeUnpackRecord() function above, which makes all
-    ** strings and blobs static.  And none of the elements are
-    ** ever transformed, so there is never anything to delete.
-    */
-    if( NEVER(pMem->zMalloc) ) sqlite3VdbeMemRelease(pMem);
-  }
-#endif
-  if( p->flags & UNPACKED_NEED_FREE ){
-    sqlite3DbFree(p->pKeyInfo->db, p);
-  }
 }
 
 /*
@@ -60613,7 +61253,7 @@ SQLITE_PRIVATE int sqlite3VdbeIdxRowid(sqlite3 *db, BtCursor *pCur, i64 *rowid){
   ** this code can safely assume that nCellKey is 32-bits  
   */
   assert( sqlite3BtreeCursorIsValid(pCur) );
-  rc = sqlite3BtreeKeySize(pCur, &nCellKey);
+  VVA_ONLY(rc =) sqlite3BtreeKeySize(pCur, &nCellKey);
   assert( rc==SQLITE_OK );     /* pCur is always valid so KeySize cannot fail */
   assert( (nCellKey & SQLITE_MAX_U32)==(u64)nCellKey );
 
@@ -60688,7 +61328,7 @@ SQLITE_PRIVATE int sqlite3VdbeIdxKeyCompare(
   Mem m;
 
   assert( sqlite3BtreeCursorIsValid(pCur) );
-  rc = sqlite3BtreeKeySize(pCur, &nCellKey);
+  VVA_ONLY(rc =) sqlite3BtreeKeySize(pCur, &nCellKey);
   assert( rc==SQLITE_OK );    /* pCur is always valid so KeySize cannot fail */
   /* nCellKey will always be between 0 and 0xffffffff because of the say
   ** that btreeParseCellPtr() and sqlite3GetVarint32() are implemented */
@@ -61244,7 +61884,7 @@ end_of_step:
     ** error has occured, then return the error code in p->rc to the
     ** caller. Set the error code in the database handle to the same value.
     */ 
-    rc = db->errCode = p->rc;
+    rc = sqlite3VdbeTransferError(p);
   }
   return (rc&db->errMask);
 }
@@ -61278,7 +61918,7 @@ SQLITE_API int sqlite3_step(sqlite3_stmt *pStmt){
          && cnt++ < SQLITE_MAX_SCHEMA_RETRY
          && (rc2 = rc = sqlite3Reprepare(v))==SQLITE_OK ){
     sqlite3_reset(pStmt);
-    v->expired = 0;
+    assert( v->expired==0 );
   }
   if( rc2!=SQLITE_OK && ALWAYS(v->isPrepareV2) && ALWAYS(db->pErr) ){
     /* This case occurs after failing to recompile an sql statement. 
@@ -62409,6 +63049,13 @@ SQLITE_API int sqlite3_found_count = 0;
 */
 #define ExpandBlob(P) (((P)->flags&MEM_Zero)?sqlite3VdbeMemExpandBlob(P):0)
 
+/* Return true if the cursor was opened using the OP_OpenSorter opcode. */
+#ifdef SQLITE_OMIT_MERGE_SORT
+# define isSorter(x) 0
+#else
+# define isSorter(x) ((x)->pSorter!=0)
+#endif
+
 /*
 ** Argument pMem points at a register that will be passed to a
 ** user-defined function or returned to the user as the result of a query.
@@ -63003,6 +63650,7 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       u32 szField;       /* Number of bytes in the content of a field */
       int szHdr;         /* Size of the header size field at start of record */
       int avail;         /* Number of bytes of available data */
+      u32 t;             /* A type code from the record header */
       Mem *pReg;         /* PseudoTable input register */
     } am;
     struct OP_Affinity_stack_vars {
@@ -63074,9 +63722,12 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
     struct OP_OpenEphemeral_stack_vars {
       VdbeCursor *pCx;
     } ax;
-    struct OP_OpenPseudo_stack_vars {
+    struct OP_SorterOpen_stack_vars {
       VdbeCursor *pCx;
     } ay;
+    struct OP_OpenPseudo_stack_vars {
+      VdbeCursor *pCx;
+    } az;
     struct OP_SeekGt_stack_vars {
       int res;
       int oc;
@@ -63084,18 +63735,19 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       UnpackedRecord r;
       int nField;
       i64 iKey;      /* The rowid we are to seek to */
-    } az;
+    } ba;
     struct OP_Seek_stack_vars {
       VdbeCursor *pC;
-    } ba;
+    } bb;
     struct OP_Found_stack_vars {
       int alreadyExists;
       VdbeCursor *pC;
       int res;
+      char *pFree;
       UnpackedRecord *pIdxKey;
       UnpackedRecord r;
       char aTempRec[ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*3 + 7];
-    } bb;
+    } bc;
     struct OP_IsUnique_stack_vars {
       u16 ii;
       VdbeCursor *pCx;
@@ -63104,13 +63756,13 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       Mem *aMx;
       UnpackedRecord r;                  /* B-Tree index search key */
       i64 R;                             /* Rowid stored in register P3 */
-    } bc;
+    } bd;
     struct OP_NotExists_stack_vars {
       VdbeCursor *pC;
       BtCursor *pCrsr;
       int res;
       u64 iKey;
-    } bd;
+    } be;
     struct OP_NewRowid_stack_vars {
       i64 v;                 /* The new rowid */
       VdbeCursor *pC;        /* Cursor of table to get the new rowid */
@@ -63118,7 +63770,7 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       int cnt;               /* Counter to limit the number of searches */
       Mem *pMem;             /* Register holding largest rowid for AUTOINCREMENT */
       VdbeFrame *pFrame;     /* Root frame of VDBE */
-    } be;
+    } bf;
     struct OP_InsertInt_stack_vars {
       Mem *pData;       /* MEM cell holding data for the record to be inserted */
       Mem *pKey;        /* MEM cell holding key  for the record */
@@ -63129,83 +63781,89 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       const char *zDb;  /* database name - used by the update hook */
       const char *zTbl; /* Table name - used by the opdate hook */
       int op;           /* Opcode for update hook: SQLITE_UPDATE or SQLITE_INSERT */
-    } bf;
+    } bg;
     struct OP_Delete_stack_vars {
       i64 iKey;
       VdbeCursor *pC;
-    } bg;
+    } bh;
+    struct OP_SorterCompare_stack_vars {
+      VdbeCursor *pC;
+      int res;
+    } bi;
+    struct OP_SorterData_stack_vars {
+      VdbeCursor *pC;
+    } bj;
     struct OP_RowData_stack_vars {
       VdbeCursor *pC;
       BtCursor *pCrsr;
       u32 n;
       i64 n64;
-    } bh;
+    } bk;
     struct OP_Rowid_stack_vars {
       VdbeCursor *pC;
       i64 v;
       sqlite3_vtab *pVtab;
       const sqlite3_module *pModule;
-    } bi;
+    } bl;
     struct OP_NullRow_stack_vars {
       VdbeCursor *pC;
-    } bj;
+    } bm;
     struct OP_Last_stack_vars {
       VdbeCursor *pC;
       BtCursor *pCrsr;
       int res;
-    } bk;
+    } bn;
     struct OP_Rewind_stack_vars {
       VdbeCursor *pC;
       BtCursor *pCrsr;
       int res;
-    } bl;
+    } bo;
     struct OP_Next_stack_vars {
       VdbeCursor *pC;
-      BtCursor *pCrsr;
       int res;
-    } bm;
+    } bp;
     struct OP_IdxInsert_stack_vars {
       VdbeCursor *pC;
       BtCursor *pCrsr;
       int nKey;
       const char *zKey;
-    } bn;
+    } bq;
     struct OP_IdxDelete_stack_vars {
       VdbeCursor *pC;
       BtCursor *pCrsr;
       int res;
       UnpackedRecord r;
-    } bo;
+    } br;
     struct OP_IdxRowid_stack_vars {
       BtCursor *pCrsr;
       VdbeCursor *pC;
       i64 rowid;
-    } bp;
+    } bs;
     struct OP_IdxGE_stack_vars {
       VdbeCursor *pC;
       int res;
       UnpackedRecord r;
-    } bq;
+    } bt;
     struct OP_Destroy_stack_vars {
       int iMoved;
       int iCnt;
       Vdbe *pVdbe;
       int iDb;
-    } br;
+    } bu;
     struct OP_Clear_stack_vars {
       int nChange;
-    } bs;
+    } bv;
     struct OP_CreateTable_stack_vars {
       int pgno;
       int flags;
       Db *pDb;
-    } bt;
+    } bw;
     struct OP_ParseSchema_stack_vars {
       int iDb;
       const char *zMaster;
       char *zSql;
       InitData initData;
-    } bu;
+    } bx;
     struct OP_IntegrityCk_stack_vars {
       int nRoot;      /* Number of tables to check.  (Number of root pages.) */
       int *aRoot;     /* Array of rootpage numbers for tables to be checked */
@@ -63213,14 +63871,14 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       int nErr;       /* Number of errors reported */
       char *z;        /* Text of the error report */
       Mem *pnErr;     /* Register keeping track of errors remaining */
-    } bv;
+    } by;
     struct OP_RowSetRead_stack_vars {
       i64 val;
-    } bw;
+    } bz;
     struct OP_RowSetTest_stack_vars {
       int iSet;
       int exists;
-    } bx;
+    } ca;
     struct OP_Program_stack_vars {
       int nMem;               /* Number of memory registers for sub-program */
       int nByte;              /* Bytes of runtime space required for sub-program */
@@ -63230,15 +63888,15 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       VdbeFrame *pFrame;      /* New vdbe frame to execute in */
       SubProgram *pProgram;   /* Sub-program to execute */
       void *t;                /* Token identifying trigger */
-    } by;
+    } cb;
     struct OP_Param_stack_vars {
       VdbeFrame *pFrame;
       Mem *pIn;
-    } bz;
+    } cc;
     struct OP_MemMax_stack_vars {
       Mem *pIn1;
       VdbeFrame *pFrame;
-    } ca;
+    } cd;
     struct OP_AggStep_stack_vars {
       int n;
       int i;
@@ -63246,34 +63904,34 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       Mem *pRec;
       sqlite3_context ctx;
       sqlite3_value **apVal;
-    } cb;
+    } ce;
     struct OP_AggFinal_stack_vars {
       Mem *pMem;
-    } cc;
+    } cf;
     struct OP_Checkpoint_stack_vars {
       int i;                          /* Loop counter */
       int aRes[3];                    /* Results */
       Mem *pMem;                      /* Write results here */
-    } cd;
+    } cg;
     struct OP_JournalMode_stack_vars {
       Btree *pBt;                     /* Btree to change journal mode of */
       Pager *pPager;                  /* Pager associated with pBt */
       int eNew;                       /* New journal mode */
       int eOld;                       /* The old journal mode */
       const char *zFilename;          /* Name of database file for pPager */
-    } ce;
+    } ch;
     struct OP_IncrVacuum_stack_vars {
       Btree *pBt;
-    } cf;
+    } ci;
     struct OP_VBegin_stack_vars {
       VTable *pVTab;
-    } cg;
+    } cj;
     struct OP_VOpen_stack_vars {
       VdbeCursor *pCur;
       sqlite3_vtab_cursor *pVtabCursor;
       sqlite3_vtab *pVtab;
       sqlite3_module *pModule;
-    } ch;
+    } ck;
     struct OP_VFilter_stack_vars {
       int nArg;
       int iQuery;
@@ -63286,23 +63944,23 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       int res;
       int i;
       Mem **apArg;
-    } ci;
+    } cl;
     struct OP_VColumn_stack_vars {
       sqlite3_vtab *pVtab;
       const sqlite3_module *pModule;
       Mem *pDest;
       sqlite3_context sContext;
-    } cj;
+    } cm;
     struct OP_VNext_stack_vars {
       sqlite3_vtab *pVtab;
       const sqlite3_module *pModule;
       int res;
       VdbeCursor *pCur;
-    } ck;
+    } cn;
     struct OP_VRename_stack_vars {
       sqlite3_vtab *pVtab;
       Mem *pName;
-    } cl;
+    } co;
     struct OP_VUpdate_stack_vars {
       sqlite3_vtab *pVtab;
       sqlite3_module *pModule;
@@ -63311,11 +63969,11 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       sqlite_int64 rowid;
       Mem **apArg;
       Mem *pX;
-    } cm;
+    } cp;
     struct OP_Trace_stack_vars {
       char *zTrace;
       char *z;
-    } cn;
+    } cq;
   } u;
   /* End automatically generated code
   ********************************************************************/
@@ -63415,7 +64073,7 @@ SQLITE_PRIVATE int sqlite3VdbeExec(
       assert( pOp->p2<=p->nMem );
       pOut = &aMem[pOp->p2];
       memAboutToChange(p, pOut);
-      sqlite3VdbeMemReleaseExternal(pOut);
+      MemReleaseExt(pOut);
       pOut->flags = MEM_Int;
     }
 
@@ -63782,6 +64440,11 @@ case OP_Move: {
     u.ac.zMalloc = pOut->zMalloc;
     pOut->zMalloc = 0;
     sqlite3VdbeMemMove(pOut, pIn1);
+#ifdef SQLITE_DEBUG
+    if( pOut->pScopyFrom>=&aMem[u.ac.p1] && pOut->pScopyFrom<&aMem[u.ac.p1+pOp->p3] ){
+      pOut->pScopyFrom += u.ac.p1 - pOp->p2;
+    }
+#endif
     pIn1->zMalloc = u.ac.zMalloc;
     REGISTER_TRACE(u.ac.p2++, pOut);
     pIn1++;
@@ -64780,6 +65443,16 @@ case OP_BitNot: {             /* same as TK_BITNOT, in1, out2 */
   break;
 }
 
+/* Opcode: Once P1 P2 * * *
+**
+** Jump to P2 if the value in register P1 is a not null or zero.  If
+** the value is NULL or zero, fall through and change the P1 register
+** to an integer 1.
+**
+** When P1 is not used otherwise in a program, this opcode falls through
+** once and jumps on all subsequent invocations.  It is the equivalent
+** of "OP_If P1 P2", followed by "OP_Integer 1 P1".
+*/
 /* Opcode: If P1 P2 P3 * *
 **
 ** Jump to P2 if the value in register P1 is true.  The value
@@ -64792,6 +65465,7 @@ case OP_BitNot: {             /* same as TK_BITNOT, in1, out2 */
 ** is considered true if it has a numeric value of zero.  If the value
 ** in P1 is NULL then take the jump if P3 is true.
 */
+case OP_Once:               /* jump, in1 */
 case OP_If:                 /* jump, in1 */
 case OP_IfNot: {            /* jump, in1 */
 #if 0  /* local variables moved into u.al */
@@ -64810,6 +65484,12 @@ case OP_IfNot: {            /* jump, in1 */
   }
   if( u.al.c ){
     pc = pOp->p2-1;
+  }else if( pOp->opcode==OP_Once ){
+    assert( (pIn1->flags & (MEM_Agg|MEM_Dyn|MEM_RowSet|MEM_Frame))==0 );
+    memAboutToChange(p, pIn1);
+    pIn1->flags = MEM_Int;
+    pIn1->u.i = 1;
+    REGISTER_TRACE(pOp->p1, pIn1);
   }
   break;
 }
@@ -64880,6 +65560,7 @@ case OP_Column: {
   u32 szField;       /* Number of bytes in the content of a field */
   int szHdr;         /* Size of the header size field at start of record */
   int avail;         /* Number of bytes of available data */
+  u32 t;             /* A type code from the record header */
   Mem *pReg;         /* PseudoTable input register */
 #endif /* local variables moved into u.am */
 
@@ -64892,7 +65573,6 @@ case OP_Column: {
   assert( pOp->p3>0 && pOp->p3<=p->nMem );
   u.am.pDest = &aMem[pOp->p3];
   memAboutToChange(p, u.am.pDest);
-  MemSetTypeFlag(u.am.pDest, MEM_Null);
   u.am.zRec = 0;
 
   /* This block sets the variable u.am.payloadSize to be the total number of
@@ -64924,7 +65604,7 @@ case OP_Column: {
       u.am.zRec = (char*)u.am.pC->aRow;
     }else if( u.am.pC->isIndex ){
       assert( sqlite3BtreeCursorIsValid(u.am.pCrsr) );
-      rc = sqlite3BtreeKeySize(u.am.pCrsr, &u.am.payloadSize64);
+      VVA_ONLY(rc =) sqlite3BtreeKeySize(u.am.pCrsr, &u.am.payloadSize64);
       assert( rc==SQLITE_OK );   /* True because of CursorMoveto() call above */
       /* sqlite3BtreeParseCellPtr() uses getVarint32() to extract the
       ** payload size, so it is impossible for u.am.payloadSize64 to be
@@ -64933,10 +65613,10 @@ case OP_Column: {
       u.am.payloadSize = (u32)u.am.payloadSize64;
     }else{
       assert( sqlite3BtreeCursorIsValid(u.am.pCrsr) );
-      rc = sqlite3BtreeDataSize(u.am.pCrsr, &u.am.payloadSize);
+      VVA_ONLY(rc =) sqlite3BtreeDataSize(u.am.pCrsr, &u.am.payloadSize);
       assert( rc==SQLITE_OK );   /* DataSize() cannot fail */
     }
-  }else if( u.am.pC->pseudoTableReg>0 ){
+  }else if( ALWAYS(u.am.pC->pseudoTableReg>0) ){
     u.am.pReg = &aMem[u.am.pC->pseudoTableReg];
     assert( u.am.pReg->flags & MEM_Blob );
     assert( memIsValid(u.am.pReg) );
@@ -64949,9 +65629,10 @@ case OP_Column: {
     u.am.payloadSize = 0;
   }
 
-  /* If u.am.payloadSize is 0, then just store a NULL */
+  /* If u.am.payloadSize is 0, then just store a NULL.  This can happen because of
+  ** nullRow or because of a corrupt database. */
   if( u.am.payloadSize==0 ){
-    assert( u.am.pDest->flags&MEM_Null );
+    MemSetTypeFlag(u.am.pDest, MEM_Null);
     goto op_column_out;
   }
   assert( db->aLimit[SQLITE_LIMIT_LENGTH]>=0 );
@@ -65058,8 +65739,14 @@ case OP_Column: {
     for(u.am.i=0; u.am.i<u.am.nField; u.am.i++){
       if( u.am.zIdx<u.am.zEndHdr ){
         u.am.aOffset[u.am.i] = u.am.offset;
-        u.am.zIdx += getVarint32(u.am.zIdx, u.am.aType[u.am.i]);
-        u.am.szField = sqlite3VdbeSerialTypeLen(u.am.aType[u.am.i]);
+        if( u.am.zIdx[0]<0x80 ){
+          u.am.t = u.am.zIdx[0];
+          u.am.zIdx++;
+        }else{
+          u.am.zIdx += sqlite3GetVarint32(u.am.zIdx, &u.am.t);
+        }
+        u.am.aType[u.am.i] = u.am.t;
+        u.am.szField = sqlite3VdbeSerialTypeLen(u.am.t);
         u.am.offset += u.am.szField;
         if( u.am.offset<u.am.szField ){  /* True if u.am.offset overflows */
           u.am.zIdx = &u.am.zEndHdr[1];  /* Forces SQLITE_CORRUPT return below */
@@ -65100,7 +65787,7 @@ case OP_Column: {
   if( u.am.aOffset[u.am.p2] ){
     assert( rc==SQLITE_OK );
     if( u.am.zRec ){
-      sqlite3VdbeMemReleaseExternal(u.am.pDest);
+      MemReleaseExt(u.am.pDest);
       sqlite3VdbeSerialGet((u8 *)&u.am.zRec[u.am.aOffset[u.am.p2]], u.am.aType[u.am.p2], u.am.pDest);
     }else{
       u.am.len = sqlite3VdbeSerialTypeLen(u.am.aType[u.am.p2]);
@@ -65117,7 +65804,7 @@ case OP_Column: {
     if( pOp->p4type==P4_MEM ){
       sqlite3VdbeMemShallowCopy(u.am.pDest, pOp->p4.pMem, MEM_Static);
     }else{
-      assert( u.am.pDest->flags&MEM_Null );
+      MemSetTypeFlag(u.am.pDest, MEM_Null);
     }
   }
 
@@ -65319,7 +66006,7 @@ case OP_Count: {         /* out2-prerelease */
 #endif /* local variables moved into u.ap */
 
   u.ap.pCrsr = p->apCsr[pOp->p1]->pCursor;
-  if( u.ap.pCrsr ){
+  if( ALWAYS(u.ap.pCrsr) ){
     rc = sqlite3BtreeCount(u.ap.pCrsr, &u.ap.nEntry);
   }else{
     u.ap.nEntry = 0;
@@ -65895,15 +66582,9 @@ case OP_OpenWrite: {
   rc = sqlite3BtreeCursor(u.aw.pX, u.aw.p2, u.aw.wrFlag, u.aw.pKeyInfo, u.aw.pCur->pCursor);
   u.aw.pCur->pKeyInfo = u.aw.pKeyInfo;
 
-  /* Since it performs no memory allocation or IO, the only values that
-  ** sqlite3BtreeCursor() may return are SQLITE_EMPTY and SQLITE_OK.
-  ** SQLITE_EMPTY is only returned when attempting to open the table
-  ** rooted at page 1 of a zero-byte database.  */
-  assert( rc==SQLITE_EMPTY || rc==SQLITE_OK );
-  if( rc==SQLITE_EMPTY ){
-    u.aw.pCur->pCursor = 0;
-    rc = SQLITE_OK;
-  }
+  /* Since it performs no memory allocation or IO, the only value that
+  ** sqlite3BtreeCursor() may return is SQLITE_OK. */
+  assert( rc==SQLITE_OK );
 
   /* Set the VdbeCursor.isTable and isIndex variables. Previous versions of
   ** SQLite used to check if the root-page flags were sane at this point
@@ -65914,7 +66595,7 @@ case OP_OpenWrite: {
   break;
 }
 
-/* Opcode: OpenEphemeral P1 P2 * P4 *
+/* Opcode: OpenEphemeral P1 P2 * P4 P5
 **
 ** Open a new cursor P1 to a transient table.
 ** The cursor is always opened read/write even if 
@@ -65931,6 +66612,11 @@ case OP_OpenWrite: {
 ** to a TEMP table at the SQL level, or to a table opened by
 ** this opcode.  Then this opcode was call OpenVirtual.  But
 ** that created confusion with the whole virtual-table idea.
+**
+** The P5 parameter can be a mask of the BTREE_* flags defined
+** in btree.h.  These flags control aspects of the operation of
+** the btree.  The BTREE_OMIT_JOURNAL and BTREE_SINGLE flags are
+** added automatically.
 */
 /* Opcode: OpenAutoindex P1 P2 * P4 *
 **
@@ -65969,7 +66655,7 @@ case OP_OpenEphemeral: {
     if( pOp->p4.pKeyInfo ){
       int pgno;
       assert( pOp->p4type==P4_KEYINFO );
-      rc = sqlite3BtreeCreateTable(u.ax.pCx->pBt, &pgno, BTREE_BLOBKEY);
+      rc = sqlite3BtreeCreateTable(u.ax.pCx->pBt, &pgno, BTREE_BLOBKEY | pOp->p5);
       if( rc==SQLITE_OK ){
         assert( pgno==MASTER_ROOT+1 );
         rc = sqlite3BtreeCursor(u.ax.pCx->pBt, pgno, 1,
@@ -65988,6 +66674,30 @@ case OP_OpenEphemeral: {
   break;
 }
 
+/* Opcode: OpenSorter P1 P2 * P4 *
+**
+** This opcode works like OP_OpenEphemeral except that it opens
+** a transient index that is specifically designed to sort large
+** tables using an external merge-sort algorithm.
+*/
+case OP_SorterOpen: {
+#if 0  /* local variables moved into u.ay */
+  VdbeCursor *pCx;
+#endif /* local variables moved into u.ay */
+#ifndef SQLITE_OMIT_MERGE_SORT
+  u.ay.pCx = allocateCursor(p, pOp->p1, pOp->p2, -1, 1);
+  if( u.ay.pCx==0 ) goto no_mem;
+  u.ay.pCx->pKeyInfo = pOp->p4.pKeyInfo;
+  u.ay.pCx->pKeyInfo->enc = ENC(p->db);
+  u.ay.pCx->isSorter = 1;
+  rc = sqlite3VdbeSorterInit(db, u.ay.pCx);
+#else
+  pOp->opcode = OP_OpenEphemeral;
+  pc--;
+#endif
+  break;
+}
+
 /* Opcode: OpenPseudo P1 P2 P3 * *
 **
 ** Open a new cursor that points to a fake table that contains a single
@@ -66004,17 +66714,17 @@ case OP_OpenEphemeral: {
 ** the pseudo-table.
 */
 case OP_OpenPseudo: {
-#if 0  /* local variables moved into u.ay */
+#if 0  /* local variables moved into u.az */
   VdbeCursor *pCx;
-#endif /* local variables moved into u.ay */
+#endif /* local variables moved into u.az */
 
   assert( pOp->p1>=0 );
-  u.ay.pCx = allocateCursor(p, pOp->p1, pOp->p3, -1, 0);
-  if( u.ay.pCx==0 ) goto no_mem;
-  u.ay.pCx->nullRow = 1;
-  u.ay.pCx->pseudoTableReg = pOp->p2;
-  u.ay.pCx->isTable = 1;
-  u.ay.pCx->isIndex = 0;
+  u.az.pCx = allocateCursor(p, pOp->p1, pOp->p3, -1, 0);
+  if( u.az.pCx==0 ) goto no_mem;
+  u.az.pCx->nullRow = 1;
+  u.az.pCx->pseudoTableReg = pOp->p2;
+  u.az.pCx->isTable = 1;
+  u.az.pCx->isIndex = 0;
   break;
 }
 
@@ -66086,35 +66796,35 @@ case OP_SeekLt:         /* jump, in3 */
 case OP_SeekLe:         /* jump, in3 */
 case OP_SeekGe:         /* jump, in3 */
 case OP_SeekGt: {       /* jump, in3 */
-#if 0  /* local variables moved into u.az */
+#if 0  /* local variables moved into u.ba */
   int res;
   int oc;
   VdbeCursor *pC;
   UnpackedRecord r;
   int nField;
   i64 iKey;      /* The rowid we are to seek to */
-#endif /* local variables moved into u.az */
+#endif /* local variables moved into u.ba */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
   assert( pOp->p2!=0 );
-  u.az.pC = p->apCsr[pOp->p1];
-  assert( u.az.pC!=0 );
-  assert( u.az.pC->pseudoTableReg==0 );
+  u.ba.pC = p->apCsr[pOp->p1];
+  assert( u.ba.pC!=0 );
+  assert( u.ba.pC->pseudoTableReg==0 );
   assert( OP_SeekLe == OP_SeekLt+1 );
   assert( OP_SeekGe == OP_SeekLt+2 );
   assert( OP_SeekGt == OP_SeekLt+3 );
-  assert( u.az.pC->isOrdered );
-  if( u.az.pC->pCursor!=0 ){
-    u.az.oc = pOp->opcode;
-    u.az.pC->nullRow = 0;
-    if( u.az.pC->isTable ){
+  assert( u.ba.pC->isOrdered );
+  if( ALWAYS(u.ba.pC->pCursor!=0) ){
+    u.ba.oc = pOp->opcode;
+    u.ba.pC->nullRow = 0;
+    if( u.ba.pC->isTable ){
       /* The input value in P3 might be of any type: integer, real, string,
       ** blob, or NULL.  But it needs to be an integer before we can do
       ** the seek, so covert it. */
       pIn3 = &aMem[pOp->p3];
       applyNumericAffinity(pIn3);
-      u.az.iKey = sqlite3VdbeIntValue(pIn3);
-      u.az.pC->rowidIsValid = 0;
+      u.ba.iKey = sqlite3VdbeIntValue(pIn3);
+      u.ba.pC->rowidIsValid = 0;
 
       /* If the P3 value could not be converted into an integer without
       ** loss of information, then special processing is required... */
@@ -66129,101 +66839,101 @@ case OP_SeekGt: {       /* jump, in3 */
         ** point number. */
         assert( (pIn3->flags & MEM_Real)!=0 );
 
-        if( u.az.iKey==SMALLEST_INT64 && (pIn3->r<(double)u.az.iKey || pIn3->r>0) ){
+        if( u.ba.iKey==SMALLEST_INT64 && (pIn3->r<(double)u.ba.iKey || pIn3->r>0) ){
           /* The P3 value is too large in magnitude to be expressed as an
           ** integer. */
-          u.az.res = 1;
+          u.ba.res = 1;
           if( pIn3->r<0 ){
-            if( u.az.oc>=OP_SeekGe ){  assert( u.az.oc==OP_SeekGe || u.az.oc==OP_SeekGt );
-              rc = sqlite3BtreeFirst(u.az.pC->pCursor, &u.az.res);
+            if( u.ba.oc>=OP_SeekGe ){  assert( u.ba.oc==OP_SeekGe || u.ba.oc==OP_SeekGt );
+              rc = sqlite3BtreeFirst(u.ba.pC->pCursor, &u.ba.res);
               if( rc!=SQLITE_OK ) goto abort_due_to_error;
             }
           }else{
-            if( u.az.oc<=OP_SeekLe ){  assert( u.az.oc==OP_SeekLt || u.az.oc==OP_SeekLe );
-              rc = sqlite3BtreeLast(u.az.pC->pCursor, &u.az.res);
+            if( u.ba.oc<=OP_SeekLe ){  assert( u.ba.oc==OP_SeekLt || u.ba.oc==OP_SeekLe );
+              rc = sqlite3BtreeLast(u.ba.pC->pCursor, &u.ba.res);
               if( rc!=SQLITE_OK ) goto abort_due_to_error;
             }
           }
-          if( u.az.res ){
+          if( u.ba.res ){
             pc = pOp->p2 - 1;
           }
           break;
-        }else if( u.az.oc==OP_SeekLt || u.az.oc==OP_SeekGe ){
+        }else if( u.ba.oc==OP_SeekLt || u.ba.oc==OP_SeekGe ){
           /* Use the ceiling() function to convert real->int */
-          if( pIn3->r > (double)u.az.iKey ) u.az.iKey++;
+          if( pIn3->r > (double)u.ba.iKey ) u.ba.iKey++;
         }else{
           /* Use the floor() function to convert real->int */
-          assert( u.az.oc==OP_SeekLe || u.az.oc==OP_SeekGt );
-          if( pIn3->r < (double)u.az.iKey ) u.az.iKey--;
+          assert( u.ba.oc==OP_SeekLe || u.ba.oc==OP_SeekGt );
+          if( pIn3->r < (double)u.ba.iKey ) u.ba.iKey--;
         }
       }
-      rc = sqlite3BtreeMovetoUnpacked(u.az.pC->pCursor, 0, (u64)u.az.iKey, 0, &u.az.res);
+      rc = sqlite3BtreeMovetoUnpacked(u.ba.pC->pCursor, 0, (u64)u.ba.iKey, 0, &u.ba.res);
       if( rc!=SQLITE_OK ){
         goto abort_due_to_error;
       }
-      if( u.az.res==0 ){
-        u.az.pC->rowidIsValid = 1;
-        u.az.pC->lastRowid = u.az.iKey;
+      if( u.ba.res==0 ){
+        u.ba.pC->rowidIsValid = 1;
+        u.ba.pC->lastRowid = u.ba.iKey;
       }
     }else{
-      u.az.nField = pOp->p4.i;
+      u.ba.nField = pOp->p4.i;
       assert( pOp->p4type==P4_INT32 );
-      assert( u.az.nField>0 );
-      u.az.r.pKeyInfo = u.az.pC->pKeyInfo;
-      u.az.r.nField = (u16)u.az.nField;
+      assert( u.ba.nField>0 );
+      u.ba.r.pKeyInfo = u.ba.pC->pKeyInfo;
+      u.ba.r.nField = (u16)u.ba.nField;
 
       /* The next line of code computes as follows, only faster:
-      **   if( u.az.oc==OP_SeekGt || u.az.oc==OP_SeekLe ){
-      **     u.az.r.flags = UNPACKED_INCRKEY;
+      **   if( u.ba.oc==OP_SeekGt || u.ba.oc==OP_SeekLe ){
+      **     u.ba.r.flags = UNPACKED_INCRKEY;
       **   }else{
-      **     u.az.r.flags = 0;
+      **     u.ba.r.flags = 0;
       **   }
       */
-      u.az.r.flags = (u16)(UNPACKED_INCRKEY * (1 & (u.az.oc - OP_SeekLt)));
-      assert( u.az.oc!=OP_SeekGt || u.az.r.flags==UNPACKED_INCRKEY );
-      assert( u.az.oc!=OP_SeekLe || u.az.r.flags==UNPACKED_INCRKEY );
-      assert( u.az.oc!=OP_SeekGe || u.az.r.flags==0 );
-      assert( u.az.oc!=OP_SeekLt || u.az.r.flags==0 );
+      u.ba.r.flags = (u16)(UNPACKED_INCRKEY * (1 & (u.ba.oc - OP_SeekLt)));
+      assert( u.ba.oc!=OP_SeekGt || u.ba.r.flags==UNPACKED_INCRKEY );
+      assert( u.ba.oc!=OP_SeekLe || u.ba.r.flags==UNPACKED_INCRKEY );
+      assert( u.ba.oc!=OP_SeekGe || u.ba.r.flags==0 );
+      assert( u.ba.oc!=OP_SeekLt || u.ba.r.flags==0 );
 
-      u.az.r.aMem = &aMem[pOp->p3];
+      u.ba.r.aMem = &aMem[pOp->p3];
 #ifdef SQLITE_DEBUG
-      { int i; for(i=0; i<u.az.r.nField; i++) assert( memIsValid(&u.az.r.aMem[i]) ); }
+      { int i; for(i=0; i<u.ba.r.nField; i++) assert( memIsValid(&u.ba.r.aMem[i]) ); }
 #endif
-      ExpandBlob(u.az.r.aMem);
-      rc = sqlite3BtreeMovetoUnpacked(u.az.pC->pCursor, &u.az.r, 0, 0, &u.az.res);
+      ExpandBlob(u.ba.r.aMem);
+      rc = sqlite3BtreeMovetoUnpacked(u.ba.pC->pCursor, &u.ba.r, 0, 0, &u.ba.res);
       if( rc!=SQLITE_OK ){
         goto abort_due_to_error;
       }
-      u.az.pC->rowidIsValid = 0;
+      u.ba.pC->rowidIsValid = 0;
     }
-    u.az.pC->deferredMoveto = 0;
-    u.az.pC->cacheStatus = CACHE_STALE;
+    u.ba.pC->deferredMoveto = 0;
+    u.ba.pC->cacheStatus = CACHE_STALE;
 #ifdef SQLITE_TEST
     sqlite3_search_count++;
 #endif
-    if( u.az.oc>=OP_SeekGe ){  assert( u.az.oc==OP_SeekGe || u.az.oc==OP_SeekGt );
-      if( u.az.res<0 || (u.az.res==0 && u.az.oc==OP_SeekGt) ){
-        rc = sqlite3BtreeNext(u.az.pC->pCursor, &u.az.res);
+    if( u.ba.oc>=OP_SeekGe ){  assert( u.ba.oc==OP_SeekGe || u.ba.oc==OP_SeekGt );
+      if( u.ba.res<0 || (u.ba.res==0 && u.ba.oc==OP_SeekGt) ){
+        rc = sqlite3BtreeNext(u.ba.pC->pCursor, &u.ba.res);
         if( rc!=SQLITE_OK ) goto abort_due_to_error;
-        u.az.pC->rowidIsValid = 0;
+        u.ba.pC->rowidIsValid = 0;
       }else{
-        u.az.res = 0;
+        u.ba.res = 0;
       }
     }else{
-      assert( u.az.oc==OP_SeekLt || u.az.oc==OP_SeekLe );
-      if( u.az.res>0 || (u.az.res==0 && u.az.oc==OP_SeekLt) ){
-        rc = sqlite3BtreePrevious(u.az.pC->pCursor, &u.az.res);
+      assert( u.ba.oc==OP_SeekLt || u.ba.oc==OP_SeekLe );
+      if( u.ba.res>0 || (u.ba.res==0 && u.ba.oc==OP_SeekLt) ){
+        rc = sqlite3BtreePrevious(u.ba.pC->pCursor, &u.ba.res);
         if( rc!=SQLITE_OK ) goto abort_due_to_error;
-        u.az.pC->rowidIsValid = 0;
+        u.ba.pC->rowidIsValid = 0;
       }else{
-        /* u.az.res might be negative because the table is empty.  Check to
+        /* u.ba.res might be negative because the table is empty.  Check to
         ** see if this is the case.
         */
-        u.az.res = sqlite3BtreeEof(u.az.pC->pCursor);
+        u.ba.res = sqlite3BtreeEof(u.ba.pC->pCursor);
       }
     }
     assert( pOp->p2>0 );
-    if( u.az.res ){
+    if( u.ba.res ){
       pc = pOp->p2 - 1;
     }
   }else{
@@ -66246,20 +66956,20 @@ case OP_SeekGt: {       /* jump, in3 */
 ** occur, no unnecessary I/O happens.
 */
 case OP_Seek: {    /* in2 */
-#if 0  /* local variables moved into u.ba */
+#if 0  /* local variables moved into u.bb */
   VdbeCursor *pC;
-#endif /* local variables moved into u.ba */
+#endif /* local variables moved into u.bb */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.ba.pC = p->apCsr[pOp->p1];
-  assert( u.ba.pC!=0 );
-  if( ALWAYS(u.ba.pC->pCursor!=0) ){
-    assert( u.ba.pC->isTable );
-    u.ba.pC->nullRow = 0;
+  u.bb.pC = p->apCsr[pOp->p1];
+  assert( u.bb.pC!=0 );
+  if( ALWAYS(u.bb.pC->pCursor!=0) ){
+    assert( u.bb.pC->isTable );
+    u.bb.pC->nullRow = 0;
     pIn2 = &aMem[pOp->p2];
-    u.ba.pC->movetoTarget = sqlite3VdbeIntValue(pIn2);
-    u.ba.pC->rowidIsValid = 0;
-    u.ba.pC->deferredMoveto = 1;
+    u.bb.pC->movetoTarget = sqlite3VdbeIntValue(pIn2);
+    u.bb.pC->rowidIsValid = 0;
+    u.bb.pC->deferredMoveto = 1;
   }
   break;
 }
@@ -66291,62 +67001,63 @@ case OP_Seek: {    /* in2 */
 */
 case OP_NotFound:       /* jump, in3 */
 case OP_Found: {        /* jump, in3 */
-#if 0  /* local variables moved into u.bb */
+#if 0  /* local variables moved into u.bc */
   int alreadyExists;
   VdbeCursor *pC;
   int res;
+  char *pFree;
   UnpackedRecord *pIdxKey;
   UnpackedRecord r;
   char aTempRec[ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*3 + 7];
-#endif /* local variables moved into u.bb */
+#endif /* local variables moved into u.bc */
 
 #ifdef SQLITE_TEST
   sqlite3_found_count++;
 #endif
 
-  u.bb.alreadyExists = 0;
+  u.bc.alreadyExists = 0;
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
   assert( pOp->p4type==P4_INT32 );
-  u.bb.pC = p->apCsr[pOp->p1];
-  assert( u.bb.pC!=0 );
+  u.bc.pC = p->apCsr[pOp->p1];
+  assert( u.bc.pC!=0 );
   pIn3 = &aMem[pOp->p3];
-  if( ALWAYS(u.bb.pC->pCursor!=0) ){
+  if( ALWAYS(u.bc.pC->pCursor!=0) ){
 
-    assert( u.bb.pC->isTable==0 );
+    assert( u.bc.pC->isTable==0 );
     if( pOp->p4.i>0 ){
-      u.bb.r.pKeyInfo = u.bb.pC->pKeyInfo;
-      u.bb.r.nField = (u16)pOp->p4.i;
-      u.bb.r.aMem = pIn3;
+      u.bc.r.pKeyInfo = u.bc.pC->pKeyInfo;
+      u.bc.r.nField = (u16)pOp->p4.i;
+      u.bc.r.aMem = pIn3;
 #ifdef SQLITE_DEBUG
-      { int i; for(i=0; i<u.bb.r.nField; i++) assert( memIsValid(&u.bb.r.aMem[i]) ); }
+      { int i; for(i=0; i<u.bc.r.nField; i++) assert( memIsValid(&u.bc.r.aMem[i]) ); }
 #endif
-      u.bb.r.flags = UNPACKED_PREFIX_MATCH;
-      u.bb.pIdxKey = &u.bb.r;
+      u.bc.r.flags = UNPACKED_PREFIX_MATCH;
+      u.bc.pIdxKey = &u.bc.r;
     }else{
+      u.bc.pIdxKey = sqlite3VdbeAllocUnpackedRecord(
+          u.bc.pC->pKeyInfo, u.bc.aTempRec, sizeof(u.bc.aTempRec), &u.bc.pFree
+      );
+      if( u.bc.pIdxKey==0 ) goto no_mem;
       assert( pIn3->flags & MEM_Blob );
       assert( (pIn3->flags & MEM_Zero)==0 );  /* zeroblobs already expanded */
-      u.bb.pIdxKey = sqlite3VdbeRecordUnpack(u.bb.pC->pKeyInfo, pIn3->n, pIn3->z,
-                                        u.bb.aTempRec, sizeof(u.bb.aTempRec));
-      if( u.bb.pIdxKey==0 ){
-        goto no_mem;
-      }
-      u.bb.pIdxKey->flags |= UNPACKED_PREFIX_MATCH;
+      sqlite3VdbeRecordUnpack(u.bc.pC->pKeyInfo, pIn3->n, pIn3->z, u.bc.pIdxKey);
+      u.bc.pIdxKey->flags |= UNPACKED_PREFIX_MATCH;
     }
-    rc = sqlite3BtreeMovetoUnpacked(u.bb.pC->pCursor, u.bb.pIdxKey, 0, 0, &u.bb.res);
+    rc = sqlite3BtreeMovetoUnpacked(u.bc.pC->pCursor, u.bc.pIdxKey, 0, 0, &u.bc.res);
     if( pOp->p4.i==0 ){
-      sqlite3VdbeDeleteUnpackedRecord(u.bb.pIdxKey);
+      sqlite3DbFree(db, u.bc.pFree);
     }
     if( rc!=SQLITE_OK ){
       break;
     }
-    u.bb.alreadyExists = (u.bb.res==0);
-    u.bb.pC->deferredMoveto = 0;
-    u.bb.pC->cacheStatus = CACHE_STALE;
+    u.bc.alreadyExists = (u.bc.res==0);
+    u.bc.pC->deferredMoveto = 0;
+    u.bc.pC->cacheStatus = CACHE_STALE;
   }
   if( pOp->opcode==OP_Found ){
-    if( u.bb.alreadyExists ) pc = pOp->p2 - 1;
+    if( u.bc.alreadyExists ) pc = pOp->p2 - 1;
   }else{
-    if( !u.bb.alreadyExists ) pc = pOp->p2 - 1;
+    if( !u.bc.alreadyExists ) pc = pOp->p2 - 1;
   }
   break;
 }
@@ -66378,7 +67089,7 @@ case OP_Found: {        /* jump, in3 */
 ** See also: NotFound, NotExists, Found
 */
 case OP_IsUnique: {        /* jump, in3 */
-#if 0  /* local variables moved into u.bc */
+#if 0  /* local variables moved into u.bd */
   u16 ii;
   VdbeCursor *pCx;
   BtCursor *pCrsr;
@@ -66386,55 +67097,55 @@ case OP_IsUnique: {        /* jump, in3 */
   Mem *aMx;
   UnpackedRecord r;                  /* B-Tree index search key */
   i64 R;                             /* Rowid stored in register P3 */
-#endif /* local variables moved into u.bc */
+#endif /* local variables moved into u.bd */
 
   pIn3 = &aMem[pOp->p3];
-  u.bc.aMx = &aMem[pOp->p4.i];
+  u.bd.aMx = &aMem[pOp->p4.i];
   /* Assert that the values of parameters P1 and P4 are in range. */
   assert( pOp->p4type==P4_INT32 );
   assert( pOp->p4.i>0 && pOp->p4.i<=p->nMem );
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
 
   /* Find the index cursor. */
-  u.bc.pCx = p->apCsr[pOp->p1];
-  assert( u.bc.pCx->deferredMoveto==0 );
-  u.bc.pCx->seekResult = 0;
-  u.bc.pCx->cacheStatus = CACHE_STALE;
-  u.bc.pCrsr = u.bc.pCx->pCursor;
+  u.bd.pCx = p->apCsr[pOp->p1];
+  assert( u.bd.pCx->deferredMoveto==0 );
+  u.bd.pCx->seekResult = 0;
+  u.bd.pCx->cacheStatus = CACHE_STALE;
+  u.bd.pCrsr = u.bd.pCx->pCursor;
 
   /* If any of the values are NULL, take the jump. */
-  u.bc.nField = u.bc.pCx->pKeyInfo->nField;
-  for(u.bc.ii=0; u.bc.ii<u.bc.nField; u.bc.ii++){
-    if( u.bc.aMx[u.bc.ii].flags & MEM_Null ){
+  u.bd.nField = u.bd.pCx->pKeyInfo->nField;
+  for(u.bd.ii=0; u.bd.ii<u.bd.nField; u.bd.ii++){
+    if( u.bd.aMx[u.bd.ii].flags & MEM_Null ){
       pc = pOp->p2 - 1;
-      u.bc.pCrsr = 0;
+      u.bd.pCrsr = 0;
       break;
     }
   }
-  assert( (u.bc.aMx[u.bc.nField].flags & MEM_Null)==0 );
+  assert( (u.bd.aMx[u.bd.nField].flags & MEM_Null)==0 );
 
-  if( u.bc.pCrsr!=0 ){
+  if( u.bd.pCrsr!=0 ){
     /* Populate the index search key. */
-    u.bc.r.pKeyInfo = u.bc.pCx->pKeyInfo;
-    u.bc.r.nField = u.bc.nField + 1;
-    u.bc.r.flags = UNPACKED_PREFIX_SEARCH;
-    u.bc.r.aMem = u.bc.aMx;
+    u.bd.r.pKeyInfo = u.bd.pCx->pKeyInfo;
+    u.bd.r.nField = u.bd.nField + 1;
+    u.bd.r.flags = UNPACKED_PREFIX_SEARCH;
+    u.bd.r.aMem = u.bd.aMx;
 #ifdef SQLITE_DEBUG
-    { int i; for(i=0; i<u.bc.r.nField; i++) assert( memIsValid(&u.bc.r.aMem[i]) ); }
+    { int i; for(i=0; i<u.bd.r.nField; i++) assert( memIsValid(&u.bd.r.aMem[i]) ); }
 #endif
 
-    /* Extract the value of u.bc.R from register P3. */
+    /* Extract the value of u.bd.R from register P3. */
     sqlite3VdbeMemIntegerify(pIn3);
-    u.bc.R = pIn3->u.i;
+    u.bd.R = pIn3->u.i;
 
     /* Search the B-Tree index. If no conflicting record is found, jump
     ** to P2. Otherwise, copy the rowid of the conflicting record to
     ** register P3 and fall through to the next instruction.  */
-    rc = sqlite3BtreeMovetoUnpacked(u.bc.pCrsr, &u.bc.r, 0, 0, &u.bc.pCx->seekResult);
-    if( (u.bc.r.flags & UNPACKED_PREFIX_SEARCH) || u.bc.r.rowid==u.bc.R ){
+    rc = sqlite3BtreeMovetoUnpacked(u.bd.pCrsr, &u.bd.r, 0, 0, &u.bd.pCx->seekResult);
+    if( (u.bd.r.flags & UNPACKED_PREFIX_SEARCH) || u.bd.r.rowid==u.bd.R ){
       pc = pOp->p2 - 1;
     }else{
-      pIn3->u.i = u.bc.r.rowid;
+      pIn3->u.i = u.bd.r.rowid;
     }
   }
   break;
@@ -66455,42 +67166,42 @@ case OP_IsUnique: {        /* jump, in3 */
 ** See also: Found, NotFound, IsUnique
 */
 case OP_NotExists: {        /* jump, in3 */
-#if 0  /* local variables moved into u.bd */
+#if 0  /* local variables moved into u.be */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   int res;
   u64 iKey;
-#endif /* local variables moved into u.bd */
+#endif /* local variables moved into u.be */
 
   pIn3 = &aMem[pOp->p3];
   assert( pIn3->flags & MEM_Int );
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bd.pC = p->apCsr[pOp->p1];
-  assert( u.bd.pC!=0 );
-  assert( u.bd.pC->isTable );
-  assert( u.bd.pC->pseudoTableReg==0 );
-  u.bd.pCrsr = u.bd.pC->pCursor;
-  if( u.bd.pCrsr!=0 ){
-    u.bd.res = 0;
-    u.bd.iKey = pIn3->u.i;
-    rc = sqlite3BtreeMovetoUnpacked(u.bd.pCrsr, 0, u.bd.iKey, 0, &u.bd.res);
-    u.bd.pC->lastRowid = pIn3->u.i;
-    u.bd.pC->rowidIsValid = u.bd.res==0 ?1:0;
-    u.bd.pC->nullRow = 0;
-    u.bd.pC->cacheStatus = CACHE_STALE;
-    u.bd.pC->deferredMoveto = 0;
-    if( u.bd.res!=0 ){
+  u.be.pC = p->apCsr[pOp->p1];
+  assert( u.be.pC!=0 );
+  assert( u.be.pC->isTable );
+  assert( u.be.pC->pseudoTableReg==0 );
+  u.be.pCrsr = u.be.pC->pCursor;
+  if( ALWAYS(u.be.pCrsr!=0) ){
+    u.be.res = 0;
+    u.be.iKey = pIn3->u.i;
+    rc = sqlite3BtreeMovetoUnpacked(u.be.pCrsr, 0, u.be.iKey, 0, &u.be.res);
+    u.be.pC->lastRowid = pIn3->u.i;
+    u.be.pC->rowidIsValid = u.be.res==0 ?1:0;
+    u.be.pC->nullRow = 0;
+    u.be.pC->cacheStatus = CACHE_STALE;
+    u.be.pC->deferredMoveto = 0;
+    if( u.be.res!=0 ){
       pc = pOp->p2 - 1;
-      assert( u.bd.pC->rowidIsValid==0 );
+      assert( u.be.pC->rowidIsValid==0 );
     }
-    u.bd.pC->seekResult = u.bd.res;
+    u.be.pC->seekResult = u.be.res;
   }else{
     /* This happens when an attempt to open a read cursor on the
     ** sqlite_master table returns SQLITE_EMPTY.
     */
     pc = pOp->p2 - 1;
-    assert( u.bd.pC->rowidIsValid==0 );
-    u.bd.pC->seekResult = 0;
+    assert( u.be.pC->rowidIsValid==0 );
+    u.be.pC->seekResult = 0;
   }
   break;
 }
@@ -66525,21 +67236,21 @@ case OP_Sequence: {           /* out2-prerelease */
 ** AUTOINCREMENT feature.
 */
 case OP_NewRowid: {           /* out2-prerelease */
-#if 0  /* local variables moved into u.be */
+#if 0  /* local variables moved into u.bf */
   i64 v;                 /* The new rowid */
   VdbeCursor *pC;        /* Cursor of table to get the new rowid */
   int res;               /* Result of an sqlite3BtreeLast() */
   int cnt;               /* Counter to limit the number of searches */
   Mem *pMem;             /* Register holding largest rowid for AUTOINCREMENT */
   VdbeFrame *pFrame;     /* Root frame of VDBE */
-#endif /* local variables moved into u.be */
+#endif /* local variables moved into u.bf */
 
-  u.be.v = 0;
-  u.be.res = 0;
+  u.bf.v = 0;
+  u.bf.res = 0;
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.be.pC = p->apCsr[pOp->p1];
-  assert( u.be.pC!=0 );
-  if( NEVER(u.be.pC->pCursor==0) ){
+  u.bf.pC = p->apCsr[pOp->p1];
+  assert( u.bf.pC!=0 );
+  if( NEVER(u.bf.pC->pCursor==0) ){
     /* The zero initialization above is all that is needed */
   }else{
     /* The next rowid or record number (different terms for the same
@@ -66555,7 +67266,7 @@ case OP_NewRowid: {           /* out2-prerelease */
     ** succeeded.  If the random rowid does exist, we select a new one
     ** and try again, up to 100 times.
     */
-    assert( u.be.pC->isTable );
+    assert( u.bf.pC->isTable );
 
 #ifdef SQLITE_32BIT_ROWID
 #   define MAX_ROWID 0x7fffffff
@@ -66567,23 +67278,23 @@ case OP_NewRowid: {           /* out2-prerelease */
 #   define MAX_ROWID  (i64)( (((u64)0x7fffffff)<<32) | (u64)0xffffffff )
 #endif
 
-    if( !u.be.pC->useRandomRowid ){
-      u.be.v = sqlite3BtreeGetCachedRowid(u.be.pC->pCursor);
-      if( u.be.v==0 ){
-        rc = sqlite3BtreeLast(u.be.pC->pCursor, &u.be.res);
+    if( !u.bf.pC->useRandomRowid ){
+      u.bf.v = sqlite3BtreeGetCachedRowid(u.bf.pC->pCursor);
+      if( u.bf.v==0 ){
+        rc = sqlite3BtreeLast(u.bf.pC->pCursor, &u.bf.res);
         if( rc!=SQLITE_OK ){
           goto abort_due_to_error;
         }
-        if( u.be.res ){
-          u.be.v = 1;   /* IMP: R-61914-48074 */
+        if( u.bf.res ){
+          u.bf.v = 1;   /* IMP: R-61914-48074 */
         }else{
-          assert( sqlite3BtreeCursorIsValid(u.be.pC->pCursor) );
-          rc = sqlite3BtreeKeySize(u.be.pC->pCursor, &u.be.v);
+          assert( sqlite3BtreeCursorIsValid(u.bf.pC->pCursor) );
+          rc = sqlite3BtreeKeySize(u.bf.pC->pCursor, &u.bf.v);
           assert( rc==SQLITE_OK );   /* Cannot fail following BtreeLast() */
-          if( u.be.v==MAX_ROWID ){
-            u.be.pC->useRandomRowid = 1;
+          if( u.bf.v==MAX_ROWID ){
+            u.bf.pC->useRandomRowid = 1;
           }else{
-            u.be.v++;   /* IMP: R-29538-34987 */
+            u.bf.v++;   /* IMP: R-29538-34987 */
           }
         }
       }
@@ -66593,35 +67304,35 @@ case OP_NewRowid: {           /* out2-prerelease */
         /* Assert that P3 is a valid memory cell. */
         assert( pOp->p3>0 );
         if( p->pFrame ){
-          for(u.be.pFrame=p->pFrame; u.be.pFrame->pParent; u.be.pFrame=u.be.pFrame->pParent);
+          for(u.bf.pFrame=p->pFrame; u.bf.pFrame->pParent; u.bf.pFrame=u.bf.pFrame->pParent);
           /* Assert that P3 is a valid memory cell. */
-          assert( pOp->p3<=u.be.pFrame->nMem );
-          u.be.pMem = &u.be.pFrame->aMem[pOp->p3];
+          assert( pOp->p3<=u.bf.pFrame->nMem );
+          u.bf.pMem = &u.bf.pFrame->aMem[pOp->p3];
         }else{
           /* Assert that P3 is a valid memory cell. */
           assert( pOp->p3<=p->nMem );
-          u.be.pMem = &aMem[pOp->p3];
-          memAboutToChange(p, u.be.pMem);
+          u.bf.pMem = &aMem[pOp->p3];
+          memAboutToChange(p, u.bf.pMem);
         }
-        assert( memIsValid(u.be.pMem) );
+        assert( memIsValid(u.bf.pMem) );
 
-        REGISTER_TRACE(pOp->p3, u.be.pMem);
-        sqlite3VdbeMemIntegerify(u.be.pMem);
-        assert( (u.be.pMem->flags & MEM_Int)!=0 );  /* mem(P3) holds an integer */
-        if( u.be.pMem->u.i==MAX_ROWID || u.be.pC->useRandomRowid ){
+        REGISTER_TRACE(pOp->p3, u.bf.pMem);
+        sqlite3VdbeMemIntegerify(u.bf.pMem);
+        assert( (u.bf.pMem->flags & MEM_Int)!=0 );  /* mem(P3) holds an integer */
+        if( u.bf.pMem->u.i==MAX_ROWID || u.bf.pC->useRandomRowid ){
           rc = SQLITE_FULL;   /* IMP: R-12275-61338 */
           goto abort_due_to_error;
         }
-        if( u.be.v<u.be.pMem->u.i+1 ){
-          u.be.v = u.be.pMem->u.i + 1;
+        if( u.bf.v<u.bf.pMem->u.i+1 ){
+          u.bf.v = u.bf.pMem->u.i + 1;
         }
-        u.be.pMem->u.i = u.be.v;
+        u.bf.pMem->u.i = u.bf.v;
       }
 #endif
 
-      sqlite3BtreeSetCachedRowid(u.be.pC->pCursor, u.be.v<MAX_ROWID ? u.be.v+1 : 0);
+      sqlite3BtreeSetCachedRowid(u.bf.pC->pCursor, u.bf.v<MAX_ROWID ? u.bf.v+1 : 0);
     }
-    if( u.be.pC->useRandomRowid ){
+    if( u.bf.pC->useRandomRowid ){
       /* IMPLEMENTATION-OF: R-07677-41881 If the largest ROWID is equal to the
       ** largest possible integer (9223372036854775807) then the database
       ** engine starts picking positive candidate ROWIDs at random until
@@ -66629,35 +67340,35 @@ case OP_NewRowid: {           /* out2-prerelease */
       assert( pOp->p3==0 );  /* We cannot be in random rowid mode if this is
                              ** an AUTOINCREMENT table. */
       /* on the first attempt, simply do one more than previous */
-      u.be.v = lastRowid;
-      u.be.v &= (MAX_ROWID>>1); /* ensure doesn't go negative */
-      u.be.v++; /* ensure non-zero */
-      u.be.cnt = 0;
-      while(   ((rc = sqlite3BtreeMovetoUnpacked(u.be.pC->pCursor, 0, (u64)u.be.v,
-                                                 0, &u.be.res))==SQLITE_OK)
-            && (u.be.res==0)
-            && (++u.be.cnt<100)){
+      u.bf.v = lastRowid;
+      u.bf.v &= (MAX_ROWID>>1); /* ensure doesn't go negative */
+      u.bf.v++; /* ensure non-zero */
+      u.bf.cnt = 0;
+      while(   ((rc = sqlite3BtreeMovetoUnpacked(u.bf.pC->pCursor, 0, (u64)u.bf.v,
+                                                 0, &u.bf.res))==SQLITE_OK)
+            && (u.bf.res==0)
+            && (++u.bf.cnt<100)){
         /* collision - try another random rowid */
-        sqlite3_randomness(sizeof(u.be.v), &u.be.v);
-        if( u.be.cnt<5 ){
+        sqlite3_randomness(sizeof(u.bf.v), &u.bf.v);
+        if( u.bf.cnt<5 ){
           /* try "small" random rowids for the initial attempts */
-          u.be.v &= 0xffffff;
+          u.bf.v &= 0xffffff;
         }else{
-          u.be.v &= (MAX_ROWID>>1); /* ensure doesn't go negative */
+          u.bf.v &= (MAX_ROWID>>1); /* ensure doesn't go negative */
         }
-        u.be.v++; /* ensure non-zero */
+        u.bf.v++; /* ensure non-zero */
       }
-      if( rc==SQLITE_OK && u.be.res==0 ){
+      if( rc==SQLITE_OK && u.bf.res==0 ){
         rc = SQLITE_FULL;   /* IMP: R-38219-53002 */
         goto abort_due_to_error;
       }
-      assert( u.be.v>0 );  /* EV: R-40812-03570 */
+      assert( u.bf.v>0 );  /* EV: R-40812-03570 */
     }
-    u.be.pC->rowidIsValid = 0;
-    u.be.pC->deferredMoveto = 0;
-    u.be.pC->cacheStatus = CACHE_STALE;
+    u.bf.pC->rowidIsValid = 0;
+    u.bf.pC->deferredMoveto = 0;
+    u.bf.pC->cacheStatus = CACHE_STALE;
   }
-  pOut->u.i = u.be.v;
+  pOut->u.i = u.bf.v;
   break;
 }
 
@@ -66707,7 +67418,7 @@ case OP_NewRowid: {           /* out2-prerelease */
 */
 case OP_Insert: 
 case OP_InsertInt: {
-#if 0  /* local variables moved into u.bf */
+#if 0  /* local variables moved into u.bg */
   Mem *pData;       /* MEM cell holding data for the record to be inserted */
   Mem *pKey;        /* MEM cell holding key  for the record */
   i64 iKey;         /* The integer ROWID or key for the record to be inserted */
@@ -66717,60 +67428,60 @@ case OP_InsertInt: {
   const char *zDb;  /* database name - used by the update hook */
   const char *zTbl; /* Table name - used by the opdate hook */
   int op;           /* Opcode for update hook: SQLITE_UPDATE or SQLITE_INSERT */
-#endif /* local variables moved into u.bf */
+#endif /* local variables moved into u.bg */
 
-  u.bf.pData = &aMem[pOp->p2];
+  u.bg.pData = &aMem[pOp->p2];
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  assert( memIsValid(u.bf.pData) );
-  u.bf.pC = p->apCsr[pOp->p1];
-  assert( u.bf.pC!=0 );
-  assert( u.bf.pC->pCursor!=0 );
-  assert( u.bf.pC->pseudoTableReg==0 );
-  assert( u.bf.pC->isTable );
-  REGISTER_TRACE(pOp->p2, u.bf.pData);
+  assert( memIsValid(u.bg.pData) );
+  u.bg.pC = p->apCsr[pOp->p1];
+  assert( u.bg.pC!=0 );
+  assert( u.bg.pC->pCursor!=0 );
+  assert( u.bg.pC->pseudoTableReg==0 );
+  assert( u.bg.pC->isTable );
+  REGISTER_TRACE(pOp->p2, u.bg.pData);
 
   if( pOp->opcode==OP_Insert ){
-    u.bf.pKey = &aMem[pOp->p3];
-    assert( u.bf.pKey->flags & MEM_Int );
-    assert( memIsValid(u.bf.pKey) );
-    REGISTER_TRACE(pOp->p3, u.bf.pKey);
-    u.bf.iKey = u.bf.pKey->u.i;
+    u.bg.pKey = &aMem[pOp->p3];
+    assert( u.bg.pKey->flags & MEM_Int );
+    assert( memIsValid(u.bg.pKey) );
+    REGISTER_TRACE(pOp->p3, u.bg.pKey);
+    u.bg.iKey = u.bg.pKey->u.i;
   }else{
     assert( pOp->opcode==OP_InsertInt );
-    u.bf.iKey = pOp->p3;
+    u.bg.iKey = pOp->p3;
   }
 
   if( pOp->p5 & OPFLAG_NCHANGE ) p->nChange++;
-  if( pOp->p5 & OPFLAG_LASTROWID ) db->lastRowid = lastRowid = u.bf.iKey;
-  if( u.bf.pData->flags & MEM_Null ){
-    u.bf.pData->z = 0;
-    u.bf.pData->n = 0;
+  if( pOp->p5 & OPFLAG_LASTROWID ) db->lastRowid = lastRowid = u.bg.iKey;
+  if( u.bg.pData->flags & MEM_Null ){
+    u.bg.pData->z = 0;
+    u.bg.pData->n = 0;
   }else{
-    assert( u.bf.pData->flags & (MEM_Blob|MEM_Str) );
+    assert( u.bg.pData->flags & (MEM_Blob|MEM_Str) );
   }
-  u.bf.seekResult = ((pOp->p5 & OPFLAG_USESEEKRESULT) ? u.bf.pC->seekResult : 0);
-  if( u.bf.pData->flags & MEM_Zero ){
-    u.bf.nZero = u.bf.pData->u.nZero;
+  u.bg.seekResult = ((pOp->p5 & OPFLAG_USESEEKRESULT) ? u.bg.pC->seekResult : 0);
+  if( u.bg.pData->flags & MEM_Zero ){
+    u.bg.nZero = u.bg.pData->u.nZero;
   }else{
-    u.bf.nZero = 0;
+    u.bg.nZero = 0;
   }
-  sqlite3BtreeSetCachedRowid(u.bf.pC->pCursor, 0);
-  rc = sqlite3BtreeInsert(u.bf.pC->pCursor, 0, u.bf.iKey,
-                          u.bf.pData->z, u.bf.pData->n, u.bf.nZero,
-                          pOp->p5 & OPFLAG_APPEND, u.bf.seekResult
+  sqlite3BtreeSetCachedRowid(u.bg.pC->pCursor, 0);
+  rc = sqlite3BtreeInsert(u.bg.pC->pCursor, 0, u.bg.iKey,
+                          u.bg.pData->z, u.bg.pData->n, u.bg.nZero,
+                          pOp->p5 & OPFLAG_APPEND, u.bg.seekResult
   );
-  u.bf.pC->rowidIsValid = 0;
-  u.bf.pC->deferredMoveto = 0;
-  u.bf.pC->cacheStatus = CACHE_STALE;
+  u.bg.pC->rowidIsValid = 0;
+  u.bg.pC->deferredMoveto = 0;
+  u.bg.pC->cacheStatus = CACHE_STALE;
 
   /* Invoke the update-hook if required. */
   if( rc==SQLITE_OK && db->xUpdateCallback && pOp->p4.z ){
-    u.bf.zDb = db->aDb[u.bf.pC->iDb].zName;
-    u.bf.zTbl = pOp->p4.z;
-    u.bf.op = ((pOp->p5 & OPFLAG_ISUPDATE) ? SQLITE_UPDATE : SQLITE_INSERT);
-    assert( u.bf.pC->isTable );
-    db->xUpdateCallback(db->pUpdateArg, u.bf.op, u.bf.zDb, u.bf.zTbl, u.bf.iKey);
-    assert( u.bf.pC->iDb>=0 );
+    u.bg.zDb = db->aDb[u.bg.pC->iDb].zName;
+    u.bg.zTbl = pOp->p4.z;
+    u.bg.op = ((pOp->p5 & OPFLAG_ISUPDATE) ? SQLITE_UPDATE : SQLITE_INSERT);
+    assert( u.bg.pC->isTable );
+    db->xUpdateCallback(db->pUpdateArg, u.bg.op, u.bg.zDb, u.bg.zTbl, u.bg.iKey);
+    assert( u.bg.pC->iDb>=0 );
   }
   break;
 }
@@ -66796,47 +67507,47 @@ case OP_InsertInt: {
 ** using OP_NotFound prior to invoking this opcode.
 */
 case OP_Delete: {
-#if 0  /* local variables moved into u.bg */
+#if 0  /* local variables moved into u.bh */
   i64 iKey;
   VdbeCursor *pC;
-#endif /* local variables moved into u.bg */
+#endif /* local variables moved into u.bh */
 
-  u.bg.iKey = 0;
+  u.bh.iKey = 0;
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bg.pC = p->apCsr[pOp->p1];
-  assert( u.bg.pC!=0 );
-  assert( u.bg.pC->pCursor!=0 );  /* Only valid for real tables, no pseudotables */
+  u.bh.pC = p->apCsr[pOp->p1];
+  assert( u.bh.pC!=0 );
+  assert( u.bh.pC->pCursor!=0 );  /* Only valid for real tables, no pseudotables */
 
-  /* If the update-hook will be invoked, set u.bg.iKey to the rowid of the
+  /* If the update-hook will be invoked, set u.bh.iKey to the rowid of the
   ** row being deleted.
   */
   if( db->xUpdateCallback && pOp->p4.z ){
-    assert( u.bg.pC->isTable );
-    assert( u.bg.pC->rowidIsValid );  /* lastRowid set by previous OP_NotFound */
-    u.bg.iKey = u.bg.pC->lastRowid;
+    assert( u.bh.pC->isTable );
+    assert( u.bh.pC->rowidIsValid );  /* lastRowid set by previous OP_NotFound */
+    u.bh.iKey = u.bh.pC->lastRowid;
   }
 
   /* The OP_Delete opcode always follows an OP_NotExists or OP_Last or
   ** OP_Column on the same table without any intervening operations that
-  ** might move or invalidate the cursor.  Hence cursor u.bg.pC is always pointing
+  ** might move or invalidate the cursor.  Hence cursor u.bh.pC is always pointing
   ** to the row to be deleted and the sqlite3VdbeCursorMoveto() operation
   ** below is always a no-op and cannot fail.  We will run it anyhow, though,
   ** to guard against future changes to the code generator.
   **/
-  assert( u.bg.pC->deferredMoveto==0 );
-  rc = sqlite3VdbeCursorMoveto(u.bg.pC);
+  assert( u.bh.pC->deferredMoveto==0 );
+  rc = sqlite3VdbeCursorMoveto(u.bh.pC);
   if( NEVER(rc!=SQLITE_OK) ) goto abort_due_to_error;
 
-  sqlite3BtreeSetCachedRowid(u.bg.pC->pCursor, 0);
-  rc = sqlite3BtreeDelete(u.bg.pC->pCursor);
-  u.bg.pC->cacheStatus = CACHE_STALE;
+  sqlite3BtreeSetCachedRowid(u.bh.pC->pCursor, 0);
+  rc = sqlite3BtreeDelete(u.bh.pC->pCursor);
+  u.bh.pC->cacheStatus = CACHE_STALE;
 
   /* Invoke the update-hook if required. */
   if( rc==SQLITE_OK && db->xUpdateCallback && pOp->p4.z ){
-    const char *zDb = db->aDb[u.bg.pC->iDb].zName;
+    const char *zDb = db->aDb[u.bh.pC->iDb].zName;
     const char *zTbl = pOp->p4.z;
-    db->xUpdateCallback(db->pUpdateArg, SQLITE_DELETE, zDb, zTbl, u.bg.iKey);
-    assert( u.bg.pC->iDb>=0 );
+    db->xUpdateCallback(db->pUpdateArg, SQLITE_DELETE, zDb, zTbl, u.bh.iKey);
+    assert( u.bh.pC->iDb>=0 );
   }
   if( pOp->p2 & OPFLAG_NCHANGE ) p->nChange++;
   break;
@@ -66854,6 +67565,49 @@ case OP_ResetCount: {
   break;
 }
 
+/* Opcode: SorterCompare P1 P2 P3
+**
+** P1 is a sorter cursor. This instruction compares the record blob in 
+** register P3 with the entry that the sorter cursor currently points to.
+** If, excluding the rowid fields at the end, the two records are a match,
+** fall through to the next instruction. Otherwise, jump to instruction P2.
+*/
+case OP_SorterCompare: {
+#if 0  /* local variables moved into u.bi */
+  VdbeCursor *pC;
+  int res;
+#endif /* local variables moved into u.bi */
+
+  u.bi.pC = p->apCsr[pOp->p1];
+  assert( isSorter(u.bi.pC) );
+  pIn3 = &aMem[pOp->p3];
+  rc = sqlite3VdbeSorterCompare(u.bi.pC, pIn3, &u.bi.res);
+  if( u.bi.res ){
+    pc = pOp->p2-1;
+  }
+  break;
+};
+
+/* Opcode: SorterData P1 P2 * * *
+**
+** Write into register P2 the current sorter data for sorter cursor P1.
+*/
+case OP_SorterData: {
+#if 0  /* local variables moved into u.bj */
+  VdbeCursor *pC;
+#endif /* local variables moved into u.bj */
+#ifndef SQLITE_OMIT_MERGE_SORT
+  pOut = &aMem[pOp->p2];
+  u.bj.pC = p->apCsr[pOp->p1];
+  assert( u.bj.pC->isSorter );
+  rc = sqlite3VdbeSorterRowkey(u.bj.pC, pOut);
+#else
+  pOp->opcode = OP_RowKey;
+  pc--;
+#endif
+  break;
+}
+
 /* Opcode: RowData P1 P2 * * *
 **
 ** Write into register P2 the complete row data for cursor P1.
@@ -66876,61 +67630,63 @@ case OP_ResetCount: {
 */
 case OP_RowKey:
 case OP_RowData: {
-#if 0  /* local variables moved into u.bh */
+#if 0  /* local variables moved into u.bk */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   u32 n;
   i64 n64;
-#endif /* local variables moved into u.bh */
+#endif /* local variables moved into u.bk */
 
   pOut = &aMem[pOp->p2];
   memAboutToChange(p, pOut);
 
   /* Note that RowKey and RowData are really exactly the same instruction */
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bh.pC = p->apCsr[pOp->p1];
-  assert( u.bh.pC->isTable || pOp->opcode==OP_RowKey );
-  assert( u.bh.pC->isIndex || pOp->opcode==OP_RowData );
-  assert( u.bh.pC!=0 );
-  assert( u.bh.pC->nullRow==0 );
-  assert( u.bh.pC->pseudoTableReg==0 );
-  assert( u.bh.pC->pCursor!=0 );
-  u.bh.pCrsr = u.bh.pC->pCursor;
-  assert( sqlite3BtreeCursorIsValid(u.bh.pCrsr) );
+  u.bk.pC = p->apCsr[pOp->p1];
+  assert( u.bk.pC->isSorter==0 );
+  assert( u.bk.pC->isTable || pOp->opcode!=OP_RowData );
+  assert( u.bk.pC->isIndex || pOp->opcode==OP_RowData );
+  assert( u.bk.pC!=0 );
+  assert( u.bk.pC->nullRow==0 );
+  assert( u.bk.pC->pseudoTableReg==0 );
+  assert( !u.bk.pC->isSorter );
+  assert( u.bk.pC->pCursor!=0 );
+  u.bk.pCrsr = u.bk.pC->pCursor;
+  assert( sqlite3BtreeCursorIsValid(u.bk.pCrsr) );
 
   /* The OP_RowKey and OP_RowData opcodes always follow OP_NotExists or
   ** OP_Rewind/Op_Next with no intervening instructions that might invalidate
   ** the cursor.  Hence the following sqlite3VdbeCursorMoveto() call is always
   ** a no-op and can never fail.  But we leave it in place as a safety.
   */
-  assert( u.bh.pC->deferredMoveto==0 );
-  rc = sqlite3VdbeCursorMoveto(u.bh.pC);
+  assert( u.bk.pC->deferredMoveto==0 );
+  rc = sqlite3VdbeCursorMoveto(u.bk.pC);
   if( NEVER(rc!=SQLITE_OK) ) goto abort_due_to_error;
 
-  if( u.bh.pC->isIndex ){
-    assert( !u.bh.pC->isTable );
-    rc = sqlite3BtreeKeySize(u.bh.pCrsr, &u.bh.n64);
+  if( u.bk.pC->isIndex ){
+    assert( !u.bk.pC->isTable );
+    VVA_ONLY(rc =) sqlite3BtreeKeySize(u.bk.pCrsr, &u.bk.n64);
     assert( rc==SQLITE_OK );    /* True because of CursorMoveto() call above */
-    if( u.bh.n64>db->aLimit[SQLITE_LIMIT_LENGTH] ){
+    if( u.bk.n64>db->aLimit[SQLITE_LIMIT_LENGTH] ){
       goto too_big;
     }
-    u.bh.n = (u32)u.bh.n64;
+    u.bk.n = (u32)u.bk.n64;
   }else{
-    rc = sqlite3BtreeDataSize(u.bh.pCrsr, &u.bh.n);
+    VVA_ONLY(rc =) sqlite3BtreeDataSize(u.bk.pCrsr, &u.bk.n);
     assert( rc==SQLITE_OK );    /* DataSize() cannot fail */
-    if( u.bh.n>(u32)db->aLimit[SQLITE_LIMIT_LENGTH] ){
+    if( u.bk.n>(u32)db->aLimit[SQLITE_LIMIT_LENGTH] ){
       goto too_big;
     }
   }
-  if( sqlite3VdbeMemGrow(pOut, u.bh.n, 0) ){
+  if( sqlite3VdbeMemGrow(pOut, u.bk.n, 0) ){
     goto no_mem;
   }
-  pOut->n = u.bh.n;
+  pOut->n = u.bk.n;
   MemSetTypeFlag(pOut, MEM_Blob);
-  if( u.bh.pC->isIndex ){
-    rc = sqlite3BtreeKey(u.bh.pCrsr, 0, u.bh.n, pOut->z);
+  if( u.bk.pC->isIndex ){
+    rc = sqlite3BtreeKey(u.bk.pCrsr, 0, u.bk.n, pOut->z);
   }else{
-    rc = sqlite3BtreeData(u.bh.pCrsr, 0, u.bh.n, pOut->z);
+    rc = sqlite3BtreeData(u.bk.pCrsr, 0, u.bk.n, pOut->z);
   }
   pOut->enc = SQLITE_UTF8;  /* In case the blob is ever cast to text */
   UPDATE_MAX_BLOBSIZE(pOut);
@@ -66947,42 +67703,42 @@ case OP_RowData: {
 ** one opcode now works for both table types.
 */
 case OP_Rowid: {                 /* out2-prerelease */
-#if 0  /* local variables moved into u.bi */
+#if 0  /* local variables moved into u.bl */
   VdbeCursor *pC;
   i64 v;
   sqlite3_vtab *pVtab;
   const sqlite3_module *pModule;
-#endif /* local variables moved into u.bi */
+#endif /* local variables moved into u.bl */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bi.pC = p->apCsr[pOp->p1];
-  assert( u.bi.pC!=0 );
-  assert( u.bi.pC->pseudoTableReg==0 );
-  if( u.bi.pC->nullRow ){
+  u.bl.pC = p->apCsr[pOp->p1];
+  assert( u.bl.pC!=0 );
+  assert( u.bl.pC->pseudoTableReg==0 );
+  if( u.bl.pC->nullRow ){
     pOut->flags = MEM_Null;
     break;
-  }else if( u.bi.pC->deferredMoveto ){
-    u.bi.v = u.bi.pC->movetoTarget;
+  }else if( u.bl.pC->deferredMoveto ){
+    u.bl.v = u.bl.pC->movetoTarget;
 #ifndef SQLITE_OMIT_VIRTUALTABLE
-  }else if( u.bi.pC->pVtabCursor ){
-    u.bi.pVtab = u.bi.pC->pVtabCursor->pVtab;
-    u.bi.pModule = u.bi.pVtab->pModule;
-    assert( u.bi.pModule->xRowid );
-    rc = u.bi.pModule->xRowid(u.bi.pC->pVtabCursor, &u.bi.v);
-    importVtabErrMsg(p, u.bi.pVtab);
+  }else if( u.bl.pC->pVtabCursor ){
+    u.bl.pVtab = u.bl.pC->pVtabCursor->pVtab;
+    u.bl.pModule = u.bl.pVtab->pModule;
+    assert( u.bl.pModule->xRowid );
+    rc = u.bl.pModule->xRowid(u.bl.pC->pVtabCursor, &u.bl.v);
+    importVtabErrMsg(p, u.bl.pVtab);
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
   }else{
-    assert( u.bi.pC->pCursor!=0 );
-    rc = sqlite3VdbeCursorMoveto(u.bi.pC);
+    assert( u.bl.pC->pCursor!=0 );
+    rc = sqlite3VdbeCursorMoveto(u.bl.pC);
     if( rc ) goto abort_due_to_error;
-    if( u.bi.pC->rowidIsValid ){
-      u.bi.v = u.bi.pC->lastRowid;
+    if( u.bl.pC->rowidIsValid ){
+      u.bl.v = u.bl.pC->lastRowid;
     }else{
-      rc = sqlite3BtreeKeySize(u.bi.pC->pCursor, &u.bi.v);
+      rc = sqlite3BtreeKeySize(u.bl.pC->pCursor, &u.bl.v);
       assert( rc==SQLITE_OK );  /* Always so because of CursorMoveto() above */
     }
   }
-  pOut->u.i = u.bi.v;
+  pOut->u.i = u.bl.v;
   break;
 }
 
@@ -66993,17 +67749,18 @@ case OP_Rowid: {                 /* out2-prerelease */
 ** write a NULL.
 */
 case OP_NullRow: {
-#if 0  /* local variables moved into u.bj */
+#if 0  /* local variables moved into u.bm */
   VdbeCursor *pC;
-#endif /* local variables moved into u.bj */
+#endif /* local variables moved into u.bm */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bj.pC = p->apCsr[pOp->p1];
-  assert( u.bj.pC!=0 );
-  u.bj.pC->nullRow = 1;
-  u.bj.pC->rowidIsValid = 0;
-  if( u.bj.pC->pCursor ){
-    sqlite3BtreeClearCursor(u.bj.pC->pCursor);
+  u.bm.pC = p->apCsr[pOp->p1];
+  assert( u.bm.pC!=0 );
+  u.bm.pC->nullRow = 1;
+  u.bm.pC->rowidIsValid = 0;
+  assert( u.bm.pC->pCursor || u.bm.pC->pVtabCursor );
+  if( u.bm.pC->pCursor ){
+    sqlite3BtreeClearCursor(u.bm.pC->pCursor);
   }
   break;
 }
@@ -67017,26 +67774,25 @@ case OP_NullRow: {
 ** to the following instruction.
 */
 case OP_Last: {        /* jump */
-#if 0  /* local variables moved into u.bk */
+#if 0  /* local variables moved into u.bn */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   int res;
-#endif /* local variables moved into u.bk */
+#endif /* local variables moved into u.bn */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bk.pC = p->apCsr[pOp->p1];
-  assert( u.bk.pC!=0 );
-  u.bk.pCrsr = u.bk.pC->pCursor;
-  if( u.bk.pCrsr==0 ){
-    u.bk.res = 1;
-  }else{
-    rc = sqlite3BtreeLast(u.bk.pCrsr, &u.bk.res);
+  u.bn.pC = p->apCsr[pOp->p1];
+  assert( u.bn.pC!=0 );
+  u.bn.pCrsr = u.bn.pC->pCursor;
+  u.bn.res = 0;
+  if( ALWAYS(u.bn.pCrsr!=0) ){
+    rc = sqlite3BtreeLast(u.bn.pCrsr, &u.bn.res);
   }
-  u.bk.pC->nullRow = (u8)u.bk.res;
-  u.bk.pC->deferredMoveto = 0;
-  u.bk.pC->rowidIsValid = 0;
-  u.bk.pC->cacheStatus = CACHE_STALE;
-  if( pOp->p2>0 && u.bk.res ){
+  u.bn.pC->nullRow = (u8)u.bn.res;
+  u.bn.pC->deferredMoveto = 0;
+  u.bn.pC->rowidIsValid = 0;
+  u.bn.pC->cacheStatus = CACHE_STALE;
+  if( pOp->p2>0 && u.bn.res ){
     pc = pOp->p2 - 1;
   }
   break;
@@ -67055,6 +67811,10 @@ case OP_Last: {        /* jump */
 ** regression tests can determine whether or not the optimizer is
 ** correctly optimizing out sorts.
 */
+case OP_SorterSort:    /* jump */
+#ifdef SQLITE_OMIT_MERGE_SORT
+  pOp->opcode = OP_Sort;
+#endif
 case OP_Sort: {        /* jump */
 #ifdef SQLITE_TEST
   sqlite3_sort_count++;
@@ -67072,32 +67832,37 @@ case OP_Sort: {        /* jump */
 ** to the following instruction.
 */
 case OP_Rewind: {        /* jump */
-#if 0  /* local variables moved into u.bl */
+#if 0  /* local variables moved into u.bo */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   int res;
-#endif /* local variables moved into u.bl */
+#endif /* local variables moved into u.bo */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bl.pC = p->apCsr[pOp->p1];
-  assert( u.bl.pC!=0 );
-  u.bl.res = 1;
-  if( (u.bl.pCrsr = u.bl.pC->pCursor)!=0 ){
-    rc = sqlite3BtreeFirst(u.bl.pCrsr, &u.bl.res);
-    u.bl.pC->atFirst = u.bl.res==0 ?1:0;
-    u.bl.pC->deferredMoveto = 0;
-    u.bl.pC->cacheStatus = CACHE_STALE;
-    u.bl.pC->rowidIsValid = 0;
-  }
-  u.bl.pC->nullRow = (u8)u.bl.res;
+  u.bo.pC = p->apCsr[pOp->p1];
+  assert( u.bo.pC!=0 );
+  assert( u.bo.pC->isSorter==(pOp->opcode==OP_SorterSort) );
+  u.bo.res = 1;
+  if( isSorter(u.bo.pC) ){
+    rc = sqlite3VdbeSorterRewind(db, u.bo.pC, &u.bo.res);
+  }else{
+    u.bo.pCrsr = u.bo.pC->pCursor;
+    assert( u.bo.pCrsr );
+    rc = sqlite3BtreeFirst(u.bo.pCrsr, &u.bo.res);
+    u.bo.pC->atFirst = u.bo.res==0 ?1:0;
+    u.bo.pC->deferredMoveto = 0;
+    u.bo.pC->cacheStatus = CACHE_STALE;
+    u.bo.pC->rowidIsValid = 0;
+  }
+  u.bo.pC->nullRow = (u8)u.bo.res;
   assert( pOp->p2>0 && pOp->p2<p->nOp );
-  if( u.bl.res ){
+  if( u.bo.res ){
     pc = pOp->p2 - 1;
   }
   break;
 }
 
-/* Opcode: Next P1 P2 * * P5
+/* Opcode: Next P1 P2 * P4 P5
 **
 ** Advance cursor P1 so that it points to the next key/data pair in its
 ** table or index.  If there are no more key/value pairs then fall through
@@ -67106,6 +67871,9 @@ case OP_Rewind: {        /* jump */
 **
 ** The P1 cursor must be for a real table, not a pseudo-table.
 **
+** P4 is always of type P4_ADVANCE. The function pointer points to
+** sqlite3BtreeNext().
+**
 ** If P5 is positive and the jump is taken, then event counter
 ** number P5-1 in the prepared statement is incremented.
 **
@@ -67120,43 +67888,52 @@ case OP_Rewind: {        /* jump */
 **
 ** The P1 cursor must be for a real table, not a pseudo-table.
 **
+** P4 is always of type P4_ADVANCE. The function pointer points to
+** sqlite3BtreePrevious().
+**
 ** If P5 is positive and the jump is taken, then event counter
 ** number P5-1 in the prepared statement is incremented.
 */
+case OP_SorterNext:    /* jump */
+#ifdef SQLITE_OMIT_MERGE_SORT
+  pOp->opcode = OP_Next;
+#endif
 case OP_Prev:          /* jump */
 case OP_Next: {        /* jump */
-#if 0  /* local variables moved into u.bm */
+#if 0  /* local variables moved into u.bp */
   VdbeCursor *pC;
-  BtCursor *pCrsr;
   int res;
-#endif /* local variables moved into u.bm */
+#endif /* local variables moved into u.bp */
 
   CHECK_FOR_INTERRUPT;
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
   assert( pOp->p5<=ArraySize(p->aCounter) );
-  u.bm.pC = p->apCsr[pOp->p1];
-  if( u.bm.pC==0 ){
+  u.bp.pC = p->apCsr[pOp->p1];
+  if( u.bp.pC==0 ){
     break;  /* See ticket #2273 */
   }
-  u.bm.pCrsr = u.bm.pC->pCursor;
-  if( u.bm.pCrsr==0 ){
-    u.bm.pC->nullRow = 1;
-    break;
-  }
-  u.bm.res = 1;
-  assert( u.bm.pC->deferredMoveto==0 );
-  rc = pOp->opcode==OP_Next ? sqlite3BtreeNext(u.bm.pCrsr, &u.bm.res) :
-                              sqlite3BtreePrevious(u.bm.pCrsr, &u.bm.res);
-  u.bm.pC->nullRow = (u8)u.bm.res;
-  u.bm.pC->cacheStatus = CACHE_STALE;
-  if( u.bm.res==0 ){
+  assert( u.bp.pC->isSorter==(pOp->opcode==OP_SorterNext) );
+  if( isSorter(u.bp.pC) ){
+    assert( pOp->opcode==OP_SorterNext );
+    rc = sqlite3VdbeSorterNext(db, u.bp.pC, &u.bp.res);
+  }else{
+    u.bp.res = 1;
+    assert( u.bp.pC->deferredMoveto==0 );
+    assert( u.bp.pC->pCursor );
+    assert( pOp->opcode!=OP_Next || pOp->p4.xAdvance==sqlite3BtreeNext );
+    assert( pOp->opcode!=OP_Prev || pOp->p4.xAdvance==sqlite3BtreePrevious );
+    rc = pOp->p4.xAdvance(u.bp.pC->pCursor, &u.bp.res);
+  }
+  u.bp.pC->nullRow = (u8)u.bp.res;
+  u.bp.pC->cacheStatus = CACHE_STALE;
+  if( u.bp.res==0 ){
     pc = pOp->p2 - 1;
     if( pOp->p5 ) p->aCounter[pOp->p5-1]++;
 #ifdef SQLITE_TEST
     sqlite3_search_count++;
 #endif
   }
-  u.bm.pC->rowidIsValid = 0;
+  u.bp.pC->rowidIsValid = 0;
   break;
 }
 
@@ -67172,31 +67949,40 @@ case OP_Next: {        /* jump */
 ** This instruction only works for indices.  The equivalent instruction
 ** for tables is OP_Insert.
 */
+case OP_SorterInsert:       /* in2 */
+#ifdef SQLITE_OMIT_MERGE_SORT
+  pOp->opcode = OP_IdxInsert;
+#endif
 case OP_IdxInsert: {        /* in2 */
-#if 0  /* local variables moved into u.bn */
+#if 0  /* local variables moved into u.bq */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   int nKey;
   const char *zKey;
-#endif /* local variables moved into u.bn */
+#endif /* local variables moved into u.bq */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bn.pC = p->apCsr[pOp->p1];
-  assert( u.bn.pC!=0 );
+  u.bq.pC = p->apCsr[pOp->p1];
+  assert( u.bq.pC!=0 );
+  assert( u.bq.pC->isSorter==(pOp->opcode==OP_SorterInsert) );
   pIn2 = &aMem[pOp->p2];
   assert( pIn2->flags & MEM_Blob );
-  u.bn.pCrsr = u.bn.pC->pCursor;
-  if( ALWAYS(u.bn.pCrsr!=0) ){
-    assert( u.bn.pC->isTable==0 );
+  u.bq.pCrsr = u.bq.pC->pCursor;
+  if( ALWAYS(u.bq.pCrsr!=0) ){
+    assert( u.bq.pC->isTable==0 );
     rc = ExpandBlob(pIn2);
     if( rc==SQLITE_OK ){
-      u.bn.nKey = pIn2->n;
-      u.bn.zKey = pIn2->z;
-      rc = sqlite3BtreeInsert(u.bn.pCrsr, u.bn.zKey, u.bn.nKey, "", 0, 0, pOp->p3,
-          ((pOp->p5 & OPFLAG_USESEEKRESULT) ? u.bn.pC->seekResult : 0)
-      );
-      assert( u.bn.pC->deferredMoveto==0 );
-      u.bn.pC->cacheStatus = CACHE_STALE;
+      if( isSorter(u.bq.pC) ){
+        rc = sqlite3VdbeSorterWrite(db, u.bq.pC, pIn2);
+      }else{
+        u.bq.nKey = pIn2->n;
+        u.bq.zKey = pIn2->z;
+        rc = sqlite3BtreeInsert(u.bq.pCrsr, u.bq.zKey, u.bq.nKey, "", 0, 0, pOp->p3,
+            ((pOp->p5 & OPFLAG_USESEEKRESULT) ? u.bq.pC->seekResult : 0)
+            );
+        assert( u.bq.pC->deferredMoveto==0 );
+        u.bq.pC->cacheStatus = CACHE_STALE;
+      }
     }
   }
   break;
@@ -67209,33 +67995,33 @@ case OP_IdxInsert: {        /* in2 */
 ** index opened by cursor P1.
 */
 case OP_IdxDelete: {
-#if 0  /* local variables moved into u.bo */
+#if 0  /* local variables moved into u.br */
   VdbeCursor *pC;
   BtCursor *pCrsr;
   int res;
   UnpackedRecord r;
-#endif /* local variables moved into u.bo */
+#endif /* local variables moved into u.br */
 
   assert( pOp->p3>0 );
   assert( pOp->p2>0 && pOp->p2+pOp->p3<=p->nMem+1 );
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bo.pC = p->apCsr[pOp->p1];
-  assert( u.bo.pC!=0 );
-  u.bo.pCrsr = u.bo.pC->pCursor;
-  if( ALWAYS(u.bo.pCrsr!=0) ){
-    u.bo.r.pKeyInfo = u.bo.pC->pKeyInfo;
-    u.bo.r.nField = (u16)pOp->p3;
-    u.bo.r.flags = 0;
-    u.bo.r.aMem = &aMem[pOp->p2];
+  u.br.pC = p->apCsr[pOp->p1];
+  assert( u.br.pC!=0 );
+  u.br.pCrsr = u.br.pC->pCursor;
+  if( ALWAYS(u.br.pCrsr!=0) ){
+    u.br.r.pKeyInfo = u.br.pC->pKeyInfo;
+    u.br.r.nField = (u16)pOp->p3;
+    u.br.r.flags = 0;
+    u.br.r.aMem = &aMem[pOp->p2];
 #ifdef SQLITE_DEBUG
-    { int i; for(i=0; i<u.bo.r.nField; i++) assert( memIsValid(&u.bo.r.aMem[i]) ); }
+    { int i; for(i=0; i<u.br.r.nField; i++) assert( memIsValid(&u.br.r.aMem[i]) ); }
 #endif
-    rc = sqlite3BtreeMovetoUnpacked(u.bo.pCrsr, &u.bo.r, 0, 0, &u.bo.res);
-    if( rc==SQLITE_OK && u.bo.res==0 ){
-      rc = sqlite3BtreeDelete(u.bo.pCrsr);
+    rc = sqlite3BtreeMovetoUnpacked(u.br.pCrsr, &u.br.r, 0, 0, &u.br.res);
+    if( rc==SQLITE_OK && u.br.res==0 ){
+      rc = sqlite3BtreeDelete(u.br.pCrsr);
     }
-    assert( u.bo.pC->deferredMoveto==0 );
-    u.bo.pC->cacheStatus = CACHE_STALE;
+    assert( u.br.pC->deferredMoveto==0 );
+    u.br.pC->cacheStatus = CACHE_STALE;
   }
   break;
 }
@@ -67249,28 +68035,28 @@ case OP_IdxDelete: {
 ** See also: Rowid, MakeRecord.
 */
 case OP_IdxRowid: {              /* out2-prerelease */
-#if 0  /* local variables moved into u.bp */
+#if 0  /* local variables moved into u.bs */
   BtCursor *pCrsr;
   VdbeCursor *pC;
   i64 rowid;
-#endif /* local variables moved into u.bp */
+#endif /* local variables moved into u.bs */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bp.pC = p->apCsr[pOp->p1];
-  assert( u.bp.pC!=0 );
-  u.bp.pCrsr = u.bp.pC->pCursor;
+  u.bs.pC = p->apCsr[pOp->p1];
+  assert( u.bs.pC!=0 );
+  u.bs.pCrsr = u.bs.pC->pCursor;
   pOut->flags = MEM_Null;
-  if( ALWAYS(u.bp.pCrsr!=0) ){
-    rc = sqlite3VdbeCursorMoveto(u.bp.pC);
+  if( ALWAYS(u.bs.pCrsr!=0) ){
+    rc = sqlite3VdbeCursorMoveto(u.bs.pC);
     if( NEVER(rc) ) goto abort_due_to_error;
-    assert( u.bp.pC->deferredMoveto==0 );
-    assert( u.bp.pC->isTable==0 );
-    if( !u.bp.pC->nullRow ){
-      rc = sqlite3VdbeIdxRowid(db, u.bp.pCrsr, &u.bp.rowid);
+    assert( u.bs.pC->deferredMoveto==0 );
+    assert( u.bs.pC->isTable==0 );
+    if( !u.bs.pC->nullRow ){
+      rc = sqlite3VdbeIdxRowid(db, u.bs.pCrsr, &u.bs.rowid);
       if( rc!=SQLITE_OK ){
         goto abort_due_to_error;
       }
-      pOut->u.i = u.bp.rowid;
+      pOut->u.i = u.bs.rowid;
       pOut->flags = MEM_Int;
     }
   }
@@ -67305,39 +68091,39 @@ case OP_IdxRowid: {              /* out2-prerelease */
 */
 case OP_IdxLT:          /* jump */
 case OP_IdxGE: {        /* jump */
-#if 0  /* local variables moved into u.bq */
+#if 0  /* local variables moved into u.bt */
   VdbeCursor *pC;
   int res;
   UnpackedRecord r;
-#endif /* local variables moved into u.bq */
+#endif /* local variables moved into u.bt */
 
   assert( pOp->p1>=0 && pOp->p1<p->nCursor );
-  u.bq.pC = p->apCsr[pOp->p1];
-  assert( u.bq.pC!=0 );
-  assert( u.bq.pC->isOrdered );
-  if( ALWAYS(u.bq.pC->pCursor!=0) ){
-    assert( u.bq.pC->deferredMoveto==0 );
+  u.bt.pC = p->apCsr[pOp->p1];
+  assert( u.bt.pC!=0 );
+  assert( u.bt.pC->isOrdered );
+  if( ALWAYS(u.bt.pC->pCursor!=0) ){
+    assert( u.bt.pC->deferredMoveto==0 );
     assert( pOp->p5==0 || pOp->p5==1 );
     assert( pOp->p4type==P4_INT32 );
-    u.bq.r.pKeyInfo = u.bq.pC->pKeyInfo;
-    u.bq.r.nField = (u16)pOp->p4.i;
+    u.bt.r.pKeyInfo = u.bt.pC->pKeyInfo;
+    u.bt.r.nField = (u16)pOp->p4.i;
     if( pOp->p5 ){
-      u.bq.r.flags = UNPACKED_INCRKEY | UNPACKED_IGNORE_ROWID;
+      u.bt.r.flags = UNPACKED_INCRKEY | UNPACKED_IGNORE_ROWID;
     }else{
-      u.bq.r.flags = UNPACKED_IGNORE_ROWID;
+      u.bt.r.flags = UNPACKED_IGNORE_ROWID;
     }
-    u.bq.r.aMem = &aMem[pOp->p3];
+    u.bt.r.aMem = &aMem[pOp->p3];
 #ifdef SQLITE_DEBUG
-    { int i; for(i=0; i<u.bq.r.nField; i++) assert( memIsValid(&u.bq.r.aMem[i]) ); }
+    { int i; for(i=0; i<u.bt.r.nField; i++) assert( memIsValid(&u.bt.r.aMem[i]) ); }
 #endif
-    rc = sqlite3VdbeIdxKeyCompare(u.bq.pC, &u.bq.r, &u.bq.res);
+    rc = sqlite3VdbeIdxKeyCompare(u.bt.pC, &u.bt.r, &u.bt.res);
     if( pOp->opcode==OP_IdxLT ){
-      u.bq.res = -u.bq.res;
+      u.bt.res = -u.bt.res;
     }else{
       assert( pOp->opcode==OP_IdxGE );
-      u.bq.res++;
+      u.bt.res++;
     }
-    if( u.bq.res>0 ){
+    if( u.bt.res>0 ){
       pc = pOp->p2 - 1 ;
     }
   }
@@ -67365,39 +68151,39 @@ case OP_IdxGE: {        /* jump */
 ** See also: Clear
 */
 case OP_Destroy: {     /* out2-prerelease */
-#if 0  /* local variables moved into u.br */
+#if 0  /* local variables moved into u.bu */
   int iMoved;
   int iCnt;
   Vdbe *pVdbe;
   int iDb;
-#endif /* local variables moved into u.br */
+#endif /* local variables moved into u.bu */
 #ifndef SQLITE_OMIT_VIRTUALTABLE
-  u.br.iCnt = 0;
-  for(u.br.pVdbe=db->pVdbe; u.br.pVdbe; u.br.pVdbe = u.br.pVdbe->pNext){
-    if( u.br.pVdbe->magic==VDBE_MAGIC_RUN && u.br.pVdbe->inVtabMethod<2 && u.br.pVdbe->pc>=0 ){
-      u.br.iCnt++;
+  u.bu.iCnt = 0;
+  for(u.bu.pVdbe=db->pVdbe; u.bu.pVdbe; u.bu.pVdbe = u.bu.pVdbe->pNext){
+    if( u.bu.pVdbe->magic==VDBE_MAGIC_RUN && u.bu.pVdbe->inVtabMethod<2 && u.bu.pVdbe->pc>=0 ){
+      u.bu.iCnt++;
     }
   }
 #else
-  u.br.iCnt = db->activeVdbeCnt;
+  u.bu.iCnt = db->activeVdbeCnt;
 #endif
   pOut->flags = MEM_Null;
-  if( u.br.iCnt>1 ){
+  if( u.bu.iCnt>1 ){
     rc = SQLITE_LOCKED;
     p->errorAction = OE_Abort;
   }else{
-    u.br.iDb = pOp->p3;
-    assert( u.br.iCnt==1 );
-    assert( (p->btreeMask & (((yDbMask)1)<<u.br.iDb))!=0 );
-    rc = sqlite3BtreeDropTable(db->aDb[u.br.iDb].pBt, pOp->p1, &u.br.iMoved);
+    u.bu.iDb = pOp->p3;
+    assert( u.bu.iCnt==1 );
+    assert( (p->btreeMask & (((yDbMask)1)<<u.bu.iDb))!=0 );
+    rc = sqlite3BtreeDropTable(db->aDb[u.bu.iDb].pBt, pOp->p1, &u.bu.iMoved);
     pOut->flags = MEM_Int;
-    pOut->u.i = u.br.iMoved;
+    pOut->u.i = u.bu.iMoved;
 #ifndef SQLITE_OMIT_AUTOVACUUM
-    if( rc==SQLITE_OK && u.br.iMoved!=0 ){
-      sqlite3RootPageMoved(db, u.br.iDb, u.br.iMoved, pOp->p1);
+    if( rc==SQLITE_OK && u.bu.iMoved!=0 ){
+      sqlite3RootPageMoved(db, u.bu.iDb, u.bu.iMoved, pOp->p1);
       /* All OP_Destroy operations occur on the same btree */
-      assert( resetSchemaOnFault==0 || resetSchemaOnFault==u.br.iDb+1 );
-      resetSchemaOnFault = u.br.iDb+1;
+      assert( resetSchemaOnFault==0 || resetSchemaOnFault==u.bu.iDb+1 );
+      resetSchemaOnFault = u.bu.iDb+1;
     }
 #endif
   }
@@ -67423,21 +68209,21 @@ case OP_Destroy: {     /* out2-prerelease */
 ** See also: Destroy
 */
 case OP_Clear: {
-#if 0  /* local variables moved into u.bs */
+#if 0  /* local variables moved into u.bv */
   int nChange;
-#endif /* local variables moved into u.bs */
+#endif /* local variables moved into u.bv */
 
-  u.bs.nChange = 0;
+  u.bv.nChange = 0;
   assert( (p->btreeMask & (((yDbMask)1)<<pOp->p2))!=0 );
   rc = sqlite3BtreeClearTable(
-      db->aDb[pOp->p2].pBt, pOp->p1, (pOp->p3 ? &u.bs.nChange : 0)
+      db->aDb[pOp->p2].pBt, pOp->p1, (pOp->p3 ? &u.bv.nChange : 0)
   );
   if( pOp->p3 ){
-    p->nChange += u.bs.nChange;
+    p->nChange += u.bv.nChange;
     if( pOp->p3>0 ){
       assert( memIsValid(&aMem[pOp->p3]) );
       memAboutToChange(p, &aMem[pOp->p3]);
-      aMem[pOp->p3].u.i += u.bs.nChange;
+      aMem[pOp->p3].u.i += u.bv.nChange;
     }
   }
   break;
@@ -67467,25 +68253,25 @@ case OP_Clear: {
 */
 case OP_CreateIndex:            /* out2-prerelease */
 case OP_CreateTable: {          /* out2-prerelease */
-#if 0  /* local variables moved into u.bt */
+#if 0  /* local variables moved into u.bw */
   int pgno;
   int flags;
   Db *pDb;
-#endif /* local variables moved into u.bt */
+#endif /* local variables moved into u.bw */
 
-  u.bt.pgno = 0;
+  u.bw.pgno = 0;
   assert( pOp->p1>=0 && pOp->p1<db->nDb );
   assert( (p->btreeMask & (((yDbMask)1)<<pOp->p1))!=0 );
-  u.bt.pDb = &db->aDb[pOp->p1];
-  assert( u.bt.pDb->pBt!=0 );
+  u.bw.pDb = &db->aDb[pOp->p1];
+  assert( u.bw.pDb->pBt!=0 );
   if( pOp->opcode==OP_CreateTable ){
-    /* u.bt.flags = BTREE_INTKEY; */
-    u.bt.flags = BTREE_INTKEY;
+    /* u.bw.flags = BTREE_INTKEY; */
+    u.bw.flags = BTREE_INTKEY;
   }else{
-    u.bt.flags = BTREE_BLOBKEY;
+    u.bw.flags = BTREE_BLOBKEY;
   }
-  rc = sqlite3BtreeCreateTable(u.bt.pDb->pBt, &u.bt.pgno, u.bt.flags);
-  pOut->u.i = u.bt.pgno;
+  rc = sqlite3BtreeCreateTable(u.bw.pDb->pBt, &u.bw.pgno, u.bw.flags);
+  pOut->u.i = u.bw.pgno;
   break;
 }
 
@@ -67498,44 +68284,44 @@ case OP_CreateTable: {          /* out2-prerelease */
 ** then runs the new virtual machine.  It is thus a re-entrant opcode.
 */
 case OP_ParseSchema: {
-#if 0  /* local variables moved into u.bu */
+#if 0  /* local variables moved into u.bx */
   int iDb;
   const char *zMaster;
   char *zSql;
   InitData initData;
-#endif /* local variables moved into u.bu */
+#endif /* local variables moved into u.bx */
 
   /* Any prepared statement that invokes this opcode will hold mutexes
   ** on every btree.  This is a prerequisite for invoking
   ** sqlite3InitCallback().
   */
 #ifdef SQLITE_DEBUG
-  for(u.bu.iDb=0; u.bu.iDb<db->nDb; u.bu.iDb++){
-    assert( u.bu.iDb==1 || sqlite3BtreeHoldsMutex(db->aDb[u.bu.iDb].pBt) );
+  for(u.bx.iDb=0; u.bx.iDb<db->nDb; u.bx.iDb++){
+    assert( u.bx.iDb==1 || sqlite3BtreeHoldsMutex(db->aDb[u.bx.iDb].pBt) );
   }
 #endif
 
-  u.bu.iDb = pOp->p1;
-  assert( u.bu.iDb>=0 && u.bu.iDb<db->nDb );
-  assert( DbHasProperty(db, u.bu.iDb, DB_SchemaLoaded) );
+  u.bx.iDb = pOp->p1;
+  assert( u.bx.iDb>=0 && u.bx.iDb<db->nDb );
+  assert( DbHasProperty(db, u.bx.iDb, DB_SchemaLoaded) );
   /* Used to be a conditional */ {
-    u.bu.zMaster = SCHEMA_TABLE(u.bu.iDb);
-    u.bu.initData.db = db;
-    u.bu.initData.iDb = pOp->p1;
-    u.bu.initData.pzErrMsg = &p->zErrMsg;
-    u.bu.zSql = sqlite3MPrintf(db,
+    u.bx.zMaster = SCHEMA_TABLE(u.bx.iDb);
+    u.bx.initData.db = db;
+    u.bx.initData.iDb = pOp->p1;
+    u.bx.initData.pzErrMsg = &p->zErrMsg;
+    u.bx.zSql = sqlite3MPrintf(db,
        "SELECT name, rootpage, sql FROM '%q'.%s WHERE %s ORDER BY rowid",
-       db->aDb[u.bu.iDb].zName, u.bu.zMaster, pOp->p4.z);
-    if( u.bu.zSql==0 ){
+       db->aDb[u.bx.iDb].zName, u.bx.zMaster, pOp->p4.z);
+    if( u.bx.zSql==0 ){
       rc = SQLITE_NOMEM;
     }else{
       assert( db->init.busy==0 );
       db->init.busy = 1;
-      u.bu.initData.rc = SQLITE_OK;
+      u.bx.initData.rc = SQLITE_OK;
       assert( !db->mallocFailed );
-      rc = sqlite3_exec(db, u.bu.zSql, sqlite3InitCallback, &u.bu.initData, 0);
-      if( rc==SQLITE_OK ) rc = u.bu.initData.rc;
-      sqlite3DbFree(db, u.bu.zSql);
+      rc = sqlite3_exec(db, u.bx.zSql, sqlite3InitCallback, &u.bx.initData, 0);
+      if( rc==SQLITE_OK ) rc = u.bx.initData.rc;
+      sqlite3DbFree(db, u.bx.zSql);
       db->init.busy = 0;
     }
   }
@@ -67618,41 +68404,41 @@ case OP_DropTrigger: {
 ** This opcode is used to implement the integrity_check pragma.
 */
 case OP_IntegrityCk: {
-#if 0  /* local variables moved into u.bv */
+#if 0  /* local variables moved into u.by */
   int nRoot;      /* Number of tables to check.  (Number of root pages.) */
   int *aRoot;     /* Array of rootpage numbers for tables to be checked */
   int j;          /* Loop counter */
   int nErr;       /* Number of errors reported */
   char *z;        /* Text of the error report */
   Mem *pnErr;     /* Register keeping track of errors remaining */
-#endif /* local variables moved into u.bv */
+#endif /* local variables moved into u.by */
 
-  u.bv.nRoot = pOp->p2;
-  assert( u.bv.nRoot>0 );
-  u.bv.aRoot = sqlite3DbMallocRaw(db, sizeof(int)*(u.bv.nRoot+1) );
-  if( u.bv.aRoot==0 ) goto no_mem;
+  u.by.nRoot = pOp->p2;
+  assert( u.by.nRoot>0 );
+  u.by.aRoot = sqlite3DbMallocRaw(db, sizeof(int)*(u.by.nRoot+1) );
+  if( u.by.aRoot==0 ) goto no_mem;
   assert( pOp->p3>0 && pOp->p3<=p->nMem );
-  u.bv.pnErr = &aMem[pOp->p3];
-  assert( (u.bv.pnErr->flags & MEM_Int)!=0 );
-  assert( (u.bv.pnErr->flags & (MEM_Str|MEM_Blob))==0 );
+  u.by.pnErr = &aMem[pOp->p3];
+  assert( (u.by.pnErr->flags & MEM_Int)!=0 );
+  assert( (u.by.pnErr->flags & (MEM_Str|MEM_Blob))==0 );
   pIn1 = &aMem[pOp->p1];
-  for(u.bv.j=0; u.bv.j<u.bv.nRoot; u.bv.j++){
-    u.bv.aRoot[u.bv.j] = (int)sqlite3VdbeIntValue(&pIn1[u.bv.j]);
+  for(u.by.j=0; u.by.j<u.by.nRoot; u.by.j++){
+    u.by.aRoot[u.by.j] = (int)sqlite3VdbeIntValue(&pIn1[u.by.j]);
   }
-  u.bv.aRoot[u.bv.j] = 0;
+  u.by.aRoot[u.by.j] = 0;
   assert( pOp->p5<db->nDb );
   assert( (p->btreeMask & (((yDbMask)1)<<pOp->p5))!=0 );
-  u.bv.z = sqlite3BtreeIntegrityCheck(db->aDb[pOp->p5].pBt, u.bv.aRoot, u.bv.nRoot,
-                                 (int)u.bv.pnErr->u.i, &u.bv.nErr);
-  sqlite3DbFree(db, u.bv.aRoot);
-  u.bv.pnErr->u.i -= u.bv.nErr;
+  u.by.z = sqlite3BtreeIntegrityCheck(db->aDb[pOp->p5].pBt, u.by.aRoot, u.by.nRoot,
+                                 (int)u.by.pnErr->u.i, &u.by.nErr);
+  sqlite3DbFree(db, u.by.aRoot);
+  u.by.pnErr->u.i -= u.by.nErr;
   sqlite3VdbeMemSetNull(pIn1);
-  if( u.bv.nErr==0 ){
-    assert( u.bv.z==0 );
-  }else if( u.bv.z==0 ){
+  if( u.by.nErr==0 ){
+    assert( u.by.z==0 );
+  }else if( u.by.z==0 ){
     goto no_mem;
   }else{
-    sqlite3VdbeMemSetStr(pIn1, u.bv.z, -1, SQLITE_UTF8, sqlite3_free);
+    sqlite3VdbeMemSetStr(pIn1, u.by.z, -1, SQLITE_UTF8, sqlite3_free);
   }
   UPDATE_MAX_BLOBSIZE(pIn1);
   sqlite3VdbeChangeEncoding(pIn1, encoding);
@@ -67686,20 +68472,20 @@ case OP_RowSetAdd: {       /* in1, in2 */
 ** unchanged and jump to instruction P2.
 */
 case OP_RowSetRead: {       /* jump, in1, out3 */
-#if 0  /* local variables moved into u.bw */
+#if 0  /* local variables moved into u.bz */
   i64 val;
-#endif /* local variables moved into u.bw */
+#endif /* local variables moved into u.bz */
   CHECK_FOR_INTERRUPT;
   pIn1 = &aMem[pOp->p1];
   if( (pIn1->flags & MEM_RowSet)==0
-   || sqlite3RowSetNext(pIn1->u.pRowSet, &u.bw.val)==0
+   || sqlite3RowSetNext(pIn1->u.pRowSet, &u.bz.val)==0
   ){
     /* The boolean index is empty */
     sqlite3VdbeMemSetNull(pIn1);
     pc = pOp->p2 - 1;
   }else{
     /* A value was pulled from the index */
-    sqlite3VdbeMemSetInt64(&aMem[pOp->p3], u.bw.val);
+    sqlite3VdbeMemSetInt64(&aMem[pOp->p3], u.bz.val);
   }
   break;
 }
@@ -67728,14 +68514,14 @@ case OP_RowSetRead: {       /* jump, in1, out3 */
 ** inserted as part of some other set).
 */
 case OP_RowSetTest: {                     /* jump, in1, in3 */
-#if 0  /* local variables moved into u.bx */
+#if 0  /* local variables moved into u.ca */
   int iSet;
   int exists;
-#endif /* local variables moved into u.bx */
+#endif /* local variables moved into u.ca */
 
   pIn1 = &aMem[pOp->p1];
   pIn3 = &aMem[pOp->p3];
-  u.bx.iSet = pOp->p4.i;
+  u.ca.iSet = pOp->p4.i;
   assert( pIn3->flags&MEM_Int );
 
   /* If there is anything other than a rowset object in memory cell P1,
@@ -67747,17 +68533,17 @@ case OP_RowSetTest: {                     /* jump, in1, in3 */
   }
 
   assert( pOp->p4type==P4_INT32 );
-  assert( u.bx.iSet==-1 || u.bx.iSet>=0 );
-  if( u.bx.iSet ){
-    u.bx.exists = sqlite3RowSetTest(pIn1->u.pRowSet,
-                               (u8)(u.bx.iSet>=0 ? u.bx.iSet & 0xf : 0xff),
+  assert( u.ca.iSet==-1 || u.ca.iSet>=0 );
+  if( u.ca.iSet ){
+    u.ca.exists = sqlite3RowSetTest(pIn1->u.pRowSet,
+                               (u8)(u.ca.iSet>=0 ? u.ca.iSet & 0xf : 0xff),
                                pIn3->u.i);
-    if( u.bx.exists ){
+    if( u.ca.exists ){
       pc = pOp->p2 - 1;
       break;
     }
   }
-  if( u.bx.iSet>=0 ){
+  if( u.ca.iSet>=0 ){
     sqlite3RowSetInsert(pIn1->u.pRowSet, pIn3->u.i);
   }
   break;
@@ -67780,7 +68566,7 @@ case OP_RowSetTest: {                     /* jump, in1, in3 */
 ** P4 is a pointer to the VM containing the trigger program.
 */
 case OP_Program: {        /* jump */
-#if 0  /* local variables moved into u.by */
+#if 0  /* local variables moved into u.cb */
   int nMem;               /* Number of memory registers for sub-program */
   int nByte;              /* Bytes of runtime space required for sub-program */
   Mem *pRt;               /* Register to allocate runtime space */
@@ -67789,12 +68575,12 @@ case OP_Program: {        /* jump */
   VdbeFrame *pFrame;      /* New vdbe frame to execute in */
   SubProgram *pProgram;   /* Sub-program to execute */
   void *t;                /* Token identifying trigger */
-#endif /* local variables moved into u.by */
+#endif /* local variables moved into u.cb */
 
-  u.by.pProgram = pOp->p4.pProgram;
-  u.by.pRt = &aMem[pOp->p3];
-  assert( memIsValid(u.by.pRt) );
-  assert( u.by.pProgram->nOp>0 );
+  u.cb.pProgram = pOp->p4.pProgram;
+  u.cb.pRt = &aMem[pOp->p3];
+  assert( memIsValid(u.cb.pRt) );
+  assert( u.cb.pProgram->nOp>0 );
 
   /* If the p5 flag is clear, then recursive invocation of triggers is
   ** disabled for backwards compatibility (p5 is set if this sub-program
@@ -67808,9 +68594,9 @@ case OP_Program: {        /* jump */
   ** single trigger all have the same value for the SubProgram.token
   ** variable.  */
   if( pOp->p5 ){
-    u.by.t = u.by.pProgram->token;
-    for(u.by.pFrame=p->pFrame; u.by.pFrame && u.by.pFrame->token!=u.by.t; u.by.pFrame=u.by.pFrame->pParent);
-    if( u.by.pFrame ) break;
+    u.cb.t = u.cb.pProgram->token;
+    for(u.cb.pFrame=p->pFrame; u.cb.pFrame && u.cb.pFrame->token!=u.cb.t; u.cb.pFrame=u.cb.pFrame->pParent);
+    if( u.cb.pFrame ) break;
   }
 
   if( p->nFrame>=db->aLimit[SQLITE_LIMIT_TRIGGER_DEPTH] ){
@@ -67819,64 +68605,64 @@ case OP_Program: {        /* jump */
     break;
   }
 
-  /* Register u.by.pRt is used to store the memory required to save the state
+  /* Register u.cb.pRt is used to store the memory required to save the state
   ** of the current program, and the memory required at runtime to execute
-  ** the trigger program. If this trigger has been fired before, then u.by.pRt
+  ** the trigger program. If this trigger has been fired before, then u.cb.pRt
   ** is already allocated. Otherwise, it must be initialized.  */
-  if( (u.by.pRt->flags&MEM_Frame)==0 ){
+  if( (u.cb.pRt->flags&MEM_Frame)==0 ){
     /* SubProgram.nMem is set to the number of memory cells used by the
     ** program stored in SubProgram.aOp. As well as these, one memory
     ** cell is required for each cursor used by the program. Set local
-    ** variable u.by.nMem (and later, VdbeFrame.nChildMem) to this value.
+    ** variable u.cb.nMem (and later, VdbeFrame.nChildMem) to this value.
     */
-    u.by.nMem = u.by.pProgram->nMem + u.by.pProgram->nCsr;
-    u.by.nByte = ROUND8(sizeof(VdbeFrame))
-              + u.by.nMem * sizeof(Mem)
-              + u.by.pProgram->nCsr * sizeof(VdbeCursor *);
-    u.by.pFrame = sqlite3DbMallocZero(db, u.by.nByte);
-    if( !u.by.pFrame ){
+    u.cb.nMem = u.cb.pProgram->nMem + u.cb.pProgram->nCsr;
+    u.cb.nByte = ROUND8(sizeof(VdbeFrame))
+              + u.cb.nMem * sizeof(Mem)
+              + u.cb.pProgram->nCsr * sizeof(VdbeCursor *);
+    u.cb.pFrame = sqlite3DbMallocZero(db, u.cb.nByte);
+    if( !u.cb.pFrame ){
       goto no_mem;
     }
-    sqlite3VdbeMemRelease(u.by.pRt);
-    u.by.pRt->flags = MEM_Frame;
-    u.by.pRt->u.pFrame = u.by.pFrame;
-
-    u.by.pFrame->v = p;
-    u.by.pFrame->nChildMem = u.by.nMem;
-    u.by.pFrame->nChildCsr = u.by.pProgram->nCsr;
-    u.by.pFrame->pc = pc;
-    u.by.pFrame->aMem = p->aMem;
-    u.by.pFrame->nMem = p->nMem;
-    u.by.pFrame->apCsr = p->apCsr;
-    u.by.pFrame->nCursor = p->nCursor;
-    u.by.pFrame->aOp = p->aOp;
-    u.by.pFrame->nOp = p->nOp;
-    u.by.pFrame->token = u.by.pProgram->token;
-
-    u.by.pEnd = &VdbeFrameMem(u.by.pFrame)[u.by.pFrame->nChildMem];
-    for(u.by.pMem=VdbeFrameMem(u.by.pFrame); u.by.pMem!=u.by.pEnd; u.by.pMem++){
-      u.by.pMem->flags = MEM_Null;
-      u.by.pMem->db = db;
+    sqlite3VdbeMemRelease(u.cb.pRt);
+    u.cb.pRt->flags = MEM_Frame;
+    u.cb.pRt->u.pFrame = u.cb.pFrame;
+
+    u.cb.pFrame->v = p;
+    u.cb.pFrame->nChildMem = u.cb.nMem;
+    u.cb.pFrame->nChildCsr = u.cb.pProgram->nCsr;
+    u.cb.pFrame->pc = pc;
+    u.cb.pFrame->aMem = p->aMem;
+    u.cb.pFrame->nMem = p->nMem;
+    u.cb.pFrame->apCsr = p->apCsr;
+    u.cb.pFrame->nCursor = p->nCursor;
+    u.cb.pFrame->aOp = p->aOp;
+    u.cb.pFrame->nOp = p->nOp;
+    u.cb.pFrame->token = u.cb.pProgram->token;
+
+    u.cb.pEnd = &VdbeFrameMem(u.cb.pFrame)[u.cb.pFrame->nChildMem];
+    for(u.cb.pMem=VdbeFrameMem(u.cb.pFrame); u.cb.pMem!=u.cb.pEnd; u.cb.pMem++){
+      u.cb.pMem->flags = MEM_Null;
+      u.cb.pMem->db = db;
     }
   }else{
-    u.by.pFrame = u.by.pRt->u.pFrame;
-    assert( u.by.pProgram->nMem+u.by.pProgram->nCsr==u.by.pFrame->nChildMem );
-    assert( u.by.pProgram->nCsr==u.by.pFrame->nChildCsr );
-    assert( pc==u.by.pFrame->pc );
+    u.cb.pFrame = u.cb.pRt->u.pFrame;
+    assert( u.cb.pProgram->nMem+u.cb.pProgram->nCsr==u.cb.pFrame->nChildMem );
+    assert( u.cb.pProgram->nCsr==u.cb.pFrame->nChildCsr );
+    assert( pc==u.cb.pFrame->pc );
   }
 
   p->nFrame++;
-  u.by.pFrame->pParent = p->pFrame;
-  u.by.pFrame->lastRowid = lastRowid;
-  u.by.pFrame->nChange = p->nChange;
+  u.cb.pFrame->pParent = p->pFrame;
+  u.cb.pFrame->lastRowid = lastRowid;
+  u.cb.pFrame->nChange = p->nChange;
   p->nChange = 0;
-  p->pFrame = u.by.pFrame;
-  p->aMem = aMem = &VdbeFrameMem(u.by.pFrame)[-1];
-  p->nMem = u.by.pFrame->nChildMem;
-  p->nCursor = (u16)u.by.pFrame->nChildCsr;
+  p->pFrame = u.cb.pFrame;
+  p->aMem = aMem = &VdbeFrameMem(u.cb.pFrame)[-1];
+  p->nMem = u.cb.pFrame->nChildMem;
+  p->nCursor = (u16)u.cb.pFrame->nChildCsr;
   p->apCsr = (VdbeCursor **)&aMem[p->nMem+1];
-  p->aOp = aOp = u.by.pProgram->aOp;
-  p->nOp = u.by.pProgram->nOp;
+  p->aOp = aOp = u.cb.pProgram->aOp;
+  p->nOp = u.cb.pProgram->nOp;
   pc = -1;
 
   break;
@@ -67895,13 +68681,13 @@ case OP_Program: {        /* jump */
 ** calling OP_Program instruction.
 */
 case OP_Param: {           /* out2-prerelease */
-#if 0  /* local variables moved into u.bz */
+#if 0  /* local variables moved into u.cc */
   VdbeFrame *pFrame;
   Mem *pIn;
-#endif /* local variables moved into u.bz */
-  u.bz.pFrame = p->pFrame;
-  u.bz.pIn = &u.bz.pFrame->aMem[pOp->p1 + u.bz.pFrame->aOp[u.bz.pFrame->pc].p1];
-  sqlite3VdbeMemShallowCopy(pOut, u.bz.pIn, MEM_Ephem);
+#endif /* local variables moved into u.cc */
+  u.cc.pFrame = p->pFrame;
+  u.cc.pIn = &u.cc.pFrame->aMem[pOp->p1 + u.cc.pFrame->aOp[u.cc.pFrame->pc].p1];
+  sqlite3VdbeMemShallowCopy(pOut, u.cc.pIn, MEM_Ephem);
   break;
 }
 
@@ -67957,22 +68743,22 @@ case OP_FkIfZero: {         /* jump */
 ** an integer.
 */
 case OP_MemMax: {        /* in2 */
-#if 0  /* local variables moved into u.ca */
+#if 0  /* local variables moved into u.cd */
   Mem *pIn1;
   VdbeFrame *pFrame;
-#endif /* local variables moved into u.ca */
+#endif /* local variables moved into u.cd */
   if( p->pFrame ){
-    for(u.ca.pFrame=p->pFrame; u.ca.pFrame->pParent; u.ca.pFrame=u.ca.pFrame->pParent);
-    u.ca.pIn1 = &u.ca.pFrame->aMem[pOp->p1];
+    for(u.cd.pFrame=p->pFrame; u.cd.pFrame->pParent; u.cd.pFrame=u.cd.pFrame->pParent);
+    u.cd.pIn1 = &u.cd.pFrame->aMem[pOp->p1];
   }else{
-    u.ca.pIn1 = &aMem[pOp->p1];
+    u.cd.pIn1 = &aMem[pOp->p1];
   }
-  assert( memIsValid(u.ca.pIn1) );
-  sqlite3VdbeMemIntegerify(u.ca.pIn1);
+  assert( memIsValid(u.cd.pIn1) );
+  sqlite3VdbeMemIntegerify(u.cd.pIn1);
   pIn2 = &aMem[pOp->p2];
   sqlite3VdbeMemIntegerify(pIn2);
-  if( u.ca.pIn1->u.i<pIn2->u.i){
-    u.ca.pIn1->u.i = pIn2->u.i;
+  if( u.cd.pIn1->u.i<pIn2->u.i){
+    u.cd.pIn1->u.i = pIn2->u.i;
   }
   break;
 }
@@ -68039,50 +68825,50 @@ case OP_IfZero: {        /* jump, in1 */
 ** successors.
 */
 case OP_AggStep: {
-#if 0  /* local variables moved into u.cb */
+#if 0  /* local variables moved into u.ce */
   int n;
   int i;
   Mem *pMem;
   Mem *pRec;
   sqlite3_context ctx;
   sqlite3_value **apVal;
-#endif /* local variables moved into u.cb */
+#endif /* local variables moved into u.ce */
 
-  u.cb.n = pOp->p5;
-  assert( u.cb.n>=0 );
-  u.cb.pRec = &aMem[pOp->p2];
-  u.cb.apVal = p->apArg;
-  assert( u.cb.apVal || u.cb.n==0 );
-  for(u.cb.i=0; u.cb.i<u.cb.n; u.cb.i++, u.cb.pRec++){
-    assert( memIsValid(u.cb.pRec) );
-    u.cb.apVal[u.cb.i] = u.cb.pRec;
-    memAboutToChange(p, u.cb.pRec);
-    sqlite3VdbeMemStoreType(u.cb.pRec);
-  }
-  u.cb.ctx.pFunc = pOp->p4.pFunc;
+  u.ce.n = pOp->p5;
+  assert( u.ce.n>=0 );
+  u.ce.pRec = &aMem[pOp->p2];
+  u.ce.apVal = p->apArg;
+  assert( u.ce.apVal || u.ce.n==0 );
+  for(u.ce.i=0; u.ce.i<u.ce.n; u.ce.i++, u.ce.pRec++){
+    assert( memIsValid(u.ce.pRec) );
+    u.ce.apVal[u.ce.i] = u.ce.pRec;
+    memAboutToChange(p, u.ce.pRec);
+    sqlite3VdbeMemStoreType(u.ce.pRec);
+  }
+  u.ce.ctx.pFunc = pOp->p4.pFunc;
   assert( pOp->p3>0 && pOp->p3<=p->nMem );
-  u.cb.ctx.pMem = u.cb.pMem = &aMem[pOp->p3];
-  u.cb.pMem->n++;
-  u.cb.ctx.s.flags = MEM_Null;
-  u.cb.ctx.s.z = 0;
-  u.cb.ctx.s.zMalloc = 0;
-  u.cb.ctx.s.xDel = 0;
-  u.cb.ctx.s.db = db;
-  u.cb.ctx.isError = 0;
-  u.cb.ctx.pColl = 0;
-  if( u.cb.ctx.pFunc->flags & SQLITE_FUNC_NEEDCOLL ){
+  u.ce.ctx.pMem = u.ce.pMem = &aMem[pOp->p3];
+  u.ce.pMem->n++;
+  u.ce.ctx.s.flags = MEM_Null;
+  u.ce.ctx.s.z = 0;
+  u.ce.ctx.s.zMalloc = 0;
+  u.ce.ctx.s.xDel = 0;
+  u.ce.ctx.s.db = db;
+  u.ce.ctx.isError = 0;
+  u.ce.ctx.pColl = 0;
+  if( u.ce.ctx.pFunc->flags & SQLITE_FUNC_NEEDCOLL ){
     assert( pOp>p->aOp );
     assert( pOp[-1].p4type==P4_COLLSEQ );
     assert( pOp[-1].opcode==OP_CollSeq );
-    u.cb.ctx.pColl = pOp[-1].p4.pColl;
+    u.ce.ctx.pColl = pOp[-1].p4.pColl;
   }
-  (u.cb.ctx.pFunc->xStep)(&u.cb.ctx, u.cb.n, u.cb.apVal); /* IMP: R-24505-23230 */
-  if( u.cb.ctx.isError ){
-    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&u.cb.ctx.s));
-    rc = u.cb.ctx.isError;
+  (u.ce.ctx.pFunc->xStep)(&u.ce.ctx, u.ce.n, u.ce.apVal); /* IMP: R-24505-23230 */
+  if( u.ce.ctx.isError ){
+    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(&u.ce.ctx.s));
+    rc = u.ce.ctx.isError;
   }
 
-  sqlite3VdbeMemRelease(&u.cb.ctx.s);
+  sqlite3VdbeMemRelease(&u.ce.ctx.s);
 
   break;
 }
@@ -68100,19 +68886,19 @@ case OP_AggStep: {
 ** the step function was not previously called.
 */
 case OP_AggFinal: {
-#if 0  /* local variables moved into u.cc */
+#if 0  /* local variables moved into u.cf */
   Mem *pMem;
-#endif /* local variables moved into u.cc */
+#endif /* local variables moved into u.cf */
   assert( pOp->p1>0 && pOp->p1<=p->nMem );
-  u.cc.pMem = &aMem[pOp->p1];
-  assert( (u.cc.pMem->flags & ~(MEM_Null|MEM_Agg))==0 );
-  rc = sqlite3VdbeMemFinalize(u.cc.pMem, pOp->p4.pFunc);
+  u.cf.pMem = &aMem[pOp->p1];
+  assert( (u.cf.pMem->flags & ~(MEM_Null|MEM_Agg))==0 );
+  rc = sqlite3VdbeMemFinalize(u.cf.pMem, pOp->p4.pFunc);
   if( rc ){
-    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(u.cc.pMem));
+    sqlite3SetString(&p->zErrMsg, db, "%s", sqlite3_value_text(u.cf.pMem));
   }
-  sqlite3VdbeChangeEncoding(u.cc.pMem, encoding);
-  UPDATE_MAX_BLOBSIZE(u.cc.pMem);
-  if( sqlite3VdbeMemTooBig(u.cc.pMem) ){
+  sqlite3VdbeChangeEncoding(u.cf.pMem, encoding);
+  UPDATE_MAX_BLOBSIZE(u.cf.pMem);
+  if( sqlite3VdbeMemTooBig(u.cf.pMem) ){
     goto too_big;
   }
   break;
@@ -68131,25 +68917,25 @@ case OP_AggFinal: {
 ** mem[P3+2] are initialized to -1.
 */
 case OP_Checkpoint: {
-#if 0  /* local variables moved into u.cd */
+#if 0  /* local variables moved into u.cg */
   int i;                          /* Loop counter */
   int aRes[3];                    /* Results */
   Mem *pMem;                      /* Write results here */
-#endif /* local variables moved into u.cd */
+#endif /* local variables moved into u.cg */
 
-  u.cd.aRes[0] = 0;
-  u.cd.aRes[1] = u.cd.aRes[2] = -1;
+  u.cg.aRes[0] = 0;
+  u.cg.aRes[1] = u.cg.aRes[2] = -1;
   assert( pOp->p2==SQLITE_CHECKPOINT_PASSIVE
        || pOp->p2==SQLITE_CHECKPOINT_FULL
        || pOp->p2==SQLITE_CHECKPOINT_RESTART
   );
-  rc = sqlite3Checkpoint(db, pOp->p1, pOp->p2, &u.cd.aRes[1], &u.cd.aRes[2]);
+  rc = sqlite3Checkpoint(db, pOp->p1, pOp->p2, &u.cg.aRes[1], &u.cg.aRes[2]);
   if( rc==SQLITE_BUSY ){
     rc = SQLITE_OK;
-    u.cd.aRes[0] = 1;
+    u.cg.aRes[0] = 1;
   }
-  for(u.cd.i=0, u.cd.pMem = &aMem[pOp->p3]; u.cd.i<3; u.cd.i++, u.cd.pMem++){
-    sqlite3VdbeMemSetInt64(u.cd.pMem, (i64)u.cd.aRes[u.cd.i]);
+  for(u.cg.i=0, u.cg.pMem = &aMem[pOp->p3]; u.cg.i<3; u.cg.i++, u.cg.pMem++){
+    sqlite3VdbeMemSetInt64(u.cg.pMem, (i64)u.cg.aRes[u.cg.i]);
   }
   break;
 };  
@@ -68168,91 +68954,91 @@ case OP_Checkpoint: {
 ** Write a string containing the final journal-mode to register P2.
 */
 case OP_JournalMode: {    /* out2-prerelease */
-#if 0  /* local variables moved into u.ce */
+#if 0  /* local variables moved into u.ch */
   Btree *pBt;                     /* Btree to change journal mode of */
   Pager *pPager;                  /* Pager associated with pBt */
   int eNew;                       /* New journal mode */
   int eOld;                       /* The old journal mode */
   const char *zFilename;          /* Name of database file for pPager */
-#endif /* local variables moved into u.ce */
+#endif /* local variables moved into u.ch */
 
-  u.ce.eNew = pOp->p3;
-  assert( u.ce.eNew==PAGER_JOURNALMODE_DELETE
-       || u.ce.eNew==PAGER_JOURNALMODE_TRUNCATE
-       || u.ce.eNew==PAGER_JOURNALMODE_PERSIST
-       || u.ce.eNew==PAGER_JOURNALMODE_OFF
-       || u.ce.eNew==PAGER_JOURNALMODE_MEMORY
-       || u.ce.eNew==PAGER_JOURNALMODE_WAL
-       || u.ce.eNew==PAGER_JOURNALMODE_QUERY
+  u.ch.eNew = pOp->p3;
+  assert( u.ch.eNew==PAGER_JOURNALMODE_DELETE
+       || u.ch.eNew==PAGER_JOURNALMODE_TRUNCATE
+       || u.ch.eNew==PAGER_JOURNALMODE_PERSIST
+       || u.ch.eNew==PAGER_JOURNALMODE_OFF
+       || u.ch.eNew==PAGER_JOURNALMODE_MEMORY
+       || u.ch.eNew==PAGER_JOURNALMODE_WAL
+       || u.ch.eNew==PAGER_JOURNALMODE_QUERY
   );
   assert( pOp->p1>=0 && pOp->p1<db->nDb );
 
-  u.ce.pBt = db->aDb[pOp->p1].pBt;
-  u.ce.pPager = sqlite3BtreePager(u.ce.pBt);
-  u.ce.eOld = sqlite3PagerGetJournalMode(u.ce.pPager);
-  if( u.ce.eNew==PAGER_JOURNALMODE_QUERY ) u.ce.eNew = u.ce.eOld;
-  if( !sqlite3PagerOkToChangeJournalMode(u.ce.pPager) ) u.ce.eNew = u.ce.eOld;
+  u.ch.pBt = db->aDb[pOp->p1].pBt;
+  u.ch.pPager = sqlite3BtreePager(u.ch.pBt);
+  u.ch.eOld = sqlite3PagerGetJournalMode(u.ch.pPager);
+  if( u.ch.eNew==PAGER_JOURNALMODE_QUERY ) u.ch.eNew = u.ch.eOld;
+  if( !sqlite3PagerOkToChangeJournalMode(u.ch.pPager) ) u.ch.eNew = u.ch.eOld;
 
 #ifndef SQLITE_OMIT_WAL
-  u.ce.zFilename = sqlite3PagerFilename(u.ce.pPager);
+  u.ch.zFilename = sqlite3PagerFilename(u.ch.pPager);
 
   /* Do not allow a transition to journal_mode=WAL for a database
   ** in temporary storage or if the VFS does not support shared memory
   */
-  if( u.ce.eNew==PAGER_JOURNALMODE_WAL
-   && (u.ce.zFilename[0]==0                         /* Temp file */
-       || !sqlite3PagerWalSupported(u.ce.pPager))   /* No shared-memory support */
+  if( u.ch.eNew==PAGER_JOURNALMODE_WAL
+   && (sqlite3Strlen30(u.ch.zFilename)==0           /* Temp file */
+       || !sqlite3PagerWalSupported(u.ch.pPager))   /* No shared-memory support */
   ){
-    u.ce.eNew = u.ce.eOld;
+    u.ch.eNew = u.ch.eOld;
   }
 
-  if( (u.ce.eNew!=u.ce.eOld)
-   && (u.ce.eOld==PAGER_JOURNALMODE_WAL || u.ce.eNew==PAGER_JOURNALMODE_WAL)
+  if( (u.ch.eNew!=u.ch.eOld)
+   && (u.ch.eOld==PAGER_JOURNALMODE_WAL || u.ch.eNew==PAGER_JOURNALMODE_WAL)
   ){
     if( !db->autoCommit || db->activeVdbeCnt>1 ){
       rc = SQLITE_ERROR;
       sqlite3SetString(&p->zErrMsg, db,
           "cannot change %s wal mode from within a transaction",
-          (u.ce.eNew==PAGER_JOURNALMODE_WAL ? "into" : "out of")
+          (u.ch.eNew==PAGER_JOURNALMODE_WAL ? "into" : "out of")
       );
       break;
     }else{
 
-      if( u.ce.eOld==PAGER_JOURNALMODE_WAL ){
+      if( u.ch.eOld==PAGER_JOURNALMODE_WAL ){
         /* If leaving WAL mode, close the log file. If successful, the call
         ** to PagerCloseWal() checkpoints and deletes the write-ahead-log
         ** file. An EXCLUSIVE lock may still be held on the database file
         ** after a successful return.
         */
-        rc = sqlite3PagerCloseWal(u.ce.pPager);
+        rc = sqlite3PagerCloseWal(u.ch.pPager);
         if( rc==SQLITE_OK ){
-          sqlite3PagerSetJournalMode(u.ce.pPager, u.ce.eNew);
+          sqlite3PagerSetJournalMode(u.ch.pPager, u.ch.eNew);
         }
-      }else if( u.ce.eOld==PAGER_JOURNALMODE_MEMORY ){
+      }else if( u.ch.eOld==PAGER_JOURNALMODE_MEMORY ){
         /* Cannot transition directly from MEMORY to WAL.  Use mode OFF
         ** as an intermediate */
-        sqlite3PagerSetJournalMode(u.ce.pPager, PAGER_JOURNALMODE_OFF);
+        sqlite3PagerSetJournalMode(u.ch.pPager, PAGER_JOURNALMODE_OFF);
       }
 
       /* Open a transaction on the database file. Regardless of the journal
       ** mode, this transaction always uses a rollback journal.
       */
-      assert( sqlite3BtreeIsInTrans(u.ce.pBt)==0 );
+      assert( sqlite3BtreeIsInTrans(u.ch.pBt)==0 );
       if( rc==SQLITE_OK ){
-        rc = sqlite3BtreeSetVersion(u.ce.pBt, (u.ce.eNew==PAGER_JOURNALMODE_WAL ? 2 : 1));
+        rc = sqlite3BtreeSetVersion(u.ch.pBt, (u.ch.eNew==PAGER_JOURNALMODE_WAL ? 2 : 1));
       }
     }
   }
 #endif /* ifndef SQLITE_OMIT_WAL */
 
   if( rc ){
-    u.ce.eNew = u.ce.eOld;
+    u.ch.eNew = u.ch.eOld;
   }
-  u.ce.eNew = sqlite3PagerSetJournalMode(u.ce.pPager, u.ce.eNew);
+  u.ch.eNew = sqlite3PagerSetJournalMode(u.ch.pPager, u.ch.eNew);
 
   pOut = &aMem[pOp->p2];
   pOut->flags = MEM_Str|MEM_Static|MEM_Term;
-  pOut->z = (char *)sqlite3JournalModename(u.ce.eNew);
+  pOut->z = (char *)sqlite3JournalModename(u.ch.eNew);
   pOut->n = sqlite3Strlen30(pOut->z);
   pOut->enc = SQLITE_UTF8;
   sqlite3VdbeChangeEncoding(pOut, encoding);
@@ -68281,14 +69067,14 @@ case OP_Vacuum: {
 ** P2. Otherwise, fall through to the next instruction.
 */
 case OP_IncrVacuum: {        /* jump */
-#if 0  /* local variables moved into u.cf */
+#if 0  /* local variables moved into u.ci */
   Btree *pBt;
-#endif /* local variables moved into u.cf */
+#endif /* local variables moved into u.ci */
 
   assert( pOp->p1>=0 && pOp->p1<db->nDb );
   assert( (p->btreeMask & (((yDbMask)1)<<pOp->p1))!=0 );
-  u.cf.pBt = db->aDb[pOp->p1].pBt;
-  rc = sqlite3BtreeIncrVacuum(u.cf.pBt);
+  u.ci.pBt = db->aDb[pOp->p1].pBt;
+  rc = sqlite3BtreeIncrVacuum(u.ci.pBt);
   if( rc==SQLITE_DONE ){
     pc = pOp->p2 - 1;
     rc = SQLITE_OK;
@@ -68358,12 +69144,12 @@ case OP_TableLock: {
 ** code will be set to SQLITE_LOCKED.
 */
 case OP_VBegin: {
-#if 0  /* local variables moved into u.cg */
+#if 0  /* local variables moved into u.cj */
   VTable *pVTab;
-#endif /* local variables moved into u.cg */
-  u.cg.pVTab = pOp->p4.pVtab;
-  rc = sqlite3VtabBegin(db, u.cg.pVTab);
-  if( u.cg.pVTab ) importVtabErrMsg(p, u.cg.pVTab->pVtab);
+#endif /* local variables moved into u.cj */
+  u.cj.pVTab = pOp->p4.pVtab;
+  rc = sqlite3VtabBegin(db, u.cj.pVTab);
+  if( u.cj.pVTab ) importVtabErrMsg(p, u.cj.pVTab->pVtab);
   break;
 }
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
@@ -68402,32 +69188,32 @@ case OP_VDestroy: {
 ** table and stores that cursor in P1.
 */
 case OP_VOpen: {
-#if 0  /* local variables moved into u.ch */
+#if 0  /* local variables moved into u.ck */
   VdbeCursor *pCur;
   sqlite3_vtab_cursor *pVtabCursor;
   sqlite3_vtab *pVtab;
   sqlite3_module *pModule;
-#endif /* local variables moved into u.ch */
+#endif /* local variables moved into u.ck */
 
-  u.ch.pCur = 0;
-  u.ch.pVtabCursor = 0;
-  u.ch.pVtab = pOp->p4.pVtab->pVtab;
-  u.ch.pModule = (sqlite3_module *)u.ch.pVtab->pModule;
-  assert(u.ch.pVtab && u.ch.pModule);
-  rc = u.ch.pModule->xOpen(u.ch.pVtab, &u.ch.pVtabCursor);
-  importVtabErrMsg(p, u.ch.pVtab);
+  u.ck.pCur = 0;
+  u.ck.pVtabCursor = 0;
+  u.ck.pVtab = pOp->p4.pVtab->pVtab;
+  u.ck.pModule = (sqlite3_module *)u.ck.pVtab->pModule;
+  assert(u.ck.pVtab && u.ck.pModule);
+  rc = u.ck.pModule->xOpen(u.ck.pVtab, &u.ck.pVtabCursor);
+  importVtabErrMsg(p, u.ck.pVtab);
   if( SQLITE_OK==rc ){
     /* Initialize sqlite3_vtab_cursor base class */
-    u.ch.pVtabCursor->pVtab = u.ch.pVtab;
+    u.ck.pVtabCursor->pVtab = u.ck.pVtab;
 
     /* Initialise vdbe cursor object */
-    u.ch.pCur = allocateCursor(p, pOp->p1, 0, -1, 0);
-    if( u.ch.pCur ){
-      u.ch.pCur->pVtabCursor = u.ch.pVtabCursor;
-      u.ch.pCur->pModule = u.ch.pVtabCursor->pVtab->pModule;
+    u.ck.pCur = allocateCursor(p, pOp->p1, 0, -1, 0);
+    if( u.ck.pCur ){
+      u.ck.pCur->pVtabCursor = u.ck.pVtabCursor;
+      u.ck.pCur->pModule = u.ck.pVtabCursor->pVtab->pModule;
     }else{
       db->mallocFailed = 1;
-      u.ch.pModule->xClose(u.ch.pVtabCursor);
+      u.ck.pModule->xClose(u.ck.pVtabCursor);
     }
   }
   break;
@@ -68454,7 +69240,7 @@ case OP_VOpen: {
 ** A jump is made to P2 if the result set after filtering would be empty.
 */
 case OP_VFilter: {   /* jump */
-#if 0  /* local variables moved into u.ci */
+#if 0  /* local variables moved into u.cl */
   int nArg;
   int iQuery;
   const sqlite3_module *pModule;
@@ -68466,45 +69252,45 @@ case OP_VFilter: {   /* jump */
   int res;
   int i;
   Mem **apArg;
-#endif /* local variables moved into u.ci */
+#endif /* local variables moved into u.cl */
 
-  u.ci.pQuery = &aMem[pOp->p3];
-  u.ci.pArgc = &u.ci.pQuery[1];
-  u.ci.pCur = p->apCsr[pOp->p1];
-  assert( memIsValid(u.ci.pQuery) );
-  REGISTER_TRACE(pOp->p3, u.ci.pQuery);
-  assert( u.ci.pCur->pVtabCursor );
-  u.ci.pVtabCursor = u.ci.pCur->pVtabCursor;
-  u.ci.pVtab = u.ci.pVtabCursor->pVtab;
-  u.ci.pModule = u.ci.pVtab->pModule;
+  u.cl.pQuery = &aMem[pOp->p3];
+  u.cl.pArgc = &u.cl.pQuery[1];
+  u.cl.pCur = p->apCsr[pOp->p1];
+  assert( memIsValid(u.cl.pQuery) );
+  REGISTER_TRACE(pOp->p3, u.cl.pQuery);
+  assert( u.cl.pCur->pVtabCursor );
+  u.cl.pVtabCursor = u.cl.pCur->pVtabCursor;
+  u.cl.pVtab = u.cl.pVtabCursor->pVtab;
+  u.cl.pModule = u.cl.pVtab->pModule;
 
   /* Grab the index number and argc parameters */
-  assert( (u.ci.pQuery->flags&MEM_Int)!=0 && u.ci.pArgc->flags==MEM_Int );
-  u.ci.nArg = (int)u.ci.pArgc->u.i;
-  u.ci.iQuery = (int)u.ci.pQuery->u.i;
+  assert( (u.cl.pQuery->flags&MEM_Int)!=0 && u.cl.pArgc->flags==MEM_Int );
+  u.cl.nArg = (int)u.cl.pArgc->u.i;
+  u.cl.iQuery = (int)u.cl.pQuery->u.i;
 
   /* Invoke the xFilter method */
   {
-    u.ci.res = 0;
-    u.ci.apArg = p->apArg;
-    for(u.ci.i = 0; u.ci.i<u.ci.nArg; u.ci.i++){
-      u.ci.apArg[u.ci.i] = &u.ci.pArgc[u.ci.i+1];
-      sqlite3VdbeMemStoreType(u.ci.apArg[u.ci.i]);
+    u.cl.res = 0;
+    u.cl.apArg = p->apArg;
+    for(u.cl.i = 0; u.cl.i<u.cl.nArg; u.cl.i++){
+      u.cl.apArg[u.cl.i] = &u.cl.pArgc[u.cl.i+1];
+      sqlite3VdbeMemStoreType(u.cl.apArg[u.cl.i]);
     }
 
     p->inVtabMethod = 1;
-    rc = u.ci.pModule->xFilter(u.ci.pVtabCursor, u.ci.iQuery, pOp->p4.z, u.ci.nArg, u.ci.apArg);
+    rc = u.cl.pModule->xFilter(u.cl.pVtabCursor, u.cl.iQuery, pOp->p4.z, u.cl.nArg, u.cl.apArg);
     p->inVtabMethod = 0;
-    importVtabErrMsg(p, u.ci.pVtab);
+    importVtabErrMsg(p, u.cl.pVtab);
     if( rc==SQLITE_OK ){
-      u.ci.res = u.ci.pModule->xEof(u.ci.pVtabCursor);
+      u.cl.res = u.cl.pModule->xEof(u.cl.pVtabCursor);
     }
 
-    if( u.ci.res ){
+    if( u.cl.res ){
       pc = pOp->p2 - 1;
     }
   }
-  u.ci.pCur->nullRow = 0;
+  u.cl.pCur->nullRow = 0;
 
   break;
 }
@@ -68518,51 +69304,51 @@ case OP_VFilter: {   /* jump */
 ** P1 cursor is pointing to into register P3.
 */
 case OP_VColumn: {
-#if 0  /* local variables moved into u.cj */
+#if 0  /* local variables moved into u.cm */
   sqlite3_vtab *pVtab;
   const sqlite3_module *pModule;
   Mem *pDest;
   sqlite3_context sContext;
-#endif /* local variables moved into u.cj */
+#endif /* local variables moved into u.cm */
 
   VdbeCursor *pCur = p->apCsr[pOp->p1];
   assert( pCur->pVtabCursor );
   assert( pOp->p3>0 && pOp->p3<=p->nMem );
-  u.cj.pDest = &aMem[pOp->p3];
-  memAboutToChange(p, u.cj.pDest);
+  u.cm.pDest = &aMem[pOp->p3];
+  memAboutToChange(p, u.cm.pDest);
   if( pCur->nullRow ){
-    sqlite3VdbeMemSetNull(u.cj.pDest);
+    sqlite3VdbeMemSetNull(u.cm.pDest);
     break;
   }
-  u.cj.pVtab = pCur->pVtabCursor->pVtab;
-  u.cj.pModule = u.cj.pVtab->pModule;
-  assert( u.cj.pModule->xColumn );
-  memset(&u.cj.sContext, 0, sizeof(u.cj.sContext));
+  u.cm.pVtab = pCur->pVtabCursor->pVtab;
+  u.cm.pModule = u.cm.pVtab->pModule;
+  assert( u.cm.pModule->xColumn );
+  memset(&u.cm.sContext, 0, sizeof(u.cm.sContext));
 
   /* The output cell may already have a buffer allocated. Move
-  ** the current contents to u.cj.sContext.s so in case the user-function
+  ** the current contents to u.cm.sContext.s so in case the user-function
   ** can use the already allocated buffer instead of allocating a
   ** new one.
   */
-  sqlite3VdbeMemMove(&u.cj.sContext.s, u.cj.pDest);
-  MemSetTypeFlag(&u.cj.sContext.s, MEM_Null);
+  sqlite3VdbeMemMove(&u.cm.sContext.s, u.cm.pDest);
+  MemSetTypeFlag(&u.cm.sContext.s, MEM_Null);
 
-  rc = u.cj.pModule->xColumn(pCur->pVtabCursor, &u.cj.sContext, pOp->p2);
-  importVtabErrMsg(p, u.cj.pVtab);
-  if( u.cj.sContext.isError ){
-    rc = u.cj.sContext.isError;
+  rc = u.cm.pModule->xColumn(pCur->pVtabCursor, &u.cm.sContext, pOp->p2);
+  importVtabErrMsg(p, u.cm.pVtab);
+  if( u.cm.sContext.isError ){
+    rc = u.cm.sContext.isError;
   }
 
   /* Copy the result of the function to the P3 register. We
   ** do this regardless of whether or not an error occurred to ensure any
-  ** dynamic allocation in u.cj.sContext.s (a Mem struct) is  released.
+  ** dynamic allocation in u.cm.sContext.s (a Mem struct) is  released.
   */
-  sqlite3VdbeChangeEncoding(&u.cj.sContext.s, encoding);
-  sqlite3VdbeMemMove(u.cj.pDest, &u.cj.sContext.s);
-  REGISTER_TRACE(pOp->p3, u.cj.pDest);
-  UPDATE_MAX_BLOBSIZE(u.cj.pDest);
+  sqlite3VdbeChangeEncoding(&u.cm.sContext.s, encoding);
+  sqlite3VdbeMemMove(u.cm.pDest, &u.cm.sContext.s);
+  REGISTER_TRACE(pOp->p3, u.cm.pDest);
+  UPDATE_MAX_BLOBSIZE(u.cm.pDest);
 
-  if( sqlite3VdbeMemTooBig(u.cj.pDest) ){
+  if( sqlite3VdbeMemTooBig(u.cm.pDest) ){
     goto too_big;
   }
   break;
@@ -68577,22 +69363,22 @@ case OP_VColumn: {
 ** the end of its result set, then fall through to the next instruction.
 */
 case OP_VNext: {   /* jump */
-#if 0  /* local variables moved into u.ck */
+#if 0  /* local variables moved into u.cn */
   sqlite3_vtab *pVtab;
   const sqlite3_module *pModule;
   int res;
   VdbeCursor *pCur;
-#endif /* local variables moved into u.ck */
+#endif /* local variables moved into u.cn */
 
-  u.ck.res = 0;
-  u.ck.pCur = p->apCsr[pOp->p1];
-  assert( u.ck.pCur->pVtabCursor );
-  if( u.ck.pCur->nullRow ){
+  u.cn.res = 0;
+  u.cn.pCur = p->apCsr[pOp->p1];
+  assert( u.cn.pCur->pVtabCursor );
+  if( u.cn.pCur->nullRow ){
     break;
   }
-  u.ck.pVtab = u.ck.pCur->pVtabCursor->pVtab;
-  u.ck.pModule = u.ck.pVtab->pModule;
-  assert( u.ck.pModule->xNext );
+  u.cn.pVtab = u.cn.pCur->pVtabCursor->pVtab;
+  u.cn.pModule = u.cn.pVtab->pModule;
+  assert( u.cn.pModule->xNext );
 
   /* Invoke the xNext() method of the module. There is no way for the
   ** underlying implementation to return an error if one occurs during
@@ -68601,14 +69387,14 @@ case OP_VNext: {   /* jump */
   ** some other method is next invoked on the save virtual table cursor.
   */
   p->inVtabMethod = 1;
-  rc = u.ck.pModule->xNext(u.ck.pCur->pVtabCursor);
+  rc = u.cn.pModule->xNext(u.cn.pCur->pVtabCursor);
   p->inVtabMethod = 0;
-  importVtabErrMsg(p, u.ck.pVtab);
+  importVtabErrMsg(p, u.cn.pVtab);
   if( rc==SQLITE_OK ){
-    u.ck.res = u.ck.pModule->xEof(u.ck.pCur->pVtabCursor);
+    u.cn.res = u.cn.pModule->xEof(u.cn.pCur->pVtabCursor);
   }
 
-  if( !u.ck.res ){
+  if( !u.cn.res ){
     /* If there is data, jump to P2 */
     pc = pOp->p2 - 1;
   }
@@ -68624,21 +69410,26 @@ case OP_VNext: {   /* jump */
 ** in register P1 is passed as the zName argument to the xRename method.
 */
 case OP_VRename: {
-#if 0  /* local variables moved into u.cl */
+#if 0  /* local variables moved into u.co */
   sqlite3_vtab *pVtab;
   Mem *pName;
-#endif /* local variables moved into u.cl */
-
-  u.cl.pVtab = pOp->p4.pVtab->pVtab;
-  u.cl.pName = &aMem[pOp->p1];
-  assert( u.cl.pVtab->pModule->xRename );
-  assert( memIsValid(u.cl.pName) );
-  REGISTER_TRACE(pOp->p1, u.cl.pName);
-  assert( u.cl.pName->flags & MEM_Str );
-  rc = u.cl.pVtab->pModule->xRename(u.cl.pVtab, u.cl.pName->z);
-  importVtabErrMsg(p, u.cl.pVtab);
-  p->expired = 0;
-
+#endif /* local variables moved into u.co */
+
+  u.co.pVtab = pOp->p4.pVtab->pVtab;
+  u.co.pName = &aMem[pOp->p1];
+  assert( u.co.pVtab->pModule->xRename );
+  assert( memIsValid(u.co.pName) );
+  REGISTER_TRACE(pOp->p1, u.co.pName);
+  assert( u.co.pName->flags & MEM_Str );
+  testcase( u.co.pName->enc==SQLITE_UTF8 );
+  testcase( u.co.pName->enc==SQLITE_UTF16BE );
+  testcase( u.co.pName->enc==SQLITE_UTF16LE );
+  rc = sqlite3VdbeChangeEncoding(u.co.pName, SQLITE_UTF8);
+  if( rc==SQLITE_OK ){
+    rc = u.co.pVtab->pModule->xRename(u.co.pVtab, u.co.pName->z);
+    importVtabErrMsg(p, u.co.pVtab);
+    p->expired = 0;
+  }
   break;
 }
 #endif
@@ -68668,7 +69459,7 @@ case OP_VRename: {
 ** is set to the value of the rowid for the row just inserted.
 */
 case OP_VUpdate: {
-#if 0  /* local variables moved into u.cm */
+#if 0  /* local variables moved into u.cp */
   sqlite3_vtab *pVtab;
   sqlite3_module *pModule;
   int nArg;
@@ -68676,33 +69467,33 @@ case OP_VUpdate: {
   sqlite_int64 rowid;
   Mem **apArg;
   Mem *pX;
-#endif /* local variables moved into u.cm */
+#endif /* local variables moved into u.cp */
 
   assert( pOp->p2==1        || pOp->p5==OE_Fail   || pOp->p5==OE_Rollback
        || pOp->p5==OE_Abort || pOp->p5==OE_Ignore || pOp->p5==OE_Replace
   );
-  u.cm.pVtab = pOp->p4.pVtab->pVtab;
-  u.cm.pModule = (sqlite3_module *)u.cm.pVtab->pModule;
-  u.cm.nArg = pOp->p2;
+  u.cp.pVtab = pOp->p4.pVtab->pVtab;
+  u.cp.pModule = (sqlite3_module *)u.cp.pVtab->pModule;
+  u.cp.nArg = pOp->p2;
   assert( pOp->p4type==P4_VTAB );
-  if( ALWAYS(u.cm.pModule->xUpdate) ){
+  if( ALWAYS(u.cp.pModule->xUpdate) ){
     u8 vtabOnConflict = db->vtabOnConflict;
-    u.cm.apArg = p->apArg;
-    u.cm.pX = &aMem[pOp->p3];
-    for(u.cm.i=0; u.cm.i<u.cm.nArg; u.cm.i++){
-      assert( memIsValid(u.cm.pX) );
-      memAboutToChange(p, u.cm.pX);
-      sqlite3VdbeMemStoreType(u.cm.pX);
-      u.cm.apArg[u.cm.i] = u.cm.pX;
-      u.cm.pX++;
+    u.cp.apArg = p->apArg;
+    u.cp.pX = &aMem[pOp->p3];
+    for(u.cp.i=0; u.cp.i<u.cp.nArg; u.cp.i++){
+      assert( memIsValid(u.cp.pX) );
+      memAboutToChange(p, u.cp.pX);
+      sqlite3VdbeMemStoreType(u.cp.pX);
+      u.cp.apArg[u.cp.i] = u.cp.pX;
+      u.cp.pX++;
     }
     db->vtabOnConflict = pOp->p5;
-    rc = u.cm.pModule->xUpdate(u.cm.pVtab, u.cm.nArg, u.cm.apArg, &u.cm.rowid);
+    rc = u.cp.pModule->xUpdate(u.cp.pVtab, u.cp.nArg, u.cp.apArg, &u.cp.rowid);
     db->vtabOnConflict = vtabOnConflict;
-    importVtabErrMsg(p, u.cm.pVtab);
+    importVtabErrMsg(p, u.cp.pVtab);
     if( rc==SQLITE_OK && pOp->p1 ){
-      assert( u.cm.nArg>1 && u.cm.apArg[0] && (u.cm.apArg[0]->flags&MEM_Null) );
-      db->lastRowid = lastRowid = u.cm.rowid;
+      assert( u.cp.nArg>1 && u.cp.apArg[0] && (u.cp.apArg[0]->flags&MEM_Null) );
+      db->lastRowid = lastRowid = u.cp.rowid;
     }
     if( rc==SQLITE_CONSTRAINT && pOp->p4.pVtab->bConstraint ){
       if( pOp->p5==OE_Ignore ){
@@ -68762,21 +69553,21 @@ case OP_MaxPgcnt: {            /* out2-prerelease */
 ** the UTF-8 string contained in P4 is emitted on the trace callback.
 */
 case OP_Trace: {
-#if 0  /* local variables moved into u.cn */
+#if 0  /* local variables moved into u.cq */
   char *zTrace;
   char *z;
-#endif /* local variables moved into u.cn */
+#endif /* local variables moved into u.cq */
 
-  if( db->xTrace && (u.cn.zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql))!=0 ){
-    u.cn.z = sqlite3VdbeExpandSql(p, u.cn.zTrace);
-    db->xTrace(db->pTraceArg, u.cn.z);
-    sqlite3DbFree(db, u.cn.z);
+  if( db->xTrace && (u.cq.zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql))!=0 ){
+    u.cq.z = sqlite3VdbeExpandSql(p, u.cq.zTrace);
+    db->xTrace(db->pTraceArg, u.cq.z);
+    sqlite3DbFree(db, u.cq.z);
   }
 #ifdef SQLITE_DEBUG
   if( (db->flags & SQLITE_SqlTrace)!=0
-   && (u.cn.zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql))!=0
+   && (u.cq.zTrace = (pOp->p4.z ? pOp->p4.z : p->zSql))!=0
   ){
-    sqlite3DebugPrintf("SQL-trace: %s\n", u.cn.zTrace);
+    sqlite3DebugPrintf("SQL-trace: %s\n", u.cq.zTrace);
   }
 #endif /* SQLITE_DEBUG */
   break;
@@ -69179,7 +69970,7 @@ SQLITE_API int sqlite3_blob_open(
 
       /* Configure the OP_TableLock instruction */
 #ifdef SQLITE_OMIT_SHARED_CACHE
-      sqlite3VdbeChangeToNoop(v, 2, 1);
+      sqlite3VdbeChangeToNoop(v, 2);
 #else
       sqlite3VdbeChangeP1(v, 2, iDb);
       sqlite3VdbeChangeP2(v, 2, pTab->tnum);
@@ -69189,7 +69980,7 @@ SQLITE_API int sqlite3_blob_open(
 
       /* Remove either the OP_OpenWrite or OpenRead. Set the P2 
       ** parameter of the other to pTab->tnum.  */
-      sqlite3VdbeChangeToNoop(v, 4 - flags, 1);
+      sqlite3VdbeChangeToNoop(v, 4 - flags);
       sqlite3VdbeChangeP2(v, 3 + flags, pTab->tnum);
       sqlite3VdbeChangeP3(v, 3 + flags, iDb);
 
@@ -69375,6 +70166,889 @@ SQLITE_API int sqlite3_blob_reopen(sqlite3_blob *pBlob, sqlite3_int64 iRow){
 #endif /* #ifndef SQLITE_OMIT_INCRBLOB */
 
 /************** End of vdbeblob.c ********************************************/
+/************** Begin file vdbesort.c ****************************************/
+/*
+** 2011 July 9
+**
+** The author disclaims copyright to this source code.  In place of
+** a legal notice, here is a blessing:
+**
+**    May you do good and not evil.
+**    May you find forgiveness for yourself and forgive others.
+**    May you share freely, never taking more than you give.
+**
+*************************************************************************
+** This file contains code for the VdbeSorter object, used in concert with
+** a VdbeCursor to sort large numbers of keys (as may be required, for
+** example, by CREATE INDEX statements on tables too large to fit in main
+** memory).
+*/
+
+
+#ifndef SQLITE_OMIT_MERGE_SORT
+
+typedef struct VdbeSorterIter VdbeSorterIter;
+typedef struct SorterRecord SorterRecord;
+
+/*
+** NOTES ON DATA STRUCTURE USED FOR N-WAY MERGES:
+**
+** As keys are added to the sorter, they are written to disk in a series
+** of sorted packed-memory-arrays (PMAs). The size of each PMA is roughly
+** the same as the cache-size allowed for temporary databases. In order
+** to allow the caller to extract keys from the sorter in sorted order,
+** all PMAs currently stored on disk must be merged together. This comment
+** describes the data structure used to do so. The structure supports 
+** merging any number of arrays in a single pass with no redundant comparison 
+** operations.
+**
+** The aIter[] array contains an iterator for each of the PMAs being merged.
+** An aIter[] iterator either points to a valid key or else is at EOF. For 
+** the purposes of the paragraphs below, we assume that the array is actually 
+** N elements in size, where N is the smallest power of 2 greater to or equal 
+** to the number of iterators being merged. The extra aIter[] elements are 
+** treated as if they are empty (always at EOF).
+**
+** The aTree[] array is also N elements in size. The value of N is stored in
+** the VdbeSorter.nTree variable.
+**
+** The final (N/2) elements of aTree[] contain the results of comparing
+** pairs of iterator keys together. Element i contains the result of 
+** comparing aIter[2*i-N] and aIter[2*i-N+1]. Whichever key is smaller, the
+** aTree element is set to the index of it. 
+**
+** For the purposes of this comparison, EOF is considered greater than any
+** other key value. If the keys are equal (only possible with two EOF
+** values), it doesn't matter which index is stored.
+**
+** The (N/4) elements of aTree[] that preceed the final (N/2) described 
+** above contains the index of the smallest of each block of 4 iterators.
+** And so on. So that aTree[1] contains the index of the iterator that 
+** currently points to the smallest key value. aTree[0] is unused.
+**
+** Example:
+**
+**     aIter[0] -> Banana
+**     aIter[1] -> Feijoa
+**     aIter[2] -> Elderberry
+**     aIter[3] -> Currant
+**     aIter[4] -> Grapefruit
+**     aIter[5] -> Apple
+**     aIter[6] -> Durian
+**     aIter[7] -> EOF
+**
+**     aTree[] = { X, 5   0, 5    0, 3, 5, 6 }
+**
+** The current element is "Apple" (the value of the key indicated by 
+** iterator 5). When the Next() operation is invoked, iterator 5 will
+** be advanced to the next key in its segment. Say the next key is
+** "Eggplant":
+**
+**     aIter[5] -> Eggplant
+**
+** The contents of aTree[] are updated first by comparing the new iterator
+** 5 key to the current key of iterator 4 (still "Grapefruit"). The iterator
+** 5 value is still smaller, so aTree[6] is set to 5. And so on up the tree.
+** The value of iterator 6 - "Durian" - is now smaller than that of iterator
+** 5, so aTree[3] is set to 6. Key 0 is smaller than key 6 (Banana<Durian),
+** so the value written into element 1 of the array is 0. As follows:
+**
+**     aTree[] = { X, 0   0, 6    0, 3, 5, 6 }
+**
+** In other words, each time we advance to the next sorter element, log2(N)
+** key comparison operations are required, where N is the number of segments
+** being merged (rounded up to the next power of 2).
+*/
+struct VdbeSorter {
+  int nInMemory;                  /* Current size of pRecord list as PMA */
+  int nTree;                      /* Used size of aTree/aIter (power of 2) */
+  VdbeSorterIter *aIter;          /* Array of iterators to merge */
+  int *aTree;                     /* Current state of incremental merge */
+  i64 iWriteOff;                  /* Current write offset within file pTemp1 */
+  i64 iReadOff;                   /* Current read offset within file pTemp1 */
+  sqlite3_file *pTemp1;           /* PMA file 1 */
+  int nPMA;                       /* Number of PMAs stored in pTemp1 */
+  SorterRecord *pRecord;          /* Head of in-memory record list */
+  int mnPmaSize;                  /* Minimum PMA size, in bytes */
+  int mxPmaSize;                  /* Maximum PMA size, in bytes.  0==no limit */
+  UnpackedRecord *pUnpacked;      /* Used to unpack keys */
+};
+
+/*
+** The following type is an iterator for a PMA. It caches the current key in 
+** variables nKey/aKey. If the iterator is at EOF, pFile==0.
+*/
+struct VdbeSorterIter {
+  i64 iReadOff;                   /* Current read offset */
+  i64 iEof;                       /* 1 byte past EOF for this iterator */
+  sqlite3_file *pFile;            /* File iterator is reading from */
+  int nAlloc;                     /* Bytes of space at aAlloc */
+  u8 *aAlloc;                     /* Allocated space */
+  int nKey;                       /* Number of bytes in key */
+  u8 *aKey;                       /* Pointer to current key */
+};
+
+/*
+** A structure to store a single record. All in-memory records are connected
+** together into a linked list headed at VdbeSorter.pRecord using the 
+** SorterRecord.pNext pointer.
+*/
+struct SorterRecord {
+  void *pVal;
+  int nVal;
+  SorterRecord *pNext;
+};
+
+/* Minimum allowable value for the VdbeSorter.nWorking variable */
+#define SORTER_MIN_WORKING 10
+
+/* Maximum number of segments to merge in a single pass. */
+#define SORTER_MAX_MERGE_COUNT 16
+
+/*
+** Free all memory belonging to the VdbeSorterIter object passed as the second
+** argument. All structure fields are set to zero before returning.
+*/
+static void vdbeSorterIterZero(sqlite3 *db, VdbeSorterIter *pIter){
+  sqlite3DbFree(db, pIter->aAlloc);
+  memset(pIter, 0, sizeof(VdbeSorterIter));
+}
+
+/*
+** Advance iterator pIter to the next key in its PMA. Return SQLITE_OK if
+** no error occurs, or an SQLite error code if one does.
+*/
+static int vdbeSorterIterNext(
+  sqlite3 *db,                    /* Database handle (for sqlite3DbMalloc() ) */
+  VdbeSorterIter *pIter           /* Iterator to advance */
+){
+  int rc;                         /* Return Code */
+  int nRead;                      /* Number of bytes read */
+  int nRec = 0;                   /* Size of record in bytes */
+  int iOff = 0;                   /* Size of serialized size varint in bytes */
+
+  assert( pIter->iEof>=pIter->iReadOff );
+  if( pIter->iEof-pIter->iReadOff>5 ){
+    nRead = 5;
+  }else{
+    nRead = (int)(pIter->iEof - pIter->iReadOff);
+  }
+  if( nRead<=0 ){
+    /* This is an EOF condition */
+    vdbeSorterIterZero(db, pIter);
+    return SQLITE_OK;
+  }
+
+  rc = sqlite3OsRead(pIter->pFile, pIter->aAlloc, nRead, pIter->iReadOff);
+  if( rc==SQLITE_OK ){
+    iOff = getVarint32(pIter->aAlloc, nRec);
+    if( (iOff+nRec)>nRead ){
+      int nRead2;                   /* Number of extra bytes to read */
+      if( (iOff+nRec)>pIter->nAlloc ){
+        int nNew = pIter->nAlloc*2;
+        while( (iOff+nRec)>nNew ) nNew = nNew*2;
+        pIter->aAlloc = sqlite3DbReallocOrFree(db, pIter->aAlloc, nNew);
+        if( !pIter->aAlloc ) return SQLITE_NOMEM;
+        pIter->nAlloc = nNew;
+      }
+  
+      nRead2 = iOff + nRec - nRead;
+      rc = sqlite3OsRead(
+          pIter->pFile, &pIter->aAlloc[nRead], nRead2, pIter->iReadOff+nRead
+      );
+    }
+  }
+
+  assert( rc!=SQLITE_OK || nRec>0 );
+  pIter->iReadOff += iOff+nRec;
+  pIter->nKey = nRec;
+  pIter->aKey = &pIter->aAlloc[iOff];
+  return rc;
+}
+
+/*
+** Write a single varint, value iVal, to file-descriptor pFile. Return
+** SQLITE_OK if successful, or an SQLite error code if some error occurs.
+**
+** The value of *piOffset when this function is called is used as the byte
+** offset in file pFile to write to. Before returning, *piOffset is 
+** incremented by the number of bytes written.
+*/
+static int vdbeSorterWriteVarint(
+  sqlite3_file *pFile,            /* File to write to */
+  i64 iVal,                       /* Value to write as a varint */
+  i64 *piOffset                   /* IN/OUT: Write offset in file pFile */
+){
+  u8 aVarint[9];                  /* Buffer large enough for a varint */
+  int nVarint;                    /* Number of used bytes in varint */
+  int rc;                         /* Result of write() call */
+
+  nVarint = sqlite3PutVarint(aVarint, iVal);
+  rc = sqlite3OsWrite(pFile, aVarint, nVarint, *piOffset);
+  *piOffset += nVarint;
+
+  return rc;
+}
+
+/*
+** Read a single varint from file-descriptor pFile. Return SQLITE_OK if
+** successful, or an SQLite error code if some error occurs.
+**
+** The value of *piOffset when this function is called is used as the
+** byte offset in file pFile from whence to read the varint. If successful
+** (i.e. if no IO error occurs), then *piOffset is set to the offset of
+** the first byte past the end of the varint before returning. *piVal is
+** set to the integer value read. If an error occurs, the final values of
+** both *piOffset and *piVal are undefined.
+*/
+static int vdbeSorterReadVarint(
+  sqlite3_file *pFile,            /* File to read from */
+  i64 *piOffset,                  /* IN/OUT: Read offset in pFile */
+  i64 *piVal                      /* OUT: Value read from file */
+){
+  u8 aVarint[9];                  /* Buffer large enough for a varint */
+  i64 iOff = *piOffset;           /* Offset in file to read from */
+  int rc;                         /* Return code */
+
+  rc = sqlite3OsRead(pFile, aVarint, 9, iOff);
+  if( rc==SQLITE_OK ){
+    *piOffset += getVarint(aVarint, (u64 *)piVal);
+  }
+
+  return rc;
+}
+
+/*
+** Initialize iterator pIter to scan through the PMA stored in file pFile
+** starting at offset iStart and ending at offset iEof-1. This function 
+** leaves the iterator pointing to the first key in the PMA (or EOF if the 
+** PMA is empty).
+*/
+static int vdbeSorterIterInit(
+  sqlite3 *db,                    /* Database handle */
+  VdbeSorter *pSorter,            /* Sorter object */
+  i64 iStart,                     /* Start offset in pFile */
+  VdbeSorterIter *pIter,          /* Iterator to populate */
+  i64 *pnByte                     /* IN/OUT: Increment this value by PMA size */
+){
+  int rc;
+
+  assert( pSorter->iWriteOff>iStart );
+  assert( pIter->aAlloc==0 );
+  pIter->pFile = pSorter->pTemp1;
+  pIter->iReadOff = iStart;
+  pIter->nAlloc = 128;
+  pIter->aAlloc = (u8 *)sqlite3DbMallocRaw(db, pIter->nAlloc);
+  if( !pIter->aAlloc ){
+    rc = SQLITE_NOMEM;
+  }else{
+    i64 nByte;                         /* Total size of PMA in bytes */
+    rc = vdbeSorterReadVarint(pSorter->pTemp1, &pIter->iReadOff, &nByte);
+    *pnByte += nByte;
+    pIter->iEof = pIter->iReadOff + nByte;
+  }
+  if( rc==SQLITE_OK ){
+    rc = vdbeSorterIterNext(db, pIter);
+  }
+  return rc;
+}
+
+
+/*
+** Compare key1 (buffer pKey1, size nKey1 bytes) with key2 (buffer pKey2, 
+** size nKey2 bytes).  Argument pKeyInfo supplies the collation functions
+** used by the comparison. If an error occurs, return an SQLite error code.
+** Otherwise, return SQLITE_OK and set *pRes to a negative, zero or positive
+** value, depending on whether key1 is smaller, equal to or larger than key2.
+**
+** If the bOmitRowid argument is non-zero, assume both keys end in a rowid
+** field. For the purposes of the comparison, ignore it. Also, if bOmitRowid
+** is true and key1 contains even a single NULL value, it is considered to
+** be less than key2. Even if key2 also contains NULL values.
+**
+** If pKey2 is passed a NULL pointer, then it is assumed that the pCsr->aSpace
+** has been allocated and contains an unpacked record that is used as key2.
+*/
+static void vdbeSorterCompare(
+  VdbeCursor *pCsr,               /* Cursor object (for pKeyInfo) */
+  int bOmitRowid,                 /* Ignore rowid field at end of keys */
+  void *pKey1, int nKey1,         /* Left side of comparison */
+  void *pKey2, int nKey2,         /* Right side of comparison */
+  int *pRes                       /* OUT: Result of comparison */
+){
+  KeyInfo *pKeyInfo = pCsr->pKeyInfo;
+  VdbeSorter *pSorter = pCsr->pSorter;
+  UnpackedRecord *r2 = pSorter->pUnpacked;
+  int i;
+
+  if( pKey2 ){
+    sqlite3VdbeRecordUnpack(pKeyInfo, nKey2, pKey2, r2);
+  }
+
+  if( bOmitRowid ){
+    r2->nField = pKeyInfo->nField;
+    assert( r2->nField>0 );
+    for(i=0; i<r2->nField; i++){
+      if( r2->aMem[i].flags & MEM_Null ){
+        *pRes = -1;
+        return;
+      }
+    }
+    r2->flags |= UNPACKED_PREFIX_MATCH;
+  }
+
+  *pRes = sqlite3VdbeRecordCompare(nKey1, pKey1, r2);
+}
+
+/*
+** This function is called to compare two iterator keys when merging 
+** multiple b-tree segments. Parameter iOut is the index of the aTree[] 
+** value to recalculate.
+*/
+static int vdbeSorterDoCompare(VdbeCursor *pCsr, int iOut){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  int i1;
+  int i2;
+  int iRes;
+  VdbeSorterIter *p1;
+  VdbeSorterIter *p2;
+
+  assert( iOut<pSorter->nTree && iOut>0 );
+
+  if( iOut>=(pSorter->nTree/2) ){
+    i1 = (iOut - pSorter->nTree/2) * 2;
+    i2 = i1 + 1;
+  }else{
+    i1 = pSorter->aTree[iOut*2];
+    i2 = pSorter->aTree[iOut*2+1];
+  }
+
+  p1 = &pSorter->aIter[i1];
+  p2 = &pSorter->aIter[i2];
+
+  if( p1->pFile==0 ){
+    iRes = i2;
+  }else if( p2->pFile==0 ){
+    iRes = i1;
+  }else{
+    int res;
+    assert( pCsr->pSorter->pUnpacked!=0 );  /* allocated in vdbeSorterMerge() */
+    vdbeSorterCompare(
+        pCsr, 0, p1->aKey, p1->nKey, p2->aKey, p2->nKey, &res
+    );
+    if( res<=0 ){
+      iRes = i1;
+    }else{
+      iRes = i2;
+    }
+  }
+
+  pSorter->aTree[iOut] = iRes;
+  return SQLITE_OK;
+}
+
+/*
+** Initialize the temporary index cursor just opened as a sorter cursor.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterInit(sqlite3 *db, VdbeCursor *pCsr){
+  int pgsz;                       /* Page size of main database */
+  int mxCache;                    /* Cache size */
+  VdbeSorter *pSorter;            /* The new sorter */
+  char *d;                        /* Dummy */
+
+  assert( pCsr->pKeyInfo && pCsr->pBt==0 );
+  pCsr->pSorter = pSorter = sqlite3DbMallocZero(db, sizeof(VdbeSorter));
+  if( pSorter==0 ){
+    return SQLITE_NOMEM;
+  }
+  
+  pSorter->pUnpacked = sqlite3VdbeAllocUnpackedRecord(pCsr->pKeyInfo, 0, 0, &d);
+  if( pSorter->pUnpacked==0 ) return SQLITE_NOMEM;
+  assert( pSorter->pUnpacked==(UnpackedRecord *)d );
+
+  if( !sqlite3TempInMemory(db) ){
+    pgsz = sqlite3BtreeGetPageSize(db->aDb[0].pBt);
+    pSorter->mnPmaSize = SORTER_MIN_WORKING * pgsz;
+    mxCache = db->aDb[0].pSchema->cache_size;
+    if( mxCache<SORTER_MIN_WORKING ) mxCache = SORTER_MIN_WORKING;
+    pSorter->mxPmaSize = mxCache * pgsz;
+  }
+
+  return SQLITE_OK;
+}
+
+/*
+** Free the list of sorted records starting at pRecord.
+*/
+static void vdbeSorterRecordFree(sqlite3 *db, SorterRecord *pRecord){
+  SorterRecord *p;
+  SorterRecord *pNext;
+  for(p=pRecord; p; p=pNext){
+    pNext = p->pNext;
+    sqlite3DbFree(db, p);
+  }
+}
+
+/*
+** Free any cursor components allocated by sqlite3VdbeSorterXXX routines.
+*/
+SQLITE_PRIVATE void sqlite3VdbeSorterClose(sqlite3 *db, VdbeCursor *pCsr){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  if( pSorter ){
+    if( pSorter->aIter ){
+      int i;
+      for(i=0; i<pSorter->nTree; i++){
+        vdbeSorterIterZero(db, &pSorter->aIter[i]);
+      }
+      sqlite3DbFree(db, pSorter->aIter);
+    }
+    if( pSorter->pTemp1 ){
+      sqlite3OsCloseFree(pSorter->pTemp1);
+    }
+    vdbeSorterRecordFree(db, pSorter->pRecord);
+    sqlite3DbFree(db, pSorter->pUnpacked);
+    sqlite3DbFree(db, pSorter);
+    pCsr->pSorter = 0;
+  }
+}
+
+/*
+** Allocate space for a file-handle and open a temporary file. If successful,
+** set *ppFile to point to the malloc'd file-handle and return SQLITE_OK.
+** Otherwise, set *ppFile to 0 and return an SQLite error code.
+*/
+static int vdbeSorterOpenTempFile(sqlite3 *db, sqlite3_file **ppFile){
+  int dummy;
+  return sqlite3OsOpenMalloc(db->pVfs, 0, ppFile,
+      SQLITE_OPEN_TEMP_JOURNAL |
+      SQLITE_OPEN_READWRITE    | SQLITE_OPEN_CREATE |
+      SQLITE_OPEN_EXCLUSIVE    | SQLITE_OPEN_DELETEONCLOSE, &dummy
+  );
+}
+
+/*
+** Merge the two sorted lists p1 and p2 into a single list.
+** Set *ppOut to the head of the new list.
+*/
+static void vdbeSorterMerge(
+  VdbeCursor *pCsr,               /* For pKeyInfo */
+  SorterRecord *p1,               /* First list to merge */
+  SorterRecord *p2,               /* Second list to merge */
+  SorterRecord **ppOut            /* OUT: Head of merged list */
+){
+  SorterRecord *pFinal = 0;
+  SorterRecord **pp = &pFinal;
+  void *pVal2 = p2 ? p2->pVal : 0;
+
+  while( p1 && p2 ){
+    int res;
+    vdbeSorterCompare(pCsr, 0, p1->pVal, p1->nVal, pVal2, p2->nVal, &res);
+    if( res<=0 ){
+      *pp = p1;
+      pp = &p1->pNext;
+      p1 = p1->pNext;
+      pVal2 = 0;
+    }else{
+      *pp = p2;
+       pp = &p2->pNext;
+      p2 = p2->pNext;
+      if( p2==0 ) break;
+      pVal2 = p2->pVal;
+    }
+  }
+  *pp = p1 ? p1 : p2;
+  *ppOut = pFinal;
+}
+
+/*
+** Sort the linked list of records headed at pCsr->pRecord. Return SQLITE_OK
+** if successful, or an SQLite error code (i.e. SQLITE_NOMEM) if an error
+** occurs.
+*/
+static int vdbeSorterSort(VdbeCursor *pCsr){
+  int i;
+  SorterRecord **aSlot;
+  SorterRecord *p;
+  VdbeSorter *pSorter = pCsr->pSorter;
+
+  aSlot = (SorterRecord **)sqlite3MallocZero(64 * sizeof(SorterRecord *));
+  if( !aSlot ){
+    return SQLITE_NOMEM;
+  }
+
+  p = pSorter->pRecord;
+  while( p ){
+    SorterRecord *pNext = p->pNext;
+    p->pNext = 0;
+    for(i=0; aSlot[i]; i++){
+      vdbeSorterMerge(pCsr, p, aSlot[i], &p);
+      aSlot[i] = 0;
+    }
+    aSlot[i] = p;
+    p = pNext;
+  }
+
+  p = 0;
+  for(i=0; i<64; i++){
+    vdbeSorterMerge(pCsr, p, aSlot[i], &p);
+  }
+  pSorter->pRecord = p;
+
+  sqlite3_free(aSlot);
+  return SQLITE_OK;
+}
+
+
+/*
+** Write the current contents of the in-memory linked-list to a PMA. Return
+** SQLITE_OK if successful, or an SQLite error code otherwise.
+**
+** The format of a PMA is:
+**
+**     * A varint. This varint contains the total number of bytes of content
+**       in the PMA (not including the varint itself).
+**
+**     * One or more records packed end-to-end in order of ascending keys. 
+**       Each record consists of a varint followed by a blob of data (the 
+**       key). The varint is the number of bytes in the blob of data.
+*/
+static int vdbeSorterListToPMA(sqlite3 *db, VdbeCursor *pCsr){
+  int rc = SQLITE_OK;             /* Return code */
+  VdbeSorter *pSorter = pCsr->pSorter;
+
+  if( pSorter->nInMemory==0 ){
+    assert( pSorter->pRecord==0 );
+    return rc;
+  }
+
+  rc = vdbeSorterSort(pCsr);
+
+  /* If the first temporary PMA file has not been opened, open it now. */
+  if( rc==SQLITE_OK && pSorter->pTemp1==0 ){
+    rc = vdbeSorterOpenTempFile(db, &pSorter->pTemp1);
+    assert( rc!=SQLITE_OK || pSorter->pTemp1 );
+    assert( pSorter->iWriteOff==0 );
+    assert( pSorter->nPMA==0 );
+  }
+
+  if( rc==SQLITE_OK ){
+    i64 iOff = pSorter->iWriteOff;
+    SorterRecord *p;
+    SorterRecord *pNext = 0;
+    static const char eightZeros[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+    pSorter->nPMA++;
+    rc = vdbeSorterWriteVarint(pSorter->pTemp1, pSorter->nInMemory, &iOff);
+    for(p=pSorter->pRecord; rc==SQLITE_OK && p; p=pNext){
+      pNext = p->pNext;
+      rc = vdbeSorterWriteVarint(pSorter->pTemp1, p->nVal, &iOff);
+
+      if( rc==SQLITE_OK ){
+        rc = sqlite3OsWrite(pSorter->pTemp1, p->pVal, p->nVal, iOff);
+        iOff += p->nVal;
+      }
+
+      sqlite3DbFree(db, p);
+    }
+
+    /* This assert verifies that unless an error has occurred, the size of 
+    ** the PMA on disk is the same as the expected size stored in
+    ** pSorter->nInMemory. */ 
+    assert( rc!=SQLITE_OK || pSorter->nInMemory==(
+          iOff-pSorter->iWriteOff-sqlite3VarintLen(pSorter->nInMemory)
+    ));
+
+    pSorter->iWriteOff = iOff;
+    if( rc==SQLITE_OK ){
+      /* Terminate each file with 8 extra bytes so that from any offset
+      ** in the file we can always read 9 bytes without a SHORT_READ error */
+      rc = sqlite3OsWrite(pSorter->pTemp1, eightZeros, 8, iOff);
+    }
+    pSorter->pRecord = p;
+  }
+
+  return rc;
+}
+
+/*
+** Add a record to the sorter.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterWrite(
+  sqlite3 *db,                    /* Database handle */
+  VdbeCursor *pCsr,               /* Sorter cursor */
+  Mem *pVal                       /* Memory cell containing record */
+){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  int rc = SQLITE_OK;             /* Return Code */
+  SorterRecord *pNew;             /* New list element */
+
+  assert( pSorter );
+  pSorter->nInMemory += sqlite3VarintLen(pVal->n) + pVal->n;
+
+  pNew = (SorterRecord *)sqlite3DbMallocRaw(db, pVal->n + sizeof(SorterRecord));
+  if( pNew==0 ){
+    rc = SQLITE_NOMEM;
+  }else{
+    pNew->pVal = (void *)&pNew[1];
+    memcpy(pNew->pVal, pVal->z, pVal->n);
+    pNew->nVal = pVal->n;
+    pNew->pNext = pSorter->pRecord;
+    pSorter->pRecord = pNew;
+  }
+
+  /* See if the contents of the sorter should now be written out. They
+  ** are written out when either of the following are true:
+  **
+  **   * The total memory allocated for the in-memory list is greater 
+  **     than (page-size * cache-size), or
+  **
+  **   * The total memory allocated for the in-memory list is greater 
+  **     than (page-size * 10) and sqlite3HeapNearlyFull() returns true.
+  */
+  if( rc==SQLITE_OK && pSorter->mxPmaSize>0 && (
+        (pSorter->nInMemory>pSorter->mxPmaSize)
+     || (pSorter->nInMemory>pSorter->mnPmaSize && sqlite3HeapNearlyFull())
+  )){
+    rc = vdbeSorterListToPMA(db, pCsr);
+    pSorter->nInMemory = 0;
+  }
+
+  return rc;
+}
+
+/*
+** Helper function for sqlite3VdbeSorterRewind(). 
+*/
+static int vdbeSorterInitMerge(
+  sqlite3 *db,                    /* Database handle */
+  VdbeCursor *pCsr,               /* Cursor handle for this sorter */
+  i64 *pnByte                     /* Sum of bytes in all opened PMAs */
+){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  int rc = SQLITE_OK;             /* Return code */
+  int i;                          /* Used to iterator through aIter[] */
+  i64 nByte = 0;                  /* Total bytes in all opened PMAs */
+
+  /* Initialize the iterators. */
+  for(i=0; i<SORTER_MAX_MERGE_COUNT; i++){
+    VdbeSorterIter *pIter = &pSorter->aIter[i];
+    rc = vdbeSorterIterInit(db, pSorter, pSorter->iReadOff, pIter, &nByte);
+    pSorter->iReadOff = pIter->iEof;
+    assert( rc!=SQLITE_OK || pSorter->iReadOff<=pSorter->iWriteOff );
+    if( rc!=SQLITE_OK || pSorter->iReadOff>=pSorter->iWriteOff ) break;
+  }
+
+  /* Initialize the aTree[] array. */
+  for(i=pSorter->nTree-1; rc==SQLITE_OK && i>0; i--){
+    rc = vdbeSorterDoCompare(pCsr, i);
+  }
+
+  *pnByte = nByte;
+  return rc;
+}
+
+/*
+** Once the sorter has been populated, this function is called to prepare
+** for iterating through its contents in sorted order.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterRewind(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  int rc;                         /* Return code */
+  sqlite3_file *pTemp2 = 0;       /* Second temp file to use */
+  i64 iWrite2 = 0;                /* Write offset for pTemp2 */
+  int nIter;                      /* Number of iterators used */
+  int nByte;                      /* Bytes of space required for aIter/aTree */
+  int N = 2;                      /* Power of 2 >= nIter */
+
+  assert( pSorter );
+
+  /* If no data has been written to disk, then do not do so now. Instead,
+  ** sort the VdbeSorter.pRecord list. The vdbe layer will read data directly
+  ** from the in-memory list.  */
+  if( pSorter->nPMA==0 ){
+    *pbEof = !pSorter->pRecord;
+    assert( pSorter->aTree==0 );
+    return vdbeSorterSort(pCsr);
+  }
+
+  /* Write the current b-tree to a PMA. Close the b-tree cursor. */
+  rc = vdbeSorterListToPMA(db, pCsr);
+  if( rc!=SQLITE_OK ) return rc;
+
+  /* Allocate space for aIter[] and aTree[]. */
+  nIter = pSorter->nPMA;
+  if( nIter>SORTER_MAX_MERGE_COUNT ) nIter = SORTER_MAX_MERGE_COUNT;
+  assert( nIter>0 );
+  while( N<nIter ) N += N;
+  nByte = N * (sizeof(int) + sizeof(VdbeSorterIter));
+  pSorter->aIter = (VdbeSorterIter *)sqlite3DbMallocZero(db, nByte);
+  if( !pSorter->aIter ) return SQLITE_NOMEM;
+  pSorter->aTree = (int *)&pSorter->aIter[N];
+  pSorter->nTree = N;
+
+  do {
+    int iNew;                     /* Index of new, merged, PMA */
+
+    for(iNew=0; 
+        rc==SQLITE_OK && iNew*SORTER_MAX_MERGE_COUNT<pSorter->nPMA; 
+        iNew++
+    ){
+      i64 nWrite;                 /* Number of bytes in new PMA */
+
+      /* If there are SORTER_MAX_MERGE_COUNT or less PMAs in file pTemp1,
+      ** initialize an iterator for each of them and break out of the loop.
+      ** These iterators will be incrementally merged as the VDBE layer calls
+      ** sqlite3VdbeSorterNext().
+      **
+      ** Otherwise, if pTemp1 contains more than SORTER_MAX_MERGE_COUNT PMAs,
+      ** initialize interators for SORTER_MAX_MERGE_COUNT of them. These PMAs
+      ** are merged into a single PMA that is written to file pTemp2.
+      */
+      rc = vdbeSorterInitMerge(db, pCsr, &nWrite);
+      assert( rc!=SQLITE_OK || pSorter->aIter[ pSorter->aTree[1] ].pFile );
+      if( rc!=SQLITE_OK || pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
+        break;
+      }
+
+      /* Open the second temp file, if it is not already open. */
+      if( pTemp2==0 ){
+        assert( iWrite2==0 );
+        rc = vdbeSorterOpenTempFile(db, &pTemp2);
+      }
+
+      if( rc==SQLITE_OK ){
+        rc = vdbeSorterWriteVarint(pTemp2, nWrite, &iWrite2);
+      }
+
+      if( rc==SQLITE_OK ){
+        int bEof = 0;
+        while( rc==SQLITE_OK && bEof==0 ){
+          int nToWrite;
+          VdbeSorterIter *pIter = &pSorter->aIter[ pSorter->aTree[1] ];
+          assert( pIter->pFile );
+          nToWrite = pIter->nKey + sqlite3VarintLen(pIter->nKey);
+          rc = sqlite3OsWrite(pTemp2, pIter->aAlloc, nToWrite, iWrite2);
+          iWrite2 += nToWrite;
+          if( rc==SQLITE_OK ){
+            rc = sqlite3VdbeSorterNext(db, pCsr, &bEof);
+          }
+        }
+      }
+    }
+
+    if( pSorter->nPMA<=SORTER_MAX_MERGE_COUNT ){
+      break;
+    }else{
+      sqlite3_file *pTmp = pSorter->pTemp1;
+      pSorter->nPMA = iNew;
+      pSorter->pTemp1 = pTemp2;
+      pTemp2 = pTmp;
+      pSorter->iWriteOff = iWrite2;
+      pSorter->iReadOff = 0;
+      iWrite2 = 0;
+    }
+  }while( rc==SQLITE_OK );
+
+  if( pTemp2 ){
+    sqlite3OsCloseFree(pTemp2);
+  }
+  *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
+  return rc;
+}
+
+/*
+** Advance to the next element in the sorter.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterNext(sqlite3 *db, VdbeCursor *pCsr, int *pbEof){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  int rc;                         /* Return code */
+
+  if( pSorter->aTree ){
+    int iPrev = pSorter->aTree[1];/* Index of iterator to advance */
+    int i;                        /* Index of aTree[] to recalculate */
+
+    rc = vdbeSorterIterNext(db, &pSorter->aIter[iPrev]);
+    for(i=(pSorter->nTree+iPrev)/2; rc==SQLITE_OK && i>0; i=i/2){
+      rc = vdbeSorterDoCompare(pCsr, i);
+    }
+
+    *pbEof = (pSorter->aIter[pSorter->aTree[1]].pFile==0);
+  }else{
+    SorterRecord *pFree = pSorter->pRecord;
+    pSorter->pRecord = pFree->pNext;
+    pFree->pNext = 0;
+    vdbeSorterRecordFree(db, pFree);
+    *pbEof = !pSorter->pRecord;
+    rc = SQLITE_OK;
+  }
+  return rc;
+}
+
+/*
+** Return a pointer to a buffer owned by the sorter that contains the 
+** current key.
+*/
+static void *vdbeSorterRowkey(
+  VdbeSorter *pSorter,            /* Sorter object */
+  int *pnKey                      /* OUT: Size of current key in bytes */
+){
+  void *pKey;
+  if( pSorter->aTree ){
+    VdbeSorterIter *pIter;
+    pIter = &pSorter->aIter[ pSorter->aTree[1] ];
+    *pnKey = pIter->nKey;
+    pKey = pIter->aKey;
+  }else{
+    *pnKey = pSorter->pRecord->nVal;
+    pKey = pSorter->pRecord->pVal;
+  }
+  return pKey;
+}
+
+/*
+** Copy the current sorter key into the memory cell pOut.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterRowkey(VdbeCursor *pCsr, Mem *pOut){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  void *pKey; int nKey;           /* Sorter key to copy into pOut */
+
+  pKey = vdbeSorterRowkey(pSorter, &nKey);
+  if( sqlite3VdbeMemGrow(pOut, nKey, 0) ){
+    return SQLITE_NOMEM;
+  }
+  pOut->n = nKey;
+  MemSetTypeFlag(pOut, MEM_Blob);
+  memcpy(pOut->z, pKey, nKey);
+
+  return SQLITE_OK;
+}
+
+/*
+** Compare the key in memory cell pVal with the key that the sorter cursor
+** passed as the first argument currently points to. For the purposes of
+** the comparison, ignore the rowid field at the end of each record.
+**
+** If an error occurs, return an SQLite error code (i.e. SQLITE_NOMEM).
+** Otherwise, set *pRes to a negative, zero or positive value if the
+** key in pVal is smaller than, equal to or larger than the current sorter
+** key.
+*/
+SQLITE_PRIVATE int sqlite3VdbeSorterCompare(
+  VdbeCursor *pCsr,               /* Sorter cursor */
+  Mem *pVal,                      /* Value to compare to current sorter key */
+  int *pRes                       /* OUT: Result of comparison */
+){
+  VdbeSorter *pSorter = pCsr->pSorter;
+  void *pKey; int nKey;           /* Sorter key to compare pVal with */
+
+  pKey = vdbeSorterRowkey(pSorter, &nKey);
+  vdbeSorterCompare(pCsr, 1, pVal->z, pVal->n, pKey, nKey, pRes);
+  return SQLITE_OK;
+}
+
+#endif /* #ifndef SQLITE_OMIT_MERGE_SORT */
+
+/************** End of vdbesort.c ********************************************/
 /************** Begin file journal.c *****************************************/
 /*
 ** 2007 August 22
@@ -69891,6 +71565,8 @@ SQLITE_PRIVATE int sqlite3MemJournalSize(void){
 ** This file contains routines used for walking the parser tree for
 ** an SQL statement.
 */
+/* #include <stdlib.h> */
+/* #include <string.h> */
 
 
 /*
@@ -70029,6 +71705,8 @@ SQLITE_PRIVATE int sqlite3WalkSelect(Walker *pWalker, Select *p){
 ** resolve all identifiers by associating them with a particular
 ** table and column.
 */
+/* #include <stdlib.h> */
+/* #include <string.h> */
 
 /*
 ** Turn the pExpr expression into an alias for the iCol-th column of the
@@ -70110,6 +71788,24 @@ static void resolveAlias(
   sqlite3DbFree(db, pDup);
 }
 
+
+/*
+** Return TRUE if the name zCol occurs anywhere in the USING clause.
+**
+** Return FALSE if the USING clause is NULL or if it does not contain
+** zCol.
+*/
+static int nameInUsingClause(IdList *pUsing, const char *zCol){
+  if( pUsing ){
+    int k;
+    for(k=0; k<pUsing->nId; k++){
+      if( sqlite3StrICmp(pUsing->a[k].zName, zCol)==0 ) return 1;
+    }
+  }
+  return 0;
+}
+
+
 /*
 ** Given the name of a column of the form X.Y.Z or Y.Z or just Z, look up
 ** that name in the set of source tables in pSrcList and make the pExpr 
@@ -70201,7 +71897,14 @@ static int lookupName(
         }
         for(j=0, pCol=pTab->aCol; j<pTab->nCol; j++, pCol++){
           if( sqlite3StrICmp(pCol->zName, zCol)==0 ){
-            IdList *pUsing;
+            /* If there has been exactly one prior match and this match
+            ** is for the right-hand table of a NATURAL JOIN or is in a 
+            ** USING clause, then skip this match.
+            */
+            if( cnt==1 ){
+              if( pItem->jointype & JT_NATURAL ) continue;
+              if( nameInUsingClause(pItem->pUsing, zCol) ) continue;
+            }
             cnt++;
             pExpr->iTable = pItem->iCursor;
             pExpr->pTab = pTab;
@@ -70209,26 +71912,6 @@ static int lookupName(
             pSchema = pTab->pSchema;
             /* Substitute the rowid (column -1) for the INTEGER PRIMARY KEY */
             pExpr->iColumn = j==pTab->iPKey ? -1 : (i16)j;
-            if( i<pSrcList->nSrc-1 ){
-              if( pItem[1].jointype & JT_NATURAL ){
-                /* If this match occurred in the left table of a natural join,
-                ** then skip the right table to avoid a duplicate match */
-                pItem++;
-                i++;
-              }else if( (pUsing = pItem[1].pUsing)!=0 ){
-                /* If this match occurs on a column that is in the USING clause
-                ** of a join, skip the search of the right table of the join
-                ** to avoid a duplicate match there. */
-                int k;
-                for(k=0; k<pUsing->nId; k++){
-                  if( sqlite3StrICmp(pUsing->a[k].zName, zCol)==0 ){
-                    pItem++;
-                    i++;
-                    break;
-                  }
-                }
-              }
-            }
             break;
           }
         }
@@ -71008,11 +72691,25 @@ static int resolveSelectStep(Walker *pWalker, Select *p){
     for(i=0; i<p->pSrc->nSrc; i++){
       struct SrcList_item *pItem = &p->pSrc->a[i];
       if( pItem->pSelect ){
+        NameContext *pNC;         /* Used to iterate name contexts */
+        int nRef = 0;             /* Refcount for pOuterNC and outer contexts */
         const char *zSavedContext = pParse->zAuthContext;
+
+        /* Count the total number of references to pOuterNC and all of its
+        ** parent contexts. After resolving references to expressions in
+        ** pItem->pSelect, check if this value has changed. If so, then
+        ** SELECT statement pItem->pSelect must be correlated. Set the
+        ** pItem->isCorrelated flag if this is the case. */
+        for(pNC=pOuterNC; pNC; pNC=pNC->pNext) nRef += pNC->nRef;
+
         if( pItem->zName ) pParse->zAuthContext = pItem->zName;
         sqlite3ResolveSelectNames(pParse, pItem->pSelect, pOuterNC);
         pParse->zAuthContext = zSavedContext;
         if( pParse->nErr || db->mallocFailed ) return WRC_Abort;
+
+        for(pNC=pOuterNC; pNC; pNC=pNC->pNext) nRef -= pNC->nRef;
+        assert( pItem->isCorrelated==0 && nRef<=0 );
+        pItem->isCorrelated = (nRef!=0);
       }
     }
   
@@ -71621,7 +73318,8 @@ SQLITE_PRIVATE Expr *sqlite3ExprAlloc(
       }else{
         int c;
         pNew->u.zToken = (char*)&pNew[1];
-        memcpy(pNew->u.zToken, pToken->z, pToken->n);
+        assert( pToken->z!=0 || pToken->n==0 );
+        if( pToken->n ) memcpy(pNew->u.zToken, pToken->z, pToken->n);
         pNew->u.zToken[pToken->n] = 0;
         if( dequote && nExtra>=3 
              && ((c = pToken->z[0])=='\'' || c=='"' || c=='[' || c=='`') ){
@@ -72119,7 +73817,9 @@ SQLITE_PRIVATE SrcList *sqlite3SrcListDup(sqlite3 *db, SrcList *p, int flags){
     pNewItem->zAlias = sqlite3DbStrDup(db, pOldItem->zAlias);
     pNewItem->jointype = pOldItem->jointype;
     pNewItem->iCursor = pOldItem->iCursor;
-    pNewItem->isPopulated = pOldItem->isPopulated;
+    pNewItem->addrFillSub = pOldItem->addrFillSub;
+    pNewItem->regReturn = pOldItem->regReturn;
+    pNewItem->isCorrelated = pOldItem->isCorrelated;
     pNewItem->zIndex = sqlite3DbStrDup(db, pOldItem->zIndex);
     pNewItem->notIndexed = pOldItem->notIndexed;
     pNewItem->pIndex = pOldItem->pIndex;
@@ -72658,11 +74358,19 @@ SQLITE_PRIVATE int sqlite3FindInIndex(Parse *pParse, Expr *pX, int *prNotFound){
   p = (ExprHasProperty(pX, EP_xIsSelect) ? pX->x.pSelect : 0);
   if( ALWAYS(pParse->nErr==0) && isCandidateForInOpt(p) ){
     sqlite3 *db = pParse->db;              /* Database connection */
-    Expr *pExpr = p->pEList->a[0].pExpr;   /* Expression <column> */
-    int iCol = pExpr->iColumn;             /* Index of column <column> */
     Vdbe *v = sqlite3GetVdbe(pParse);      /* Virtual machine being coded */
-    Table *pTab = p->pSrc->a[0].pTab;      /* Table <table>. */
+    Table *pTab;                           /* Table <table>. */
+    Expr *pExpr;                           /* Expression <column> */
+    int iCol;                              /* Index of column <column> */
     int iDb;                               /* Database idx for pTab */
+
+    assert( p );                        /* Because of isCandidateForInOpt(p) */
+    assert( p->pEList!=0 );             /* Because of isCandidateForInOpt(p) */
+    assert( p->pEList->a[0].pExpr!=0 ); /* Because of isCandidateForInOpt(p) */
+    assert( p->pSrc!=0 );               /* Because of isCandidateForInOpt(p) */
+    pTab = p->pSrc->a[0].pTab;
+    pExpr = p->pEList->a[0].pExpr;
+    iCol = pExpr->iColumn;
    
     /* Code an OP_VerifyCookie and OP_TableLock for <table>. */
     iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
@@ -72678,8 +74386,7 @@ SQLITE_PRIVATE int sqlite3FindInIndex(Parse *pParse, Expr *pX, int *prNotFound){
       int iMem = ++pParse->nMem;
       int iAddr;
 
-      iAddr = sqlite3VdbeAddOp1(v, OP_If, iMem);
-      sqlite3VdbeAddOp2(v, OP_Integer, 1, iMem);
+      iAddr = sqlite3VdbeAddOp1(v, OP_Once, iMem);
 
       sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
       eType = IN_INDEX_ROWID;
@@ -72710,8 +74417,7 @@ SQLITE_PRIVATE int sqlite3FindInIndex(Parse *pParse, Expr *pX, int *prNotFound){
           char *pKey;
   
           pKey = (char *)sqlite3IndexKeyinfo(pParse, pIdx);
-          iAddr = sqlite3VdbeAddOp1(v, OP_If, iMem);
-          sqlite3VdbeAddOp2(v, OP_Integer, 1, iMem);
+          iAddr = sqlite3VdbeAddOp1(v, OP_Once, iMem);
   
           sqlite3VdbeAddOp4(v, OP_OpenRead, iTab, pIdx->tnum, iDb,
                                pKey,P4_KEYINFO_HANDOFF);
@@ -72792,7 +74498,7 @@ SQLITE_PRIVATE int sqlite3CodeSubselect(
   int rMayHaveNull,       /* Register that records whether NULLs exist in RHS */
   int isRowid             /* If true, LHS of IN operator is a rowid */
 ){
-  int testAddr = 0;                       /* One-time test address */
+  int testAddr = -1;                      /* One-time test address */
   int rReg = 0;                           /* Register storing resulting */
   Vdbe *v = sqlite3GetVdbe(pParse);
   if( NEVER(v==0) ) return 0;
@@ -72810,15 +74516,13 @@ SQLITE_PRIVATE int sqlite3CodeSubselect(
   */
   if( !ExprHasAnyProperty(pExpr, EP_VarSelect) && !pParse->pTriggerTab ){
     int mem = ++pParse->nMem;
-    sqlite3VdbeAddOp1(v, OP_If, mem);
-    testAddr = sqlite3VdbeAddOp2(v, OP_Integer, 1, mem);
-    assert( testAddr>0 || pParse->db->mallocFailed );
+    testAddr = sqlite3VdbeAddOp1(v, OP_Once, mem);
   }
 
 #ifndef SQLITE_OMIT_EXPLAIN
   if( pParse->explain==2 ){
     char *zMsg = sqlite3MPrintf(
-        pParse->db, "EXECUTE %s%s SUBQUERY %d", testAddr?"":"CORRELATED ",
+        pParse->db, "EXECUTE %s%s SUBQUERY %d", testAddr>=0?"":"CORRELATED ",
         pExpr->op==TK_IN?"LIST":"SCALAR", pParse->iNextSelectId
     );
     sqlite3VdbeAddOp4(v, OP_Explain, pParse->iSelectId, 0, 0, zMsg, P4_DYNAMIC);
@@ -72910,9 +74614,9 @@ SQLITE_PRIVATE int sqlite3CodeSubselect(
           ** this code only executes once.  Because for a non-constant
           ** expression we need to rerun this code each time.
           */
-          if( testAddr && !sqlite3ExprIsConstant(pE2) ){
-            sqlite3VdbeChangeToNoop(v, testAddr-1, 2);
-            testAddr = 0;
+          if( testAddr>=0 && !sqlite3ExprIsConstant(pE2) ){
+            sqlite3VdbeChangeToNoop(v, testAddr);
+            testAddr = -1;
           }
 
           /* Evaluate the expression and insert it into the temp table */
@@ -72981,8 +74685,8 @@ SQLITE_PRIVATE int sqlite3CodeSubselect(
     }
   }
 
-  if( testAddr ){
-    sqlite3VdbeJumpHere(v, testAddr-1);
+  if( testAddr>=0 ){
+    sqlite3VdbeJumpHere(v, testAddr);
   }
   sqlite3ExprCachePop(pParse, 1);
 
@@ -73504,7 +75208,7 @@ SQLITE_PRIVATE int sqlite3ExprCodeTarget(Parse *pParse, Expr *pExpr, int target)
         inReg = pCol->iMem;
         break;
       }else if( pAggInfo->useSortingIdx ){
-        sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdx,
+        sqlite3VdbeAddOp3(v, OP_Column, pAggInfo->sortingIdxPTab,
                               pCol->iSorterColumn, target);
         break;
       }
@@ -74673,7 +76377,7 @@ SQLITE_PRIVATE int sqlite3ExprCompare(Expr *pA, Expr *pB){
     }
   }else if( pA->op!=TK_COLUMN && pA->u.zToken ){
     if( ExprHasProperty(pB, EP_IntValue) || NEVER(pB->u.zToken==0) ) return 2;
-    if( sqlite3StrICmp(pA->u.zToken,pB->u.zToken)!=0 ){
+    if( strcmp(pA->u.zToken,pB->u.zToken)!=0 ){
       return 2;
     }
   }
@@ -75816,22 +77520,124 @@ exit_begin_add_column:
 **
 *************************************************************************
 ** This file contains code associated with the ANALYZE command.
+**
+** The ANALYZE command gather statistics about the content of tables
+** and indices.  These statistics are made available to the query planner
+** to help it make better decisions about how to perform queries.
+**
+** The following system tables are or have been supported:
+**
+**    CREATE TABLE sqlite_stat1(tbl, idx, stat);
+**    CREATE TABLE sqlite_stat2(tbl, idx, sampleno, sample);
+**    CREATE TABLE sqlite_stat3(tbl, idx, nEq, nLt, nDLt, sample);
+**
+** Additional tables might be added in future releases of SQLite.
+** The sqlite_stat2 table is not created or used unless the SQLite version
+** is between 3.6.18 and 3.7.8, inclusive, and unless SQLite is compiled
+** with SQLITE_ENABLE_STAT2.  The sqlite_stat2 table is deprecated.
+** The sqlite_stat2 table is superceded by sqlite_stat3, which is only
+** created and used by SQLite versions 3.7.9 and later and with
+** SQLITE_ENABLE_STAT3 defined.  The fucntionality of sqlite_stat3
+** is a superset of sqlite_stat2.  
+**
+** Format of sqlite_stat1:
+**
+** There is normally one row per index, with the index identified by the
+** name in the idx column.  The tbl column is the name of the table to
+** which the index belongs.  In each such row, the stat column will be
+** a string consisting of a list of integers.  The first integer in this
+** list is the number of rows in the index and in the table.  The second
+** integer is the average number of rows in the index that have the same
+** value in the first column of the index.  The third integer is the average
+** number of rows in the index that have the same value for the first two
+** columns.  The N-th integer (for N>1) is the average number of rows in 
+** the index which have the same value for the first N-1 columns.  For
+** a K-column index, there will be K+1 integers in the stat column.  If
+** the index is unique, then the last integer will be 1.
+**
+** The list of integers in the stat column can optionally be followed
+** by the keyword "unordered".  The "unordered" keyword, if it is present,
+** must be separated from the last integer by a single space.  If the
+** "unordered" keyword is present, then the query planner assumes that
+** the index is unordered and will not use the index for a range query.
+** 
+** If the sqlite_stat1.idx column is NULL, then the sqlite_stat1.stat
+** column contains a single integer which is the (estimated) number of
+** rows in the table identified by sqlite_stat1.tbl.
+**
+** Format of sqlite_stat2:
+**
+** The sqlite_stat2 is only created and is only used if SQLite is compiled
+** with SQLITE_ENABLE_STAT2 and if the SQLite version number is between
+** 3.6.18 and 3.7.8.  The "stat2" table contains additional information
+** about the distribution of keys within an index.  The index is identified by
+** the "idx" column and the "tbl" column is the name of the table to which
+** the index belongs.  There are usually 10 rows in the sqlite_stat2
+** table for each index.
+**
+** The sqlite_stat2 entries for an index that have sampleno between 0 and 9
+** inclusive are samples of the left-most key value in the index taken at
+** evenly spaced points along the index.  Let the number of samples be S
+** (10 in the standard build) and let C be the number of rows in the index.
+** Then the sampled rows are given by:
+**
+**     rownumber = (i*C*2 + C)/(S*2)
+**
+** For i between 0 and S-1.  Conceptually, the index space is divided into
+** S uniform buckets and the samples are the middle row from each bucket.
+**
+** The format for sqlite_stat2 is recorded here for legacy reference.  This
+** version of SQLite does not support sqlite_stat2.  It neither reads nor
+** writes the sqlite_stat2 table.  This version of SQLite only supports
+** sqlite_stat3.
+**
+** Format for sqlite_stat3:
+**
+** The sqlite_stat3 is an enhancement to sqlite_stat2.  A new name is
+** used to avoid compatibility problems.  
+**
+** The format of the sqlite_stat3 table is similar to the format of
+** the sqlite_stat2 table.  There are multiple entries for each index.
+** The idx column names the index and the tbl column is the table of the
+** index.  If the idx and tbl columns are the same, then the sample is
+** of the INTEGER PRIMARY KEY.  The sample column is a value taken from
+** the left-most column of the index.  The nEq column is the approximate
+** number of entires in the index whose left-most column exactly matches
+** the sample.  nLt is the approximate number of entires whose left-most
+** column is less than the sample.  The nDLt column is the approximate
+** number of distinct left-most entries in the index that are less than
+** the sample.
+**
+** Future versions of SQLite might change to store a string containing
+** multiple integers values in the nDLt column of sqlite_stat3.  The first
+** integer will be the number of prior index entires that are distinct in
+** the left-most column.  The second integer will be the number of prior index
+** entries that are distinct in the first two columns.  The third integer
+** will be the number of prior index entries that are distinct in the first
+** three columns.  And so forth.  With that extension, the nDLt field is
+** similar in function to the sqlite_stat1.stat field.
+**
+** There can be an arbitrary number of sqlite_stat3 entries per index.
+** The ANALYZE command will typically generate sqlite_stat3 tables
+** that contain between 10 and 40 samples which are distributed across
+** the key space, though not uniformly, and which include samples with
+** largest possible nEq values.
 */
 #ifndef SQLITE_OMIT_ANALYZE
 
 /*
 ** This routine generates code that opens the sqlite_stat1 table for
 ** writing with cursor iStatCur. If the library was built with the
-** SQLITE_ENABLE_STAT2 macro defined, then the sqlite_stat2 table is
+** SQLITE_ENABLE_STAT3 macro defined, then the sqlite_stat3 table is
 ** opened for writing using cursor (iStatCur+1)
 **
 ** If the sqlite_stat1 tables does not previously exist, it is created.
-** Similarly, if the sqlite_stat2 table does not exist and the library
-** is compiled with SQLITE_ENABLE_STAT2 defined, it is created. 
+** Similarly, if the sqlite_stat3 table does not exist and the library
+** is compiled with SQLITE_ENABLE_STAT3 defined, it is created. 
 **
 ** Argument zWhere may be a pointer to a buffer containing a table name,
 ** or it may be a NULL pointer. If it is not NULL, then all entries in
-** the sqlite_stat1 and (if applicable) sqlite_stat2 tables associated
+** the sqlite_stat1 and (if applicable) sqlite_stat3 tables associated
 ** with the named table are deleted. If zWhere==0, then code is generated
 ** to delete all stat table entries.
 */
@@ -75847,8 +77653,8 @@ static void openStatTable(
     const char *zCols;
   } aTable[] = {
     { "sqlite_stat1", "tbl,idx,stat" },
-#ifdef SQLITE_ENABLE_STAT2
-    { "sqlite_stat2", "tbl,idx,sampleno,sample" },
+#ifdef SQLITE_ENABLE_STAT3
+    { "sqlite_stat3", "tbl,idx,neq,nlt,ndlt,sample" },
 #endif
   };
 
@@ -75864,6 +77670,9 @@ static void openStatTable(
   assert( sqlite3VdbeDb(v)==db );
   pDb = &db->aDb[iDb];
 
+  /* Create new statistic tables if they do not exist, or clear them
+  ** if they do already exist.
+  */
   for(i=0; i<ArraySize(aTable); i++){
     const char *zTab = aTable[i].zName;
     Table *pStat;
@@ -75894,7 +77703,7 @@ static void openStatTable(
     }
   }
 
-  /* Open the sqlite_stat[12] tables for writing. */
+  /* Open the sqlite_stat[13] tables for writing. */
   for(i=0; i<ArraySize(aTable); i++){
     sqlite3VdbeAddOp3(v, OP_OpenWrite, iStatCur+i, aRoot[i], iDb);
     sqlite3VdbeChangeP4(v, -1, (char *)3, P4_INT32);
@@ -75903,6 +77712,226 @@ static void openStatTable(
 }
 
 /*
+** Recommended number of samples for sqlite_stat3
+*/
+#ifndef SQLITE_STAT3_SAMPLES
+# define SQLITE_STAT3_SAMPLES 24
+#endif
+
+/*
+** Three SQL functions - stat3_init(), stat3_push(), and stat3_pop() -
+** share an instance of the following structure to hold their state
+** information.
+*/
+typedef struct Stat3Accum Stat3Accum;
+struct Stat3Accum {
+  tRowcnt nRow;             /* Number of rows in the entire table */
+  tRowcnt nPSample;         /* How often to do a periodic sample */
+  int iMin;                 /* Index of entry with minimum nEq and hash */
+  int mxSample;             /* Maximum number of samples to accumulate */
+  int nSample;              /* Current number of samples */
+  u32 iPrn;                 /* Pseudo-random number used for sampling */
+  struct Stat3Sample {
+    i64 iRowid;                /* Rowid in main table of the key */
+    tRowcnt nEq;               /* sqlite_stat3.nEq */
+    tRowcnt nLt;               /* sqlite_stat3.nLt */
+    tRowcnt nDLt;              /* sqlite_stat3.nDLt */
+    u8 isPSample;              /* True if a periodic sample */
+    u32 iHash;                 /* Tiebreaker hash */
+  } *a;                     /* An array of samples */
+};
+
+#ifdef SQLITE_ENABLE_STAT3
+/*
+** Implementation of the stat3_init(C,S) SQL function.  The two parameters
+** are the number of rows in the table or index (C) and the number of samples
+** to accumulate (S).
+**
+** This routine allocates the Stat3Accum object.
+**
+** The return value is the Stat3Accum object (P).
+*/
+static void stat3Init(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Stat3Accum *p;
+  tRowcnt nRow;
+  int mxSample;
+  int n;
+
+  UNUSED_PARAMETER(argc);
+  nRow = (tRowcnt)sqlite3_value_int64(argv[0]);
+  mxSample = sqlite3_value_int(argv[1]);
+  n = sizeof(*p) + sizeof(p->a[0])*mxSample;
+  p = sqlite3_malloc( n );
+  if( p==0 ){
+    sqlite3_result_error_nomem(context);
+    return;
+  }
+  memset(p, 0, n);
+  p->a = (struct Stat3Sample*)&p[1];
+  p->nRow = nRow;
+  p->mxSample = mxSample;
+  p->nPSample = p->nRow/(mxSample/3+1) + 1;
+  sqlite3_randomness(sizeof(p->iPrn), &p->iPrn);
+  sqlite3_result_blob(context, p, sizeof(p), sqlite3_free);
+}
+static const FuncDef stat3InitFuncdef = {
+  2,                /* nArg */
+  SQLITE_UTF8,      /* iPrefEnc */
+  0,                /* flags */
+  0,                /* pUserData */
+  0,                /* pNext */
+  stat3Init,        /* xFunc */
+  0,                /* xStep */
+  0,                /* xFinalize */
+  "stat3_init",     /* zName */
+  0,                /* pHash */
+  0                 /* pDestructor */
+};
+
+
+/*
+** Implementation of the stat3_push(nEq,nLt,nDLt,rowid,P) SQL function.  The
+** arguments describe a single key instance.  This routine makes the 
+** decision about whether or not to retain this key for the sqlite_stat3
+** table.
+**
+** The return value is NULL.
+*/
+static void stat3Push(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  Stat3Accum *p = (Stat3Accum*)sqlite3_value_blob(argv[4]);
+  tRowcnt nEq = sqlite3_value_int64(argv[0]);
+  tRowcnt nLt = sqlite3_value_int64(argv[1]);
+  tRowcnt nDLt = sqlite3_value_int64(argv[2]);
+  i64 rowid = sqlite3_value_int64(argv[3]);
+  u8 isPSample = 0;
+  u8 doInsert = 0;
+  int iMin = p->iMin;
+  struct Stat3Sample *pSample;
+  int i;
+  u32 h;
+
+  UNUSED_PARAMETER(context);
+  UNUSED_PARAMETER(argc);
+  if( nEq==0 ) return;
+  h = p->iPrn = p->iPrn*1103515245 + 12345;
+  if( (nLt/p->nPSample)!=((nEq+nLt)/p->nPSample) ){
+    doInsert = isPSample = 1;
+  }else if( p->nSample<p->mxSample ){
+    doInsert = 1;
+  }else{
+    if( nEq>p->a[iMin].nEq || (nEq==p->a[iMin].nEq && h>p->a[iMin].iHash) ){
+      doInsert = 1;
+    }
+  }
+  if( !doInsert ) return;
+  if( p->nSample==p->mxSample ){
+    assert( p->nSample - iMin - 1 >= 0 );
+    memmove(&p->a[iMin], &p->a[iMin+1], sizeof(p->a[0])*(p->nSample-iMin-1));
+    pSample = &p->a[p->nSample-1];
+  }else{
+    pSample = &p->a[p->nSample++];
+  }
+  pSample->iRowid = rowid;
+  pSample->nEq = nEq;
+  pSample->nLt = nLt;
+  pSample->nDLt = nDLt;
+  pSample->iHash = h;
+  pSample->isPSample = isPSample;
+
+  /* Find the new minimum */
+  if( p->nSample==p->mxSample ){
+    pSample = p->a;
+    i = 0;
+    while( pSample->isPSample ){
+      i++;
+      pSample++;
+      assert( i<p->nSample );
+    }
+    nEq = pSample->nEq;
+    h = pSample->iHash;
+    iMin = i;
+    for(i++, pSample++; i<p->nSample; i++, pSample++){
+      if( pSample->isPSample ) continue;
+      if( pSample->nEq<nEq
+       || (pSample->nEq==nEq && pSample->iHash<h)
+      ){
+        iMin = i;
+        nEq = pSample->nEq;
+        h = pSample->iHash;
+      }
+    }
+    p->iMin = iMin;
+  }
+}
+static const FuncDef stat3PushFuncdef = {
+  5,                /* nArg */
+  SQLITE_UTF8,      /* iPrefEnc */
+  0,                /* flags */
+  0,                /* pUserData */
+  0,                /* pNext */
+  stat3Push,        /* xFunc */
+  0,                /* xStep */
+  0,                /* xFinalize */
+  "stat3_push",     /* zName */
+  0,                /* pHash */
+  0                 /* pDestructor */
+};
+
+/*
+** Implementation of the stat3_get(P,N,...) SQL function.  This routine is
+** used to query the results.  Content is returned for the Nth sqlite_stat3
+** row where N is between 0 and S-1 and S is the number of samples.  The
+** value returned depends on the number of arguments.
+**
+**   argc==2    result:  rowid
+**   argc==3    result:  nEq
+**   argc==4    result:  nLt
+**   argc==5    result:  nDLt
+*/
+static void stat3Get(
+  sqlite3_context *context,
+  int argc,
+  sqlite3_value **argv
+){
+  int n = sqlite3_value_int(argv[1]);
+  Stat3Accum *p = (Stat3Accum*)sqlite3_value_blob(argv[0]);
+
+  assert( p!=0 );
+  if( p->nSample<=n ) return;
+  switch( argc ){
+    case 2:  sqlite3_result_int64(context, p->a[n].iRowid); break;
+    case 3:  sqlite3_result_int64(context, p->a[n].nEq);    break;
+    case 4:  sqlite3_result_int64(context, p->a[n].nLt);    break;
+    default: sqlite3_result_int64(context, p->a[n].nDLt);   break;
+  }
+}
+static const FuncDef stat3GetFuncdef = {
+  -1,               /* nArg */
+  SQLITE_UTF8,      /* iPrefEnc */
+  0,                /* flags */
+  0,                /* pUserData */
+  0,                /* pNext */
+  stat3Get,         /* xFunc */
+  0,                /* xStep */
+  0,                /* xFinalize */
+  "stat3_get",     /* zName */
+  0,                /* pHash */
+  0                 /* pDestructor */
+};
+#endif /* SQLITE_ENABLE_STAT3 */
+
+
+
+
+/*
 ** Generate code to do an analysis of all indices associated with
 ** a single table.
 */
@@ -75924,20 +77953,27 @@ static void analyzeOneTable(
   int iDb;                     /* Index of database containing pTab */
   int regTabname = iMem++;     /* Register containing table name */
   int regIdxname = iMem++;     /* Register containing index name */
-  int regSampleno = iMem++;    /* Register containing next sample number */
-  int regCol = iMem++;         /* Content of a column analyzed table */
+  int regStat1 = iMem++;       /* The stat column of sqlite_stat1 */
+#ifdef SQLITE_ENABLE_STAT3
+  int regNumEq = regStat1;     /* Number of instances.  Same as regStat1 */
+  int regNumLt = iMem++;       /* Number of keys less than regSample */
+  int regNumDLt = iMem++;      /* Number of distinct keys less than regSample */
+  int regSample = iMem++;      /* The next sample value */
+  int regRowid = regSample;    /* Rowid of a sample */
+  int regAccum = iMem++;       /* Register to hold Stat3Accum object */
+  int regLoop = iMem++;        /* Loop counter */
+  int regCount = iMem++;       /* Number of rows in the table or index */
+  int regTemp1 = iMem++;       /* Intermediate register */
+  int regTemp2 = iMem++;       /* Intermediate register */
+  int once = 1;                /* One-time initialization */
+  int shortJump = 0;           /* Instruction address */
+  int iTabCur = pParse->nTab++; /* Table cursor */
+#endif
+  int regCol = iMem++;         /* Content of a column in analyzed table */
   int regRec = iMem++;         /* Register holding completed record */
   int regTemp = iMem++;        /* Temporary use register */
-  int regRowid = iMem++;       /* Rowid for the inserted record */
+  int regNewRowid = iMem++;    /* Rowid for the inserted record */
 
-#ifdef SQLITE_ENABLE_STAT2
-  int addr = 0;                /* Instruction address */
-  int regTemp2 = iMem++;       /* Temporary use register */
-  int regSamplerecno = iMem++; /* Index of next sample to record */
-  int regRecno = iMem++;       /* Current sample index */
-  int regLast = iMem++;        /* Index of last sample to record */
-  int regFirst = iMem++;       /* Index of first sample to record */
-#endif
 
   v = sqlite3GetVdbe(pParse);
   if( v==0 || NEVER(pTab==0) ){
@@ -75970,9 +78006,14 @@ static void analyzeOneTable(
   for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
     int nCol;
     KeyInfo *pKey;
+    int addrIfNot = 0;           /* address of OP_IfNot */
+    int *aChngAddr;              /* Array of jump instruction addresses */
 
     if( pOnlyIdx && pOnlyIdx!=pIdx ) continue;
+    VdbeNoopComment((v, "Begin analysis of %s", pIdx->zName));
     nCol = pIdx->nColumn;
+    aChngAddr = sqlite3DbMallocRaw(db, sizeof(int)*nCol);
+    if( aChngAddr==0 ) continue;
     pKey = sqlite3IndexKeyinfo(pParse, pIdx);
     if( iMem+1+(nCol*2)>pParse->nMem ){
       pParse->nMem = iMem+1+(nCol*2);
@@ -75987,31 +78028,20 @@ static void analyzeOneTable(
     /* Populate the register containing the index name. */
     sqlite3VdbeAddOp4(v, OP_String8, 0, regIdxname, 0, pIdx->zName, 0);
 
-#ifdef SQLITE_ENABLE_STAT2
-
-    /* If this iteration of the loop is generating code to analyze the
-    ** first index in the pTab->pIndex list, then register regLast has
-    ** not been populated. In this case populate it now.  */
-    if( pTab->pIndex==pIdx ){
-      sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES, regSamplerecno);
-      sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES*2-1, regTemp);
-      sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES*2, regTemp2);
-
-      sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regLast);
-      sqlite3VdbeAddOp2(v, OP_Null, 0, regFirst);
-      addr = sqlite3VdbeAddOp3(v, OP_Lt, regSamplerecno, 0, regLast);
-      sqlite3VdbeAddOp3(v, OP_Divide, regTemp2, regLast, regFirst);
-      sqlite3VdbeAddOp3(v, OP_Multiply, regLast, regTemp, regLast);
-      sqlite3VdbeAddOp2(v, OP_AddImm, regLast, SQLITE_INDEX_SAMPLES*2-2);
-      sqlite3VdbeAddOp3(v, OP_Divide,  regTemp2, regLast, regLast);
-      sqlite3VdbeJumpHere(v, addr);
-    }
-
-    /* Zero the regSampleno and regRecno registers. */
-    sqlite3VdbeAddOp2(v, OP_Integer, 0, regSampleno);
-    sqlite3VdbeAddOp2(v, OP_Integer, 0, regRecno);
-    sqlite3VdbeAddOp2(v, OP_Copy, regFirst, regSamplerecno);
-#endif
+#ifdef SQLITE_ENABLE_STAT3
+    if( once ){
+      once = 0;
+      sqlite3OpenTable(pParse, iTabCur, iDb, pTab, OP_OpenRead);
+    }
+    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regCount);
+    sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_STAT3_SAMPLES, regTemp1);
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, regNumEq);
+    sqlite3VdbeAddOp2(v, OP_Integer, 0, regNumLt);
+    sqlite3VdbeAddOp2(v, OP_Integer, -1, regNumDLt);
+    sqlite3VdbeAddOp4(v, OP_Function, 1, regCount, regAccum,
+                      (char*)&stat3InitFuncdef, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, 2);
+#endif /* SQLITE_ENABLE_STAT3 */
 
     /* The block of memory cells initialized here is used as follows.
     **
@@ -76041,75 +78071,83 @@ static void analyzeOneTable(
     endOfLoop = sqlite3VdbeMakeLabel(v);
     sqlite3VdbeAddOp2(v, OP_Rewind, iIdxCur, endOfLoop);
     topOfLoop = sqlite3VdbeCurrentAddr(v);
-    sqlite3VdbeAddOp2(v, OP_AddImm, iMem, 1);
+    sqlite3VdbeAddOp2(v, OP_AddImm, iMem, 1);  /* Increment row counter */
 
     for(i=0; i<nCol; i++){
       CollSeq *pColl;
       sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, regCol);
       if( i==0 ){
-#ifdef SQLITE_ENABLE_STAT2
-        /* Check if the record that cursor iIdxCur points to contains a
-        ** value that should be stored in the sqlite_stat2 table. If so,
-        ** store it.  */
-        int ne = sqlite3VdbeAddOp3(v, OP_Ne, regRecno, 0, regSamplerecno);
-        assert( regTabname+1==regIdxname 
-             && regTabname+2==regSampleno
-             && regTabname+3==regCol
-        );
-        sqlite3VdbeChangeP5(v, SQLITE_JUMPIFNULL);
-        sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 4, regRec, "aaab", 0);
-        sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur+1, regRowid);
-        sqlite3VdbeAddOp3(v, OP_Insert, iStatCur+1, regRec, regRowid);
-
-        /* Calculate new values for regSamplerecno and regSampleno.
-        **
-        **   sampleno = sampleno + 1
-        **   samplerecno = samplerecno+(remaining records)/(remaining samples)
-        */
-        sqlite3VdbeAddOp2(v, OP_AddImm, regSampleno, 1);
-        sqlite3VdbeAddOp3(v, OP_Subtract, regRecno, regLast, regTemp);
-        sqlite3VdbeAddOp2(v, OP_AddImm, regTemp, -1);
-        sqlite3VdbeAddOp2(v, OP_Integer, SQLITE_INDEX_SAMPLES, regTemp2);
-        sqlite3VdbeAddOp3(v, OP_Subtract, regSampleno, regTemp2, regTemp2);
-        sqlite3VdbeAddOp3(v, OP_Divide, regTemp2, regTemp, regTemp);
-        sqlite3VdbeAddOp3(v, OP_Add, regSamplerecno, regTemp, regSamplerecno);
-
-        sqlite3VdbeJumpHere(v, ne);
-        sqlite3VdbeAddOp2(v, OP_AddImm, regRecno, 1);
-#endif
-
         /* Always record the very first row */
-        sqlite3VdbeAddOp1(v, OP_IfNot, iMem+1);
+        addrIfNot = sqlite3VdbeAddOp1(v, OP_IfNot, iMem+1);
       }
       assert( pIdx->azColl!=0 );
       assert( pIdx->azColl[i]!=0 );
       pColl = sqlite3LocateCollSeq(pParse, pIdx->azColl[i]);
-      sqlite3VdbeAddOp4(v, OP_Ne, regCol, 0, iMem+nCol+i+1,
-                       (char*)pColl, P4_COLLSEQ);
+      aChngAddr[i] = sqlite3VdbeAddOp4(v, OP_Ne, regCol, 0, iMem+nCol+i+1,
+                                      (char*)pColl, P4_COLLSEQ);
       sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
-    }
-    if( db->mallocFailed ){
-      /* If a malloc failure has occurred, then the result of the expression 
-      ** passed as the second argument to the call to sqlite3VdbeJumpHere() 
-      ** below may be negative. Which causes an assert() to fail (or an
-      ** out-of-bounds write if SQLITE_DEBUG is not defined).  */
-      return;
+      VdbeComment((v, "jump if column %d changed", i));
+#ifdef SQLITE_ENABLE_STAT3
+      if( i==0 ){
+        sqlite3VdbeAddOp2(v, OP_AddImm, regNumEq, 1);
+        VdbeComment((v, "incr repeat count"));
+      }
+#endif
     }
     sqlite3VdbeAddOp2(v, OP_Goto, 0, endOfLoop);
     for(i=0; i<nCol; i++){
-      int addr2 = sqlite3VdbeCurrentAddr(v) - (nCol*2);
+      sqlite3VdbeJumpHere(v, aChngAddr[i]);  /* Set jump dest for the OP_Ne */
       if( i==0 ){
-        sqlite3VdbeJumpHere(v, addr2-1);  /* Set jump dest for the OP_IfNot */
+        sqlite3VdbeJumpHere(v, addrIfNot);   /* Jump dest for OP_IfNot */
+#ifdef SQLITE_ENABLE_STAT3
+        sqlite3VdbeAddOp4(v, OP_Function, 1, regNumEq, regTemp2,
+                          (char*)&stat3PushFuncdef, P4_FUNCDEF);
+        sqlite3VdbeChangeP5(v, 5);
+        sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, pIdx->nColumn, regRowid);
+        sqlite3VdbeAddOp3(v, OP_Add, regNumEq, regNumLt, regNumLt);
+        sqlite3VdbeAddOp2(v, OP_AddImm, regNumDLt, 1);
+        sqlite3VdbeAddOp2(v, OP_Integer, 1, regNumEq);
+#endif        
       }
-      sqlite3VdbeJumpHere(v, addr2);      /* Set jump dest for the OP_Ne */
       sqlite3VdbeAddOp2(v, OP_AddImm, iMem+i+1, 1);
       sqlite3VdbeAddOp3(v, OP_Column, iIdxCur, i, iMem+nCol+i+1);
     }
+    sqlite3DbFree(db, aChngAddr);
 
-    /* End of the analysis loop. */
+    /* Always jump here after updating the iMem+1...iMem+1+nCol counters */
     sqlite3VdbeResolveLabel(v, endOfLoop);
+
     sqlite3VdbeAddOp2(v, OP_Next, iIdxCur, topOfLoop);
     sqlite3VdbeAddOp1(v, OP_Close, iIdxCur);
+#ifdef SQLITE_ENABLE_STAT3
+    sqlite3VdbeAddOp4(v, OP_Function, 1, regNumEq, regTemp2,
+                      (char*)&stat3PushFuncdef, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, 5);
+    sqlite3VdbeAddOp2(v, OP_Integer, -1, regLoop);
+    shortJump = 
+    sqlite3VdbeAddOp2(v, OP_AddImm, regLoop, 1);
+    sqlite3VdbeAddOp4(v, OP_Function, 1, regAccum, regTemp1,
+                      (char*)&stat3GetFuncdef, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, 2);
+    sqlite3VdbeAddOp1(v, OP_IsNull, regTemp1);
+    sqlite3VdbeAddOp3(v, OP_NotExists, iTabCur, shortJump, regTemp1);
+    sqlite3VdbeAddOp3(v, OP_Column, iTabCur, pIdx->aiColumn[0], regSample);
+    sqlite3ColumnDefault(v, pTab, pIdx->aiColumn[0], regSample);
+    sqlite3VdbeAddOp4(v, OP_Function, 1, regAccum, regNumEq,
+                      (char*)&stat3GetFuncdef, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, 3);
+    sqlite3VdbeAddOp4(v, OP_Function, 1, regAccum, regNumLt,
+                      (char*)&stat3GetFuncdef, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, 4);
+    sqlite3VdbeAddOp4(v, OP_Function, 1, regAccum, regNumDLt,
+                      (char*)&stat3GetFuncdef, P4_FUNCDEF);
+    sqlite3VdbeChangeP5(v, 5);
+    sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 6, regRec, "bbbbbb", 0);
+    sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur+1, regNewRowid);
+    sqlite3VdbeAddOp3(v, OP_Insert, iStatCur+1, regRec, regNewRowid);
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, shortJump);
+    sqlite3VdbeJumpHere(v, shortJump+2);
+#endif        
 
     /* Store the results in sqlite_stat1.
     **
@@ -76129,22 +78167,22 @@ static void analyzeOneTable(
     ** If K>0 then it is always the case the D>0 so division by zero
     ** is never possible.
     */
-    sqlite3VdbeAddOp2(v, OP_SCopy, iMem, regSampleno);
+    sqlite3VdbeAddOp2(v, OP_SCopy, iMem, regStat1);
     if( jZeroRows<0 ){
       jZeroRows = sqlite3VdbeAddOp1(v, OP_IfNot, iMem);
     }
     for(i=0; i<nCol; i++){
       sqlite3VdbeAddOp4(v, OP_String8, 0, regTemp, 0, " ", 0);
-      sqlite3VdbeAddOp3(v, OP_Concat, regTemp, regSampleno, regSampleno);
+      sqlite3VdbeAddOp3(v, OP_Concat, regTemp, regStat1, regStat1);
       sqlite3VdbeAddOp3(v, OP_Add, iMem, iMem+i+1, regTemp);
       sqlite3VdbeAddOp2(v, OP_AddImm, regTemp, -1);
       sqlite3VdbeAddOp3(v, OP_Divide, iMem+i+1, regTemp, regTemp);
       sqlite3VdbeAddOp1(v, OP_ToInt, regTemp);
-      sqlite3VdbeAddOp3(v, OP_Concat, regTemp, regSampleno, regSampleno);
+      sqlite3VdbeAddOp3(v, OP_Concat, regTemp, regStat1, regStat1);
     }
     sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 3, regRec, "aaa", 0);
-    sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regRowid);
-    sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regRec, regRowid);
+    sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regNewRowid);
+    sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regRec, regNewRowid);
     sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
   }
 
@@ -76154,22 +78192,23 @@ static void analyzeOneTable(
   if( pTab->pIndex==0 ){
     sqlite3VdbeAddOp3(v, OP_OpenRead, iIdxCur, pTab->tnum, iDb);
     VdbeComment((v, "%s", pTab->zName));
-    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regSampleno);
+    sqlite3VdbeAddOp2(v, OP_Count, iIdxCur, regStat1);
     sqlite3VdbeAddOp1(v, OP_Close, iIdxCur);
-    jZeroRows = sqlite3VdbeAddOp1(v, OP_IfNot, regSampleno);
+    jZeroRows = sqlite3VdbeAddOp1(v, OP_IfNot, regStat1);
   }else{
     sqlite3VdbeJumpHere(v, jZeroRows);
     jZeroRows = sqlite3VdbeAddOp0(v, OP_Goto);
   }
   sqlite3VdbeAddOp2(v, OP_Null, 0, regIdxname);
   sqlite3VdbeAddOp4(v, OP_MakeRecord, regTabname, 3, regRec, "aaa", 0);
-  sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regRowid);
-  sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regRec, regRowid);
+  sqlite3VdbeAddOp2(v, OP_NewRowid, iStatCur, regNewRowid);
+  sqlite3VdbeAddOp3(v, OP_Insert, iStatCur, regRec, regNewRowid);
   sqlite3VdbeChangeP5(v, OPFLAG_APPEND);
   if( pParse->nMem<regRec ) pParse->nMem = regRec;
   sqlite3VdbeJumpHere(v, jZeroRows);
 }
 
+
 /*
 ** Generate code that will cause the most recent index analysis to
 ** be loaded into internal hash tables where is can be used.
@@ -76193,7 +78232,7 @@ static void analyzeDatabase(Parse *pParse, int iDb){
 
   sqlite3BeginWriteOperation(pParse, 0, iDb);
   iStatCur = pParse->nTab;
-  pParse->nTab += 2;
+  pParse->nTab += 3;
   openStatTable(pParse, iDb, iStatCur, 0, 0);
   iMem = pParse->nMem+1;
   assert( sqlite3SchemaMutexHeld(db, iDb, 0) );
@@ -76218,7 +78257,7 @@ static void analyzeTable(Parse *pParse, Table *pTab, Index *pOnlyIdx){
   iDb = sqlite3SchemaToIndex(pParse->db, pTab->pSchema);
   sqlite3BeginWriteOperation(pParse, 0, iDb);
   iStatCur = pParse->nTab;
-  pParse->nTab += 2;
+  pParse->nTab += 3;
   if( pOnlyIdx ){
     openStatTable(pParse, iDb, iStatCur, pOnlyIdx->zName, "idx");
   }else{
@@ -76323,7 +78362,7 @@ static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){
   Index *pIndex;
   Table *pTable;
   int i, c, n;
-  unsigned int v;
+  tRowcnt v;
   const char *z;
 
   assert( argc==3 );
@@ -76366,10 +78405,10 @@ static int analysisLoader(void *pData, int argc, char **argv, char **NotUsed){
 ** and its contents.
 */
 SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
   if( pIdx->aSample ){
     int j;
-    for(j=0; j<SQLITE_INDEX_SAMPLES; j++){
+    for(j=0; j<pIdx->nSample; j++){
       IndexSample *p = &pIdx->aSample[j];
       if( p->eType==SQLITE_TEXT || p->eType==SQLITE_BLOB ){
         sqlite3DbFree(db, p->u.z);
@@ -76377,25 +78416,157 @@ SQLITE_PRIVATE void sqlite3DeleteIndexSamples(sqlite3 *db, Index *pIdx){
     }
     sqlite3DbFree(db, pIdx->aSample);
   }
+  if( db && db->pnBytesFreed==0 ){
+    pIdx->nSample = 0;
+    pIdx->aSample = 0;
+  }
 #else
   UNUSED_PARAMETER(db);
   UNUSED_PARAMETER(pIdx);
 #endif
 }
 
+#ifdef SQLITE_ENABLE_STAT3
+/*
+** Load content from the sqlite_stat3 table into the Index.aSample[]
+** arrays of all indices.
+*/
+static int loadStat3(sqlite3 *db, const char *zDb){
+  int rc;                       /* Result codes from subroutines */
+  sqlite3_stmt *pStmt = 0;      /* An SQL statement being run */
+  char *zSql;                   /* Text of the SQL statement */
+  Index *pPrevIdx = 0;          /* Previous index in the loop */
+  int idx = 0;                  /* slot in pIdx->aSample[] for next sample */
+  int eType;                    /* Datatype of a sample */
+  IndexSample *pSample;         /* A slot in pIdx->aSample[] */
+
+  if( !sqlite3FindTable(db, "sqlite_stat3", zDb) ){
+    return SQLITE_OK;
+  }
+
+  zSql = sqlite3MPrintf(db, 
+      "SELECT idx,count(*) FROM %Q.sqlite_stat3"
+      " GROUP BY idx", zDb);
+  if( !zSql ){
+    return SQLITE_NOMEM;
+  }
+  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+  sqlite3DbFree(db, zSql);
+  if( rc ) return rc;
+
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    char *zIndex;   /* Index name */
+    Index *pIdx;    /* Pointer to the index object */
+    int nSample;    /* Number of samples */
+
+    zIndex = (char *)sqlite3_column_text(pStmt, 0);
+    if( zIndex==0 ) continue;
+    nSample = sqlite3_column_int(pStmt, 1);
+    pIdx = sqlite3FindIndex(db, zIndex, zDb);
+    if( pIdx==0 ) continue;
+    assert( pIdx->nSample==0 );
+    pIdx->nSample = nSample;
+    pIdx->aSample = sqlite3MallocZero( nSample*sizeof(IndexSample) );
+    pIdx->avgEq = pIdx->aiRowEst[1];
+    if( pIdx->aSample==0 ){
+      db->mallocFailed = 1;
+      sqlite3_finalize(pStmt);
+      return SQLITE_NOMEM;
+    }
+  }
+  rc = sqlite3_finalize(pStmt);
+  if( rc ) return rc;
+
+  zSql = sqlite3MPrintf(db, 
+      "SELECT idx,neq,nlt,ndlt,sample FROM %Q.sqlite_stat3", zDb);
+  if( !zSql ){
+    return SQLITE_NOMEM;
+  }
+  rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+  sqlite3DbFree(db, zSql);
+  if( rc ) return rc;
+
+  while( sqlite3_step(pStmt)==SQLITE_ROW ){
+    char *zIndex;   /* Index name */
+    Index *pIdx;    /* Pointer to the index object */
+    int i;          /* Loop counter */
+    tRowcnt sumEq;  /* Sum of the nEq values */
+
+    zIndex = (char *)sqlite3_column_text(pStmt, 0);
+    if( zIndex==0 ) continue;
+    pIdx = sqlite3FindIndex(db, zIndex, zDb);
+    if( pIdx==0 ) continue;
+    if( pIdx==pPrevIdx ){
+      idx++;
+    }else{
+      pPrevIdx = pIdx;
+      idx = 0;
+    }
+    assert( idx<pIdx->nSample );
+    pSample = &pIdx->aSample[idx];
+    pSample->nEq = (tRowcnt)sqlite3_column_int64(pStmt, 1);
+    pSample->nLt = (tRowcnt)sqlite3_column_int64(pStmt, 2);
+    pSample->nDLt = (tRowcnt)sqlite3_column_int64(pStmt, 3);
+    if( idx==pIdx->nSample-1 ){
+      if( pSample->nDLt>0 ){
+        for(i=0, sumEq=0; i<=idx-1; i++) sumEq += pIdx->aSample[i].nEq;
+        pIdx->avgEq = (pSample->nLt - sumEq)/pSample->nDLt;
+      }
+      if( pIdx->avgEq<=0 ) pIdx->avgEq = 1;
+    }
+    eType = sqlite3_column_type(pStmt, 4);
+    pSample->eType = (u8)eType;
+    switch( eType ){
+      case SQLITE_INTEGER: {
+        pSample->u.i = sqlite3_column_int64(pStmt, 4);
+        break;
+      }
+      case SQLITE_FLOAT: {
+        pSample->u.r = sqlite3_column_double(pStmt, 4);
+        break;
+      }
+      case SQLITE_NULL: {
+        break;
+      }
+      default: assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB ); {
+        const char *z = (const char *)(
+              (eType==SQLITE_BLOB) ?
+              sqlite3_column_blob(pStmt, 4):
+              sqlite3_column_text(pStmt, 4)
+           );
+        int n = z ? sqlite3_column_bytes(pStmt, 4) : 0;
+        pSample->nByte = n;
+        if( n < 1){
+          pSample->u.z = 0;
+        }else{
+          pSample->u.z = sqlite3Malloc(n);
+          if( pSample->u.z==0 ){
+            db->mallocFailed = 1;
+            sqlite3_finalize(pStmt);
+            return SQLITE_NOMEM;
+          }
+          memcpy(pSample->u.z, z, n);
+        }
+      }
+    }
+  }
+  return sqlite3_finalize(pStmt);
+}
+#endif /* SQLITE_ENABLE_STAT3 */
+
 /*
-** Load the content of the sqlite_stat1 and sqlite_stat2 tables. The
+** Load the content of the sqlite_stat1 and sqlite_stat3 tables. The
 ** contents of sqlite_stat1 are used to populate the Index.aiRowEst[]
-** arrays. The contents of sqlite_stat2 are used to populate the
+** arrays. The contents of sqlite_stat3 are used to populate the
 ** Index.aSample[] arrays.
 **
 ** If the sqlite_stat1 table is not present in the database, SQLITE_ERROR
-** is returned. In this case, even if SQLITE_ENABLE_STAT2 was defined 
-** during compilation and the sqlite_stat2 table is present, no data is 
+** is returned. In this case, even if SQLITE_ENABLE_STAT3 was defined 
+** during compilation and the sqlite_stat3 table is present, no data is 
 ** read from it.
 **
-** If SQLITE_ENABLE_STAT2 was defined during compilation and the 
-** sqlite_stat2 table is not present in the database, SQLITE_ERROR is
+** If SQLITE_ENABLE_STAT3 was defined during compilation and the 
+** sqlite_stat3 table is not present in the database, SQLITE_ERROR is
 ** returned. However, in this case, data is read from the sqlite_stat1
 ** table (if it is present) before returning.
 **
@@ -76417,8 +78588,10 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
   for(i=sqliteHashFirst(&db->aDb[iDb].pSchema->idxHash);i;i=sqliteHashNext(i)){
     Index *pIdx = sqliteHashData(i);
     sqlite3DefaultRowEst(pIdx);
+#ifdef SQLITE_ENABLE_STAT3
     sqlite3DeleteIndexSamples(db, pIdx);
     pIdx->aSample = 0;
+#endif
   }
 
   /* Check to make sure the sqlite_stat1 table exists */
@@ -76430,7 +78603,7 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
 
   /* Load new statistics out of the sqlite_stat1 table */
   zSql = sqlite3MPrintf(db, 
-      "SELECT tbl, idx, stat FROM %Q.sqlite_stat1", sInfo.zDatabase);
+      "SELECT tbl,idx,stat FROM %Q.sqlite_stat1", sInfo.zDatabase);
   if( zSql==0 ){
     rc = SQLITE_NOMEM;
   }else{
@@ -76439,78 +78612,10 @@ SQLITE_PRIVATE int sqlite3AnalysisLoad(sqlite3 *db, int iDb){
   }
 
 
-  /* Load the statistics from the sqlite_stat2 table. */
-#ifdef SQLITE_ENABLE_STAT2
-  if( rc==SQLITE_OK && !sqlite3FindTable(db, "sqlite_stat2", sInfo.zDatabase) ){
-    rc = SQLITE_ERROR;
-  }
+  /* Load the statistics from the sqlite_stat3 table. */
+#ifdef SQLITE_ENABLE_STAT3
   if( rc==SQLITE_OK ){
-    sqlite3_stmt *pStmt = 0;
-
-    zSql = sqlite3MPrintf(db, 
-        "SELECT idx,sampleno,sample FROM %Q.sqlite_stat2", sInfo.zDatabase);
-    if( !zSql ){
-      rc = SQLITE_NOMEM;
-    }else{
-      rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
-      sqlite3DbFree(db, zSql);
-    }
-
-    if( rc==SQLITE_OK ){
-      while( sqlite3_step(pStmt)==SQLITE_ROW ){
-        char *zIndex;   /* Index name */
-        Index *pIdx;    /* Pointer to the index object */
-
-        zIndex = (char *)sqlite3_column_text(pStmt, 0);
-        pIdx = zIndex ? sqlite3FindIndex(db, zIndex, sInfo.zDatabase) : 0;
-        if( pIdx ){
-          int iSample = sqlite3_column_int(pStmt, 1);
-          if( iSample<SQLITE_INDEX_SAMPLES && iSample>=0 ){
-            int eType = sqlite3_column_type(pStmt, 2);
-
-            if( pIdx->aSample==0 ){
-              static const int sz = sizeof(IndexSample)*SQLITE_INDEX_SAMPLES;
-              pIdx->aSample = (IndexSample *)sqlite3DbMallocRaw(0, sz);
-              if( pIdx->aSample==0 ){
-                db->mallocFailed = 1;
-                break;
-              }
-	      memset(pIdx->aSample, 0, sz);
-            }
-
-            assert( pIdx->aSample );
-            {
-              IndexSample *pSample = &pIdx->aSample[iSample];
-              pSample->eType = (u8)eType;
-              if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
-                pSample->u.r = sqlite3_column_double(pStmt, 2);
-              }else if( eType==SQLITE_TEXT || eType==SQLITE_BLOB ){
-                const char *z = (const char *)(
-                    (eType==SQLITE_BLOB) ?
-                    sqlite3_column_blob(pStmt, 2):
-                    sqlite3_column_text(pStmt, 2)
-                );
-                int n = sqlite3_column_bytes(pStmt, 2);
-                if( n>24 ){
-                  n = 24;
-                }
-                pSample->nByte = (u8)n;
-                if( n < 1){
-                  pSample->u.z = 0;
-                }else{
-                  pSample->u.z = sqlite3DbStrNDup(0, z, n);
-                  if( pSample->u.z==0 ){
-                    db->mallocFailed = 1;
-                    break;
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      rc = sqlite3_finalize(pStmt);
-    }
+    rc = loadStat3(db, sInfo.zDatabase);
   }
 #endif
 
@@ -79010,7 +81115,7 @@ SQLITE_PRIVATE void sqlite3CreateView(
   const char *z;
   Token sEnd;
   DbFixer sFix;
-  Token *pName;
+  Token *pName = 0;
   int iDb;
   sqlite3 *db = pParse->db;
 
@@ -79317,6 +81422,100 @@ static void destroyTable(Parse *pParse, Table *pTab){
 }
 
 /*
+** Remove entries from the sqlite_statN tables (for N in (1,2,3))
+** after a DROP INDEX or DROP TABLE command.
+*/
+static void sqlite3ClearStatTables(
+  Parse *pParse,         /* The parsing context */
+  int iDb,               /* The database number */
+  const char *zType,     /* "idx" or "tbl" */
+  const char *zName      /* Name of index or table */
+){
+  int i;
+  const char *zDbName = pParse->db->aDb[iDb].zName;
+  for(i=1; i<=3; i++){
+    char zTab[24];
+    sqlite3_snprintf(sizeof(zTab),zTab,"sqlite_stat%d",i);
+    if( sqlite3FindTable(pParse->db, zTab, zDbName) ){
+      sqlite3NestedParse(pParse,
+        "DELETE FROM %Q.%s WHERE %s=%Q",
+        zDbName, zTab, zType, zName
+      );
+    }
+  }
+}
+
+/*
+** Generate code to drop a table.
+*/
+SQLITE_PRIVATE void sqlite3CodeDropTable(Parse *pParse, Table *pTab, int iDb, int isView){
+  Vdbe *v;
+  sqlite3 *db = pParse->db;
+  Trigger *pTrigger;
+  Db *pDb = &db->aDb[iDb];
+
+  v = sqlite3GetVdbe(pParse);
+  assert( v!=0 );
+  sqlite3BeginWriteOperation(pParse, 1, iDb);
+
+#ifndef SQLITE_OMIT_VIRTUALTABLE
+  if( IsVirtual(pTab) ){
+    sqlite3VdbeAddOp0(v, OP_VBegin);
+  }
+#endif
+
+  /* Drop all triggers associated with the table being dropped. Code
+  ** is generated to remove entries from sqlite_master and/or
+  ** sqlite_temp_master if required.
+  */
+  pTrigger = sqlite3TriggerList(pParse, pTab);
+  while( pTrigger ){
+    assert( pTrigger->pSchema==pTab->pSchema || 
+        pTrigger->pSchema==db->aDb[1].pSchema );
+    sqlite3DropTriggerPtr(pParse, pTrigger);
+    pTrigger = pTrigger->pNext;
+  }
+
+#ifndef SQLITE_OMIT_AUTOINCREMENT
+  /* Remove any entries of the sqlite_sequence table associated with
+  ** the table being dropped. This is done before the table is dropped
+  ** at the btree level, in case the sqlite_sequence table needs to
+  ** move as a result of the drop (can happen in auto-vacuum mode).
+  */
+  if( pTab->tabFlags & TF_Autoincrement ){
+    sqlite3NestedParse(pParse,
+      "DELETE FROM %Q.sqlite_sequence WHERE name=%Q",
+      pDb->zName, pTab->zName
+    );
+  }
+#endif
+
+  /* Drop all SQLITE_MASTER table and index entries that refer to the
+  ** table. The program name loops through the master table and deletes
+  ** every row that refers to a table of the same name as the one being
+  ** dropped. Triggers are handled seperately because a trigger can be
+  ** created in the temp database that refers to a table in another
+  ** database.
+  */
+  sqlite3NestedParse(pParse, 
+      "DELETE FROM %Q.%s WHERE tbl_name=%Q and type!='trigger'",
+      pDb->zName, SCHEMA_TABLE(iDb), pTab->zName);
+  if( !isView && !IsVirtual(pTab) ){
+    destroyTable(pParse, pTab);
+  }
+
+  /* Remove the table entry from SQLite's internal schema and modify
+  ** the schema cookie.
+  */
+  if( IsVirtual(pTab) ){
+    sqlite3VdbeAddOp4(v, OP_VDestroy, iDb, 0, 0, pTab->zName, 0);
+  }
+  sqlite3VdbeAddOp4(v, OP_DropTable, iDb, 0, 0, pTab->zName, 0);
+  sqlite3ChangeCookie(pParse, iDb);
+  sqliteViewResetAll(db, iDb);
+}
+
+/*
 ** This routine is called to do the work of a DROP TABLE statement.
 ** pName is the name of the table to be dropped.
 */
@@ -79384,7 +81583,8 @@ SQLITE_PRIVATE void sqlite3DropTable(Parse *pParse, SrcList *pName, int isView,
     }
   }
 #endif
-  if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 ){
+  if( sqlite3StrNICmp(pTab->zName, "sqlite_", 7)==0 
+    && sqlite3StrNICmp(pTab->zName, "sqlite_stat", 11)!=0 ){
     sqlite3ErrorMsg(pParse, "table %s may not be dropped", pTab->zName);
     goto exit_drop_table;
   }
@@ -79408,75 +81608,11 @@ SQLITE_PRIVATE void sqlite3DropTable(Parse *pParse, SrcList *pName, int isView,
   */
   v = sqlite3GetVdbe(pParse);
   if( v ){
-    Trigger *pTrigger;
-    Db *pDb = &db->aDb[iDb];
     sqlite3BeginWriteOperation(pParse, 1, iDb);
-
-#ifndef SQLITE_OMIT_VIRTUALTABLE
-    if( IsVirtual(pTab) ){
-      sqlite3VdbeAddOp0(v, OP_VBegin);
-    }
-#endif
+    sqlite3ClearStatTables(pParse, iDb, "tbl", pTab->zName);
     sqlite3FkDropTable(pParse, pName, pTab);
-
-    /* Drop all triggers associated with the table being dropped. Code
-    ** is generated to remove entries from sqlite_master and/or
-    ** sqlite_temp_master if required.
-    */
-    pTrigger = sqlite3TriggerList(pParse, pTab);
-    while( pTrigger ){
-      assert( pTrigger->pSchema==pTab->pSchema || 
-          pTrigger->pSchema==db->aDb[1].pSchema );
-      sqlite3DropTriggerPtr(pParse, pTrigger);
-      pTrigger = pTrigger->pNext;
-    }
-
-#ifndef SQLITE_OMIT_AUTOINCREMENT
-    /* Remove any entries of the sqlite_sequence table associated with
-    ** the table being dropped. This is done before the table is dropped
-    ** at the btree level, in case the sqlite_sequence table needs to
-    ** move as a result of the drop (can happen in auto-vacuum mode).
-    */
-    if( pTab->tabFlags & TF_Autoincrement ){
-      sqlite3NestedParse(pParse,
-        "DELETE FROM %s.sqlite_sequence WHERE name=%Q",
-        pDb->zName, pTab->zName
-      );
-    }
-#endif
-
-    /* Drop all SQLITE_MASTER table and index entries that refer to the
-    ** table. The program name loops through the master table and deletes
-    ** every row that refers to a table of the same name as the one being
-    ** dropped. Triggers are handled seperately because a trigger can be
-    ** created in the temp database that refers to a table in another
-    ** database.
-    */
-    sqlite3NestedParse(pParse, 
-        "DELETE FROM %Q.%s WHERE tbl_name=%Q and type!='trigger'",
-        pDb->zName, SCHEMA_TABLE(iDb), pTab->zName);
-
-    /* Drop any statistics from the sqlite_stat1 table, if it exists */
-    if( sqlite3FindTable(db, "sqlite_stat1", db->aDb[iDb].zName) ){
-      sqlite3NestedParse(pParse,
-        "DELETE FROM %Q.sqlite_stat1 WHERE tbl=%Q", pDb->zName, pTab->zName
-      );
-    }
-
-    if( !isView && !IsVirtual(pTab) ){
-      destroyTable(pParse, pTab);
-    }
-
-    /* Remove the table entry from SQLite's internal schema and modify
-    ** the schema cookie.
-    */
-    if( IsVirtual(pTab) ){
-      sqlite3VdbeAddOp4(v, OP_VDestroy, iDb, 0, 0, pTab->zName, 0);
-    }
-    sqlite3VdbeAddOp4(v, OP_DropTable, iDb, 0, 0, pTab->zName, 0);
-    sqlite3ChangeCookie(pParse, iDb);
+    sqlite3CodeDropTable(pParse, pTab, iDb, isView);
   }
-  sqliteViewResetAll(db, iDb);
 
 exit_drop_table:
   sqlite3SrcListDelete(db, pName);
@@ -79644,11 +81780,15 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
   Table *pTab = pIndex->pTable;  /* The table that is indexed */
   int iTab = pParse->nTab++;     /* Btree cursor used for pTab */
   int iIdx = pParse->nTab++;     /* Btree cursor used for pIndex */
+  int iSorter;                   /* Cursor opened by OpenSorter (if in use) */
   int addr1;                     /* Address of top of loop */
+  int addr2;                     /* Address to jump to for next iteration */
   int tnum;                      /* Root page of index */
   Vdbe *v;                       /* Generate code into this virtual machine */
   KeyInfo *pKey;                 /* KeyInfo for index */
+#ifdef SQLITE_OMIT_MERGE_SORT
   int regIdxKey;                 /* Registers containing the index key */
+#endif
   int regRecord;                 /* Register holding assemblied index record */
   sqlite3 *db = pParse->db;      /* The database connection */
   int iDb = sqlite3SchemaToIndex(db, pIndex->pSchema);
@@ -79677,10 +81817,44 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
   if( memRootPage>=0 ){
     sqlite3VdbeChangeP5(v, 1);
   }
+
+#ifndef SQLITE_OMIT_MERGE_SORT
+  /* Open the sorter cursor if we are to use one. */
+  iSorter = pParse->nTab++;
+  sqlite3VdbeAddOp4(v, OP_SorterOpen, iSorter, 0, 0, (char*)pKey, P4_KEYINFO);
+#else
+  iSorter = iTab;
+#endif
+
+  /* Open the table. Loop through all rows of the table, inserting index
+  ** records into the sorter. */
   sqlite3OpenTable(pParse, iTab, iDb, pTab, OP_OpenRead);
   addr1 = sqlite3VdbeAddOp2(v, OP_Rewind, iTab, 0);
   regRecord = sqlite3GetTempReg(pParse);
+
+#ifndef SQLITE_OMIT_MERGE_SORT
+  sqlite3GenerateIndexKey(pParse, pIndex, iTab, regRecord, 1);
+  sqlite3VdbeAddOp2(v, OP_SorterInsert, iSorter, regRecord);
+  sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1);
+  sqlite3VdbeJumpHere(v, addr1);
+  addr1 = sqlite3VdbeAddOp2(v, OP_SorterSort, iSorter, 0);
+  if( pIndex->onError!=OE_None ){
+    int j2 = sqlite3VdbeCurrentAddr(v) + 3;
+    sqlite3VdbeAddOp2(v, OP_Goto, 0, j2);
+    addr2 = sqlite3VdbeCurrentAddr(v);
+    sqlite3VdbeAddOp3(v, OP_SorterCompare, iSorter, j2, regRecord);
+    sqlite3HaltConstraint(
+        pParse, OE_Abort, "indexed columns are not unique", P4_STATIC
+    );
+  }else{
+    addr2 = sqlite3VdbeCurrentAddr(v);
+  }
+  sqlite3VdbeAddOp2(v, OP_SorterData, iSorter, regRecord);
+  sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 1);
+  sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
+#else
   regIdxKey = sqlite3GenerateIndexKey(pParse, pIndex, iTab, regRecord, 1);
+  addr2 = addr1 + 1;
   if( pIndex->onError!=OE_None ){
     const int regRowid = regIdxKey + pIndex->nColumn;
     const int j2 = sqlite3VdbeCurrentAddr(v) + 2;
@@ -79699,13 +81873,16 @@ static void sqlite3RefillIndex(Parse *pParse, Index *pIndex, int memRootPage){
     sqlite3HaltConstraint(
         pParse, OE_Abort, "indexed columns are not unique", P4_STATIC);
   }
-  sqlite3VdbeAddOp2(v, OP_IdxInsert, iIdx, regRecord);
+  sqlite3VdbeAddOp3(v, OP_IdxInsert, iIdx, regRecord, 0);
   sqlite3VdbeChangeP5(v, OPFLAG_USESEEKRESULT);
+#endif
   sqlite3ReleaseTempReg(pParse, regRecord);
-  sqlite3VdbeAddOp2(v, OP_Next, iTab, addr1+1);
+  sqlite3VdbeAddOp2(v, OP_SorterNext, iSorter, addr2);
   sqlite3VdbeJumpHere(v, addr1);
+
   sqlite3VdbeAddOp1(v, OP_Close, iTab);
   sqlite3VdbeAddOp1(v, OP_Close, iIdx);
+  sqlite3VdbeAddOp1(v, OP_Close, iSorter);
 }
 
 /*
@@ -79775,6 +81952,7 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
     assert( pName1 && pName2 );
     iDb = sqlite3TwoPartName(pParse, pName1, pName2, &pName);
     if( iDb<0 ) goto exit_create_index;
+    assert( pName && pName->z );
 
 #ifndef SQLITE_OMIT_TEMPDB
     /* If the index name was unqualified, check if the the table
@@ -79802,6 +81980,7 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
     assert( db->aDb[iDb].pSchema==pTab->pSchema );
   }else{
     assert( pName==0 );
+    assert( pStart==0 );
     pTab = pParse->pNewTable;
     if( !pTab ) goto exit_create_index;
     iDb = sqlite3SchemaToIndex(db, pTab->pSchema);
@@ -79844,6 +82023,7 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
   if( pName ){
     zName = sqlite3NameFromToken(db, pName);
     if( zName==0 ) goto exit_create_index;
+    assert( pName->z!=0 );
     if( SQLITE_OK!=sqlite3CheckObjectName(pParse, zName) ){
       goto exit_create_index;
     }
@@ -79923,8 +82103,8 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
   nCol = pList->nExpr;
   pIndex = sqlite3DbMallocZero(db, 
       sizeof(Index) +              /* Index structure  */
+      sizeof(tRowcnt)*(nCol+1) +   /* Index.aiRowEst   */
       sizeof(int)*nCol +           /* Index.aiColumn   */
-      sizeof(int)*(nCol+1) +       /* Index.aiRowEst   */
       sizeof(char *)*nCol +        /* Index.azColl     */
       sizeof(u8)*nCol +            /* Index.aSortOrder */
       nName + 1 +                  /* Index.zName      */
@@ -79933,10 +82113,10 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
   if( db->mallocFailed ){
     goto exit_create_index;
   }
-  pIndex->azColl = (char**)(&pIndex[1]);
+  pIndex->aiRowEst = (tRowcnt*)(&pIndex[1]);
+  pIndex->azColl = (char**)(&pIndex->aiRowEst[nCol+1]);
   pIndex->aiColumn = (int *)(&pIndex->azColl[nCol]);
-  pIndex->aiRowEst = (unsigned *)(&pIndex->aiColumn[nCol]);
-  pIndex->aSortOrder = (u8 *)(&pIndex->aiRowEst[nCol+1]);
+  pIndex->aSortOrder = (u8 *)(&pIndex->aiColumn[nCol]);
   pIndex->zName = (char *)(&pIndex->aSortOrder[nCol]);
   zExtra = (char *)(&pIndex->zName[nName+1]);
   memcpy(pIndex->zName, zName, nName+1);
@@ -80127,7 +82307,7 @@ SQLITE_PRIVATE Index *sqlite3CreateIndex(
       /* A named index with an explicit CREATE INDEX statement */
       zStmt = sqlite3MPrintf(db, "CREATE%s INDEX %.*s",
         onError==OE_None ? "" : " UNIQUE",
-        pEnd->z - pName->z + 1,
+        (int)(pEnd->z - pName->z) + 1,
         pName->z);
     }else{
       /* An automatic index created by a PRIMARY KEY or UNIQUE constraint */
@@ -80213,9 +82393,9 @@ exit_create_index:
 ** are based on typical values found in actual indices.
 */
 SQLITE_PRIVATE void sqlite3DefaultRowEst(Index *pIdx){
-  unsigned *a = pIdx->aiRowEst;
+  tRowcnt *a = pIdx->aiRowEst;
   int i;
-  unsigned n;
+  tRowcnt n;
   assert( a!=0 );
   a[0] = pIdx->pTable->nRowEst;
   if( a[0]<10 ) a[0] = 10;
@@ -80285,15 +82465,9 @@ SQLITE_PRIVATE void sqlite3DropIndex(Parse *pParse, SrcList *pName, int ifExists
     sqlite3BeginWriteOperation(pParse, 1, iDb);
     sqlite3NestedParse(pParse,
        "DELETE FROM %Q.%s WHERE name=%Q AND type='index'",
-       db->aDb[iDb].zName, SCHEMA_TABLE(iDb),
-       pIndex->zName
+       db->aDb[iDb].zName, SCHEMA_TABLE(iDb), pIndex->zName
     );
-    if( sqlite3FindTable(db, "sqlite_stat1", db->aDb[iDb].zName) ){
-      sqlite3NestedParse(pParse,
-        "DELETE FROM %Q.sqlite_stat1 WHERE idx=%Q",
-        db->aDb[iDb].zName, pIndex->zName
-      );
-    }
+    sqlite3ClearStatTables(pParse, iDb, "idx", pIndex->zName);
     sqlite3ChangeCookie(pParse, iDb);
     destroyRootPage(pParse, pIndex->tnum, iDb);
     sqlite3VdbeAddOp4(v, OP_DropIndex, iDb, 0, 0, pIndex->zName, 0);
@@ -80665,8 +82839,9 @@ SQLITE_PRIVATE void sqlite3SrcListIndexedBy(Parse *pParse, SrcList *p, Token *pI
 ** operator with A.  This routine shifts that operator over to B.
 */
 SQLITE_PRIVATE void sqlite3SrcListShiftJoinType(SrcList *p){
-  if( p && p->a ){
+  if( p ){
     int i;
+    assert( p->a || p->nSrc==0 );
     for(i=p->nSrc-1; i>0; i--){
       p->a[i].jointype = p->a[i-1].jointype;
     }
@@ -80704,13 +82879,10 @@ SQLITE_PRIVATE void sqlite3BeginTransaction(Parse *pParse, int type){
 ** Commit a transaction
 */
 SQLITE_PRIVATE void sqlite3CommitTransaction(Parse *pParse){
-  sqlite3 *db;
   Vdbe *v;
 
   assert( pParse!=0 );
-  db = pParse->db;
-  assert( db!=0 );
-/*  if( db->aDb[0].pBt==0 ) return; */
+  assert( pParse->db!=0 );
   if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, "COMMIT", 0, 0) ){
     return;
   }
@@ -80724,13 +82896,10 @@ SQLITE_PRIVATE void sqlite3CommitTransaction(Parse *pParse){
 ** Rollback a transaction
 */
 SQLITE_PRIVATE void sqlite3RollbackTransaction(Parse *pParse){
-  sqlite3 *db;
   Vdbe *v;
 
   assert( pParse!=0 );
-  db = pParse->db;
-  assert( db!=0 );
-/*  if( db->aDb[0].pBt==0 ) return; */
+  assert( pParse->db!=0 );
   if( sqlite3AuthCheck(pParse, SQLITE_TRANSACTION, "ROLLBACK", 0, 0) ){
     return;
   }
@@ -81922,7 +84091,9 @@ SQLITE_PRIVATE void sqlite3DeleteFrom(
     /* Collect rowids of every row to be deleted.
     */
     sqlite3VdbeAddOp2(v, OP_Null, 0, iRowSet);
-    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0,WHERE_DUPLICATES_OK);
+    pWInfo = sqlite3WhereBegin(
+        pParse, pTabList, pWhere, 0, 0, WHERE_DUPLICATES_OK
+    );
     if( pWInfo==0 ) goto delete_from_cleanup;
     regRowid = sqlite3ExprCodeGetColumn(pParse, pTab, -1, iCur, iRowid);
     sqlite3VdbeAddOp2(v, OP_RowSetAdd, iRowSet, regRowid);
@@ -82220,6 +84391,8 @@ SQLITE_PRIVATE int sqlite3GenerateIndexKey(
 ** sqliteRegisterBuildinFunctions() found at the bottom of the file.
 ** All other code has file scope.
 */
+/* #include <stdlib.h> */
+/* #include <assert.h> */
 
 /*
 ** Return the collating function associated with a function.
@@ -82532,16 +84705,15 @@ static void upperFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
   if( z2 ){
     z1 = contextMalloc(context, ((i64)n)+1);
     if( z1 ){
-      memcpy(z1, z2, n+1);
-      for(i=0; z1[i]; i++){
-        z1[i] = (char)sqlite3Toupper(z1[i]);
+      for(i=0; i<n; i++){
+        z1[i] = (char)sqlite3Toupper(z2[i]);
       }
-      sqlite3_result_text(context, z1, -1, sqlite3_free);
+      sqlite3_result_text(context, z1, n, sqlite3_free);
     }
   }
 }
 static void lowerFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
-  u8 *z1;
+  char *z1;
   const char *z2;
   int i, n;
   UNUSED_PARAMETER(argc);
@@ -82552,11 +84724,10 @@ static void lowerFunc(sqlite3_context *context, int argc, sqlite3_value **argv){
   if( z2 ){
     z1 = contextMalloc(context, ((i64)n)+1);
     if( z1 ){
-      memcpy(z1, z2, n+1);
-      for(i=0; z1[i]; i++){
-        z1[i] = sqlite3Tolower(z1[i]);
+      for(i=0; i<n; i++){
+        z1[i] = sqlite3Tolower(z2[i]);
       }
-      sqlite3_result_text(context, (char *)z1, -1, sqlite3_free);
+      sqlite3_result_text(context, z1, n, sqlite3_free);
     }
   }
 }
@@ -84369,7 +86540,7 @@ static void fkScanChildren(
   ** clause. If the constraint is not deferred, throw an exception for
   ** each row found. Otherwise, for deferred constraints, increment the
   ** deferred constraint counter by nIncr for each row selected.  */
-  pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0);
+  pWInfo = sqlite3WhereBegin(pParse, pSrc, pWhere, 0, 0, 0);
   if( nIncr>0 && pFKey->isDeferred==0 ){
     sqlite3ParseToplevel(pParse)->mayAbort = 1;
   }
@@ -84543,7 +86714,24 @@ SQLITE_PRIVATE void sqlite3FkCheck(
       pTo = sqlite3LocateTable(pParse, 0, pFKey->zTo, zDb);
     }
     if( !pTo || locateFkeyIndex(pParse, pTo, pFKey, &pIdx, &aiFree) ){
+      assert( isIgnoreErrors==0 || (regOld!=0 && regNew==0) );
       if( !isIgnoreErrors || db->mallocFailed ) return;
+      if( pTo==0 ){
+        /* If isIgnoreErrors is true, then a table is being dropped. In this
+        ** case SQLite runs a "DELETE FROM xxx" on the table being dropped
+        ** before actually dropping it in order to check FK constraints.
+        ** If the parent table of an FK constraint on the current table is
+        ** missing, behave as if it is empty. i.e. decrement the relevant
+        ** FK counter for each row of the current table with non-NULL keys.
+        */
+        Vdbe *v = sqlite3GetVdbe(pParse);
+        int iJump = sqlite3VdbeCurrentAddr(v) + pFKey->nCol + 1;
+        for(i=0; i<pFKey->nCol; i++){
+          int iReg = pFKey->aCol[i].iFrom + regOld + 1;
+          sqlite3VdbeAddOp2(v, OP_IsNull, iReg, iJump);
+        }
+        sqlite3VdbeAddOp2(v, OP_FkCounter, pFKey->isDeferred, -1);
+      }
       continue;
     }
     assert( pFKey->nCol==1 || (aiFree && pIdx) );
@@ -84916,6 +87104,7 @@ static Trigger *fkActionTrigger(
       fkTriggerDelete(db, pTrigger);
       return 0;
     }
+    assert( pStep!=0 );
 
     switch( action ){
       case OE_Restrict:
@@ -86759,6 +88948,9 @@ static int xferOptimization(
     return 0;
   }
 #endif
+  if( (pParse->db->flags & SQLITE_CountRows)!=0 ){
+    return 0;
+  }
 
   /* If we get this far, it means either:
   **
@@ -87073,8 +89265,10 @@ struct sqlite3_api_routines {
   int  (*busy_timeout)(sqlite3*,int ms);
   int  (*changes)(sqlite3*);
   int  (*close)(sqlite3*);
-  int  (*collation_needed)(sqlite3*,void*,void(*)(void*,sqlite3*,int eTextRep,const char*));
-  int  (*collation_needed16)(sqlite3*,void*,void(*)(void*,sqlite3*,int eTextRep,const void*));
+  int  (*collation_needed)(sqlite3*,void*,void(*)(void*,sqlite3*,
+                           int eTextRep,const char*));
+  int  (*collation_needed16)(sqlite3*,void*,void(*)(void*,sqlite3*,
+                             int eTextRep,const void*));
   const void * (*column_blob)(sqlite3_stmt*,int iCol);
   int  (*column_bytes)(sqlite3_stmt*,int iCol);
   int  (*column_bytes16)(sqlite3_stmt*,int iCol);
@@ -87099,10 +89293,18 @@ struct sqlite3_api_routines {
   void * (*commit_hook)(sqlite3*,int(*)(void*),void*);
   int  (*complete)(const char*sql);
   int  (*complete16)(const void*sql);
-  int  (*create_collation)(sqlite3*,const char*,int,void*,int(*)(void*,int,const void*,int,const void*));
-  int  (*create_collation16)(sqlite3*,const void*,int,void*,int(*)(void*,int,const void*,int,const void*));
-  int  (*create_function)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
-  int  (*create_function16)(sqlite3*,const void*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*));
+  int  (*create_collation)(sqlite3*,const char*,int,void*,
+                           int(*)(void*,int,const void*,int,const void*));
+  int  (*create_collation16)(sqlite3*,const void*,int,void*,
+                             int(*)(void*,int,const void*,int,const void*));
+  int  (*create_function)(sqlite3*,const char*,int,int,void*,
+                          void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+                          void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+                          void (*xFinal)(sqlite3_context*));
+  int  (*create_function16)(sqlite3*,const void*,int,int,void*,
+                            void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+                            void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+                            void (*xFinal)(sqlite3_context*));
   int (*create_module)(sqlite3*,const char*,const sqlite3_module*,void*);
   int  (*data_count)(sqlite3_stmt*pStmt);
   sqlite3 * (*db_handle)(sqlite3_stmt*);
@@ -87147,16 +89349,19 @@ struct sqlite3_api_routines {
   void  (*result_text16le)(sqlite3_context*,const void*,int,void(*)(void*));
   void  (*result_value)(sqlite3_context*,sqlite3_value*);
   void * (*rollback_hook)(sqlite3*,void(*)(void*),void*);
-  int  (*set_authorizer)(sqlite3*,int(*)(void*,int,const char*,const char*,const char*,const char*),void*);
+  int  (*set_authorizer)(sqlite3*,int(*)(void*,int,const char*,const char*,
+                         const char*,const char*),void*);
   void  (*set_auxdata)(sqlite3_context*,int,void*,void (*)(void*));
   char * (*snprintf)(int,char*,const char*,...);
   int  (*step)(sqlite3_stmt*);
-  int  (*table_column_metadata)(sqlite3*,const char*,const char*,const char*,char const**,char const**,int*,int*,int*);
+  int  (*table_column_metadata)(sqlite3*,const char*,const char*,const char*,
+                                char const**,char const**,int*,int*,int*);
   void  (*thread_cleanup)(void);
   int  (*total_changes)(sqlite3*);
   void * (*trace)(sqlite3*,void(*xTrace)(void*,const char*),void*);
   int  (*transfer_bindings)(sqlite3_stmt*,sqlite3_stmt*);
-  void * (*update_hook)(sqlite3*,void(*)(void*,int ,char const*,char const*,sqlite_int64),void*);
+  void * (*update_hook)(sqlite3*,void(*)(void*,int ,char const*,char const*,
+                                         sqlite_int64),void*);
   void * (*user_data)(sqlite3_context*);
   const void * (*value_blob)(sqlite3_value*);
   int  (*value_bytes)(sqlite3_value*);
@@ -87178,15 +89383,19 @@ struct sqlite3_api_routines {
   int (*prepare16_v2)(sqlite3*,const void*,int,sqlite3_stmt**,const void**);
   int (*clear_bindings)(sqlite3_stmt*);
   /* Added by 3.4.1 */
-  int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*,void (*xDestroy)(void *));
+  int (*create_module_v2)(sqlite3*,const char*,const sqlite3_module*,void*,
+                          void (*xDestroy)(void *));
   /* Added by 3.5.0 */
   int (*bind_zeroblob)(sqlite3_stmt*,int,int);
   int (*blob_bytes)(sqlite3_blob*);
   int (*blob_close)(sqlite3_blob*);
-  int (*blob_open)(sqlite3*,const char*,const char*,const char*,sqlite3_int64,int,sqlite3_blob**);
+  int (*blob_open)(sqlite3*,const char*,const char*,const char*,sqlite3_int64,
+                   int,sqlite3_blob**);
   int (*blob_read)(sqlite3_blob*,void*,int,int);
   int (*blob_write)(sqlite3_blob*,const void*,int,int);
-  int (*create_collation_v2)(sqlite3*,const char*,int,void*,int(*)(void*,int,const void*,int,const void*),void(*)(void*));
+  int (*create_collation_v2)(sqlite3*,const char*,int,void*,
+                             int(*)(void*,int,const void*,int,const void*),
+                             void(*)(void*));
   int (*file_control)(sqlite3*,const char*,int,void*);
   sqlite3_int64 (*memory_highwater)(int);
   sqlite3_int64 (*memory_used)(void);
@@ -87222,7 +89431,11 @@ struct sqlite3_api_routines {
   int (*backup_step)(sqlite3_backup*,int);
   const char *(*compileoption_get)(int);
   int (*compileoption_used)(const char*);
-  int (*create_function_v2)(sqlite3*,const char*,int,int,void*,void (*xFunc)(sqlite3_context*,int,sqlite3_value**),void (*xStep)(sqlite3_context*,int,sqlite3_value**),void (*xFinal)(sqlite3_context*),void(*xDestroy)(void*));
+  int (*create_function_v2)(sqlite3*,const char*,int,int,void*,
+                            void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+                            void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+                            void (*xFinal)(sqlite3_context*),
+                            void(*xDestroy)(void*));
   int (*db_config)(sqlite3*,int,...);
   sqlite3_mutex *(*db_mutex)(sqlite3*);
   int (*db_status)(sqlite3*,int,int*,int*,int);
@@ -87236,6 +89449,9 @@ struct sqlite3_api_routines {
   int (*wal_autocheckpoint)(sqlite3*,int);
   int (*wal_checkpoint)(sqlite3*,const char*);
   void *(*wal_hook)(sqlite3*,int(*)(void*,sqlite3*,const char*,int),void*);
+  int (*blob_reopen)(sqlite3_blob*,sqlite3_int64);
+  int (*vtab_config)(sqlite3*,int op,...);
+  int (*vtab_on_conflict)(sqlite3*);
 };
 
 /*
@@ -87436,6 +89652,9 @@ struct sqlite3_api_routines {
 #define sqlite3_wal_autocheckpoint     sqlite3_api->wal_autocheckpoint
 #define sqlite3_wal_checkpoint         sqlite3_api->wal_checkpoint
 #define sqlite3_wal_hook               sqlite3_api->wal_hook
+#define sqlite3_blob_reopen            sqlite3_api->blob_reopen
+#define sqlite3_vtab_config            sqlite3_api->vtab_config
+#define sqlite3_vtab_on_conflict       sqlite3_api->vtab_on_conflict
 #endif /* SQLITE_CORE */
 
 #define SQLITE_EXTENSION_INIT1     const sqlite3_api_routines *sqlite3_api = 0;
@@ -87445,6 +89664,7 @@ struct sqlite3_api_routines {
 
 /************** End of sqlite3ext.h ******************************************/
 /************** Continuing where we left off in loadext.c ********************/
+/* #include <string.h> */
 
 #ifndef SQLITE_OMIT_LOAD_EXTENSION
 
@@ -87510,6 +89730,8 @@ struct sqlite3_api_routines {
 # define sqlite3_create_module 0
 # define sqlite3_create_module_v2 0
 # define sqlite3_declare_vtab 0
+# define sqlite3_vtab_config 0
+# define sqlite3_vtab_on_conflict 0
 #endif
 
 #ifdef SQLITE_OMIT_SHARED_CACHE
@@ -87533,6 +89755,7 @@ struct sqlite3_api_routines {
 #define sqlite3_blob_open      0
 #define sqlite3_blob_read      0
 #define sqlite3_blob_write     0
+#define sqlite3_blob_reopen    0
 #endif
 
 /*
@@ -87798,6 +90021,9 @@ static const sqlite3_api_routines sqlite3Apis = {
   0,
   0,
 #endif
+  sqlite3_blob_reopen,
+  sqlite3_vtab_config,
+  sqlite3_vtab_on_conflict,
 };
 
 /*
@@ -87823,7 +90049,7 @@ static int sqlite3LoadExtension(
   int (*xInit)(sqlite3*,char**,const sqlite3_api_routines*);
   char *zErrmsg = 0;
   void **aHandle;
-  const int nMsg = 300;
+  int nMsg = 300 + sqlite3Strlen30(zFile);
 
   if( pzErrMsg ) *pzErrMsg = 0;
 
@@ -87860,6 +90086,7 @@ static int sqlite3LoadExtension(
                    sqlite3OsDlSym(pVfs, handle, zProc);
   if( xInit==0 ){
     if( pzErrMsg ){
+      nMsg += sqlite3Strlen30(zProc);
       *pzErrMsg = zErrmsg = sqlite3_malloc(nMsg);
       if( zErrmsg ){
         sqlite3_snprintf(nMsg, zErrmsg,
@@ -88545,7 +90772,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
     if( sqlite3ReadSchema(pParse) ) goto pragma_out;
     sqlite3CodeVerifySchema(pParse, iDb);
     iReg = ++pParse->nMem;
-    if( zLeft[0]=='p' ){
+    if( sqlite3Tolower(zLeft[0])=='p' ){
       sqlite3VdbeAddOp2(v, OP_Pagecount, iDb, iReg);
     }else{
       sqlite3VdbeAddOp3(v, OP_MaxPgcnt, iDb, iReg, sqlite3Atoi(zRight));
@@ -88611,8 +90838,10 @@ SQLITE_PRIVATE void sqlite3Pragma(
     int eMode;        /* One of the PAGER_JOURNALMODE_XXX symbols */
     int ii;           /* Loop counter */
 
-    /* Force the schema to be loaded on all databases.  This cases all
-    ** database files to be opened and the journal_modes set. */
+    /* Force the schema to be loaded on all databases.  This causes all
+    ** database files to be opened and the journal_modes set.  This is
+    ** necessary because subsequent processing must know if the databases
+    ** are in WAL mode. */
     if( sqlite3ReadSchema(pParse) ){
       goto pragma_out;
     }
@@ -89210,7 +91439,7 @@ SQLITE_PRIVATE void sqlite3Pragma(
       { OP_ResultRow,   3, 1,        0},
     };
 
-    int isQuick = (zLeft[0]=='q');
+    int isQuick = (sqlite3Tolower(zLeft[0])=='q');
 
     /* Initialize the VDBE program */
     if( sqlite3ReadSchema(pParse) ) goto pragma_out;
@@ -90585,6 +92814,7 @@ SQLITE_PRIVATE Select *sqlite3SelectNew(
   pNew = sqlite3DbMallocZero(db, sizeof(*pNew) );
   assert( db->mallocFailed || !pOffset || pLimit ); /* OFFSET implies LIMIT */
   if( pNew==0 ){
+    assert( db->mallocFailed );
     pNew = &standin;
     memset(pNew, 0, sizeof(*pNew));
   }
@@ -90609,7 +92839,10 @@ SQLITE_PRIVATE Select *sqlite3SelectNew(
     clearSelect(db, pNew);
     if( pNew!=&standin ) sqlite3DbFree(db, pNew);
     pNew = 0;
+  }else{
+    assert( pNew->pSrc!=0 || pParse->nErr>0 );
   }
+  assert( pNew!=&standin );
   return pNew;
 }
 
@@ -90939,12 +93172,18 @@ static void pushOntoSorter(
   int nExpr = pOrderBy->nExpr;
   int regBase = sqlite3GetTempRange(pParse, nExpr+2);
   int regRecord = sqlite3GetTempReg(pParse);
+  int op;
   sqlite3ExprCacheClear(pParse);
   sqlite3ExprCodeExprList(pParse, pOrderBy, regBase, 0);
   sqlite3VdbeAddOp2(v, OP_Sequence, pOrderBy->iECursor, regBase+nExpr);
   sqlite3ExprCodeMove(pParse, regData, regBase+nExpr+1, 1);
   sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nExpr + 2, regRecord);
-  sqlite3VdbeAddOp2(v, OP_IdxInsert, pOrderBy->iECursor, regRecord);
+  if( pSelect->selFlags & SF_UseSorter ){
+    op = OP_SorterInsert;
+  }else{
+    op = OP_IdxInsert;
+  }
+  sqlite3VdbeAddOp2(v, op, pOrderBy->iECursor, regRecord);
   sqlite3ReleaseTempReg(pParse, regRecord);
   sqlite3ReleaseTempRange(pParse, regBase, nExpr+2);
   if( pSelect->iLimit ){
@@ -91413,9 +93652,20 @@ static void generateSortTail(
   }else{
     regRowid = sqlite3GetTempReg(pParse);
   }
-  addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak);
-  codeOffset(v, p, addrContinue);
-  sqlite3VdbeAddOp3(v, OP_Column, iTab, pOrderBy->nExpr + 1, regRow);
+  if( p->selFlags & SF_UseSorter ){
+    int regSortOut = ++pParse->nMem;
+    int ptab2 = pParse->nTab++;
+    sqlite3VdbeAddOp3(v, OP_OpenPseudo, ptab2, regSortOut, pOrderBy->nExpr+2);
+    addr = 1 + sqlite3VdbeAddOp2(v, OP_SorterSort, iTab, addrBreak);
+    codeOffset(v, p, addrContinue);
+    sqlite3VdbeAddOp2(v, OP_SorterData, iTab, regSortOut);
+    sqlite3VdbeAddOp3(v, OP_Column, ptab2, pOrderBy->nExpr+1, regRow);
+    sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE);
+  }else{
+    addr = 1 + sqlite3VdbeAddOp2(v, OP_Sort, iTab, addrBreak);
+    codeOffset(v, p, addrContinue);
+    sqlite3VdbeAddOp3(v, OP_Column, iTab, pOrderBy->nExpr+1, regRow);
+  }
   switch( eDest ){
     case SRT_Table:
     case SRT_EphemTab: {
@@ -91468,7 +93718,11 @@ static void generateSortTail(
   /* The bottom of the loop
   */
   sqlite3VdbeResolveLabel(v, addrContinue);
-  sqlite3VdbeAddOp2(v, OP_Next, iTab, addr);
+  if( p->selFlags & SF_UseSorter ){
+    sqlite3VdbeAddOp2(v, OP_SorterNext, iTab, addr);
+  }else{
+    sqlite3VdbeAddOp2(v, OP_Next, iTab, addr);
+  }
   sqlite3VdbeResolveLabel(v, addrBreak);
   if( eDest==SRT_Output || eDest==SRT_Coroutine ){
     sqlite3VdbeAddOp2(v, OP_Close, pseudoTab, 0);
@@ -91767,7 +94021,10 @@ static int selectColumnsFromExprList(
     }else{
       Expr *pColExpr = p;  /* The expression that is the result column name */
       Table *pTab;         /* Table associated with this expression */
-      while( pColExpr->op==TK_DOT ) pColExpr = pColExpr->pRight;
+      while( pColExpr->op==TK_DOT ){
+        pColExpr = pColExpr->pRight;
+        assert( pColExpr!=0 );
+      }
       if( pColExpr->op==TK_COLUMN && ALWAYS(pColExpr->pTab!=0) ){
         /* For columns use the column name name */
         int iCol = pColExpr->iColumn;
@@ -94241,6 +96498,7 @@ SQLITE_PRIVATE int sqlite3Select(
   int distinct;          /* Table to use for the distinct set */
   int rc = 1;            /* Value to return from this function */
   int addrSortIndex;     /* Address of an OP_OpenEphemeral instruction */
+  int addrDistinctIndex; /* Address of an OP_OpenEphemeral instruction */
   AggInfo sAggInfo;      /* Information used by aggregate queries */
   int iEnd;              /* Address of the end of the query */
   sqlite3 *db;           /* The database connection */
@@ -94299,7 +96557,11 @@ SQLITE_PRIVATE int sqlite3Select(
     Select *pSub = pItem->pSelect;
     int isAggSub;
 
-    if( pSub==0 || pItem->isPopulated ) continue;
+    if( pSub==0 ) continue;
+    if( pItem->addrFillSub ){
+      sqlite3VdbeAddOp2(v, OP_Gosub, pItem->regReturn, pItem->addrFillSub);
+      continue;
+    }
 
     /* Increment Parse.nHeight by the height of the largest expression
     ** tree refered to by this, the parent select. The child select
@@ -94310,21 +96572,44 @@ SQLITE_PRIVATE int sqlite3Select(
     */
     pParse->nHeight += sqlite3SelectExprHeight(p);
 
-    /* Check to see if the subquery can be absorbed into the parent. */
     isAggSub = (pSub->selFlags & SF_Aggregate)!=0;
     if( flattenSubquery(pParse, p, i, isAgg, isAggSub) ){
+      /* This subquery can be absorbed into its parent. */
       if( isAggSub ){
         isAgg = 1;
         p->selFlags |= SF_Aggregate;
       }
       i = -1;
     }else{
+      /* Generate a subroutine that will fill an ephemeral table with
+      ** the content of this subquery.  pItem->addrFillSub will point
+      ** to the address of the generated subroutine.  pItem->regReturn
+      ** is a register allocated to hold the subroutine return address
+      */
+      int topAddr;
+      int onceAddr = 0;
+      int retAddr;
+      assert( pItem->addrFillSub==0 );
+      pItem->regReturn = ++pParse->nMem;
+      topAddr = sqlite3VdbeAddOp2(v, OP_Integer, 0, pItem->regReturn);
+      pItem->addrFillSub = topAddr+1;
+      VdbeNoopComment((v, "materialize %s", pItem->pTab->zName));
+      if( pItem->isCorrelated==0 && pParse->pTriggerTab==0 ){
+        /* If the subquery is no correlated and if we are not inside of
+        ** a trigger, then we only need to compute the value of the subquery
+        ** once. */
+        int regOnce = ++pParse->nMem;
+        onceAddr = sqlite3VdbeAddOp1(v, OP_Once, regOnce);
+      }
       sqlite3SelectDestInit(&dest, SRT_EphemTab, pItem->iCursor);
-      assert( pItem->isPopulated==0 );
       explainSetInteger(pItem->iSelectId, (u8)pParse->iNextSelectId);
       sqlite3Select(pParse, pSub, &dest);
-      pItem->isPopulated = 1;
       pItem->pTab->nRowEst = (unsigned)pSub->nSelectRow;
+      if( onceAddr ) sqlite3VdbeJumpHere(v, onceAddr);
+      retAddr = sqlite3VdbeAddOp1(v, OP_Return, pItem->regReturn);
+      VdbeComment((v, "end %s", pItem->pTab->zName));
+      sqlite3VdbeChangeP1(v, topAddr, retAddr);
+
     }
     if( /*pParse->nErr ||*/ db->mallocFailed ){
       goto select_end;
@@ -94367,16 +96652,6 @@ SQLITE_PRIVATE int sqlite3Select(
   }
 #endif
 
-  /* If possible, rewrite the query to use GROUP BY instead of DISTINCT.
-  ** GROUP BY might use an index, DISTINCT never does.
-  */
-  assert( p->pGroupBy==0 || (p->selFlags & SF_Aggregate)!=0 );
-  if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct ){
-    p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
-    pGroupBy = p->pGroupBy;
-    p->selFlags &= ~SF_Distinct;
-  }
-
   /* If there is both a GROUP BY and an ORDER BY clause and they are
   ** identical, then disable the ORDER BY clause since the GROUP BY
   ** will cause elements to come out in the correct order.  This is
@@ -94389,6 +96664,30 @@ SQLITE_PRIVATE int sqlite3Select(
     pOrderBy = 0;
   }
 
+  /* If the query is DISTINCT with an ORDER BY but is not an aggregate, and 
+  ** if the select-list is the same as the ORDER BY list, then this query
+  ** can be rewritten as a GROUP BY. In other words, this:
+  **
+  **     SELECT DISTINCT xyz FROM ... ORDER BY xyz
+  **
+  ** is transformed to:
+  **
+  **     SELECT xyz FROM ... GROUP BY xyz
+  **
+  ** The second form is preferred as a single index (or temp-table) may be 
+  ** used for both the ORDER BY and DISTINCT processing. As originally 
+  ** written the query must use a temp-table for at least one of the ORDER 
+  ** BY and DISTINCT, and an index or separate temp-table for the other.
+  */
+  if( (p->selFlags & (SF_Distinct|SF_Aggregate))==SF_Distinct 
+   && sqlite3ExprListCompare(pOrderBy, p->pEList)==0
+  ){
+    p->selFlags &= ~SF_Distinct;
+    p->pGroupBy = sqlite3ExprListDup(db, p->pEList, 0);
+    pGroupBy = p->pGroupBy;
+    pOrderBy = 0;
+  }
+
   /* If there is an ORDER BY clause, then this sorting
   ** index might end up being unused if the data can be 
   ** extracted in pre-sorted order.  If that is the case, then the
@@ -94419,27 +96718,30 @@ SQLITE_PRIVATE int sqlite3Select(
   iEnd = sqlite3VdbeMakeLabel(v);
   p->nSelectRow = (double)LARGEST_INT64;
   computeLimitRegisters(pParse, p, iEnd);
+  if( p->iLimit==0 && addrSortIndex>=0 ){
+    sqlite3VdbeGetOp(v, addrSortIndex)->opcode = OP_SorterOpen;
+    p->selFlags |= SF_UseSorter;
+  }
 
   /* Open a virtual index to use for the distinct set.
   */
   if( p->selFlags & SF_Distinct ){
     KeyInfo *pKeyInfo;
-    assert( isAgg || pGroupBy );
     distinct = pParse->nTab++;
     pKeyInfo = keyInfoFromExprList(pParse, p->pEList);
-    sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0,
-                        (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
+    addrDistinctIndex = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, distinct, 0, 0,
+        (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
     sqlite3VdbeChangeP5(v, BTREE_UNORDERED);
   }else{
-    distinct = -1;
+    distinct = addrDistinctIndex = -1;
   }
 
   /* Aggregate and non-aggregate queries are handled differently */
   if( !isAgg && pGroupBy==0 ){
-    /* This case is for non-aggregate queries
-    ** Begin the database scan
-    */
-    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, 0);
+    ExprList *pDist = (isDistinct ? p->pEList : 0);
+
+    /* Begin the database scan. */
+    pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pOrderBy, pDist, 0);
     if( pWInfo==0 ) goto select_end;
     if( pWInfo->nRowOut < p->nSelectRow ) p->nSelectRow = pWInfo->nRowOut;
 
@@ -94448,14 +96750,56 @@ SQLITE_PRIVATE int sqlite3Select(
     ** into an OP_Noop.
     */
     if( addrSortIndex>=0 && pOrderBy==0 ){
-      sqlite3VdbeChangeToNoop(v, addrSortIndex, 1);
+      sqlite3VdbeChangeToNoop(v, addrSortIndex);
       p->addrOpenEphm[2] = -1;
     }
 
-    /* Use the standard inner loop
-    */
-    assert(!isDistinct);
-    selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, -1, pDest,
+    if( pWInfo->eDistinct ){
+      VdbeOp *pOp;                /* No longer required OpenEphemeral instr. */
+     
+      assert( addrDistinctIndex>=0 );
+      pOp = sqlite3VdbeGetOp(v, addrDistinctIndex);
+
+      assert( isDistinct );
+      assert( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED 
+           || pWInfo->eDistinct==WHERE_DISTINCT_UNIQUE 
+      );
+      distinct = -1;
+      if( pWInfo->eDistinct==WHERE_DISTINCT_ORDERED ){
+        int iJump;
+        int iExpr;
+        int iFlag = ++pParse->nMem;
+        int iBase = pParse->nMem+1;
+        int iBase2 = iBase + pEList->nExpr;
+        pParse->nMem += (pEList->nExpr*2);
+
+        /* Change the OP_OpenEphemeral coded earlier to an OP_Integer. The
+        ** OP_Integer initializes the "first row" flag.  */
+        pOp->opcode = OP_Integer;
+        pOp->p1 = 1;
+        pOp->p2 = iFlag;
+
+        sqlite3ExprCodeExprList(pParse, pEList, iBase, 1);
+        iJump = sqlite3VdbeCurrentAddr(v) + 1 + pEList->nExpr + 1 + 1;
+        sqlite3VdbeAddOp2(v, OP_If, iFlag, iJump-1);
+        for(iExpr=0; iExpr<pEList->nExpr; iExpr++){
+          CollSeq *pColl = sqlite3ExprCollSeq(pParse, pEList->a[iExpr].pExpr);
+          sqlite3VdbeAddOp3(v, OP_Ne, iBase+iExpr, iJump, iBase2+iExpr);
+          sqlite3VdbeChangeP4(v, -1, (const char *)pColl, P4_COLLSEQ);
+          sqlite3VdbeChangeP5(v, SQLITE_NULLEQ);
+        }
+        sqlite3VdbeAddOp2(v, OP_Goto, 0, pWInfo->iContinue);
+
+        sqlite3VdbeAddOp2(v, OP_Integer, 0, iFlag);
+        assert( sqlite3VdbeCurrentAddr(v)==iJump );
+        sqlite3VdbeAddOp3(v, OP_Move, iBase, iBase2, pEList->nExpr);
+      }else{
+        pOp->opcode = OP_Noop;
+      }
+    }
+
+    /* Use the standard inner loop. */
+    selectInnerLoop(pParse, p, pEList, 0, 0, pOrderBy, distinct, pDest,
                     pWInfo->iContinue, pWInfo->iBreak);
 
     /* End the database scan loop.
@@ -94472,6 +96816,8 @@ SQLITE_PRIVATE int sqlite3Select(
     int iAbortFlag;     /* Mem address which causes query abort if positive */
     int groupBySort;    /* Rows come from source in GROUP BY order */
     int addrEnd;        /* End of processing for this SELECT */
+    int sortPTab = 0;   /* Pseudotable used to decode sorting results */
+    int sortOut = 0;    /* Output register from the sorter */
 
     /* Remove any and all aliases between the result set and the
     ** GROUP BY clause.
@@ -94533,12 +96879,12 @@ SQLITE_PRIVATE int sqlite3Select(
 
       /* If there is a GROUP BY clause we might need a sorting index to
       ** implement it.  Allocate that sorting index now.  If it turns out
-      ** that we do not need it after all, the OpenEphemeral instruction
+      ** that we do not need it after all, the OP_SorterOpen instruction
       ** will be converted into a Noop.  
       */
       sAggInfo.sortingIdx = pParse->nTab++;
       pKeyInfo = keyInfoFromExprList(pParse, pGroupBy);
-      addrSortingIdx = sqlite3VdbeAddOp4(v, OP_OpenEphemeral, 
+      addrSortingIdx = sqlite3VdbeAddOp4(v, OP_SorterOpen, 
           sAggInfo.sortingIdx, sAggInfo.nSortingColumn, 
           0, (char*)pKeyInfo, P4_KEYINFO_HANDOFF);
 
@@ -94565,7 +96911,7 @@ SQLITE_PRIVATE int sqlite3Select(
       ** in the right order to begin with.
       */
       sqlite3VdbeAddOp2(v, OP_Gosub, regReset, addrReset);
-      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0);
+      pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pGroupBy, 0, 0);
       if( pWInfo==0 ) goto select_end;
       if( pGroupBy==0 ){
         /* The optimizer is able to deliver rows in group by order so
@@ -94619,11 +96965,14 @@ SQLITE_PRIVATE int sqlite3Select(
         }
         regRecord = sqlite3GetTempReg(pParse);
         sqlite3VdbeAddOp3(v, OP_MakeRecord, regBase, nCol, regRecord);
-        sqlite3VdbeAddOp2(v, OP_IdxInsert, sAggInfo.sortingIdx, regRecord);
+        sqlite3VdbeAddOp2(v, OP_SorterInsert, sAggInfo.sortingIdx, regRecord);
         sqlite3ReleaseTempReg(pParse, regRecord);
         sqlite3ReleaseTempRange(pParse, regBase, nCol);
         sqlite3WhereEnd(pWInfo);
-        sqlite3VdbeAddOp2(v, OP_Sort, sAggInfo.sortingIdx, addrEnd);
+        sAggInfo.sortingIdxPTab = sortPTab = pParse->nTab++;
+        sortOut = sqlite3GetTempReg(pParse);
+        sqlite3VdbeAddOp3(v, OP_OpenPseudo, sortPTab, sortOut, nCol);
+        sqlite3VdbeAddOp2(v, OP_SorterSort, sAggInfo.sortingIdx, addrEnd);
         VdbeComment((v, "GROUP BY sort"));
         sAggInfo.useSortingIdx = 1;
         sqlite3ExprCacheClear(pParse);
@@ -94636,9 +96985,13 @@ SQLITE_PRIVATE int sqlite3Select(
       */
       addrTopOfLoop = sqlite3VdbeCurrentAddr(v);
       sqlite3ExprCacheClear(pParse);
+      if( groupBySort ){
+        sqlite3VdbeAddOp2(v, OP_SorterData, sAggInfo.sortingIdx, sortOut);
+      }
       for(j=0; j<pGroupBy->nExpr; j++){
         if( groupBySort ){
-          sqlite3VdbeAddOp3(v, OP_Column, sAggInfo.sortingIdx, j, iBMem+j);
+          sqlite3VdbeAddOp3(v, OP_Column, sortPTab, j, iBMem+j);
+          if( j==0 ) sqlite3VdbeChangeP5(v, OPFLAG_CLEARCACHE);
         }else{
           sAggInfo.directMode = 1;
           sqlite3ExprCode(pParse, pGroupBy->a[j].pExpr, iBMem+j);
@@ -94677,10 +97030,10 @@ SQLITE_PRIVATE int sqlite3Select(
       /* End of the loop
       */
       if( groupBySort ){
-        sqlite3VdbeAddOp2(v, OP_Next, sAggInfo.sortingIdx, addrTopOfLoop);
+        sqlite3VdbeAddOp2(v, OP_SorterNext, sAggInfo.sortingIdx, addrTopOfLoop);
       }else{
         sqlite3WhereEnd(pWInfo);
-        sqlite3VdbeChangeToNoop(v, addrSortingIdx, 1);
+        sqlite3VdbeChangeToNoop(v, addrSortingIdx);
       }
 
       /* Output the final row of result
@@ -94827,7 +97180,7 @@ SQLITE_PRIVATE int sqlite3Select(
         ** of output.
         */
         resetAccumulator(pParse, &sAggInfo);
-        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, flag);
+        pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere, &pMinMax, 0, flag);
         if( pWInfo==0 ){
           sqlite3ExprListDelete(db, pDel);
           goto select_end;
@@ -95006,6 +97359,8 @@ SQLITE_PRIVATE void sqlite3PrintSelect(Select *p, int indent){
 ** These routines are in a separate files so that they will not be linked
 ** if they are not used.
 */
+/* #include <stdlib.h> */
+/* #include <string.h> */
 
 #ifndef SQLITE_OMIT_GET_TABLE
 
@@ -95303,15 +97658,28 @@ SQLITE_PRIVATE void sqlite3BeginTrigger(
       goto trigger_cleanup;
     }
   }
+  if( !pTableName || db->mallocFailed ){
+    goto trigger_cleanup;
+  }
+
+  /* A long-standing parser bug is that this syntax was allowed:
+  **
+  **    CREATE TRIGGER attached.demo AFTER INSERT ON attached.tab ....
+  **                                                 ^^^^^^^^
+  **
+  ** To maintain backwards compatibility, ignore the database
+  ** name on pTableName if we are reparsing our of SQLITE_MASTER.
+  */
+  if( db->init.busy && iDb!=1 ){
+    sqlite3DbFree(db, pTableName->a[0].zDatabase);
+    pTableName->a[0].zDatabase = 0;
+  }
 
   /* If the trigger name was unqualified, and the table is a temp table,
   ** then set iDb to 1 to create the trigger in the temporary database.
   ** If sqlite3SrcListLookup() returns 0, indicating the table does not
   ** exist, the error is caught by the block below.
   */
-  if( !pTableName || db->mallocFailed ){
-    goto trigger_cleanup;
-  }
   pTab = sqlite3SrcListLookup(pParse, pTableName);
   if( db->init.busy==0 && pName2->n==0 && pTab
         && pTab->pSchema==db->aDb[1].pSchema ){
@@ -96609,7 +98977,9 @@ SQLITE_PRIVATE void sqlite3Update(
   /* Begin the database scan
   */
   sqlite3VdbeAddOp2(v, OP_Null, 0, regOldRowid);
-  pWInfo = sqlite3WhereBegin(pParse, pTabList, pWhere,0, WHERE_ONEPASS_DESIRED);
+  pWInfo = sqlite3WhereBegin(
+      pParse, pTabList, pWhere, 0, 0, WHERE_ONEPASS_DESIRED
+  );
   if( pWInfo==0 ) goto update_cleanup;
   okOnePass = pWInfo->okOnePass;
 
@@ -96652,6 +99022,7 @@ SQLITE_PRIVATE void sqlite3Update(
       }
     }
     for(i=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, i++){
+      assert( aRegIdx );
       if( openAll || aRegIdx[i]>0 ){
         KeyInfo *pKey = sqlite3IndexKeyinfo(pParse, pIdx);
         sqlite3VdbeAddOp4(v, OP_OpenWrite, iCur+i+1, pIdx->tnum, iDb,
@@ -96825,6 +99196,7 @@ SQLITE_PRIVATE void sqlite3Update(
 
   /* Close all tables */
   for(i=0, pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext, i++){
+    assert( aRegIdx );
     if( openAll || aRegIdx[i]>0 ){
       sqlite3VdbeAddOp2(v, OP_Close, iCur+i+1, 0);
     }
@@ -97012,7 +99384,7 @@ static int execSql(sqlite3 *db, char **pzErrMsg, const char *zSql){
     return sqlite3_errcode(db);
   }
   VVA_ONLY( rc = ) sqlite3_step(pStmt);
-  assert( rc!=SQLITE_ROW );
+  assert( rc!=SQLITE_ROW || (db->flags&SQLITE_CountRows) );
   return vacuumFinalize(db, pStmt, pzErrMsg);
 }
 
@@ -97230,13 +99602,11 @@ SQLITE_PRIVATE int sqlite3RunVacuum(char **pzErrMsg, sqlite3 *db){
   );
   if( rc ) goto end_of_vacuum;
 
-  /* At this point, unless the main db was completely empty, there is now a
-  ** transaction open on the vacuum database, but not on the main database.
-  ** Open a btree level transaction on the main database. This allows a
-  ** call to sqlite3BtreeCopyFile(). The main database btree level
-  ** transaction is then committed, so the SQL level never knows it was
-  ** opened for writing. This way, the SQL transaction used to create the
-  ** temporary database never needs to be committed.
+  /* At this point, there is a write transaction open on both the 
+  ** vacuum database and the main database. Assuming no error occurs,
+  ** both transactions are closed by this block - the main database
+  ** transaction by sqlite3BtreeCopyFile() and the other by an explicit
+  ** call to sqlite3BtreeCommit().
   */
   {
     u32 meta;
@@ -98205,7 +100575,7 @@ SQLITE_PRIVATE int sqlite3VtabSavepoint(sqlite3 *db, int op, int iSavepoint){
     for(i=0; rc==SQLITE_OK && i<db->nVTrans; i++){
       VTable *pVTab = db->aVTrans[i];
       const sqlite3_module *pMod = pVTab->pMod->pModule;
-      if( pMod->iVersion>=2 ){
+      if( pVTab->pVtab && pMod->iVersion>=2 ){
         int (*xMethod)(sqlite3_vtab *, int);
         switch( op ){
           case SAVEPOINT_BEGIN:
@@ -98220,7 +100590,7 @@ SQLITE_PRIVATE int sqlite3VtabSavepoint(sqlite3 *db, int op, int iSavepoint){
             break;
         }
         if( xMethod && pVTab->iSavepoint>iSavepoint ){
-          rc = xMethod(db->aVTrans[i]->pVtab, iSavepoint);
+          rc = xMethod(pVTab->pVtab, iSavepoint);
         }
       }
     }
@@ -98500,21 +100870,31 @@ struct WhereTerm {
 #define TERM_ORINFO     0x10   /* Need to free the WhereTerm.u.pOrInfo object */
 #define TERM_ANDINFO    0x20   /* Need to free the WhereTerm.u.pAndInfo obj */
 #define TERM_OR_OK      0x40   /* Used during OR-clause processing */
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
 #  define TERM_VNULL    0x80   /* Manufactured x>NULL or x<=NULL term */
 #else
-#  define TERM_VNULL    0x00   /* Disabled if not using stat2 */
+#  define TERM_VNULL    0x00   /* Disabled if not using stat3 */
 #endif
 
 /*
 ** An instance of the following structure holds all information about a
 ** WHERE clause.  Mostly this is a container for one or more WhereTerms.
+**
+** Explanation of pOuter:  For a WHERE clause of the form
+**
+**           a AND ((b AND c) OR (d AND e)) AND f
+**
+** There are separate WhereClause objects for the whole clause and for
+** the subclauses "(b AND c)" and "(d AND e)".  The pOuter field of the
+** subclauses points to the WhereClause object for the whole clause.
 */
 struct WhereClause {
   Parse *pParse;           /* The parser context */
   WhereMaskSet *pMaskSet;  /* Mapping of table cursor numbers to bitmasks */
   Bitmask vmask;           /* Bitmask identifying virtual table cursors */
+  WhereClause *pOuter;     /* Outer conjunction */
   u8 op;                   /* Split operator.  TK_AND or TK_OR */
+  u16 wctrlFlags;          /* Might include WHERE_AND_ONLY */
   int nTerm;               /* Number of terms */
   int nSlot;               /* Number of entries in a[] */
   WhereTerm *a;            /* Each a[] describes a term of the WHERE cluase */
@@ -98635,6 +101015,7 @@ struct WhereCost {
 #define WHERE_VIRTUALTABLE 0x08000000  /* Use virtual-table processing */
 #define WHERE_MULTI_OR     0x10000000  /* OR using multiple indices */
 #define WHERE_TEMP_INDEX   0x20000000  /* Uses an ephemeral index */
+#define WHERE_DISTINCT     0x40000000  /* Correct order for DISTINCT */
 
 /*
 ** Initialize a preallocated WhereClause structure.
@@ -98642,14 +101023,17 @@ struct WhereCost {
 static void whereClauseInit(
   WhereClause *pWC,        /* The WhereClause to be initialized */
   Parse *pParse,           /* The parsing context */
-  WhereMaskSet *pMaskSet   /* Mapping from table cursor numbers to bitmasks */
+  WhereMaskSet *pMaskSet,  /* Mapping from table cursor numbers to bitmasks */
+  u16 wctrlFlags           /* Might include WHERE_AND_ONLY */
 ){
   pWC->pParse = pParse;
   pWC->pMaskSet = pMaskSet;
+  pWC->pOuter = 0;
   pWC->nTerm = 0;
   pWC->nSlot = ArraySize(pWC->aStatic);
   pWC->a = pWC->aStatic;
   pWC->vmask = 0;
+  pWC->wctrlFlags = wctrlFlags;
 }
 
 /* Forward reference */
@@ -98848,11 +101232,19 @@ static Bitmask exprListTableUsage(WhereMaskSet *pMaskSet, ExprList *pList){
 static Bitmask exprSelectTableUsage(WhereMaskSet *pMaskSet, Select *pS){
   Bitmask mask = 0;
   while( pS ){
+    SrcList *pSrc = pS->pSrc;
     mask |= exprListTableUsage(pMaskSet, pS->pEList);
     mask |= exprListTableUsage(pMaskSet, pS->pGroupBy);
     mask |= exprListTableUsage(pMaskSet, pS->pOrderBy);
     mask |= exprTableUsage(pMaskSet, pS->pWhere);
     mask |= exprTableUsage(pMaskSet, pS->pHaving);
+    if( ALWAYS(pSrc!=0) ){
+      int i;
+      for(i=0; i<pSrc->nSrc; i++){
+        mask |= exprSelectTableUsage(pMaskSet, pSrc->a[i].pSelect);
+        mask |= exprTableUsage(pMaskSet, pSrc->a[i].pOn);
+      }
+    }
     pS = pS->pPrior;
   }
   return mask;
@@ -98957,36 +101349,38 @@ static WhereTerm *findTerm(
   int k;
   assert( iCur>=0 );
   op &= WO_ALL;
-  for(pTerm=pWC->a, k=pWC->nTerm; k; k--, pTerm++){
-    if( pTerm->leftCursor==iCur
-       && (pTerm->prereqRight & notReady)==0
-       && pTerm->u.leftColumn==iColumn
-       && (pTerm->eOperator & op)!=0
-    ){
-      if( pIdx && pTerm->eOperator!=WO_ISNULL ){
-        Expr *pX = pTerm->pExpr;
-        CollSeq *pColl;
-        char idxaff;
-        int j;
-        Parse *pParse = pWC->pParse;
-
-        idxaff = pIdx->pTable->aCol[iColumn].affinity;
-        if( !sqlite3IndexAffinityOk(pX, idxaff) ) continue;
-
-        /* Figure out the collation sequence required from an index for
-        ** it to be useful for optimising expression pX. Store this
-        ** value in variable pColl.
-        */
-        assert(pX->pLeft);
-        pColl = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight);
-        assert(pColl || pParse->nErr);
-
-        for(j=0; pIdx->aiColumn[j]!=iColumn; j++){
-          if( NEVER(j>=pIdx->nColumn) ) return 0;
+  for(; pWC; pWC=pWC->pOuter){
+    for(pTerm=pWC->a, k=pWC->nTerm; k; k--, pTerm++){
+      if( pTerm->leftCursor==iCur
+         && (pTerm->prereqRight & notReady)==0
+         && pTerm->u.leftColumn==iColumn
+         && (pTerm->eOperator & op)!=0
+      ){
+        if( pIdx && pTerm->eOperator!=WO_ISNULL ){
+          Expr *pX = pTerm->pExpr;
+          CollSeq *pColl;
+          char idxaff;
+          int j;
+          Parse *pParse = pWC->pParse;
+  
+          idxaff = pIdx->pTable->aCol[iColumn].affinity;
+          if( !sqlite3IndexAffinityOk(pX, idxaff) ) continue;
+  
+          /* Figure out the collation sequence required from an index for
+          ** it to be useful for optimising expression pX. Store this
+          ** value in variable pColl.
+          */
+          assert(pX->pLeft);
+          pColl = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight);
+          assert(pColl || pParse->nErr);
+  
+          for(j=0; pIdx->aiColumn[j]!=iColumn; j++){
+            if( NEVER(j>=pIdx->nColumn) ) return 0;
+          }
+          if( pColl && sqlite3StrICmp(pColl->zName, pIdx->azColl[j]) ) continue;
         }
-        if( pColl && sqlite3StrICmp(pColl->zName, pIdx->azColl[j]) ) continue;
+        return pTerm;
       }
-      return pTerm;
     }
   }
   return 0;
@@ -99063,7 +101457,7 @@ static int isLikeOrGlob(
     if( pVal && sqlite3_value_type(pVal)==SQLITE_TEXT ){
       z = (char *)sqlite3_value_text(pVal);
     }
-    sqlite3VdbeSetVarmask(pParse->pVdbe, iCol); /* IMP: R-23257-02778 */
+    sqlite3VdbeSetVarmask(pParse->pVdbe, iCol);
     assert( pRight->op==TK_VARIABLE || pRight->op==TK_REGISTER );
   }else if( op==TK_STRING ){
     z = pRight->u.zToken;
@@ -99081,7 +101475,7 @@ static int isLikeOrGlob(
       *ppPrefix = pPrefix;
       if( op==TK_VARIABLE ){
         Vdbe *v = pParse->pVdbe;
-        sqlite3VdbeSetVarmask(v, pRight->iColumn); /* IMP: R-23257-02778 */
+        sqlite3VdbeSetVarmask(v, pRight->iColumn);
         if( *pisComplete && pRight->u.zToken[1] ){
           /* If the rhs of the LIKE expression is a variable, and the current
           ** value of the variable means there is no need to invoke the LIKE
@@ -99250,7 +101644,7 @@ static void exprAnalyzeOrTerm(
   if( pOrInfo==0 ) return;
   pTerm->wtFlags |= TERM_ORINFO;
   pOrWc = &pOrInfo->wc;
-  whereClauseInit(pOrWc, pWC->pParse, pMaskSet);
+  whereClauseInit(pOrWc, pWC->pParse, pMaskSet, pWC->wctrlFlags);
   whereSplit(pOrWc, pExpr, TK_OR);
   exprAnalyzeAll(pSrc, pOrWc);
   if( db->mallocFailed ) return;
@@ -99277,9 +101671,10 @@ static void exprAnalyzeOrTerm(
         pOrTerm->wtFlags |= TERM_ANDINFO;
         pOrTerm->eOperator = WO_AND;
         pAndWC = &pAndInfo->wc;
-        whereClauseInit(pAndWC, pWC->pParse, pMaskSet);
+        whereClauseInit(pAndWC, pWC->pParse, pMaskSet, pWC->wctrlFlags);
         whereSplit(pAndWC, pOrTerm->pExpr, TK_AND);
         exprAnalyzeAll(pSrc, pAndWC);
+        pAndWC->pOuter = pWC;
         testcase( db->mallocFailed );
         if( !db->mallocFailed ){
           for(j=0, pAndTerm=pAndWC->a; j<pAndWC->nTerm; j++, pAndTerm++){
@@ -99713,8 +102108,8 @@ static void exprAnalyze(
   }
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
 
-#ifdef SQLITE_ENABLE_STAT2
-  /* When sqlite_stat2 histogram data is available an operator of the
+#ifdef SQLITE_ENABLE_STAT3
+  /* When sqlite_stat3 histogram data is available an operator of the
   ** form "x IS NOT NULL" can sometimes be evaluated more efficiently
   ** as "x>NULL" if x is not an INTEGER PRIMARY KEY.  So construct a
   ** virtual term of that form.
@@ -99752,7 +102147,7 @@ static void exprAnalyze(
       pNewTerm->prereqAll = pTerm->prereqAll;
     }
   }
-#endif /* SQLITE_ENABLE_STAT2 */
+#endif /* SQLITE_ENABLE_STAT */
 
   /* Prevent ON clause terms of a LEFT JOIN from being used to drive
   ** an index for tables to the left of the join.
@@ -99779,6 +102174,162 @@ static int referencesOtherTables(
   return 0;
 }
 
+/*
+** This function searches the expression list passed as the second argument
+** for an expression of type TK_COLUMN that refers to the same column and
+** uses the same collation sequence as the iCol'th column of index pIdx.
+** Argument iBase is the cursor number used for the table that pIdx refers
+** to.
+**
+** If such an expression is found, its index in pList->a[] is returned. If
+** no expression is found, -1 is returned.
+*/
+static int findIndexCol(
+  Parse *pParse,                  /* Parse context */
+  ExprList *pList,                /* Expression list to search */
+  int iBase,                      /* Cursor for table associated with pIdx */
+  Index *pIdx,                    /* Index to match column of */
+  int iCol                        /* Column of index to match */
+){
+  int i;
+  const char *zColl = pIdx->azColl[iCol];
+
+  for(i=0; i<pList->nExpr; i++){
+    Expr *p = pList->a[i].pExpr;
+    if( p->op==TK_COLUMN
+     && p->iColumn==pIdx->aiColumn[iCol]
+     && p->iTable==iBase
+    ){
+      CollSeq *pColl = sqlite3ExprCollSeq(pParse, p);
+      if( ALWAYS(pColl) && 0==sqlite3StrICmp(pColl->zName, zColl) ){
+        return i;
+      }
+    }
+  }
+
+  return -1;
+}
+
+/*
+** This routine determines if pIdx can be used to assist in processing a
+** DISTINCT qualifier. In other words, it tests whether or not using this
+** index for the outer loop guarantees that rows with equal values for
+** all expressions in the pDistinct list are delivered grouped together.
+**
+** For example, the query 
+**
+**   SELECT DISTINCT a, b, c FROM tbl WHERE a = ?
+**
+** can benefit from any index on columns "b" and "c".
+*/
+static int isDistinctIndex(
+  Parse *pParse,                  /* Parsing context */
+  WhereClause *pWC,               /* The WHERE clause */
+  Index *pIdx,                    /* The index being considered */
+  int base,                       /* Cursor number for the table pIdx is on */
+  ExprList *pDistinct,            /* The DISTINCT expressions */
+  int nEqCol                      /* Number of index columns with == */
+){
+  Bitmask mask = 0;               /* Mask of unaccounted for pDistinct exprs */
+  int i;                          /* Iterator variable */
+
+  if( pIdx->zName==0 || pDistinct==0 || pDistinct->nExpr>=BMS ) return 0;
+  testcase( pDistinct->nExpr==BMS-1 );
+
+  /* Loop through all the expressions in the distinct list. If any of them
+  ** are not simple column references, return early. Otherwise, test if the
+  ** WHERE clause contains a "col=X" clause. If it does, the expression
+  ** can be ignored. If it does not, and the column does not belong to the
+  ** same table as index pIdx, return early. Finally, if there is no
+  ** matching "col=X" expression and the column is on the same table as pIdx,
+  ** set the corresponding bit in variable mask.
+  */
+  for(i=0; i<pDistinct->nExpr; i++){
+    WhereTerm *pTerm;
+    Expr *p = pDistinct->a[i].pExpr;
+    if( p->op!=TK_COLUMN ) return 0;
+    pTerm = findTerm(pWC, p->iTable, p->iColumn, ~(Bitmask)0, WO_EQ, 0);
+    if( pTerm ){
+      Expr *pX = pTerm->pExpr;
+      CollSeq *p1 = sqlite3BinaryCompareCollSeq(pParse, pX->pLeft, pX->pRight);
+      CollSeq *p2 = sqlite3ExprCollSeq(pParse, p);
+      if( p1==p2 ) continue;
+    }
+    if( p->iTable!=base ) return 0;
+    mask |= (((Bitmask)1) << i);
+  }
+
+  for(i=nEqCol; mask && i<pIdx->nColumn; i++){
+    int iExpr = findIndexCol(pParse, pDistinct, base, pIdx, i);
+    if( iExpr<0 ) break;
+    mask &= ~(((Bitmask)1) << iExpr);
+  }
+
+  return (mask==0);
+}
+
+
+/*
+** Return true if the DISTINCT expression-list passed as the third argument
+** is redundant. A DISTINCT list is redundant if the database contains a
+** UNIQUE index that guarantees that the result of the query will be distinct
+** anyway.
+*/
+static int isDistinctRedundant(
+  Parse *pParse,
+  SrcList *pTabList,
+  WhereClause *pWC,
+  ExprList *pDistinct
+){
+  Table *pTab;
+  Index *pIdx;
+  int i;                          
+  int iBase;
+
+  /* If there is more than one table or sub-select in the FROM clause of
+  ** this query, then it will not be possible to show that the DISTINCT 
+  ** clause is redundant. */
+  if( pTabList->nSrc!=1 ) return 0;
+  iBase = pTabList->a[0].iCursor;
+  pTab = pTabList->a[0].pTab;
+
+  /* If any of the expressions is an IPK column on table iBase, then return 
+  ** true. Note: The (p->iTable==iBase) part of this test may be false if the
+  ** current SELECT is a correlated sub-query.
+  */
+  for(i=0; i<pDistinct->nExpr; i++){
+    Expr *p = pDistinct->a[i].pExpr;
+    if( p->op==TK_COLUMN && p->iTable==iBase && p->iColumn<0 ) return 1;
+  }
+
+  /* Loop through all indices on the table, checking each to see if it makes
+  ** the DISTINCT qualifier redundant. It does so if:
+  **
+  **   1. The index is itself UNIQUE, and
+  **
+  **   2. All of the columns in the index are either part of the pDistinct
+  **      list, or else the WHERE clause contains a term of the form "col=X",
+  **      where X is a constant value. The collation sequences of the
+  **      comparison and select-list expressions must match those of the index.
+  */
+  for(pIdx=pTab->pIndex; pIdx; pIdx=pIdx->pNext){
+    if( pIdx->onError==OE_None ) continue;
+    for(i=0; i<pIdx->nColumn; i++){
+      int iCol = pIdx->aiColumn[i];
+      if( 0==findTerm(pWC, iBase, iCol, ~(Bitmask)0, WO_EQ, pIdx) 
+       && 0>findIndexCol(pParse, pDistinct, iBase, pIdx, i)
+      ){
+        break;
+      }
+    }
+    if( i==pIdx->nColumn ){
+      /* This index implies that the DISTINCT qualifier is redundant. */
+      return 1;
+    }
+  }
+
+  return 0;
+}
 
 /*
 ** This routine decides if pIdx can be used to satisfy the ORDER BY
@@ -99815,7 +102366,10 @@ static int isSortingIndex(
   struct ExprList_item *pTerm;    /* A term of the ORDER BY clause */
   sqlite3 *db = pParse->db;
 
-  assert( pOrderBy!=0 );
+  if( !pOrderBy ) return 0;
+  if( wsFlags & WHERE_COLUMN_IN ) return 0;
+  if( pIdx->bUnordered ) return 0;
+
   nTerm = pOrderBy->nExpr;
   assert( nTerm>0 );
 
@@ -100015,11 +102569,14 @@ static void bestOrClauseIndex(
   WhereTerm * const pWCEnd = &pWC->a[pWC->nTerm];        /* End of pWC->a[] */
   WhereTerm *pTerm;                 /* A single term of the WHERE clause */
 
-  /* No OR-clause optimization allowed if the INDEXED BY or NOT INDEXED clauses
-  ** are used */
+  /* The OR-clause optimization is disallowed if the INDEXED BY or
+  ** NOT INDEXED clauses are used or if the WHERE_AND_ONLY bit is set. */
   if( pSrc->notIndexed || pSrc->pIndex!=0 ){
     return;
   }
+  if( pWC->wctrlFlags & WHERE_AND_ONLY ){
+    return;
+  }
 
   /* Search the WHERE clause terms for a usable WO_OR term. */
   for(pTerm=pWC->a; pTerm<pWCEnd; pTerm++){
@@ -100047,8 +102604,10 @@ static void bestOrClauseIndex(
           WhereClause tempWC;
           tempWC.pParse = pWC->pParse;
           tempWC.pMaskSet = pWC->pMaskSet;
+          tempWC.pOuter = pWC;
           tempWC.op = TK_AND;
           tempWC.a = pOrTerm;
+          tempWC.wctrlFlags = 0;
           tempWC.nTerm = 1;
           bestIndex(pParse, &tempWC, pSrc, notReady, notValid, 0, &sTermCost);
         }else{
@@ -100128,6 +102687,10 @@ static void bestAutomaticIndex(
   WhereTerm *pWCEnd;          /* End of pWC->a[] */
   Table *pTable;              /* Table tht might be indexed */
 
+  if( pParse->nQueryLoop<=(double)1 ){
+    /* There is no point in building an automatic index for a single scan */
+    return;
+  }
   if( (pParse->db->flags & SQLITE_AutoIndex)==0 ){
     /* Automatic indices are disabled at run-time */
     return;
@@ -100140,6 +102703,10 @@ static void bestAutomaticIndex(
     /* The NOT INDEXED clause appears in the SQL. */
     return;
   }
+  if( pSrc->isCorrelated ){
+    /* The source is a correlated sub-query. No point in indexing it. */
+    return;
+  }
 
   assert( pParse->nQueryLoop >= (double)1 );
   pTable = pSrc->pTab;
@@ -100208,8 +102775,7 @@ static void constructAutomaticIndex(
   v = pParse->pVdbe;
   assert( v!=0 );
   regIsInit = ++pParse->nMem;
-  addrInit = sqlite3VdbeAddOp1(v, OP_If, regIsInit);
-  sqlite3VdbeAddOp2(v, OP_Integer, 1, regIsInit);
+  addrInit = sqlite3VdbeAddOp1(v, OP_Once, regIsInit);
 
   /* Count the number of columns that will be added to the index
   ** and used to match WHERE clause constraints */
@@ -100356,6 +102922,7 @@ static sqlite3_index_info *allocateIndexInfo(
     testcase( pTerm->eOperator==WO_IN );
     testcase( pTerm->eOperator==WO_ISNULL );
     if( pTerm->eOperator & (WO_IN|WO_ISNULL) ) continue;
+    if( pTerm->wtFlags & TERM_VNULL ) continue;
     nTerm++;
   }
 
@@ -100406,6 +102973,7 @@ static sqlite3_index_info *allocateIndexInfo(
     testcase( pTerm->eOperator==WO_IN );
     testcase( pTerm->eOperator==WO_ISNULL );
     if( pTerm->eOperator & (WO_IN|WO_ISNULL) ) continue;
+    if( pTerm->wtFlags & TERM_VNULL ) continue;
     pIdxCons[j].iColumn = pTerm->u.leftColumn;
     pIdxCons[j].iTermOffset = i;
     pIdxCons[j].op = (u8)pTerm->eOperator;
@@ -100632,67 +103200,85 @@ static void bestVirtualIndex(
 }
 #endif /* SQLITE_OMIT_VIRTUALTABLE */
 
+#ifdef SQLITE_ENABLE_STAT3
 /*
-** Argument pIdx is a pointer to an index structure that has an array of
-** SQLITE_INDEX_SAMPLES evenly spaced samples of the first indexed column
-** stored in Index.aSample. These samples divide the domain of values stored
-** the index into (SQLITE_INDEX_SAMPLES+1) regions.
-** Region 0 contains all values less than the first sample value. Region
-** 1 contains values between the first and second samples.  Region 2 contains
-** values between samples 2 and 3.  And so on.  Region SQLITE_INDEX_SAMPLES
-** contains values larger than the last sample.
-**
-** If the index contains many duplicates of a single value, then it is
-** possible that two or more adjacent samples can hold the same value.
-** When that is the case, the smallest possible region code is returned
-** when roundUp is false and the largest possible region code is returned
-** when roundUp is true.
-**
-** If successful, this function determines which of the regions value 
-** pVal lies in, sets *piRegion to the region index (a value between 0
-** and SQLITE_INDEX_SAMPLES+1, inclusive) and returns SQLITE_OK.
-** Or, if an OOM occurs while converting text values between encodings,
-** SQLITE_NOMEM is returned and *piRegion is undefined.
-*/
-#ifdef SQLITE_ENABLE_STAT2
-static int whereRangeRegion(
+** Estimate the location of a particular key among all keys in an
+** index.  Store the results in aStat as follows:
+**
+**    aStat[0]      Est. number of rows less than pVal
+**    aStat[1]      Est. number of rows equal to pVal
+**
+** Return SQLITE_OK on success.
+*/
+static int whereKeyStats(
   Parse *pParse,              /* Database connection */
   Index *pIdx,                /* Index to consider domain of */
   sqlite3_value *pVal,        /* Value to consider */
-  int roundUp,                /* Return largest valid region if true */
-  int *piRegion               /* OUT: Region of domain in which value lies */
+  int roundUp,                /* Round up if true.  Round down if false */
+  tRowcnt *aStat              /* OUT: stats written here */
 ){
+  tRowcnt n;
+  IndexSample *aSample;
+  int i, eType;
+  int isEq = 0;
+  i64 v;
+  double r, rS;
+
   assert( roundUp==0 || roundUp==1 );
-  if( ALWAYS(pVal) ){
-    IndexSample *aSample = pIdx->aSample;
-    int i = 0;
-    int eType = sqlite3_value_type(pVal);
-
-    if( eType==SQLITE_INTEGER || eType==SQLITE_FLOAT ){
-      double r = sqlite3_value_double(pVal);
-      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
-        if( aSample[i].eType==SQLITE_NULL ) continue;
-        if( aSample[i].eType>=SQLITE_TEXT ) break;
-        if( roundUp ){
-          if( aSample[i].u.r>r ) break;
-        }else{
-          if( aSample[i].u.r>=r ) break;
+  assert( pIdx->nSample>0 );
+  if( pVal==0 ) return SQLITE_ERROR;
+  n = pIdx->aiRowEst[0];
+  aSample = pIdx->aSample;
+  eType = sqlite3_value_type(pVal);
+
+  if( eType==SQLITE_INTEGER ){
+    v = sqlite3_value_int64(pVal);
+    r = (i64)v;
+    for(i=0; i<pIdx->nSample; i++){
+      if( aSample[i].eType==SQLITE_NULL ) continue;
+      if( aSample[i].eType>=SQLITE_TEXT ) break;
+      if( aSample[i].eType==SQLITE_INTEGER ){
+        if( aSample[i].u.i>=v ){
+          isEq = aSample[i].u.i==v;
+          break;
+        }
+      }else{
+        assert( aSample[i].eType==SQLITE_FLOAT );
+        if( aSample[i].u.r>=r ){
+          isEq = aSample[i].u.r==r;
+          break;
         }
       }
-    }else if( eType==SQLITE_NULL ){
-      i = 0;
-      if( roundUp ){
-        while( i<SQLITE_INDEX_SAMPLES && aSample[i].eType==SQLITE_NULL ) i++;
+    }
+  }else if( eType==SQLITE_FLOAT ){
+    r = sqlite3_value_double(pVal);
+    for(i=0; i<pIdx->nSample; i++){
+      if( aSample[i].eType==SQLITE_NULL ) continue;
+      if( aSample[i].eType>=SQLITE_TEXT ) break;
+      if( aSample[i].eType==SQLITE_FLOAT ){
+        rS = aSample[i].u.r;
+      }else{
+        rS = aSample[i].u.i;
+      }
+      if( rS>=r ){
+        isEq = rS==r;
+        break;
+      }
+    }
+  }else if( eType==SQLITE_NULL ){
+    i = 0;
+    if( aSample[0].eType==SQLITE_NULL ) isEq = 1;
+  }else{
+    assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB );
+    for(i=0; i<pIdx->nSample; i++){
+      if( aSample[i].eType==SQLITE_TEXT || aSample[i].eType==SQLITE_BLOB ){
+        break;
       }
-    }else{ 
+    }
+    if( i<pIdx->nSample ){      
       sqlite3 *db = pParse->db;
       CollSeq *pColl;
       const u8 *z;
-      int n;
-
-      /* pVal comes from sqlite3ValueFromExpr() so the type cannot be NULL */
-      assert( eType==SQLITE_TEXT || eType==SQLITE_BLOB );
-
       if( eType==SQLITE_BLOB ){
         z = (const u8 *)sqlite3_value_blob(pVal);
         pColl = db->pDfltColl;
@@ -100711,12 +103297,12 @@ static int whereRangeRegion(
         assert( z && pColl && pColl->xCmp );
       }
       n = sqlite3ValueBytes(pVal, pColl->enc);
-
-      for(i=0; i<SQLITE_INDEX_SAMPLES; i++){
+  
+      for(; i<pIdx->nSample; i++){
         int c;
         int eSampletype = aSample[i].eType;
-        if( eSampletype==SQLITE_NULL || eSampletype<eType ) continue;
-        if( (eSampletype!=eType) ) break;
+        if( eSampletype<eType ) continue;
+        if( eSampletype!=eType ) break;
 #ifndef SQLITE_OMIT_UTF16
         if( pColl->enc!=SQLITE_UTF8 ){
           int nSample;
@@ -100734,16 +103320,47 @@ static int whereRangeRegion(
         {
           c = pColl->xCmp(pColl->pUser, aSample[i].nByte, aSample[i].u.z, n, z);
         }
-        if( c-roundUp>=0 ) break;
+        if( c>=0 ){
+          if( c==0 ) isEq = 1;
+          break;
+        }
       }
     }
+  }
 
-    assert( i>=0 && i<=SQLITE_INDEX_SAMPLES );
-    *piRegion = i;
+  /* At this point, aSample[i] is the first sample that is greater than
+  ** or equal to pVal.  Or if i==pIdx->nSample, then all samples are less
+  ** than pVal.  If aSample[i]==pVal, then isEq==1.
+  */
+  if( isEq ){
+    assert( i<pIdx->nSample );
+    aStat[0] = aSample[i].nLt;
+    aStat[1] = aSample[i].nEq;
+  }else{
+    tRowcnt iLower, iUpper, iGap;
+    if( i==0 ){
+      iLower = 0;
+      iUpper = aSample[0].nLt;
+    }else{
+      iUpper = i>=pIdx->nSample ? n : aSample[i].nLt;
+      iLower = aSample[i-1].nEq + aSample[i-1].nLt;
+    }
+    aStat[1] = pIdx->avgEq;
+    if( iLower>=iUpper ){
+      iGap = 0;
+    }else{
+      iGap = iUpper - iLower;
+    }
+    if( roundUp ){
+      iGap = (iGap*2)/3;
+    }else{
+      iGap = iGap/3;
+    }
+    aStat[0] = iLower + iGap;
   }
   return SQLITE_OK;
 }
-#endif   /* #ifdef SQLITE_ENABLE_STAT2 */
+#endif /* SQLITE_ENABLE_STAT3 */
 
 /*
 ** If expression pExpr represents a literal value, set *pp to point to
@@ -100761,7 +103378,7 @@ static int whereRangeRegion(
 **
 ** If an error occurs, return an error code. Otherwise, SQLITE_OK.
 */
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
 static int valueFromExpr(
   Parse *pParse, 
   Expr *pExpr, 
@@ -100772,7 +103389,7 @@ static int valueFromExpr(
    || (pExpr->op==TK_REGISTER && pExpr->op2==TK_VARIABLE)
   ){
     int iVar = pExpr->iColumn;
-    sqlite3VdbeSetVarmask(pParse->pVdbe, iVar); /* IMP: R-23257-02778 */
+    sqlite3VdbeSetVarmask(pParse->pVdbe, iVar);
     *pp = sqlite3VdbeGetValue(pParse->pReprepare, iVar, aff);
     return SQLITE_OK;
   }
@@ -100809,17 +103426,15 @@ static int valueFromExpr(
 **
 ** then nEq should be passed 0.
 **
-** The returned value is an integer between 1 and 100, inclusive. A return
-** value of 1 indicates that the proposed range scan is expected to visit
-** approximately 1/100th (1%) of the rows selected by the nEq equality
-** constraints (if any). A return value of 100 indicates that it is expected
-** that the range scan will visit every row (100%) selected by the equality
-** constraints.
+** The returned value is an integer divisor to reduce the estimated
+** search space.  A return value of 1 means that range constraints are
+** no help at all.  A return value of 2 means range constraints are
+** expected to reduce the search space by half.  And so forth...
 **
-** In the absence of sqlite_stat2 ANALYZE data, each range inequality
-** reduces the search space by 3/4ths.  Hence a single constraint (x>?)
-** results in a return of 25 and a range constraint (x>? AND x<?) results
-** in a return of 6.
+** In the absence of sqlite_stat3 ANALYZE data, each range inequality
+** reduces the search space by a factor of 4.  Hence a single constraint (x>?)
+** results in a return of 4 and a range constraint (x>? AND x<?) results
+** in a return of 16.
 */
 static int whereRangeScanEst(
   Parse *pParse,       /* Parsing & code generating context */
@@ -100827,84 +103442,72 @@ static int whereRangeScanEst(
   int nEq,             /* index into p->aCol[] of the range-compared column */
   WhereTerm *pLower,   /* Lower bound on the range. ex: "x>123" Might be NULL */
   WhereTerm *pUpper,   /* Upper bound on the range. ex: "x<455" Might be NULL */
-  int *piEst           /* OUT: Return value */
+  double *pRangeDiv   /* OUT: Reduce search space by this divisor */
 ){
   int rc = SQLITE_OK;
 
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
 
-  if( nEq==0 && p->aSample ){
-    sqlite3_value *pLowerVal = 0;
-    sqlite3_value *pUpperVal = 0;
-    int iEst;
-    int iLower = 0;
-    int iUpper = SQLITE_INDEX_SAMPLES;
-    int roundUpUpper = 0;
-    int roundUpLower = 0;
+  if( nEq==0 && p->nSample ){
+    sqlite3_value *pRangeVal;
+    tRowcnt iLower = 0;
+    tRowcnt iUpper = p->aiRowEst[0];
+    tRowcnt a[2];
     u8 aff = p->pTable->aCol[p->aiColumn[0]].affinity;
 
     if( pLower ){
       Expr *pExpr = pLower->pExpr->pRight;
-      rc = valueFromExpr(pParse, pExpr, aff, &pLowerVal);
+      rc = valueFromExpr(pParse, pExpr, aff, &pRangeVal);
       assert( pLower->eOperator==WO_GT || pLower->eOperator==WO_GE );
-      roundUpLower = (pLower->eOperator==WO_GT) ?1:0;
+      if( rc==SQLITE_OK
+       && whereKeyStats(pParse, p, pRangeVal, 0, a)==SQLITE_OK
+      ){
+        iLower = a[0];
+        if( pLower->eOperator==WO_GT ) iLower += a[1];
+      }
+      sqlite3ValueFree(pRangeVal);
     }
     if( rc==SQLITE_OK && pUpper ){
       Expr *pExpr = pUpper->pExpr->pRight;
-      rc = valueFromExpr(pParse, pExpr, aff, &pUpperVal);
+      rc = valueFromExpr(pParse, pExpr, aff, &pRangeVal);
       assert( pUpper->eOperator==WO_LT || pUpper->eOperator==WO_LE );
-      roundUpUpper = (pUpper->eOperator==WO_LE) ?1:0;
-    }
-
-    if( rc!=SQLITE_OK || (pLowerVal==0 && pUpperVal==0) ){
-      sqlite3ValueFree(pLowerVal);
-      sqlite3ValueFree(pUpperVal);
-      goto range_est_fallback;
-    }else if( pLowerVal==0 ){
-      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
-      if( pLower ) iLower = iUpper/2;
-    }else if( pUpperVal==0 ){
-      rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
-      if( pUpper ) iUpper = (iLower + SQLITE_INDEX_SAMPLES + 1)/2;
-    }else{
-      rc = whereRangeRegion(pParse, p, pUpperVal, roundUpUpper, &iUpper);
-      if( rc==SQLITE_OK ){
-        rc = whereRangeRegion(pParse, p, pLowerVal, roundUpLower, &iLower);
+      if( rc==SQLITE_OK
+       && whereKeyStats(pParse, p, pRangeVal, 1, a)==SQLITE_OK
+      ){
+        iUpper = a[0];
+        if( pUpper->eOperator==WO_LE ) iUpper += a[1];
       }
+      sqlite3ValueFree(pRangeVal);
     }
-    WHERETRACE(("range scan regions: %d..%d\n", iLower, iUpper));
-
-    iEst = iUpper - iLower;
-    testcase( iEst==SQLITE_INDEX_SAMPLES );
-    assert( iEst<=SQLITE_INDEX_SAMPLES );
-    if( iEst<1 ){
-      *piEst = 50/SQLITE_INDEX_SAMPLES;
-    }else{
-      *piEst = (iEst*100)/SQLITE_INDEX_SAMPLES;
+    if( rc==SQLITE_OK ){
+      if( iUpper<=iLower ){
+        *pRangeDiv = (double)p->aiRowEst[0];
+      }else{
+        *pRangeDiv = (double)p->aiRowEst[0]/(double)(iUpper - iLower);
+      }
+      WHERETRACE(("range scan regions: %u..%u  div=%g\n",
+                  (u32)iLower, (u32)iUpper, *pRangeDiv));
+      return SQLITE_OK;
     }
-    sqlite3ValueFree(pLowerVal);
-    sqlite3ValueFree(pUpperVal);
-    return rc;
   }
-range_est_fallback:
 #else
   UNUSED_PARAMETER(pParse);
   UNUSED_PARAMETER(p);
   UNUSED_PARAMETER(nEq);
 #endif
   assert( pLower || pUpper );
-  *piEst = 100;
-  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *piEst /= 4;
-  if( pUpper ) *piEst /= 4;
+  *pRangeDiv = (double)1;
+  if( pLower && (pLower->wtFlags & TERM_VNULL)==0 ) *pRangeDiv *= (double)4;
+  if( pUpper ) *pRangeDiv *= (double)4;
   return rc;
 }
 
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
 /*
 ** Estimate the number of rows that will be returned based on
 ** an equality constraint x=VALUE and where that VALUE occurs in
 ** the histogram data.  This only works when x is the left-most
-** column of an index and sqlite_stat2 histogram data is available
+** column of an index and sqlite_stat3 histogram data is available
 ** for that index.  When pExpr==NULL that means the constraint is
 ** "x IS NULL" instead of "x=VALUE".
 **
@@ -100924,12 +103527,12 @@ static int whereEqualScanEst(
   double *pnRow        /* Write the revised row estimate here */
 ){
   sqlite3_value *pRhs = 0;  /* VALUE on right-hand side of pTerm */
-  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
   u8 aff;                   /* Column affinity */
   int rc;                   /* Subfunction return code */
-  double nRowEst;           /* New estimate of the number of rows */
+  tRowcnt a[2];             /* Statistics */
 
   assert( p->aSample!=0 );
+  assert( p->nSample>0 );
   aff = p->pTable->aCol[p->aiColumn[0]].affinity;
   if( pExpr ){
     rc = valueFromExpr(pParse, pExpr, aff, &pRhs);
@@ -100938,26 +103541,18 @@ static int whereEqualScanEst(
     pRhs = sqlite3ValueNew(pParse->db);
   }
   if( pRhs==0 ) return SQLITE_NOTFOUND;
-  rc = whereRangeRegion(pParse, p, pRhs, 0, &iLower);
-  if( rc ) goto whereEqualScanEst_cancel;
-  rc = whereRangeRegion(pParse, p, pRhs, 1, &iUpper);
-  if( rc ) goto whereEqualScanEst_cancel;
-  WHERETRACE(("equality scan regions: %d..%d\n", iLower, iUpper));
-  if( iLower>=iUpper ){
-    nRowEst = p->aiRowEst[0]/(SQLITE_INDEX_SAMPLES*2);
-    if( nRowEst<*pnRow ) *pnRow = nRowEst;
-  }else{
-    nRowEst = (iUpper-iLower)*p->aiRowEst[0]/SQLITE_INDEX_SAMPLES;
-    *pnRow = nRowEst;
+  rc = whereKeyStats(pParse, p, pRhs, 0, a);
+  if( rc==SQLITE_OK ){
+    WHERETRACE(("equality scan regions: %d\n", (int)a[1]));
+    *pnRow = a[1];
   }
-
 whereEqualScanEst_cancel:
   sqlite3ValueFree(pRhs);
   return rc;
 }
-#endif /* defined(SQLITE_ENABLE_STAT2) */
+#endif /* defined(SQLITE_ENABLE_STAT3) */
 
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
 /*
 ** Estimate the number of rows that will be returned based on
 ** an IN constraint where the right-hand side of the IN operator
@@ -100980,60 +103575,25 @@ static int whereInScanEst(
   ExprList *pList,     /* The value list on the RHS of "x IN (v1,v2,v3,...)" */
   double *pnRow        /* Write the revised row estimate here */
 ){
-  sqlite3_value *pVal = 0;  /* One value from list */
-  int iLower, iUpper;       /* Range of histogram regions containing pRhs */
-  u8 aff;                   /* Column affinity */
-  int rc = SQLITE_OK;       /* Subfunction return code */
-  double nRowEst;           /* New estimate of the number of rows */
-  int nSpan = 0;            /* Number of histogram regions spanned */
-  int nSingle = 0;          /* Histogram regions hit by a single value */
-  int nNotFound = 0;        /* Count of values that are not constants */
-  int i;                               /* Loop counter */
-  u8 aSpan[SQLITE_INDEX_SAMPLES+1];    /* Histogram regions that are spanned */
-  u8 aSingle[SQLITE_INDEX_SAMPLES+1];  /* Histogram regions hit once */
+  int rc = SQLITE_OK;         /* Subfunction return code */
+  double nEst;                /* Number of rows for a single term */
+  double nRowEst = (double)0; /* New estimate of the number of rows */
+  int i;                      /* Loop counter */
 
   assert( p->aSample!=0 );
-  aff = p->pTable->aCol[p->aiColumn[0]].affinity;
-  memset(aSpan, 0, sizeof(aSpan));
-  memset(aSingle, 0, sizeof(aSingle));
-  for(i=0; i<pList->nExpr; i++){
-    sqlite3ValueFree(pVal);
-    rc = valueFromExpr(pParse, pList->a[i].pExpr, aff, &pVal);
-    if( rc ) break;
-    if( pVal==0 || sqlite3_value_type(pVal)==SQLITE_NULL ){
-      nNotFound++;
-      continue;
-    }
-    rc = whereRangeRegion(pParse, p, pVal, 0, &iLower);
-    if( rc ) break;
-    rc = whereRangeRegion(pParse, p, pVal, 1, &iUpper);
-    if( rc ) break;
-    if( iLower>=iUpper ){
-      aSingle[iLower] = 1;
-    }else{
-      assert( iLower>=0 && iUpper<=SQLITE_INDEX_SAMPLES );
-      while( iLower<iUpper ) aSpan[iLower++] = 1;
-    }
+  for(i=0; rc==SQLITE_OK && i<pList->nExpr; i++){
+    nEst = p->aiRowEst[0];
+    rc = whereEqualScanEst(pParse, p, pList->a[i].pExpr, &nEst);
+    nRowEst += nEst;
   }
   if( rc==SQLITE_OK ){
-    for(i=nSpan=0; i<=SQLITE_INDEX_SAMPLES; i++){
-      if( aSpan[i] ){
-        nSpan++;
-      }else if( aSingle[i] ){
-        nSingle++;
-      }
-    }
-    nRowEst = (nSpan*2+nSingle)*p->aiRowEst[0]/(2*SQLITE_INDEX_SAMPLES)
-               + nNotFound*p->aiRowEst[1];
     if( nRowEst > p->aiRowEst[0] ) nRowEst = p->aiRowEst[0];
     *pnRow = nRowEst;
-    WHERETRACE(("IN row estimate: nSpan=%d, nSingle=%d, nNotFound=%d, est=%g\n",
-                 nSpan, nSingle, nNotFound, nRowEst));
+    WHERETRACE(("IN row estimate: est=%g\n", nRowEst));
   }
-  sqlite3ValueFree(pVal);
   return rc;
 }
-#endif /* defined(SQLITE_ENABLE_STAT2) */
+#endif /* defined(SQLITE_ENABLE_STAT3) */
 
 
 /*
@@ -101071,6 +103631,7 @@ static void bestBtreeIndex(
   Bitmask notReady,           /* Mask of cursors not available for indexing */
   Bitmask notValid,           /* Cursors not available for any purpose */
   ExprList *pOrderBy,         /* The ORDER BY clause */
+  ExprList *pDistinct,        /* The select-list if query is DISTINCT */
   WhereCost *pCost            /* Lowest cost query plan */
 ){
   int iCur = pSrc->iCursor;   /* The cursor of the table to be accessed */
@@ -101079,7 +103640,7 @@ static void bestBtreeIndex(
   int eqTermMask;             /* Current mask of valid equality operators */
   int idxEqTermMask;          /* Index mask of valid equality operators */
   Index sPk;                  /* A fake index object for the primary key */
-  unsigned int aiRowEstPk[2]; /* The aiRowEst[] value for the sPk index */
+  tRowcnt aiRowEstPk[2];      /* The aiRowEst[] value for the sPk index */
   int aiColumnPk = -1;        /* The aColumn[] value for the sPk index */
   int wsFlagMask;             /* Allowed flags in pCost->plan.wsFlag */
 
@@ -101134,10 +103695,10 @@ static void bestBtreeIndex(
   /* Loop over all indices looking for the best one to use
   */
   for(; pProbe; pIdx=pProbe=pProbe->pNext){
-    const unsigned int * const aiRowEst = pProbe->aiRowEst;
+    const tRowcnt * const aiRowEst = pProbe->aiRowEst;
     double cost;                /* Cost of using pProbe */
     double nRow;                /* Estimated number of rows in result set */
-    double log10N;              /* base-10 logarithm of nRow (inexact) */
+    double log10N = (double)1;  /* base-10 logarithm of nRow (inexact) */
     int rev;                    /* True to scan in reverse order */
     int wsFlags = 0;
     Bitmask used = 0;
@@ -101177,14 +103738,12 @@ static void bestBtreeIndex(
     **    IN operator must be a SELECT, not a value list, for this variable
     **    to be true.
     **
-    **  estBound:
-    **    An estimate on the amount of the table that must be searched.  A
-    **    value of 100 means the entire table is searched.  Range constraints
-    **    might reduce this to a value less than 100 to indicate that only
-    **    a fraction of the table needs searching.  In the absence of
-    **    sqlite_stat2 ANALYZE data, a single inequality reduces the search
-    **    space to 1/4rd its original size.  So an x>? constraint reduces
-    **    estBound to 25.  Two constraints (x>? AND x<?) reduce estBound to 6.
+    **  rangeDiv:
+    **    An estimate of a divisor by which to reduce the search space due
+    **    to inequality constraints.  In the absence of sqlite_stat3 ANALYZE
+    **    data, a single inequality reduces the search space to 1/4rd its
+    **    original size (rangeDiv==4).  Two inequalities reduce the search
+    **    space to 1/16th of its original size (rangeDiv==16).
     **
     **  bSort:   
     **    Boolean. True if there is an ORDER BY clause that will require an 
@@ -101209,12 +103768,13 @@ static void bestBtreeIndex(
     int nEq;                      /* Number of == or IN terms matching index */
     int bInEst = 0;               /* True if "x IN (SELECT...)" seen */
     int nInMul = 1;               /* Number of distinct equalities to lookup */
-    int estBound = 100;           /* Estimated reduction in search space */
+    double rangeDiv = (double)1;  /* Estimated reduction in search space */
     int nBound = 0;               /* Number of range constraints seen */
-    int bSort = 0;                /* True if external sort required */
+    int bSort = !!pOrderBy;       /* True if external sort required */
+    int bDist = !!pDistinct;      /* True if index cannot help with DISTINCT */
     int bLookup = 0;              /* True if not a covering index */
     WhereTerm *pTerm;             /* A single term of the WHERE clause */
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
     WhereTerm *pFirstTerm = 0;    /* First term matching the index */
 #endif
 
@@ -101224,6 +103784,7 @@ static void bestBtreeIndex(
       pTerm = findTerm(pWC, iCur, j, notReady, eqTermMask, pIdx);
       if( pTerm==0 ) break;
       wsFlags |= (WHERE_COLUMN_EQ|WHERE_ROWID_EQ);
+      testcase( pTerm->pWC!=pWC );
       if( pTerm->eOperator & WO_IN ){
         Expr *pExpr = pTerm->pExpr;
         wsFlags |= WHERE_COLUMN_IN;
@@ -101238,28 +103799,30 @@ static void bestBtreeIndex(
       }else if( pTerm->eOperator & WO_ISNULL ){
         wsFlags |= WHERE_COLUMN_NULL;
       }
-#ifdef SQLITE_ENABLE_STAT2
+#ifdef SQLITE_ENABLE_STAT3
       if( nEq==0 && pProbe->aSample ) pFirstTerm = pTerm;
 #endif
       used |= pTerm->prereqRight;
     }
 
-    /* Determine the value of estBound. */
+    /* Determine the value of rangeDiv */
     if( nEq<pProbe->nColumn && pProbe->bUnordered==0 ){
       int j = pProbe->aiColumn[nEq];
       if( findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE|WO_GT|WO_GE, pIdx) ){
         WhereTerm *pTop = findTerm(pWC, iCur, j, notReady, WO_LT|WO_LE, pIdx);
         WhereTerm *pBtm = findTerm(pWC, iCur, j, notReady, WO_GT|WO_GE, pIdx);
-        whereRangeScanEst(pParse, pProbe, nEq, pBtm, pTop, &estBound);
+        whereRangeScanEst(pParse, pProbe, nEq, pBtm, pTop, &rangeDiv);
         if( pTop ){
           nBound = 1;
           wsFlags |= WHERE_TOP_LIMIT;
           used |= pTop->prereqRight;
+          testcase( pTop->pWC!=pWC );
         }
         if( pBtm ){
           nBound++;
           wsFlags |= WHERE_BTM_LIMIT;
           used |= pBtm->prereqRight;
+          testcase( pBtm->pWC!=pWC );
         }
         wsFlags |= (WHERE_COLUMN_RANGE|WHERE_ROWID_RANGE);
       }
@@ -101275,17 +103838,20 @@ static void bestBtreeIndex(
     ** naturally scan rows in the required order, set the appropriate flags
     ** in wsFlags. Otherwise, if there is an ORDER BY clause but the index
     ** will scan rows in a different order, set the bSort variable.  */
-    if( pOrderBy ){
-      if( (wsFlags & WHERE_COLUMN_IN)==0
-        && pProbe->bUnordered==0
-        && isSortingIndex(pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy,
-                          nEq, wsFlags, &rev)
-      ){
-        wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
-        wsFlags |= (rev ? WHERE_REVERSE : 0);
-      }else{
-        bSort = 1;
-      }
+    if( isSortingIndex(
+          pParse, pWC->pMaskSet, pProbe, iCur, pOrderBy, nEq, wsFlags, &rev)
+    ){
+      bSort = 0;
+      wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_ORDERBY;
+      wsFlags |= (rev ? WHERE_REVERSE : 0);
+    }
+
+    /* If there is a DISTINCT qualifier and this index will scan rows in
+    ** order of the DISTINCT expressions, clear bDist and set the appropriate
+    ** flags in wsFlags. */
+    if( isDistinctIndex(pParse, pWC, pProbe, iCur, pDistinct, nEq) ){
+      bDist = 0;
+      wsFlags |= WHERE_ROWID_RANGE|WHERE_COLUMN_RANGE|WHERE_DISTINCT;
     }
 
     /* If currently calculating the cost of using an index (not the IPK
@@ -101319,27 +103885,30 @@ static void bestBtreeIndex(
       nInMul = (int)(nRow / aiRowEst[nEq]);
     }
 
-#ifdef SQLITE_ENABLE_STAT2
-    /* If the constraint is of the form x=VALUE and histogram
+#ifdef SQLITE_ENABLE_STAT3
+    /* If the constraint is of the form x=VALUE or x IN (E1,E2,...)
+    ** and we do not think that values of x are unique and if histogram
     ** data is available for column x, then it might be possible
     ** to get a better estimate on the number of rows based on
     ** VALUE and how common that value is according to the histogram.
     */
-    if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 ){
+    if( nRow>(double)1 && nEq==1 && pFirstTerm!=0 && aiRowEst[1]>1 ){
+      assert( (pFirstTerm->eOperator & (WO_EQ|WO_ISNULL|WO_IN))!=0 );
       if( pFirstTerm->eOperator & (WO_EQ|WO_ISNULL) ){
         testcase( pFirstTerm->eOperator==WO_EQ );
         testcase( pFirstTerm->eOperator==WO_ISNULL );
         whereEqualScanEst(pParse, pProbe, pFirstTerm->pExpr->pRight, &nRow);
-      }else if( pFirstTerm->eOperator==WO_IN && bInEst==0 ){
+      }else if( bInEst==0 ){
+        assert( pFirstTerm->eOperator==WO_IN );
         whereInScanEst(pParse, pProbe, pFirstTerm->pExpr->x.pList, &nRow);
       }
     }
-#endif /* SQLITE_ENABLE_STAT2 */
+#endif /* SQLITE_ENABLE_STAT3 */
 
     /* Adjust the number of output rows and downward to reflect rows
     ** that are excluded by range constraints.
     */
-    nRow = (nRow * (double)estBound) / (double)100;
+    nRow = nRow/rangeDiv;
     if( nRow<1 ) nRow = 1;
 
     /* Experiments run on real SQLite databases show that the time needed
@@ -101350,7 +103919,7 @@ static void bestBtreeIndex(
     ** slower with larger records, presumably because fewer records fit
     ** on one page and hence more pages have to be fetched.
     **
-    ** The ANALYZE command and the sqlite_stat1 and sqlite_stat2 tables do
+    ** The ANALYZE command and the sqlite_stat1 and sqlite_stat3 tables do
     ** not give us data on the relative sizes of table and index records.
     ** So this computation assumes table records are about twice as big
     ** as index records
@@ -101402,6 +103971,9 @@ static void bestBtreeIndex(
     if( bSort ){
       cost += nRow*estLog(nRow)*3;
     }
+    if( bDist ){
+      cost += nRow*estLog(nRow)*3;
+    }
 
     /**** Cost of using this index has now been computed ****/
 
@@ -101465,10 +104037,10 @@ static void bestBtreeIndex(
 
 
     WHERETRACE((
-      "%s(%s): nEq=%d nInMul=%d estBound=%d bSort=%d bLookup=%d wsFlags=0x%x\n"
+      "%s(%s): nEq=%d nInMul=%d rangeDiv=%d bSort=%d bLookup=%d wsFlags=0x%x\n"
       "         notReady=0x%llx log10N=%.1f nRow=%.1f cost=%.1f used=0x%llx\n",
       pSrc->pTab->zName, (pIdx ? pIdx->zName : "ipk"), 
-      nEq, nInMul, estBound, bSort, bLookup, wsFlags,
+      nEq, nInMul, (int)rangeDiv, bSort, bLookup, wsFlags,
       notReady, log10N, nRow, cost, used
     ));
 
@@ -101547,7 +104119,7 @@ static void bestIndex(
   }else
 #endif
   {
-    bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, pCost);
+    bestBtreeIndex(pParse, pWC, pSrc, notReady, notValid, pOrderBy, 0, pCost);
   }
 }
 
@@ -101972,7 +104544,8 @@ static Bitmask codeOneLoopStart(
   WhereInfo *pWInfo,   /* Complete information about the WHERE clause */
   int iLevel,          /* Which level of pWInfo->a[] should be coded */
   u16 wctrlFlags,      /* One of the WHERE_* flags defined in sqliteInt.h */
-  Bitmask notReady     /* Which tables are currently available */
+  Bitmask notReady,    /* Which tables are currently available */
+  Expr *pWhere         /* Complete WHERE clause */
 ){
   int j, k;            /* Loop counters */
   int iCur;            /* The VDBE cursor for the table */
@@ -102454,7 +105027,8 @@ static Bitmask codeOneLoopStart(
     int iLoopBody = sqlite3VdbeMakeLabel(v);  /* Start of loop body */
     int iRetInit;                             /* Address of regReturn init */
     int untestedTerms = 0;             /* Some terms not completely tested */
-    int ii;
+    int ii;                            /* Loop counter */
+    Expr *pAndExpr = 0;                /* An ".. AND (...)" expression */
    
     pTerm = pLevel->plan.u.pTerm;
     assert( pTerm!=0 );
@@ -102504,13 +105078,28 @@ static Bitmask codeOneLoopStart(
     }
     iRetInit = sqlite3VdbeAddOp2(v, OP_Integer, 0, regReturn);
 
+    /* If the original WHERE clause is z of the form:  (x1 OR x2 OR ...) AND y
+    ** Then for every term xN, evaluate as the subexpression: xN AND z
+    ** That way, terms in y that are factored into the disjunction will
+    ** be picked up by the recursive calls to sqlite3WhereBegin() below.
+    */
+    if( pWC->nTerm>1 ){
+      pAndExpr = sqlite3ExprAlloc(pParse->db, TK_AND, 0, 0);
+      pAndExpr->pRight = pWhere;
+    }
+
     for(ii=0; ii<pOrWc->nTerm; ii++){
       WhereTerm *pOrTerm = &pOrWc->a[ii];
       if( pOrTerm->leftCursor==iCur || pOrTerm->eOperator==WO_AND ){
         WhereInfo *pSubWInfo;          /* Info for single OR-term scan */
+        Expr *pOrExpr = pOrTerm->pExpr;
+        if( pAndExpr ){
+          pAndExpr->pLeft = pOrExpr;
+          pOrExpr = pAndExpr;
+        }
         /* Loop through table entries that match term pOrTerm. */
-        pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrTerm->pExpr, 0,
-                        WHERE_OMIT_OPEN | WHERE_OMIT_CLOSE |
+        pSubWInfo = sqlite3WhereBegin(pParse, pOrTab, pOrExpr, 0, 0,
+                        WHERE_OMIT_OPEN_CLOSE | WHERE_AND_ONLY |
                         WHERE_FORCE_TABLE | WHERE_ONETABLE_ONLY);
         if( pSubWInfo ){
           explainOneScan(
@@ -102538,6 +105127,7 @@ static Bitmask codeOneLoopStart(
         }
       }
     }
+    sqlite3DbFree(pParse->db, pAndExpr);
     sqlite3VdbeChangeP1(v, iRetInit, sqlite3VdbeCurrentAddr(v));
     sqlite3VdbeAddOp2(v, OP_Goto, 0, pLevel->addrBrk);
     sqlite3VdbeResolveLabel(v, iLoopBody);
@@ -102750,6 +105340,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
   SrcList *pTabList,    /* A list of all tables to be scanned */
   Expr *pWhere,         /* The WHERE clause */
   ExprList **ppOrderBy, /* An ORDER BY clause, or NULL */
+  ExprList *pDistinct,  /* The select-list for DISTINCT queries - or NULL */
   u16 wctrlFlags        /* One of the WHERE_* flags defined in sqliteInt.h */
 ){
   int i;                     /* Loop counter */
@@ -102810,11 +105401,15 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
   pWInfo->savedNQueryLoop = pParse->nQueryLoop;
   pMaskSet = (WhereMaskSet*)&pWC[1];
 
+  /* Disable the DISTINCT optimization if SQLITE_DistinctOpt is set via
+  ** sqlite3_test_ctrl(SQLITE_TESTCTRL_OPTIMIZATIONS,...) */
+  if( db->flags & SQLITE_DistinctOpt ) pDistinct = 0;
+
   /* Split the WHERE clause into separate subexpressions where each
   ** subexpression is separated by an AND operator.
   */
   initMaskSet(pMaskSet);
-  whereClauseInit(pWC, pParse, pMaskSet);
+  whereClauseInit(pWC, pParse, pMaskSet, wctrlFlags);
   sqlite3ExprCodeConstants(pParse, pWhere);
   whereSplit(pWC, pWhere, TK_AND);   /* IMP: R-15842-53296 */
     
@@ -102877,6 +105472,15 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
     goto whereBeginError;
   }
 
+  /* Check if the DISTINCT qualifier, if there is one, is redundant. 
+  ** If it is, then set pDistinct to NULL and WhereInfo.eDistinct to
+  ** WHERE_DISTINCT_UNIQUE to tell the caller to ignore the DISTINCT.
+  */
+  if( pDistinct && isDistinctRedundant(pParse, pTabList, pWC, pDistinct) ){
+    pDistinct = 0;
+    pWInfo->eDistinct = WHERE_DISTINCT_UNIQUE;
+  }
+
   /* Chose the best index to use for each table in the FROM clause.
   **
   ** This loop fills in the following fields:
@@ -102960,6 +105564,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
         int doNotReorder;    /* True if this table should not be reordered */
         WhereCost sCost;     /* Cost information from best[Virtual]Index() */
         ExprList *pOrderBy;  /* ORDER BY clause for index to optimize */
+        ExprList *pDist;     /* DISTINCT clause for index to optimize */
   
         doNotReorder =  (pTabItem->jointype & (JT_LEFT|JT_CROSS))!=0;
         if( j!=iFrom && doNotReorder ) break;
@@ -102970,6 +105575,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
         }
         mask = (isOptimal ? m : notReady);
         pOrderBy = ((i==0 && ppOrderBy )?*ppOrderBy:0);
+        pDist = (i==0 ? pDistinct : 0);
         if( pTabItem->pIndex==0 ) nUnconstrained++;
   
         WHERETRACE(("=== trying table %d with isOptimal=%d ===\n",
@@ -102984,7 +105590,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
 #endif
         {
           bestBtreeIndex(pParse, pWC, pTabItem, mask, notReady, pOrderBy,
-                         &sCost);
+              pDist, &sCost);
         }
         assert( isOptimal || (sCost.used&notReady)==0 );
 
@@ -103042,9 +105648,14 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
     WHERETRACE(("*** Optimizer selects table %d for loop %d"
                 " with cost=%g and nRow=%g\n",
                 bestJ, pLevel-pWInfo->a, bestPlan.rCost, bestPlan.plan.nRow));
-    if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 ){
+    /* The ALWAYS() that follows was added to hush up clang scan-build */
+    if( (bestPlan.plan.wsFlags & WHERE_ORDERBY)!=0 && ALWAYS(ppOrderBy) ){
       *ppOrderBy = 0;
     }
+    if( (bestPlan.plan.wsFlags & WHERE_DISTINCT)!=0 ){
+      assert( pWInfo->eDistinct==0 );
+      pWInfo->eDistinct = WHERE_DISTINCT_ORDERED;
+    }
     andFlags &= bestPlan.plan.wsFlags;
     pLevel->plan = bestPlan.plan;
     testcase( bestPlan.plan.wsFlags & WHERE_INDEXED );
@@ -103127,7 +105738,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
     }else
 #endif
     if( (pLevel->plan.wsFlags & WHERE_IDX_ONLY)==0
-         && (wctrlFlags & WHERE_OMIT_OPEN)==0 ){
+         && (wctrlFlags & WHERE_OMIT_OPEN_CLOSE)==0 ){
       int op = pWInfo->okOnePass ? OP_OpenWrite : OP_OpenRead;
       sqlite3OpenTable(pParse, pTabItem->iCursor, iDb, pTab, op);
       testcase( pTab->nCol==BMS-1 );
@@ -103172,7 +105783,7 @@ SQLITE_PRIVATE WhereInfo *sqlite3WhereBegin(
   for(i=0; i<nTabList; i++){
     pLevel = &pWInfo->a[i];
     explainOneScan(pParse, pTabList, pLevel, i, pLevel->iFrom, wctrlFlags);
-    notReady = codeOneLoopStart(pWInfo, i, wctrlFlags, notReady);
+    notReady = codeOneLoopStart(pWInfo, i, wctrlFlags, notReady, pWhere);
     pWInfo->iContinue = pLevel->addrCont;
   }
 
@@ -103307,7 +105918,7 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){
     assert( pTab!=0 );
     if( (pTab->tabFlags & TF_Ephemeral)==0
      && pTab->pSelect==0
-     && (pWInfo->wctrlFlags & WHERE_OMIT_CLOSE)==0
+     && (pWInfo->wctrlFlags & WHERE_OMIT_OPEN_CLOSE)==0
     ){
       int ws = pLevel->plan.wsFlags;
       if( !pWInfo->okOnePass && (ws & WHERE_IDX_ONLY)==0 ){
@@ -103379,6 +105990,7 @@ SQLITE_PRIVATE void sqlite3WhereEnd(WhereInfo *pWInfo){
 */
 /* First off, code is included that follows the "include" declaration
 ** in the input grammar file. */
+/* #include <stdio.h> */
 
 
 /*
@@ -104239,6 +106851,7 @@ struct yyParser {
 typedef struct yyParser yyParser;
 
 #ifndef NDEBUG
+/* #include <stdio.h> */
 static FILE *yyTraceFILE = 0;
 static char *yyTracePrompt = 0;
 #endif /* NDEBUG */
@@ -106652,7 +109265,9 @@ SQLITE_PRIVATE void sqlite3Parser(
 ){
   YYMINORTYPE yyminorunion;
   int yyact;            /* The parser action. */
+#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
   int yyendofinput;     /* True if we are at the end of input */
+#endif
 #ifdef YYERRORSYMBOL
   int yyerrorhit = 0;   /* True if yymajor has invoked an error */
 #endif
@@ -106675,7 +109290,9 @@ SQLITE_PRIVATE void sqlite3Parser(
     yypParser->yystack[0].major = 0;
   }
   yyminorunion.yy0 = yyminor;
+#if !defined(YYERRORSYMBOL) && !defined(YYNOERRORRECOVERY)
   yyendofinput = (yymajor==0);
+#endif
   sqlite3ParserARG_STORE;
 
 #ifndef NDEBUG
@@ -106687,7 +109304,6 @@ SQLITE_PRIVATE void sqlite3Parser(
   do{
     yyact = yy_find_shift_action(yypParser,(YYCODETYPE)yymajor);
     if( yyact<YYNSTATE ){
-      assert( !yyendofinput );  /* Impossible to shift the $ token */
       yy_shift(yypParser,yyact,yymajor,&yyminorunion);
       yypParser->yyerrcnt--;
       yymajor = YYNOCODE;
@@ -106814,6 +109430,7 @@ SQLITE_PRIVATE void sqlite3Parser(
 ** individual tokens and sends those tokens one-by-one over to the
 ** parser for analysis.
 */
+/* #include <stdlib.h> */
 
 /*
 ** The charMap() macro maps alphabetic characters into their
@@ -108078,7 +110695,7 @@ SQLITE_API char *sqlite3_temp_directory = 0;
 **       without blocking.
 */
 SQLITE_API int sqlite3_initialize(void){
-  sqlite3_mutex *pMaster;                      /* The main static mutex */
+  MUTEX_LOGIC( sqlite3_mutex *pMaster; )       /* The main static mutex */
   int rc;                                      /* Result code */
 
 #ifdef SQLITE_OMIT_WSD
@@ -108112,7 +110729,7 @@ SQLITE_API int sqlite3_initialize(void){
   ** malloc subsystem - this implies that the allocation of a static
   ** mutex must not require support from the malloc subsystem.
   */
-  pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER);
+  MUTEX_LOGIC( pMaster = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_MASTER); )
   sqlite3_mutex_enter(pMaster);
   sqlite3GlobalConfig.isMutexInit = 1;
   if( !sqlite3GlobalConfig.isMallocInit ){
@@ -108206,6 +110823,16 @@ SQLITE_API int sqlite3_initialize(void){
 #endif
 #endif
 
+  /* Do extra initialization steps requested by the SQLITE_EXTRA_INIT
+  ** compile-time option.
+  */
+#ifdef SQLITE_EXTRA_INIT
+  if( rc==SQLITE_OK && sqlite3GlobalConfig.isInit ){
+    int SQLITE_EXTRA_INIT(void);
+    rc = SQLITE_EXTRA_INIT();
+  }
+#endif
+
   return rc;
 }
 
@@ -109176,13 +111803,13 @@ SQLITE_API int sqlite3_overload_function(
   int nArg
 ){
   int nName = sqlite3Strlen30(zName);
-  int rc;
+  int rc = SQLITE_OK;
   sqlite3_mutex_enter(db->mutex);
   if( sqlite3FindFunction(db, zName, nName, nArg, SQLITE_UTF8, 0)==0 ){
-    sqlite3CreateFunc(db, zName, nArg, SQLITE_UTF8,
-                      0, sqlite3InvalidFunction, 0, 0, 0);
+    rc = sqlite3CreateFunc(db, zName, nArg, SQLITE_UTF8,
+                           0, sqlite3InvalidFunction, 0, 0, 0);
   }
-  rc = sqlite3ApiExit(db, SQLITE_OK);
+  rc = sqlite3ApiExit(db, rc);
   sqlite3_mutex_leave(db->mutex);
   return rc;
 }
@@ -110244,6 +112871,7 @@ opendb_out:
     sqlite3_mutex_leave(db->mutex);
   }
   rc = sqlite3_errcode(db);
+  assert( db!=0 || rc==SQLITE_NOMEM );
   if( rc==SQLITE_NOMEM ){
     sqlite3_close(db);
     db = 0;
@@ -111574,7 +114202,13 @@ SQLITE_PRIVATE void sqlite3ConnectionClosed(sqlite3 *db){
 # define SQLITE_ENABLE_FTS3
 #endif
 
-#ifdef SQLITE_ENABLE_FTS3
+#if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
+
+/* If not building as part of the core, include sqlite3ext.h. */
+#ifndef SQLITE_CORE
+SQLITE_API extern const sqlite3_api_routines *sqlite3_api;
+#endif
+
 /************** Include fts3_tokenizer.h in the middle of fts3Int.h **********/
 /************** Begin file fts3_tokenizer.h **********************************/
 /*
@@ -111967,6 +114601,13 @@ typedef sqlite3_uint64 u64;       /* 8-byte unsigned integer */
 
 #endif /* SQLITE_AMALGAMATION */
 
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE int sqlite3Fts3Corrupt(void);
+# define FTS_CORRUPT_VTAB sqlite3Fts3Corrupt()
+#else
+# define FTS_CORRUPT_VTAB SQLITE_CORRUPT_VTAB
+#endif
+
 typedef struct Fts3Table Fts3Table;
 typedef struct Fts3Cursor Fts3Cursor;
 typedef struct Fts3Expr Fts3Expr;
@@ -111994,6 +114635,7 @@ struct Fts3Table {
   int nColumn;                    /* number of named columns in virtual table */
   char **azColumn;                /* column names.  malloced */
   sqlite3_tokenizer *pTokenizer;  /* tokenizer for inserts and queries */
+  char *zContentTbl;              /* content=xxx option, or NULL */
 
   /* Precompiled statements used by the implementation. Each of these 
   ** statements is run and reset within a single virtual table API call. 
@@ -112034,7 +114676,7 @@ struct Fts3Table {
   int nPendingData;               /* Current bytes of pending data */
   sqlite_int64 iPrevDocid;        /* Docid of most recently inserted document */
 
-#if defined(SQLITE_DEBUG)
+#if defined(SQLITE_DEBUG) || defined(SQLITE_COVERAGE_TEST)
   /* State variables used for validating that the transaction control
   ** methods of the virtual table are called at appropriate times.  These
   ** values do not contribution to the FTS computation; they are used for
@@ -112107,7 +114749,7 @@ struct Fts3Doclist {
   int bFreeList;                 /* True if pList should be sqlite3_free()d */
   char *pList;                   /* Pointer to position list following iDocid */
   int nList;                     /* Length of position list */
-} doclist;
+};
 
 /*
 ** A "phrase" is a sequence of one or more tokens that must match in
@@ -112119,6 +114761,7 @@ struct Fts3PhraseToken {
   char *z;                        /* Text of the token */
   int n;                          /* Number of bytes in buffer z */
   int isPrefix;                   /* True if token ends with a "*" character */
+  int bFirst;                     /* True if token must appear at position 0 */
 
   /* Variables above this point are populated when the expression is
   ** parsed (by code in fts3_expr.c). Below this point the variables are
@@ -112237,6 +114880,7 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(
 #define FTS3_SEGMENT_COLUMN_FILTER 0x00000004
 #define FTS3_SEGMENT_PREFIX        0x00000008
 #define FTS3_SEGMENT_SCAN          0x00000010
+#define FTS3_SEGMENT_FIRST         0x00000020
 
 /* Type passed as 4th argument to SegmentReaderIterate() */
 struct Fts3SegFilter {
@@ -112276,8 +114920,8 @@ SQLITE_PRIVATE int sqlite3Fts3GetVarint32(const char *, int *);
 SQLITE_PRIVATE int sqlite3Fts3VarintLen(sqlite3_uint64);
 SQLITE_PRIVATE void sqlite3Fts3Dequote(char *);
 SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(int,char*,int,char**,sqlite3_int64*,int*,u8*);
-
 SQLITE_PRIVATE int sqlite3Fts3EvalPhraseStats(Fts3Cursor *, Fts3Expr *, u32 *);
+SQLITE_PRIVATE int sqlite3Fts3FirstFilter(sqlite3_int64, char *, int, char *);
 
 /* fts3_tokenizer.c */
 SQLITE_PRIVATE const char *sqlite3Fts3NextToken(const char *, int *);
@@ -112296,7 +114940,7 @@ SQLITE_PRIVATE void sqlite3Fts3Matchinfo(sqlite3_context *, Fts3Cursor *, const
 
 /* fts3_expr.c */
 SQLITE_PRIVATE int sqlite3Fts3ExprParse(sqlite3_tokenizer *, 
-  char **, int, int, const char *, int, Fts3Expr **
+  char **, int, int, int, const char *, int, Fts3Expr **
 );
 SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *);
 #ifdef SQLITE_TEST
@@ -112307,19 +114951,8 @@ SQLITE_PRIVATE int sqlite3Fts3InitTerm(sqlite3 *db);
 /* fts3_aux.c */
 SQLITE_PRIVATE int sqlite3Fts3InitAux(sqlite3 *db);
 
-SQLITE_PRIVATE int sqlite3Fts3TermSegReaderCursor(
-  Fts3Cursor *pCsr,               /* Virtual table cursor handle */
-  const char *zTerm,              /* Term to query for */
-  int nTerm,                      /* Size of zTerm in bytes */
-  int isPrefix,                   /* True for a prefix search */
-  Fts3MultiSegReader **ppSegcsr   /* OUT: Allocated seg-reader cursor */
-);
-
 SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *);
 
-SQLITE_PRIVATE int sqlite3Fts3EvalStart(Fts3Cursor *, Fts3Expr *, int);
-SQLITE_PRIVATE int sqlite3Fts3EvalNext(Fts3Cursor *pCsr);
-
 SQLITE_PRIVATE int sqlite3Fts3MsrIncrStart(
     Fts3Table*, Fts3MultiSegReader*, int, const char*, int);
 SQLITE_PRIVATE int sqlite3Fts3MsrIncrNext(
@@ -112330,7 +114963,7 @@ SQLITE_PRIVATE int sqlite3Fts3MsrIncrRestart(Fts3MultiSegReader *pCsr);
 
 SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, int *);
 
-#endif /* SQLITE_ENABLE_FTS3 */
+#endif /* !SQLITE_CORE || SQLITE_ENABLE_FTS3 */
 #endif /* _FTSINT_H */
 
 /************** End of fts3Int.h *********************************************/
@@ -112341,11 +114974,22 @@ SQLITE_PRIVATE int sqlite3Fts3DeferredTokenList(Fts3DeferredToken *, char **, in
 # define SQLITE_CORE 1
 #endif
 
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <stddef.h> */
+/* #include <stdio.h> */
+/* #include <string.h> */
+/* #include <stdarg.h> */
 
 #ifndef SQLITE_CORE 
   SQLITE_EXTENSION_INIT1
 #endif
 
+static int fts3EvalNext(Fts3Cursor *pCsr);
+static int fts3EvalStart(Fts3Cursor *pCsr);
+static int fts3TermSegReaderCursor(
+    Fts3Cursor *, const char *, int, int, Fts3MultiSegReader **);
+
 /* 
 ** Write a 64-bit variable-length integer to memory starting at p[0].
 ** The length of data written will be between 1 and FTS3_VARINT_MAX bytes.
@@ -112467,7 +115111,7 @@ static void fts3GetReverseVarint(
   sqlite3_int64 *pVal
 ){
   sqlite3_int64 iVal;
-  char *p = *pp;
+  char *p;
 
   /* Pointer p now points at the first byte past the varint we are 
   ** interested in. So, unless the doclist is corrupt, the 0x80 bit is
@@ -112497,6 +115141,7 @@ static int fts3DisconnectMethod(sqlite3_vtab *pVtab){
   sqlite3_free(p->zSegmentsTbl);
   sqlite3_free(p->zReadExprlist);
   sqlite3_free(p->zWriteExprlist);
+  sqlite3_free(p->zContentTbl);
 
   /* Invoke the tokenizer destructor to free the tokenizer. */
   p->pTokenizer->pModule->xDestroy(p->pTokenizer);
@@ -112536,16 +115181,19 @@ static void fts3DbExec(
 ** The xDestroy() virtual table method.
 */
 static int fts3DestroyMethod(sqlite3_vtab *pVtab){
-  int rc = SQLITE_OK;              /* Return code */
   Fts3Table *p = (Fts3Table *)pVtab;
-  sqlite3 *db = p->db;
+  int rc = SQLITE_OK;              /* Return code */
+  const char *zDb = p->zDb;        /* Name of database (e.g. "main", "temp") */
+  sqlite3 *db = p->db;             /* Database handle */
 
   /* Drop the shadow tables */
-  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", p->zDb, p->zName);
-  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", p->zDb,p->zName);
-  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", p->zDb, p->zName);
-  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", p->zDb, p->zName);
-  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", p->zDb, p->zName);
+  if( p->zContentTbl==0 ){
+    fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_content'", zDb, p->zName);
+  }
+  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segments'", zDb,p->zName);
+  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_segdir'", zDb, p->zName);
+  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_docsize'", zDb, p->zName);
+  fts3DbExec(&rc, db, "DROP TABLE IF EXISTS %Q.'%q_stat'", zDb, p->zName);
 
   /* If everything has worked, invoke fts3DisconnectMethod() to free the
   ** memory associated with the Fts3Table structure and return SQLITE_OK.
@@ -112607,23 +115255,27 @@ static void fts3DeclareVtab(int *pRc, Fts3Table *p){
 static int fts3CreateTables(Fts3Table *p){
   int rc = SQLITE_OK;             /* Return code */
   int i;                          /* Iterator variable */
-  char *zContentCols;             /* Columns of %_content table */
   sqlite3 *db = p->db;            /* The database connection */
 
-  /* Create a list of user columns for the content table */
-  zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
-  for(i=0; zContentCols && i<p->nColumn; i++){
-    char *z = p->azColumn[i];
-    zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
+  if( p->zContentTbl==0 ){
+    char *zContentCols;           /* Columns of %_content table */
+
+    /* Create a list of user columns for the content table */
+    zContentCols = sqlite3_mprintf("docid INTEGER PRIMARY KEY");
+    for(i=0; zContentCols && i<p->nColumn; i++){
+      char *z = p->azColumn[i];
+      zContentCols = sqlite3_mprintf("%z, 'c%d%q'", zContentCols, i, z);
+    }
+    if( zContentCols==0 ) rc = SQLITE_NOMEM;
+  
+    /* Create the content table */
+    fts3DbExec(&rc, db, 
+       "CREATE TABLE %Q.'%q_content'(%s)",
+       p->zDb, p->zName, zContentCols
+    );
+    sqlite3_free(zContentCols);
   }
-  if( zContentCols==0 ) rc = SQLITE_NOMEM;
 
-  /* Create the content table */
-  fts3DbExec(&rc, db, 
-     "CREATE TABLE %Q.'%q_content'(%s)",
-     p->zDb, p->zName, zContentCols
-  );
-  sqlite3_free(zContentCols);
   /* Create other tables */
   fts3DbExec(&rc, db, 
       "CREATE TABLE %Q.'%q_segments'(blockid INTEGER PRIMARY KEY, block BLOB);",
@@ -112774,8 +115426,8 @@ static char *fts3QuoteId(char const *zInput){
 }
 
 /*
-** Return a list of comma separated SQL expressions that could be used
-** in a SELECT statement such as the following:
+** Return a list of comma separated SQL expressions and a FROM clause that 
+** could be used in a SELECT statement such as the following:
 **
 **     SELECT <list of expressions> FROM %_content AS x ...
 **
@@ -112786,7 +115438,7 @@ static char *fts3QuoteId(char const *zInput){
 ** table has the three user-defined columns "a", "b", and "c", the following
 ** string is returned:
 **
-**     "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c')"
+**     "docid, unzip(x.'a'), unzip(x.'b'), unzip(x.'c') FROM %_content AS x"
 **
 ** The pointer returned points to a buffer allocated by sqlite3_malloc(). It
 ** is the responsibility of the caller to eventually free it.
@@ -112802,16 +115454,28 @@ static char *fts3ReadExprList(Fts3Table *p, const char *zFunc, int *pRc){
   char *zFunction;
   int i;
 
-  if( !zFunc ){
-    zFunction = "";
+  if( p->zContentTbl==0 ){
+    if( !zFunc ){
+      zFunction = "";
+    }else{
+      zFree = zFunction = fts3QuoteId(zFunc);
+    }
+    fts3Appendf(pRc, &zRet, "docid");
+    for(i=0; i<p->nColumn; i++){
+      fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
+    }
+    sqlite3_free(zFree);
   }else{
-    zFree = zFunction = fts3QuoteId(zFunc);
-  }
-  fts3Appendf(pRc, &zRet, "docid");
-  for(i=0; i<p->nColumn; i++){
-    fts3Appendf(pRc, &zRet, ",%s(x.'c%d%q')", zFunction, i, p->azColumn[i]);
+    fts3Appendf(pRc, &zRet, "rowid");
+    for(i=0; i<p->nColumn; i++){
+      fts3Appendf(pRc, &zRet, ", x.'%q'", p->azColumn[i]);
+    }
   }
-  sqlite3_free(zFree);
+  fts3Appendf(pRc, &zRet, "FROM '%q'.'%q%s' AS x", 
+      p->zDb,
+      (p->zContentTbl ? p->zContentTbl : p->zName),
+      (p->zContentTbl ? "" : "_content")
+  );
   return zRet;
 }
 
@@ -112854,9 +115518,23 @@ static char *fts3WriteExprList(Fts3Table *p, const char *zFunc, int *pRc){
   return zRet;
 }
 
+/*
+** This function interprets the string at (*pp) as a non-negative integer
+** value. It reads the integer and sets *pnOut to the value read, then 
+** sets *pp to point to the byte immediately following the last byte of
+** the integer value.
+**
+** Only decimal digits ('0'..'9') may be part of an integer value. 
+**
+** If *pp does not being with a decimal digit SQLITE_ERROR is returned and
+** the output value undefined. Otherwise SQLITE_OK is returned.
+**
+** This function is used when parsing the "prefix=" FTS4 parameter.
+*/
 static int fts3GobbleInt(const char **pp, int *pnOut){
-  const char *p = *pp;
-  int nInt = 0;
+  const char *p;                  /* Iterator pointer */
+  int nInt = 0;                   /* Output value */
+
   for(p=*pp; p[0]>='0' && p[0]<='9'; p++){
     nInt = nInt * 10 + (p[0] - '0');
   }
@@ -112866,15 +115544,30 @@ static int fts3GobbleInt(const char **pp, int *pnOut){
   return SQLITE_OK;
 }
 
-
+/*
+** This function is called to allocate an array of Fts3Index structures
+** representing the indexes maintained by the current FTS table. FTS tables
+** always maintain the main "terms" index, but may also maintain one or
+** more "prefix" indexes, depending on the value of the "prefix=" parameter
+** (if any) specified as part of the CREATE VIRTUAL TABLE statement.
+**
+** Argument zParam is passed the value of the "prefix=" option if one was
+** specified, or NULL otherwise.
+**
+** If no error occurs, SQLITE_OK is returned and *apIndex set to point to
+** the allocated array. *pnIndex is set to the number of elements in the
+** array. If an error does occur, an SQLite error code is returned.
+**
+** Regardless of whether or not an error is returned, it is the responsibility
+** of the caller to call sqlite3_free() on the output array to free it.
+*/
 static int fts3PrefixParameter(
   const char *zParam,             /* ABC in prefix=ABC parameter to parse */
   int *pnIndex,                   /* OUT: size of *apIndex[] array */
-  struct Fts3Index **apIndex,     /* OUT: Array of indexes for this table */
-  struct Fts3Index **apFree       /* OUT: Free this with sqlite3_free() */
+  struct Fts3Index **apIndex      /* OUT: Array of indexes for this table */
 ){
-  struct Fts3Index *aIndex;
-  int nIndex = 1;
+  struct Fts3Index *aIndex;       /* Allocated array */
+  int nIndex = 1;                 /* Number of entries in array */
 
   if( zParam && zParam[0] ){
     const char *p;
@@ -112885,7 +115578,7 @@ static int fts3PrefixParameter(
   }
 
   aIndex = sqlite3_malloc(sizeof(struct Fts3Index) * nIndex);
-  *apIndex = *apFree = aIndex;
+  *apIndex = aIndex;
   *pnIndex = nIndex;
   if( !aIndex ){
     return SQLITE_NOMEM;
@@ -112907,6 +115600,91 @@ static int fts3PrefixParameter(
 }
 
 /*
+** This function is called when initializing an FTS4 table that uses the
+** content=xxx option. It determines the number of and names of the columns
+** of the new FTS4 table.
+**
+** The third argument passed to this function is the value passed to the
+** config=xxx option (i.e. "xxx"). This function queries the database for
+** a table of that name. If found, the output variables are populated
+** as follows:
+**
+**   *pnCol:   Set to the number of columns table xxx has,
+**
+**   *pnStr:   Set to the total amount of space required to store a copy
+**             of each columns name, including the nul-terminator.
+**
+**   *pazCol:  Set to point to an array of *pnCol strings. Each string is
+**             the name of the corresponding column in table xxx. The array
+**             and its contents are allocated using a single allocation. It
+**             is the responsibility of the caller to free this allocation
+**             by eventually passing the *pazCol value to sqlite3_free().
+**
+** If the table cannot be found, an error code is returned and the output
+** variables are undefined. Or, if an OOM is encountered, SQLITE_NOMEM is
+** returned (and the output variables are undefined).
+*/
+static int fts3ContentColumns(
+  sqlite3 *db,                    /* Database handle */
+  const char *zDb,                /* Name of db (i.e. "main", "temp" etc.) */
+  const char *zTbl,               /* Name of content table */
+  const char ***pazCol,           /* OUT: Malloc'd array of column names */
+  int *pnCol,                     /* OUT: Size of array *pazCol */
+  int *pnStr                      /* OUT: Bytes of string content */
+){
+  int rc = SQLITE_OK;             /* Return code */
+  char *zSql;                     /* "SELECT *" statement on zTbl */  
+  sqlite3_stmt *pStmt = 0;        /* Compiled version of zSql */
+
+  zSql = sqlite3_mprintf("SELECT * FROM %Q.%Q", zDb, zTbl);
+  if( !zSql ){
+    rc = SQLITE_NOMEM;
+  }else{
+    rc = sqlite3_prepare(db, zSql, -1, &pStmt, 0);
+  }
+  sqlite3_free(zSql);
+
+  if( rc==SQLITE_OK ){
+    const char **azCol;           /* Output array */
+    int nStr = 0;                 /* Size of all column names (incl. 0x00) */
+    int nCol;                     /* Number of table columns */
+    int i;                        /* Used to iterate through columns */
+
+    /* Loop through the returned columns. Set nStr to the number of bytes of
+    ** space required to store a copy of each column name, including the
+    ** nul-terminator byte.  */
+    nCol = sqlite3_column_count(pStmt);
+    for(i=0; i<nCol; i++){
+      const char *zCol = sqlite3_column_name(pStmt, i);
+      nStr += strlen(zCol) + 1;
+    }
+
+    /* Allocate and populate the array to return. */
+    azCol = (const char **)sqlite3_malloc(sizeof(char *) * nCol + nStr);
+    if( azCol==0 ){
+      rc = SQLITE_NOMEM;
+    }else{
+      char *p = (char *)&azCol[nCol];
+      for(i=0; i<nCol; i++){
+        const char *zCol = sqlite3_column_name(pStmt, i);
+        int n = strlen(zCol)+1;
+        memcpy(p, zCol, n);
+        azCol[i] = p;
+        p += n;
+      }
+    }
+    sqlite3_finalize(pStmt);
+
+    /* Set the output variables. */
+    *pnCol = nCol;
+    *pnStr = nStr;
+    *pazCol = azCol;
+  }
+
+  return rc;
+}
+
+/*
 ** This function is the implementation of both the xConnect and xCreate
 ** methods of the FTS3 virtual table.
 **
@@ -112942,8 +115720,7 @@ static int fts3InitVtab(
   sqlite3_tokenizer *pTokenizer = 0;        /* Tokenizer for this table */
 
   int nIndex;                     /* Size of aIndex[] array */
-  struct Fts3Index *aIndex;       /* Array of indexes for this table */
-  struct Fts3Index *aFree = 0;    /* Free this before returning */
+  struct Fts3Index *aIndex = 0;   /* Array of indexes for this table */
 
   /* The results of parsing supported FTS4 key=value options: */
   int bNoDocsize = 0;             /* True to omit %_docsize table */
@@ -112951,6 +115728,7 @@ static int fts3InitVtab(
   char *zPrefix = 0;              /* Prefix parameter value (or NULL) */
   char *zCompress = 0;            /* compress=? parameter (or NULL) */
   char *zUncompress = 0;          /* uncompress=? parameter (or NULL) */
+  char *zContent = 0;             /* content=? parameter (or NULL) */
 
   assert( strlen(argv[0])==4 );
   assert( (sqlite3_strnicmp(argv[0], "fts4", 4)==0 && isFts4)
@@ -112994,13 +115772,13 @@ static int fts3InitVtab(
       struct Fts4Option {
         const char *zOpt;
         int nOpt;
-        char **pzVar;
       } aFts4Opt[] = {
-        { "matchinfo",   9, 0 },            /* 0 -> MATCHINFO */
-        { "prefix",      6, 0 },            /* 1 -> PREFIX */
-        { "compress",    8, 0 },            /* 2 -> COMPRESS */
-        { "uncompress", 10, 0 },            /* 3 -> UNCOMPRESS */
-        { "order",       5, 0 }             /* 4 -> ORDER */
+        { "matchinfo",   9 },     /* 0 -> MATCHINFO */
+        { "prefix",      6 },     /* 1 -> PREFIX */
+        { "compress",    8 },     /* 2 -> COMPRESS */
+        { "uncompress", 10 },     /* 3 -> UNCOMPRESS */
+        { "order",       5 },     /* 4 -> ORDER */
+        { "content",     7 }      /* 5 -> CONTENT */
       };
 
       int iOpt;
@@ -113046,13 +115824,20 @@ static int fts3InitVtab(
 
             case 4:               /* ORDER */
               if( (strlen(zVal)!=3 || sqlite3_strnicmp(zVal, "asc", 3)) 
-               && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 3)) 
+               && (strlen(zVal)!=4 || sqlite3_strnicmp(zVal, "desc", 4)) 
               ){
                 *pzErr = sqlite3_mprintf("unrecognized order: %s", zVal);
                 rc = SQLITE_ERROR;
               }
               bDescIdx = (zVal[0]=='d' || zVal[0]=='D');
               break;
+
+            default:              /* CONTENT */
+              assert( iOpt==5 );
+              sqlite3_free(zUncompress);
+              zContent = zVal;
+              zVal = 0;
+              break;
           }
         }
         sqlite3_free(zVal);
@@ -113065,6 +115850,26 @@ static int fts3InitVtab(
       aCol[nCol++] = z;
     }
   }
+
+  /* If a content=xxx option was specified, the following:
+  **
+  **   1. Ignore any compress= and uncompress= options.
+  **
+  **   2. If no column names were specified as part of the CREATE VIRTUAL
+  **      TABLE statement, use all columns from the content table.
+  */
+  if( rc==SQLITE_OK && zContent ){
+    sqlite3_free(zCompress); 
+    sqlite3_free(zUncompress); 
+    zCompress = 0;
+    zUncompress = 0;
+    if( nCol==0 ){
+      sqlite3_free((void*)aCol); 
+      aCol = 0;
+      rc = fts3ContentColumns(db, argv[1], zContent, &aCol, &nCol, &nString);
+    }
+    assert( rc!=SQLITE_OK || nCol>0 );
+  }
   if( rc!=SQLITE_OK ) goto fts3_init_out;
 
   if( nCol==0 ){
@@ -113080,7 +115885,7 @@ static int fts3InitVtab(
   }
   assert( pTokenizer );
 
-  rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex, &aFree);
+  rc = fts3PrefixParameter(zPrefix, &nIndex, &aIndex);
   if( rc==SQLITE_ERROR ){
     assert( zPrefix );
     *pzErr = sqlite3_mprintf("error parsing prefix parameter: %s", zPrefix);
@@ -113109,6 +115914,8 @@ static int fts3InitVtab(
   p->bHasDocsize = (isFts4 && bNoDocsize==0);
   p->bHasStat = isFts4;
   p->bDescIdx = bDescIdx;
+  p->zContentTbl = zContent;
+  zContent = 0;
   TESTONLY( p->inTransaction = -1 );
   TESTONLY( p->mxSavepoint = -1 );
 
@@ -113167,9 +115974,10 @@ static int fts3InitVtab(
 
 fts3_init_out:
   sqlite3_free(zPrefix);
-  sqlite3_free(aFree);
+  sqlite3_free(aIndex);
   sqlite3_free(zCompress);
   sqlite3_free(zUncompress);
+  sqlite3_free(zContent);
   sqlite3_free((void *)aCol);
   if( rc!=SQLITE_OK ){
     if( p ){
@@ -113322,34 +116130,63 @@ static int fts3CloseMethod(sqlite3_vtab_cursor *pCursor){
 }
 
 /*
+** If pCsr->pStmt has not been prepared (i.e. if pCsr->pStmt==0), then
+** compose and prepare an SQL statement of the form:
+**
+**    "SELECT <columns> FROM %_content WHERE rowid = ?"
+**
+** (or the equivalent for a content=xxx table) and set pCsr->pStmt to
+** it. If an error occurs, return an SQLite error code.
+**
+** Otherwise, set *ppStmt to point to pCsr->pStmt and return SQLITE_OK.
+*/
+static int fts3CursorSeekStmt(Fts3Cursor *pCsr, sqlite3_stmt **ppStmt){
+  int rc = SQLITE_OK;
+  if( pCsr->pStmt==0 ){
+    Fts3Table *p = (Fts3Table *)pCsr->base.pVtab;
+    char *zSql;
+    zSql = sqlite3_mprintf("SELECT %s WHERE rowid = ?", p->zReadExprlist);
+    if( !zSql ) return SQLITE_NOMEM;
+    rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
+    sqlite3_free(zSql);
+  }
+  *ppStmt = pCsr->pStmt;
+  return rc;
+}
+
+/*
 ** Position the pCsr->pStmt statement so that it is on the row
 ** of the %_content table that contains the last match.  Return
 ** SQLITE_OK on success.  
 */
 static int fts3CursorSeek(sqlite3_context *pContext, Fts3Cursor *pCsr){
+  int rc = SQLITE_OK;
   if( pCsr->isRequireSeek ){
-    sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
-    pCsr->isRequireSeek = 0;
-    if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
-      return SQLITE_OK;
-    }else{
-      int rc = sqlite3_reset(pCsr->pStmt);
-      if( rc==SQLITE_OK ){
-        /* If no row was found and no error has occured, then the %_content
-        ** table is missing a row that is present in the full-text index.
-        ** The data structures are corrupt.
-        */
-        rc = SQLITE_CORRUPT_VTAB;
-      }
-      pCsr->isEof = 1;
-      if( pContext ){
-        sqlite3_result_error_code(pContext, rc);
+    sqlite3_stmt *pStmt = 0;
+
+    rc = fts3CursorSeekStmt(pCsr, &pStmt);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_int64(pCsr->pStmt, 1, pCsr->iPrevId);
+      pCsr->isRequireSeek = 0;
+      if( SQLITE_ROW==sqlite3_step(pCsr->pStmt) ){
+        return SQLITE_OK;
+      }else{
+        rc = sqlite3_reset(pCsr->pStmt);
+        if( rc==SQLITE_OK && ((Fts3Table *)pCsr->base.pVtab)->zContentTbl==0 ){
+          /* If no row was found and no error has occured, then the %_content
+          ** table is missing a row that is present in the full-text index.
+          ** The data structures are corrupt.  */
+          rc = FTS_CORRUPT_VTAB;
+          pCsr->isEof = 1;
+        }
       }
-      return rc;
     }
-  }else{
-    return SQLITE_OK;
   }
+
+  if( rc!=SQLITE_OK && pContext ){
+    sqlite3_result_error_code(pContext, rc);
+  }
+  return rc;
 }
 
 /*
@@ -113399,7 +116236,7 @@ static int fts3ScanInteriorNode(
   zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
   zCsr += sqlite3Fts3GetVarint(zCsr, &iChild);
   if( zCsr>zEnd ){
-    return SQLITE_CORRUPT_VTAB;
+    return FTS_CORRUPT_VTAB;
   }
   
   while( zCsr<zEnd && (piFirst || piLast) ){
@@ -113417,7 +116254,7 @@ static int fts3ScanInteriorNode(
     zCsr += sqlite3Fts3GetVarint32(zCsr, &nSuffix);
     
     if( nPrefix<0 || nSuffix<0 || &zCsr[nSuffix]>zEnd ){
-      rc = SQLITE_CORRUPT_VTAB;
+      rc = FTS_CORRUPT_VTAB;
       goto finish_scan;
     }
     if( nPrefix+nSuffix>nAlloc ){
@@ -113430,6 +116267,7 @@ static int fts3ScanInteriorNode(
       }
       zBuffer = zNew;
     }
+    assert( zBuffer );
     memcpy(&zBuffer[nPrefix], zCsr, nSuffix);
     nBuffer = nPrefix + nSuffix;
     zCsr += nSuffix;
@@ -113758,8 +116596,6 @@ static void fts3PoslistMerge(
 }
 
 /*
-** nToken==1 searches for adjacent positions.
-**
 ** This function is used to merge two position lists into one. When it is
 ** called, *pp1 and *pp2 must both point to position lists. A position-list is
 ** the part of a doclist that follows each document id. For example, if a row
@@ -113779,6 +116615,8 @@ static void fts3PoslistMerge(
 ** *pp1 so that (pos(*pp2)>pos(*pp1) && pos(*pp2)-pos(*pp1)<=nToken). i.e.
 ** when the *pp1 token appears before the *pp2 token, but not more than nToken
 ** slots before it.
+**
+** e.g. nToken==1 searches for adjacent positions.
 */
 static int fts3PoslistPhraseMerge(
   char **pp,                      /* IN/OUT: Preallocated output buffer */
@@ -113788,7 +116626,7 @@ static int fts3PoslistPhraseMerge(
   char **pp1,                     /* IN/OUT: Left input list */
   char **pp2                      /* IN/OUT: Right input list */
 ){
-  char *p = (pp ? *pp : 0);
+  char *p = *pp;
   char *p1 = *pp1;
   char *p2 = *pp2;
   int iCol1 = 0;
@@ -113797,7 +116635,7 @@ static int fts3PoslistPhraseMerge(
   /* Never set both isSaveLeft and isExact for the same invocation. */
   assert( isSaveLeft==0 || isExact==0 );
 
-  assert( *p1!=0 && *p2!=0 );
+  assert( p!=0 && *p1!=0 && *p2!=0 );
   if( *p1==POS_COLUMN ){ 
     p1++;
     p1 += sqlite3Fts3GetVarint32(p1, &iCol1);
@@ -113814,7 +116652,7 @@ static int fts3PoslistPhraseMerge(
       sqlite3_int64 iPos1 = 0;
       sqlite3_int64 iPos2 = 0;
 
-      if( pp && iCol1 ){
+      if( iCol1 ){
         *p++ = POS_COLUMN;
         p += sqlite3Fts3PutVarint(p, iCol1);
       }
@@ -113829,16 +116667,10 @@ static int fts3PoslistPhraseMerge(
          || (isExact==0 && iPos2>iPos1 && iPos2<=iPos1+nToken) 
         ){
           sqlite3_int64 iSave;
-          if( !pp ){
-            fts3PoslistCopy(0, &p2);
-            fts3PoslistCopy(0, &p1);
-            *pp1 = p1;
-            *pp2 = p2;
-            return 1;
-          }
           iSave = isSaveLeft ? iPos1 : iPos2;
           fts3PutDeltaVarint(&p, &iPrev, iSave+2); iPrev -= 2;
           pSave = 0;
+          assert( p );
         }
         if( (!isSaveLeft && iPos2<=(iPos1+nToken)) || iPos2<=iPos1 ){
           if( (*p2&0xFE)==0 ) break;
@@ -113887,7 +116719,7 @@ static int fts3PoslistPhraseMerge(
   fts3PoslistCopy(0, &p1);
   *pp1 = p1;
   *pp2 = p2;
-  if( !pp || *pp==p ){
+  if( *pp==p ){
     return 0;
   }
   *p++ = 0x00;
@@ -113945,22 +116777,34 @@ static int fts3PoslistNearMerge(
 }
 
 /* 
-** A pointer to an instance of this structure is used as the context 
-** argument to sqlite3Fts3SegReaderIterate()
+** An instance of this function is used to merge together the (potentially
+** large number of) doclists for each term that matches a prefix query.
+** See function fts3TermSelectMerge() for details.
 */
 typedef struct TermSelect TermSelect;
 struct TermSelect {
-  int isReqPos;
-  char *aaOutput[16];             /* Malloc'd output buffer */
-  int anOutput[16];               /* Size of output in bytes */
+  char *aaOutput[16];             /* Malloc'd output buffers */
+  int anOutput[16];               /* Size each output buffer in bytes */
 };
 
-
+/*
+** This function is used to read a single varint from a buffer. Parameter
+** pEnd points 1 byte past the end of the buffer. When this function is
+** called, if *pp points to pEnd or greater, then the end of the buffer
+** has been reached. In this case *pp is set to 0 and the function returns.
+**
+** If *pp does not point to or past pEnd, then a single varint is read
+** from *pp. *pp is then set to point 1 byte past the end of the read varint.
+**
+** If bDescIdx is false, the value read is added to *pVal before returning.
+** If it is true, the value read is subtracted from *pVal before this 
+** function returns.
+*/
 static void fts3GetDeltaVarint3(
-  char **pp, 
-  char *pEnd, 
-  int bDescIdx,
-  sqlite3_int64 *pVal
+  char **pp,                      /* IN/OUT: Point to read varint from */
+  char *pEnd,                     /* End of buffer */
+  int bDescIdx,                   /* True if docids are descending */
+  sqlite3_int64 *pVal             /* IN/OUT: Integer value */
 ){
   if( *pp>=pEnd ){
     *pp = 0;
@@ -113975,6 +116819,21 @@ static void fts3GetDeltaVarint3(
   }
 }
 
+/*
+** This function is used to write a single varint to a buffer. The varint
+** is written to *pp. Before returning, *pp is set to point 1 byte past the
+** end of the value written.
+**
+** If *pbFirst is zero when this function is called, the value written to
+** the buffer is that of parameter iVal. 
+**
+** If *pbFirst is non-zero when this function is called, then the value 
+** written is either (iVal-*piPrev) (if bDescIdx is zero) or (*piPrev-iVal)
+** (if bDescIdx is non-zero).
+**
+** Before returning, this function always sets *pbFirst to 1 and *piPrev
+** to the value of parameter iVal.
+*/
 static void fts3PutDeltaVarint3(
   char **pp,                      /* IN/OUT: Output pointer */
   int bDescIdx,                   /* True for descending docids */
@@ -113995,10 +116854,34 @@ static void fts3PutDeltaVarint3(
   *pbFirst = 1;
 }
 
-#define COMPARE_DOCID(i1, i2) ((bDescIdx?-1:1) * (i1-i2))
 
+/*
+** This macro is used by various functions that merge doclists. The two
+** arguments are 64-bit docid values. If the value of the stack variable
+** bDescDoclist is 0 when this macro is invoked, then it returns (i1-i2). 
+** Otherwise, (i2-i1).
+**
+** Using this makes it easier to write code that can merge doclists that are
+** sorted in either ascending or descending order.
+*/
+#define DOCID_CMP(i1, i2) ((bDescDoclist?-1:1) * (i1-i2))
+
+/*
+** This function does an "OR" merge of two doclists (output contains all
+** positions contained in either argument doclist). If the docids in the 
+** input doclists are sorted in ascending order, parameter bDescDoclist
+** should be false. If they are sorted in ascending order, it should be
+** passed a non-zero value.
+**
+** If no error occurs, *paOut is set to point at an sqlite3_malloc'd buffer
+** containing the output doclist and SQLITE_OK is returned. In this case
+** *pnOut is set to the number of bytes in the output doclist.
+**
+** If an error occurs, an SQLite error code is returned. The output values
+** are undefined in this case.
+*/
 static int fts3DoclistOrMerge(
-  int bDescIdx,                   /* True if arguments are desc */
+  int bDescDoclist,               /* True if arguments are desc */
   char *a1, int n1,               /* First doclist */
   char *a2, int n2,               /* Second doclist */
   char **paOut, int *pnOut        /* OUT: Malloc'd doclist */
@@ -114016,38 +116899,81 @@ static int fts3DoclistOrMerge(
 
   *paOut = 0;
   *pnOut = 0;
-  aOut = sqlite3_malloc(n1+n2);
+
+  /* Allocate space for the output. Both the input and output doclists
+  ** are delta encoded. If they are in ascending order (bDescDoclist==0),
+  ** then the first docid in each list is simply encoded as a varint. For
+  ** each subsequent docid, the varint stored is the difference between the
+  ** current and previous docid (a positive number - since the list is in
+  ** ascending order).
+  **
+  ** The first docid written to the output is therefore encoded using the 
+  ** same number of bytes as it is in whichever of the input lists it is
+  ** read from. And each subsequent docid read from the same input list 
+  ** consumes either the same or less bytes as it did in the input (since
+  ** the difference between it and the previous value in the output must
+  ** be a positive value less than or equal to the delta value read from 
+  ** the input list). The same argument applies to all but the first docid
+  ** read from the 'other' list. And to the contents of all position lists
+  ** that will be copied and merged from the input to the output.
+  **
+  ** However, if the first docid copied to the output is a negative number,
+  ** then the encoding of the first docid from the 'other' input list may
+  ** be larger in the output than it was in the input (since the delta value
+  ** may be a larger positive integer than the actual docid).
+  **
+  ** The space required to store the output is therefore the sum of the
+  ** sizes of the two inputs, plus enough space for exactly one of the input
+  ** docids to grow. 
+  **
+  ** A symetric argument may be made if the doclists are in descending 
+  ** order.
+  */
+  aOut = sqlite3_malloc(n1+n2+FTS3_VARINT_MAX-1);
   if( !aOut ) return SQLITE_NOMEM;
 
   p = aOut;
   fts3GetDeltaVarint3(&p1, pEnd1, 0, &i1);
   fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
   while( p1 || p2 ){
-    sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
+    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
 
     if( p2 && p1 && iDiff==0 ){
-      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
+      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
       fts3PoslistMerge(&p, &p1, &p2);
-      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
-      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
+      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
+      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
     }else if( !p2 || (p1 && iDiff<0) ){
-      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
+      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
       fts3PoslistCopy(&p, &p1);
-      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
+      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
     }else{
-      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i2);
+      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i2);
       fts3PoslistCopy(&p, &p2);
-      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
+      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
     }
   }
 
   *paOut = aOut;
   *pnOut = (p-aOut);
+  assert( *pnOut<=n1+n2+FTS3_VARINT_MAX-1 );
   return SQLITE_OK;
 }
 
+/*
+** This function does a "phrase" merge of two doclists. In a phrase merge,
+** the output contains a copy of each position from the right-hand input
+** doclist for which there is a position in the left-hand input doclist
+** exactly nDist tokens before it.
+**
+** If the docids in the input doclists are sorted in ascending order,
+** parameter bDescDoclist should be false. If they are sorted in ascending 
+** order, it should be passed a non-zero value.
+**
+** The right-hand input doclist is overwritten by this function.
+*/
 static void fts3DoclistPhraseMerge(
-  int bDescIdx,                   /* True if arguments are desc */
+  int bDescDoclist,               /* True if arguments are desc */
   int nDist,                      /* Distance from left to right (1=adjacent) */
   char *aLeft, int nLeft,         /* Left doclist */
   char *aRight, int *pnRight      /* IN/OUT: Right/output doclist */
@@ -114070,32 +116996,82 @@ static void fts3DoclistPhraseMerge(
   fts3GetDeltaVarint3(&p2, pEnd2, 0, &i2);
 
   while( p1 && p2 ){
-    sqlite3_int64 iDiff = COMPARE_DOCID(i1, i2);
+    sqlite3_int64 iDiff = DOCID_CMP(i1, i2);
     if( iDiff==0 ){
       char *pSave = p;
       sqlite3_int64 iPrevSave = iPrev;
       int bFirstOutSave = bFirstOut;
 
-      fts3PutDeltaVarint3(&p, bDescIdx, &iPrev, &bFirstOut, i1);
+      fts3PutDeltaVarint3(&p, bDescDoclist, &iPrev, &bFirstOut, i1);
       if( 0==fts3PoslistPhraseMerge(&p, nDist, 0, 1, &p1, &p2) ){
         p = pSave;
         iPrev = iPrevSave;
         bFirstOut = bFirstOutSave;
       }
-      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
-      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
+      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
+      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
     }else if( iDiff<0 ){
       fts3PoslistCopy(0, &p1);
-      fts3GetDeltaVarint3(&p1, pEnd1, bDescIdx, &i1);
+      fts3GetDeltaVarint3(&p1, pEnd1, bDescDoclist, &i1);
     }else{
       fts3PoslistCopy(0, &p2);
-      fts3GetDeltaVarint3(&p2, pEnd2, bDescIdx, &i2);
+      fts3GetDeltaVarint3(&p2, pEnd2, bDescDoclist, &i2);
     }
   }
 
   *pnRight = p - aOut;
 }
 
+/*
+** Argument pList points to a position list nList bytes in size. This
+** function checks to see if the position list contains any entries for
+** a token in position 0 (of any column). If so, it writes argument iDelta
+** to the output buffer pOut, followed by a position list consisting only
+** of the entries from pList at position 0, and terminated by an 0x00 byte.
+** The value returned is the number of bytes written to pOut (if any).
+*/
+SQLITE_PRIVATE int sqlite3Fts3FirstFilter(
+  sqlite3_int64 iDelta,           /* Varint that may be written to pOut */
+  char *pList,                    /* Position list (no 0x00 term) */
+  int nList,                      /* Size of pList in bytes */
+  char *pOut                      /* Write output here */
+){
+  int nOut = 0;
+  int bWritten = 0;               /* True once iDelta has been written */
+  char *p = pList;
+  char *pEnd = &pList[nList];
+
+  if( *p!=0x01 ){
+    if( *p==0x02 ){
+      nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
+      pOut[nOut++] = 0x02;
+      bWritten = 1;
+    }
+    fts3ColumnlistCopy(0, &p);
+  }
+
+  while( p<pEnd && *p==0x01 ){
+    sqlite3_int64 iCol;
+    p++;
+    p += sqlite3Fts3GetVarint(p, &iCol);
+    if( *p==0x02 ){
+      if( bWritten==0 ){
+        nOut += sqlite3Fts3PutVarint(&pOut[nOut], iDelta);
+        bWritten = 1;
+      }
+      pOut[nOut++] = 0x01;
+      nOut += sqlite3Fts3PutVarint(&pOut[nOut], iCol);
+      pOut[nOut++] = 0x02;
+    }
+    fts3ColumnlistCopy(0, &p);
+  }
+  if( bWritten ){
+    pOut[nOut++] = 0x00;
+  }
+
+  return nOut;
+}
+
 
 /*
 ** Merge all doclists in the TermSelect.aaOutput[] array into a single
@@ -114106,7 +117082,7 @@ static void fts3DoclistPhraseMerge(
 ** the responsibility of the caller to free any doclists left in the
 ** TermSelect.aaOutput[] array.
 */
-static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){
+static int fts3TermSelectFinishMerge(Fts3Table *p, TermSelect *pTS){
   char *aOut = 0;
   int nOut = 0;
   int i;
@@ -114147,24 +117123,25 @@ static int fts3TermSelectMerge(Fts3Table *p, TermSelect *pTS){
 }
 
 /*
-** This function is used as the sqlite3Fts3SegReaderIterate() callback when
-** querying the full-text index for a doclist associated with a term or
-** term-prefix.
+** Merge the doclist aDoclist/nDoclist into the TermSelect object passed
+** as the first argument. The merge is an "OR" merge (see function
+** fts3DoclistOrMerge() for details).
+**
+** This function is called with the doclist for each term that matches
+** a queried prefix. It merges all these doclists into one, the doclist
+** for the specified prefix. Since there can be a very large number of
+** doclists to merge, the merging is done pair-wise using the TermSelect
+** object.
+**
+** This function returns SQLITE_OK if the merge is successful, or an
+** SQLite error code (SQLITE_NOMEM) if an error occurs.
 */
-static int fts3TermSelectCb(
-  Fts3Table *p,                   /* Virtual table object */
-  void *pContext,                 /* Pointer to TermSelect structure */
-  char *zTerm,
-  int nTerm,
-  char *aDoclist,
-  int nDoclist
+static int fts3TermSelectMerge(
+  Fts3Table *p,                   /* FTS table handle */
+  TermSelect *pTS,                /* TermSelect object to merge into */
+  char *aDoclist,                 /* Pointer to doclist */
+  int nDoclist                    /* Size of aDoclist in bytes */
 ){
-  TermSelect *pTS = (TermSelect *)pContext;
-
-  UNUSED_PARAMETER(p);
-  UNUSED_PARAMETER(zTerm);
-  UNUSED_PARAMETER(nTerm);
-
   if( pTS->aaOutput[0]==0 ){
     /* If this is the first term selected, copy the doclist to the output
     ** buffer using memcpy(). */
@@ -114235,6 +117212,13 @@ static int fts3SegReaderCursorAppend(
   return SQLITE_OK;
 }
 
+/*
+** Add seg-reader objects to the Fts3MultiSegReader object passed as the
+** 8th argument.
+**
+** This function returns SQLITE_OK if successful, or an SQLite error code
+** otherwise.
+*/
 static int fts3SegReaderCursor(
   Fts3Table *p,                   /* FTS3 table handle */
   int iIndex,                     /* Index to search (from 0 to p->nIndex-1) */
@@ -114243,11 +117227,11 @@ static int fts3SegReaderCursor(
   int nTerm,                      /* Size of zTerm in bytes */
   int isPrefix,                   /* True for a prefix search */
   int isScan,                     /* True to scan from zTerm to EOF */
-  Fts3MultiSegReader *pCsr       /* Cursor object to populate */
+  Fts3MultiSegReader *pCsr        /* Cursor object to populate */
 ){
-  int rc = SQLITE_OK;
-  int rc2;
-  sqlite3_stmt *pStmt = 0;
+  int rc = SQLITE_OK;             /* Error code */
+  sqlite3_stmt *pStmt = 0;        /* Statement to iterate through segments */
+  int rc2;                        /* Result of sqlite3_reset() */
 
   /* If iLevel is less than 0 and this is not a scan, include a seg-reader 
   ** for the pending-terms. If this is a scan, then this call must be being
@@ -114336,24 +117320,42 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderCursor(
   );
 }
 
+/*
+** In addition to its current configuration, have the Fts3MultiSegReader
+** passed as the 4th argument also scan the doclist for term zTerm/nTerm.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
 static int fts3SegReaderCursorAddZero(
-  Fts3Table *p,
-  const char *zTerm,
-  int nTerm,
-  Fts3MultiSegReader *pCsr
+  Fts3Table *p,                   /* FTS virtual table handle */
+  const char *zTerm,              /* Term to scan doclist of */
+  int nTerm,                      /* Number of bytes in zTerm */
+  Fts3MultiSegReader *pCsr        /* Fts3MultiSegReader to modify */
 ){
   return fts3SegReaderCursor(p, 0, FTS3_SEGCURSOR_ALL, zTerm, nTerm, 0, 0,pCsr);
 }
 
-
-SQLITE_PRIVATE int sqlite3Fts3TermSegReaderCursor(
+/*
+** Open an Fts3MultiSegReader to scan the doclist for term zTerm/nTerm. Or,
+** if isPrefix is true, to scan the doclist for all terms for which 
+** zTerm/nTerm is a prefix. If successful, return SQLITE_OK and write
+** a pointer to the new Fts3MultiSegReader to *ppSegcsr. Otherwise, return
+** an SQLite error code.
+**
+** It is the responsibility of the caller to free this object by eventually
+** passing it to fts3SegReaderCursorFree() 
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+** Output parameter *ppSegcsr is set to 0 if an error occurs.
+*/
+static int fts3TermSegReaderCursor(
   Fts3Cursor *pCsr,               /* Virtual table cursor handle */
   const char *zTerm,              /* Term to query for */
   int nTerm,                      /* Size of zTerm in bytes */
   int isPrefix,                   /* True for a prefix search */
   Fts3MultiSegReader **ppSegcsr   /* OUT: Allocated seg-reader cursor */
 ){
-  Fts3MultiSegReader *pSegcsr;   /* Object to allocate and return */
+  Fts3MultiSegReader *pSegcsr;    /* Object to allocate and return */
   int rc = SQLITE_NOMEM;          /* Return code */
 
   pSegcsr = sqlite3_malloc(sizeof(Fts3MultiSegReader));
@@ -114397,6 +117399,9 @@ SQLITE_PRIVATE int sqlite3Fts3TermSegReaderCursor(
   return rc;
 }
 
+/*
+** Free an Fts3MultiSegReader allocated by fts3TermSegReaderCursor().
+*/
 static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
   sqlite3Fts3SegReaderFinish(pSegcsr);
   sqlite3_free(pSegcsr);
@@ -114404,35 +117409,26 @@ static void fts3SegReaderCursorFree(Fts3MultiSegReader *pSegcsr){
 
 /*
 ** This function retreives the doclist for the specified term (or term
-** prefix) from the database. 
-**
-** The returned doclist may be in one of two formats, depending on the 
-** value of parameter isReqPos. If isReqPos is zero, then the doclist is
-** a sorted list of delta-compressed docids (a bare doclist). If isReqPos
-** is non-zero, then the returned list is in the same format as is stored 
-** in the database without the found length specifier at the start of on-disk
-** doclists.
+** prefix) from the database.
 */
 static int fts3TermSelect(
   Fts3Table *p,                   /* Virtual table handle */
   Fts3PhraseToken *pTok,          /* Token to query for */
   int iColumn,                    /* Column to query (or -ve for all columns) */
-  int isReqPos,                   /* True to include position lists in output */
   int *pnOut,                     /* OUT: Size of buffer at *ppOut */
   char **ppOut                    /* OUT: Malloced result buffer */
 ){
   int rc;                         /* Return code */
-  Fts3MultiSegReader *pSegcsr;   /* Seg-reader cursor for this term */
-  TermSelect tsc;                 /* Context object for fts3TermSelectCb() */
+  Fts3MultiSegReader *pSegcsr;    /* Seg-reader cursor for this term */
+  TermSelect tsc;                 /* Object for pair-wise doclist merging */
   Fts3SegFilter filter;           /* Segment term filter configuration */
 
   pSegcsr = pTok->pSegcsr;
   memset(&tsc, 0, sizeof(TermSelect));
-  tsc.isReqPos = isReqPos;
 
-  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY 
+  filter.flags = FTS3_SEGMENT_IGNORE_EMPTY | FTS3_SEGMENT_REQUIRE_POS
         | (pTok->isPrefix ? FTS3_SEGMENT_PREFIX : 0)
-        | (isReqPos ? FTS3_SEGMENT_REQUIRE_POS : 0)
+        | (pTok->bFirst ? FTS3_SEGMENT_FIRST : 0)
         | (iColumn<p->nColumn ? FTS3_SEGMENT_COLUMN_FILTER : 0);
   filter.iCol = iColumn;
   filter.zTerm = pTok->z;
@@ -114442,13 +117438,11 @@ static int fts3TermSelect(
   while( SQLITE_OK==rc
       && SQLITE_ROW==(rc = sqlite3Fts3SegReaderStep(p, pSegcsr)) 
   ){
-    rc = fts3TermSelectCb(p, (void *)&tsc, 
-        pSegcsr->zTerm, pSegcsr->nTerm, pSegcsr->aDoclist, pSegcsr->nDoclist
-    );
+    rc = fts3TermSelectMerge(p, &tsc, pSegcsr->aDoclist, pSegcsr->nDoclist);
   }
 
   if( rc==SQLITE_OK ){
-    rc = fts3TermSelectMerge(p, &tsc);
+    rc = fts3TermSelectFinishMerge(p, &tsc);
   }
   if( rc==SQLITE_OK ){
     *ppOut = tsc.aaOutput[0];
@@ -114474,24 +117468,15 @@ static int fts3TermSelect(
 ** that the doclist is simply a list of docids stored as delta encoded 
 ** varints.
 */
-static int fts3DoclistCountDocids(int isPoslist, char *aList, int nList){
+static int fts3DoclistCountDocids(char *aList, int nList){
   int nDoc = 0;                   /* Return value */
   if( aList ){
     char *aEnd = &aList[nList];   /* Pointer to one byte after EOF */
     char *p = aList;              /* Cursor */
-    if( !isPoslist ){
-      /* The number of docids in the list is the same as the number of 
-      ** varints. In FTS3 a varint consists of a single byte with the 0x80 
-      ** bit cleared and zero or more bytes with the 0x80 bit set. So to
-      ** count the varints in the buffer, just count the number of bytes
-      ** with the 0x80 bit clear.  */
-      while( p<aEnd ) nDoc += (((*p++)&0x80)==0);
-    }else{
-      while( p<aEnd ){
-        nDoc++;
-        while( (*p++)&0x80 );     /* Skip docid varint */
-        fts3PoslistCopy(0, &p);   /* Skip over position list */
-      }
+    while( p<aEnd ){
+      nDoc++;
+      while( (*p++)&0x80 );     /* Skip docid varint */
+      fts3PoslistCopy(0, &p);   /* Skip over position list */
     }
   }
 
@@ -114521,7 +117506,7 @@ static int fts3NextMethod(sqlite3_vtab_cursor *pCursor){
       rc = SQLITE_OK;
     }
   }else{
-    rc = sqlite3Fts3EvalNext((Fts3Cursor *)pCursor);
+    rc = fts3EvalNext((Fts3Cursor *)pCursor);
   }
   assert( ((Fts3Table *)pCsr->base.pVtab)->pSegments==0 );
   return rc;
@@ -114584,8 +117569,8 @@ static int fts3FilterMethod(
       return SQLITE_NOMEM;
     }
 
-    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->nColumn, 
-        iCol, zQuery, -1, &pCsr->pExpr
+    rc = sqlite3Fts3ExprParse(p->pTokenizer, p->azColumn, p->bHasStat, 
+        p->nColumn, iCol, zQuery, -1, &pCsr->pExpr
     );
     if( rc!=SQLITE_OK ){
       if( rc==SQLITE_ERROR ){
@@ -114598,7 +117583,7 @@ static int fts3FilterMethod(
     rc = sqlite3Fts3ReadLock(p);
     if( rc!=SQLITE_OK ) return rc;
 
-    rc = sqlite3Fts3EvalStart(pCsr, pCsr->pExpr, 1);
+    rc = fts3EvalStart(pCsr);
 
     sqlite3Fts3SegmentsClose(p);
     if( rc!=SQLITE_OK ) return rc;
@@ -114612,23 +117597,24 @@ static int fts3FilterMethod(
   ** row by docid.
   */
   if( idxNum==FTS3_FULLSCAN_SEARCH ){
-    const char *zSort = (pCsr->bDesc ? "DESC" : "ASC");
-    const char *zTmpl = "SELECT %s FROM %Q.'%q_content' AS x ORDER BY docid %s";
-    zSql = sqlite3_mprintf(zTmpl, p->zReadExprlist, p->zDb, p->zName, zSort);
-  }else{
-    const char *zTmpl = "SELECT %s FROM %Q.'%q_content' AS x WHERE docid = ?";
-    zSql = sqlite3_mprintf(zTmpl, p->zReadExprlist, p->zDb, p->zName);
+    zSql = sqlite3_mprintf(
+        "SELECT %s ORDER BY rowid %s",
+        p->zReadExprlist, (pCsr->bDesc ? "DESC" : "ASC")
+    );
+    if( zSql ){
+      rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
+      sqlite3_free(zSql);
+    }else{
+      rc = SQLITE_NOMEM;
+    }
+  }else if( idxNum==FTS3_DOCID_SEARCH ){
+    rc = fts3CursorSeekStmt(pCsr, &pCsr->pStmt);
+    if( rc==SQLITE_OK ){
+      rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
+    }
   }
-  if( !zSql ) return SQLITE_NOMEM;
-  rc = sqlite3_prepare_v2(p->db, zSql, -1, &pCsr->pStmt, 0);
-  sqlite3_free(zSql);
   if( rc!=SQLITE_OK ) return rc;
 
-  if( idxNum==FTS3_DOCID_SEARCH ){
-    rc = sqlite3_bind_value(pCsr->pStmt, 1, apVal[0]);
-    if( rc!=SQLITE_OK ) return rc;
-  }
-
   return fts3NextMethod(pCursor);
 }
 
@@ -114680,7 +117666,7 @@ static int fts3ColumnMethod(
     sqlite3_result_blob(pContext, &pCsr, sizeof(pCsr), SQLITE_TRANSIENT);
   }else{
     rc = fts3CursorSeek(0, pCsr);
-    if( rc==SQLITE_OK ){
+    if( rc==SQLITE_OK && sqlite3_data_count(pCsr->pStmt)>(iCol+1) ){
       sqlite3_result_value(pContext, sqlite3_column_value(pCsr->pStmt, iCol+1));
     }
   }
@@ -114764,7 +117750,7 @@ static int fts3RollbackMethod(sqlite3_vtab *pVtab){
 */
 static void fts3ReversePoslist(char *pStart, char **ppPoslist){
   char *p = &(*ppPoslist)[-2];
-  char c;
+  char c = 0;
 
   while( p>pStart && (c=*p--)==0 );
   while( p>pStart && (*p & 0x80) | c ){ 
@@ -114973,15 +117959,22 @@ static int fts3RenameMethod(
   sqlite3 *db = p->db;            /* Database connection */
   int rc;                         /* Return Code */
 
+  /* As it happens, the pending terms table is always empty here. This is
+  ** because an "ALTER TABLE RENAME TABLE" statement inside a transaction 
+  ** always opens a savepoint transaction. And the xSavepoint() method 
+  ** flushes the pending terms table. But leave the (no-op) call to
+  ** PendingTermsFlush() in in case that changes.
+  */
+  assert( p->nPendingData==0 );
   rc = sqlite3Fts3PendingTermsFlush(p);
-  if( rc!=SQLITE_OK ){
-    return rc;
+
+  if( p->zContentTbl==0 ){
+    fts3DbExec(&rc, db,
+      "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';",
+      p->zDb, p->zName, zName
+    );
   }
 
-  fts3DbExec(&rc, db,
-    "ALTER TABLE %Q.'%q_content'  RENAME TO '%q_content';",
-    p->zDb, p->zName, zName
-  );
   if( p->bHasDocsize ){
     fts3DbExec(&rc, db,
       "ALTER TABLE %Q.'%q_docsize'  RENAME TO '%q_docsize';",
@@ -115005,6 +117998,11 @@ static int fts3RenameMethod(
   return rc;
 }
 
+/*
+** The xSavepoint() method.
+**
+** Flush the contents of the pending-terms table to disk.
+*/
 static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
   UNUSED_PARAMETER(iSavepoint);
   assert( ((Fts3Table *)pVtab)->inTransaction );
@@ -115012,6 +118010,12 @@ static int fts3SavepointMethod(sqlite3_vtab *pVtab, int iSavepoint){
   TESTONLY( ((Fts3Table *)pVtab)->mxSavepoint = iSavepoint );
   return fts3SyncMethod(pVtab);
 }
+
+/*
+** The xRelease() method.
+**
+** This is a no-op.
+*/
 static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
   TESTONLY( Fts3Table *p = (Fts3Table*)pVtab );
   UNUSED_PARAMETER(iSavepoint);
@@ -115021,6 +118025,12 @@ static int fts3ReleaseMethod(sqlite3_vtab *pVtab, int iSavepoint){
   TESTONLY( p->mxSavepoint = iSavepoint-1 );
   return SQLITE_OK;
 }
+
+/*
+** The xRollbackTo() method.
+**
+** Discard the contents of the pending terms table.
+*/
 static int fts3RollbackToMethod(sqlite3_vtab *pVtab, int iSavepoint){
   Fts3Table *p = (Fts3Table*)pVtab;
   UNUSED_PARAMETER(iSavepoint);
@@ -115170,18 +118180,6 @@ SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
   return rc;
 }
 
-#if !SQLITE_CORE
-SQLITE_API int sqlite3_extension_init(
-  sqlite3 *db, 
-  char **pzErrMsg,
-  const sqlite3_api_routines *pApi
-){
-  SQLITE_EXTENSION_INIT2(pApi)
-  return sqlite3Fts3Init(db);
-}
-#endif
-
-
 /*
 ** Allocate an Fts3MultiSegReader for each token in the expression headed
 ** by pExpr. 
@@ -115198,11 +118196,11 @@ SQLITE_API int sqlite3_extension_init(
 ** doclist and then traversed.
 */
 static void fts3EvalAllocateReaders(
-  Fts3Cursor *pCsr, 
-  Fts3Expr *pExpr, 
+  Fts3Cursor *pCsr,               /* FTS cursor handle */
+  Fts3Expr *pExpr,                /* Allocate readers for this expression */
   int *pnToken,                   /* OUT: Total number of tokens in phrase. */
   int *pnOr,                      /* OUT: Total number of OR nodes in expr. */
-  int *pRc
+  int *pRc                        /* IN/OUT: Error code */
 ){
   if( pExpr && SQLITE_OK==*pRc ){
     if( pExpr->eType==FTSQUERY_PHRASE ){
@@ -115211,7 +118209,7 @@ static void fts3EvalAllocateReaders(
       *pnToken += nToken;
       for(i=0; i<nToken; i++){
         Fts3PhraseToken *pToken = &pExpr->pPhrase->aToken[i];
-        int rc = sqlite3Fts3TermSegReaderCursor(pCsr, 
+        int rc = fts3TermSegReaderCursor(pCsr, 
             pToken->z, pToken->n, pToken->isPrefix, &pToken->pSegcsr
         );
         if( rc!=SQLITE_OK ){
@@ -115229,12 +118227,20 @@ static void fts3EvalAllocateReaders(
   }
 }
 
+/*
+** Arguments pList/nList contain the doclist for token iToken of phrase p.
+** It is merged into the main doclist stored in p->doclist.aAll/nAll.
+**
+** This function assumes that pList points to a buffer allocated using
+** sqlite3_malloc(). This function takes responsibility for eventually
+** freeing the buffer.
+*/
 static void fts3EvalPhraseMergeToken(
-  Fts3Table *pTab,
-  Fts3Phrase *p,
-  int iToken,
-  char *pList,
-  int nList
+  Fts3Table *pTab,                /* FTS Table pointer */
+  Fts3Phrase *p,                  /* Phrase to merge pList/nList into */
+  int iToken,                     /* Token pList/nList corresponds to */
+  char *pList,                    /* Pointer to doclist */
+  int nList                       /* Number of bytes in pList */
 ){
   assert( iToken!=p->iDoclistToken );
 
@@ -115283,9 +118289,15 @@ static void fts3EvalPhraseMergeToken(
   if( iToken>p->iDoclistToken ) p->iDoclistToken = iToken;
 }
 
+/*
+** Load the doclist for phrase p into p->doclist.aAll/nAll. The loaded doclist
+** does not take deferred tokens into account.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
 static int fts3EvalPhraseLoad(
-  Fts3Cursor *pCsr, 
-  Fts3Phrase *p
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Phrase *p                   /* Phrase object */
 ){
   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
   int iToken;
@@ -115298,7 +118310,7 @@ static int fts3EvalPhraseLoad(
     if( pToken->pSegcsr ){
       int nThis = 0;
       char *pThis = 0;
-      rc = fts3TermSelect(pTab, pToken, p->iColumn, 1, &nThis, &pThis);
+      rc = fts3TermSelect(pTab, pToken, p->iColumn, &nThis, &pThis);
       if( rc==SQLITE_OK ){
         fts3EvalPhraseMergeToken(pTab, p, iToken, pThis, nThis);
       }
@@ -115309,25 +118321,32 @@ static int fts3EvalPhraseLoad(
   return rc;
 }
 
+/*
+** This function is called on each phrase after the position lists for
+** any deferred tokens have been loaded into memory. It updates the phrases
+** current position list to include only those positions that are really
+** instances of the phrase (after considering deferred tokens). If this
+** means that the phrase does not appear in the current row, doclist.pList
+** and doclist.nList are both zeroed.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
+*/
 static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
-  int iToken;
-  int rc = SQLITE_OK;
-
-  int nMaxUndeferred = pPhrase->iDoclistToken;
-  char *aPoslist = 0;
-  int nPoslist = 0;
-  int iPrev = -1;
+  int iToken;                     /* Used to iterate through phrase tokens */
+  char *aPoslist = 0;             /* Position list for deferred tokens */
+  int nPoslist = 0;               /* Number of bytes in aPoslist */
+  int iPrev = -1;                 /* Token number of previous deferred token */
 
   assert( pPhrase->doclist.bFreeList==0 );
 
-  for(iToken=0; rc==SQLITE_OK && iToken<pPhrase->nToken; iToken++){
+  for(iToken=0; iToken<pPhrase->nToken; iToken++){
     Fts3PhraseToken *pToken = &pPhrase->aToken[iToken];
     Fts3DeferredToken *pDeferred = pToken->pDeferred;
 
     if( pDeferred ){
       char *pList;
       int nList;
-      rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
+      int rc = sqlite3Fts3DeferredTokenList(pDeferred, &pList, &nList);
       if( rc!=SQLITE_OK ) return rc;
 
       if( pList==0 ){
@@ -115362,6 +118381,7 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
   }
 
   if( iPrev>=0 ){
+    int nMaxUndeferred = pPhrase->iDoclistToken;
     if( nMaxUndeferred<0 ){
       pPhrase->doclist.pList = aPoslist;
       pPhrase->doclist.nList = nPoslist;
@@ -115410,9 +118430,15 @@ static int fts3EvalDeferredPhrase(Fts3Cursor *pCsr, Fts3Phrase *pPhrase){
 ** expression to initialize the mechanism for returning rows. Once this
 ** function has been called successfully on an Fts3Phrase, it may be
 ** used with fts3EvalPhraseNext() to iterate through the matching docids.
+**
+** If parameter bOptOk is true, then the phrase may (or may not) use the
+** incremental loading strategy. Otherwise, the entire doclist is loaded into
+** memory within this call.
+**
+** SQLITE_OK is returned if no error occurs, otherwise an SQLite error code.
 */
 static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
-  int rc;
+  int rc;                         /* Error code */
   Fts3PhraseToken *pFirst = &p->aToken[0];
   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
 
@@ -115421,6 +118447,7 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
    && p->nToken==1 
    && pFirst->pSegcsr 
    && pFirst->pSegcsr->bLookup 
+   && pFirst->bFirst==0
   ){
     /* Use the incremental approach. */
     int iCol = (p->iColumn >= pTab->nColumn ? -1 : p->iColumn);
@@ -115440,7 +118467,13 @@ static int fts3EvalPhraseStart(Fts3Cursor *pCsr, int bOptOk, Fts3Phrase *p){
 
 /*
 ** This function is used to iterate backwards (from the end to start) 
-** through doclists.
+** through doclists. It is used by this module to iterate through phrase
+** doclists in reverse and by the fts3_write.c module to iterate through
+** pending-terms lists when writing to databases with "order=desc".
+**
+** The doclist may be sorted in ascending (parameter bDescIdx==0) or 
+** descending (parameter bDescIdx==1) order of docid. Regardless, this
+** function iterates from the end of the doclist to the beginning.
 */
 SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(
   int bDescIdx,                   /* True if the doclist is desc */
@@ -115505,9 +118538,9 @@ SQLITE_PRIVATE void sqlite3Fts3DoclistPrev(
 ** successfully advanced, *pbEof is set to 0.
 */
 static int fts3EvalPhraseNext(
-  Fts3Cursor *pCsr, 
-  Fts3Phrase *p, 
-  u8 *pbEof
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Phrase *p,                  /* Phrase object to advance to next docid */
+  u8 *pbEof                       /* OUT: Set to 1 if EOF */
 ){
   int rc = SQLITE_OK;
   Fts3Doclist *pDL = &p->doclist;
@@ -115553,10 +118586,10 @@ static int fts3EvalPhraseNext(
 
       /* pIter now points just past the 0x00 that terminates the position-
       ** list for document pDL->iDocid. However, if this position-list was
-      ** edited in place by fts3EvalNearTrim2(), then pIter may not actually
+      ** edited in place by fts3EvalNearTrim(), then pIter may not actually
       ** point to the start of the next docid value. The following line deals
       ** with this case by advancing pIter past the zero-padding added by
-      ** fts3EvalNearTrim2().  */
+      ** fts3EvalNearTrim().  */
       while( pIter<pEnd && *pIter==0 ) pIter++;
 
       pDL->pNextDocid = pIter;
@@ -115568,11 +118601,27 @@ static int fts3EvalPhraseNext(
   return rc;
 }
 
+/*
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, fts3EvalPhraseStart() is called on all phrases within the
+** expression. Also the Fts3Expr.bDeferred variable is set to true for any
+** expressions for which all descendent tokens are deferred.
+**
+** If parameter bOptOk is zero, then it is guaranteed that the
+** Fts3Phrase.doclist.aAll/nAll variables contain the entire doclist for
+** each phrase in the expression (subject to deferred token processing).
+** Or, if bOptOk is non-zero, then one or more tokens within the expression
+** may be loaded incrementally, meaning doclist.aAll/nAll is not available.
+**
+** If an error occurs within this function, *pRc is set to an SQLite error
+** code before returning.
+*/
 static void fts3EvalStartReaders(
-  Fts3Cursor *pCsr, 
-  Fts3Expr *pExpr, 
-  int bOptOk,
-  int *pRc
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Expr *pExpr,                /* Expression to initialize phrases in */
+  int bOptOk,                     /* True to enable incremental loading */
+  int *pRc                        /* IN/OUT: Error code */
 ){
   if( pExpr && SQLITE_OK==*pRc ){
     if( pExpr->eType==FTSQUERY_PHRASE ){
@@ -115591,25 +118640,44 @@ static void fts3EvalStartReaders(
   }
 }
 
+/*
+** An array of the following structures is assembled as part of the process
+** of selecting tokens to defer before the query starts executing (as part
+** of the xFilter() method). There is one element in the array for each
+** token in the FTS expression.
+**
+** Tokens are divided into AND/NEAR clusters. All tokens in a cluster belong
+** to phrases that are connected only by AND and NEAR operators (not OR or
+** NOT). When determining tokens to defer, each AND/NEAR cluster is considered
+** separately. The root of a tokens AND/NEAR cluster is stored in 
+** Fts3TokenAndCost.pRoot.
+*/
 typedef struct Fts3TokenAndCost Fts3TokenAndCost;
 struct Fts3TokenAndCost {
   Fts3Phrase *pPhrase;            /* The phrase the token belongs to */
   int iToken;                     /* Position of token in phrase */
   Fts3PhraseToken *pToken;        /* The token itself */
-  Fts3Expr *pRoot; 
-  int nOvfl;
+  Fts3Expr *pRoot;                /* Root of NEAR/AND cluster */
+  int nOvfl;                      /* Number of overflow pages to load doclist */
   int iCol;                       /* The column the token must match */
 };
 
+/*
+** This function is used to populate an allocated Fts3TokenAndCost array.
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** Otherwise, if an error occurs during execution, *pRc is set to an
+** SQLite error code.
+*/
 static void fts3EvalTokenCosts(
-  Fts3Cursor *pCsr, 
-  Fts3Expr *pRoot, 
-  Fts3Expr *pExpr, 
-  Fts3TokenAndCost **ppTC,
-  Fts3Expr ***ppOr,
-  int *pRc
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Expr *pRoot,                /* Root of current AND/NEAR cluster */
+  Fts3Expr *pExpr,                /* Expression to consider */
+  Fts3TokenAndCost **ppTC,        /* Write new entries to *(*ppTC)++ */
+  Fts3Expr ***ppOr,               /* Write new OR root to *(*ppOr)++ */
+  int *pRc                        /* IN/OUT: Error code */
 ){
-  if( *pRc==SQLITE_OK && pExpr ){
+  if( *pRc==SQLITE_OK ){
     if( pExpr->eType==FTSQUERY_PHRASE ){
       Fts3Phrase *pPhrase = pExpr->pPhrase;
       int i;
@@ -115623,6 +118691,11 @@ static void fts3EvalTokenCosts(
         *pRc = sqlite3Fts3MsrOvfl(pCsr, pTC->pToken->pSegcsr, &pTC->nOvfl);
       }
     }else if( pExpr->eType!=FTSQUERY_NOT ){
+      assert( pExpr->eType==FTSQUERY_OR
+           || pExpr->eType==FTSQUERY_AND
+           || pExpr->eType==FTSQUERY_NEAR
+      );
+      assert( pExpr->pLeft && pExpr->pRight );
       if( pExpr->eType==FTSQUERY_OR ){
         pRoot = pExpr->pLeft;
         **ppOr = pRoot;
@@ -115639,19 +118712,30 @@ static void fts3EvalTokenCosts(
   }
 }
 
+/*
+** Determine the average document (row) size in pages. If successful,
+** write this value to *pnPage and return SQLITE_OK. Otherwise, return
+** an SQLite error code.
+**
+** The average document size in pages is calculated by first calculating 
+** determining the average size in bytes, B. If B is less than the amount
+** of data that will fit on a single leaf page of an intkey table in
+** this database, then the average docsize is 1. Otherwise, it is 1 plus
+** the number of overflow pages consumed by a record B bytes in size.
+*/
 static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
   if( pCsr->nRowAvg==0 ){
     /* The average document size, which is required to calculate the cost
-     ** of each doclist, has not yet been determined. Read the required 
-     ** data from the %_stat table to calculate it.
-     **
-     ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 
-     ** varints, where nCol is the number of columns in the FTS3 table.
-     ** The first varint is the number of documents currently stored in
-     ** the table. The following nCol varints contain the total amount of
-     ** data stored in all rows of each column of the table, from left
-     ** to right.
-     */
+    ** of each doclist, has not yet been determined. Read the required 
+    ** data from the %_stat table to calculate it.
+    **
+    ** Entry 0 of the %_stat table is a blob containing (nCol+1) FTS3 
+    ** varints, where nCol is the number of columns in the FTS3 table.
+    ** The first varint is the number of documents currently stored in
+    ** the table. The following nCol varints contain the total amount of
+    ** data stored in all rows of each column of the table, from left
+    ** to right.
+    */
     int rc;
     Fts3Table *p = (Fts3Table*)pCsr->base.pVtab;
     sqlite3_stmt *pStmt;
@@ -115672,7 +118756,7 @@ static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
     }
     if( nDoc==0 || nByte==0 ){
       sqlite3_reset(pStmt);
-      return SQLITE_CORRUPT_VTAB;
+      return FTS_CORRUPT_VTAB;
     }
 
     pCsr->nDoc = nDoc;
@@ -115686,68 +118770,130 @@ static int fts3EvalAverageDocsize(Fts3Cursor *pCsr, int *pnPage){
   return SQLITE_OK;
 }
 
+/*
+** This function is called to select the tokens (if any) that will be 
+** deferred. The array aTC[] has already been populated when this is
+** called.
+**
+** This function is called once for each AND/NEAR cluster in the 
+** expression. Each invocation determines which tokens to defer within
+** the cluster with root node pRoot. See comments above the definition
+** of struct Fts3TokenAndCost for more details.
+**
+** If no error occurs, SQLITE_OK is returned and sqlite3Fts3DeferToken()
+** called on each token to defer. Otherwise, an SQLite error code is
+** returned.
+*/
 static int fts3EvalSelectDeferred(
-  Fts3Cursor *pCsr,
-  Fts3Expr *pRoot,
-  Fts3TokenAndCost *aTC,
-  int nTC
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Expr *pRoot,                /* Consider tokens with this root node */
+  Fts3TokenAndCost *aTC,          /* Array of expression tokens and costs */
+  int nTC                         /* Number of entries in aTC[] */
 ){
-  int nDocSize = 0;
-  int nDocEst = 0;
-  int rc = SQLITE_OK;
   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
-  int ii;
-
-  int nOvfl = 0;
-  int nTerm = 0;
+  int nDocSize = 0;               /* Number of pages per doc loaded */
+  int rc = SQLITE_OK;             /* Return code */
+  int ii;                         /* Iterator variable for various purposes */
+  int nOvfl = 0;                  /* Total overflow pages used by doclists */
+  int nToken = 0;                 /* Total number of tokens in cluster */
+
+  int nMinEst = 0;                /* The minimum count for any phrase so far. */
+  int nLoad4 = 1;                 /* (Phrases that will be loaded)^4. */
+
+  /* Tokens are never deferred for FTS tables created using the content=xxx
+  ** option. The reason being that it is not guaranteed that the content
+  ** table actually contains the same data as the index. To prevent this from
+  ** causing any problems, the deferred token optimization is completely
+  ** disabled for content=xxx tables. */
+  if( pTab->zContentTbl ){
+    return SQLITE_OK;
+  }
 
+  /* Count the tokens in this AND/NEAR cluster. If none of the doclists
+  ** associated with the tokens spill onto overflow pages, or if there is
+  ** only 1 token, exit early. No tokens to defer in this case. */
   for(ii=0; ii<nTC; ii++){
     if( aTC[ii].pRoot==pRoot ){
       nOvfl += aTC[ii].nOvfl;
-      nTerm++;
+      nToken++;
     }
   }
-  if( nOvfl==0 || nTerm<2 ) return SQLITE_OK;
+  if( nOvfl==0 || nToken<2 ) return SQLITE_OK;
 
+  /* Obtain the average docsize (in pages). */
   rc = fts3EvalAverageDocsize(pCsr, &nDocSize);
+  assert( rc!=SQLITE_OK || nDocSize>0 );
 
-  for(ii=0; ii<nTerm && rc==SQLITE_OK; ii++){
-    int jj;
-    Fts3TokenAndCost *pTC = 0;
 
-    for(jj=0; jj<nTC; jj++){
-      if( aTC[jj].pToken && aTC[jj].pRoot==pRoot 
-       && (!pTC || aTC[jj].nOvfl<pTC->nOvfl) 
+  /* Iterate through all tokens in this AND/NEAR cluster, in ascending order 
+  ** of the number of overflow pages that will be loaded by the pager layer 
+  ** to retrieve the entire doclist for the token from the full-text index.
+  ** Load the doclists for tokens that are either:
+  **
+  **   a. The cheapest token in the entire query (i.e. the one visited by the
+  **      first iteration of this loop), or
+  **
+  **   b. Part of a multi-token phrase.
+  **
+  ** After each token doclist is loaded, merge it with the others from the
+  ** same phrase and count the number of documents that the merged doclist
+  ** contains. Set variable "nMinEst" to the smallest number of documents in 
+  ** any phrase doclist for which 1 or more token doclists have been loaded.
+  ** Let nOther be the number of other phrases for which it is certain that
+  ** one or more tokens will not be deferred.
+  **
+  ** Then, for each token, defer it if loading the doclist would result in
+  ** loading N or more overflow pages into memory, where N is computed as:
+  **
+  **    (nMinEst + 4^nOther - 1) / (4^nOther)
+  */
+  for(ii=0; ii<nToken && rc==SQLITE_OK; ii++){
+    int iTC;                      /* Used to iterate through aTC[] array. */
+    Fts3TokenAndCost *pTC = 0;    /* Set to cheapest remaining token. */
+
+    /* Set pTC to point to the cheapest remaining token. */
+    for(iTC=0; iTC<nTC; iTC++){
+      if( aTC[iTC].pToken && aTC[iTC].pRoot==pRoot 
+       && (!pTC || aTC[iTC].nOvfl<pTC->nOvfl) 
       ){
-        pTC = &aTC[jj];
+        pTC = &aTC[iTC];
       }
     }
     assert( pTC );
 
-    /* At this point pTC points to the cheapest remaining token. */
-    if( ii==0 ){
-      if( pTC->nOvfl ){
-        nDocEst = (pTC->nOvfl * pTab->nPgsz + pTab->nPgsz) / 10;
-      }else{
+    if( ii && pTC->nOvfl>=((nMinEst+(nLoad4/4)-1)/(nLoad4/4))*nDocSize ){
+      /* The number of overflow pages to load for this (and therefore all
+      ** subsequent) tokens is greater than the estimated number of pages 
+      ** that will be loaded if all subsequent tokens are deferred.
+      */
+      Fts3PhraseToken *pToken = pTC->pToken;
+      rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
+      fts3SegReaderCursorFree(pToken->pSegcsr);
+      pToken->pSegcsr = 0;
+    }else{
+      /* Set nLoad4 to the value of (4^nOther) for the next iteration of the
+      ** for-loop. Except, limit the value to 2^24 to prevent it from 
+      ** overflowing the 32-bit integer it is stored in. */
+      if( ii<12 ) nLoad4 = nLoad4*4;
+
+      if( ii==0 || pTC->pPhrase->nToken>1 ){
+        /* Either this is the cheapest token in the entire query, or it is
+        ** part of a multi-token phrase. Either way, the entire doclist will
+        ** (eventually) be loaded into memory. It may as well be now. */
         Fts3PhraseToken *pToken = pTC->pToken;
         int nList = 0;
         char *pList = 0;
-        rc = fts3TermSelect(pTab, pToken, pTC->iCol, 1, &nList, &pList);
+        rc = fts3TermSelect(pTab, pToken, pTC->iCol, &nList, &pList);
         assert( rc==SQLITE_OK || pList==0 );
-
         if( rc==SQLITE_OK ){
-          nDocEst = fts3DoclistCountDocids(1, pList, nList);
+          int nCount;
           fts3EvalPhraseMergeToken(pTab, pTC->pPhrase, pTC->iToken,pList,nList);
+          nCount = fts3DoclistCountDocids(
+              pTC->pPhrase->doclist.aAll, pTC->pPhrase->doclist.nAll
+          );
+          if( ii==0 || nCount<nMinEst ) nMinEst = nCount;
         }
       }
-    }else{
-      if( pTC->nOvfl>=(nDocEst*nDocSize) ){
-        Fts3PhraseToken *pToken = pTC->pToken;
-        rc = sqlite3Fts3DeferToken(pCsr, pToken, pTC->iCol);
-        fts3SegReaderCursorFree(pToken->pSegcsr);
-        pToken->pSegcsr = 0;
-      }
-      nDocEst = 1 + (nDocEst/4);
     }
     pTC->pToken = 0;
   }
@@ -115755,36 +118901,29 @@ static int fts3EvalSelectDeferred(
   return rc;
 }
 
-SQLITE_PRIVATE int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int bOptOk){
+/*
+** This function is called from within the xFilter method. It initializes
+** the full-text query currently stored in pCsr->pExpr. To iterate through
+** the results of a query, the caller does:
+**
+**    fts3EvalStart(pCsr);
+**    while( 1 ){
+**      fts3EvalNext(pCsr);
+**      if( pCsr->bEof ) break;
+**      ... return row pCsr->iPrevId to the caller ...
+**    }
+*/
+static int fts3EvalStart(Fts3Cursor *pCsr){
   Fts3Table *pTab = (Fts3Table *)pCsr->base.pVtab;
   int rc = SQLITE_OK;
   int nToken = 0;
   int nOr = 0;
 
   /* Allocate a MultiSegReader for each token in the expression. */
-  fts3EvalAllocateReaders(pCsr, pExpr, &nToken, &nOr, &rc);
-
-  /* Call fts3EvalPhraseStart() on all phrases in the expression. TODO:
-  ** This call will eventually also be responsible for determining which
-  ** tokens are 'deferred' until the document text is loaded into memory.
-  **
-  ** Each token in each phrase is dealt with using one of the following
-  ** three strategies:
-  **
-  **   1. Entire doclist loaded into memory as part of the
-  **      fts3EvalStartReaders() call.
-  **
-  **   2. Doclist loaded into memory incrementally, as part of each
-  **      sqlite3Fts3EvalNext() call.
-  **
-  **   3. Token doclist is never loaded. Instead, documents are loaded into
-  **      memory and scanned for the token as part of the sqlite3Fts3EvalNext()
-  **      call. This is known as a "deferred" token.
-  */
+  fts3EvalAllocateReaders(pCsr, pCsr->pExpr, &nToken, &nOr, &rc);
 
-  /* If bOptOk is true, check if there are any tokens that should be deferred.
-  */
-  if( rc==SQLITE_OK && bOptOk && nToken>1 && pTab->bHasStat ){
+  /* Determine which, if any, tokens in the expression should be deferred. */
+  if( rc==SQLITE_OK && nToken>1 && pTab->bHasStat ){
     Fts3TokenAndCost *aTC;
     Fts3Expr **apOr;
     aTC = (Fts3TokenAndCost *)sqlite3_malloc(
@@ -115800,7 +118939,7 @@ SQLITE_PRIVATE int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int b
       Fts3TokenAndCost *pTC = aTC;
       Fts3Expr **ppOr = apOr;
 
-      fts3EvalTokenCosts(pCsr, 0, pExpr, &pTC, &ppOr, &rc);
+      fts3EvalTokenCosts(pCsr, 0, pCsr->pExpr, &pTC, &ppOr, &rc);
       nToken = pTC-aTC;
       nOr = ppOr-apOr;
 
@@ -115815,11 +118954,14 @@ SQLITE_PRIVATE int sqlite3Fts3EvalStart(Fts3Cursor *pCsr, Fts3Expr *pExpr, int b
     }
   }
 
-  fts3EvalStartReaders(pCsr, pExpr, bOptOk, &rc);
+  fts3EvalStartReaders(pCsr, pCsr->pExpr, 1, &rc);
   return rc;
 }
 
-static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){
+/*
+** Invalidate the current position list for phrase pPhrase.
+*/
+static void fts3EvalInvalidatePoslist(Fts3Phrase *pPhrase){
   if( pPhrase->doclist.bFreeList ){
     sqlite3_free(pPhrase->doclist.pList);
   }
@@ -115828,8 +118970,30 @@ static void fts3EvalZeroPoslist(Fts3Phrase *pPhrase){
   pPhrase->doclist.bFreeList = 0;
 }
 
-static int fts3EvalNearTrim2(
-  int nNear,
+/*
+** This function is called to edit the position list associated with
+** the phrase object passed as the fifth argument according to a NEAR
+** condition. For example:
+**
+**     abc NEAR/5 "def ghi"
+**
+** Parameter nNear is passed the NEAR distance of the expression (5 in
+** the example above). When this function is called, *paPoslist points to
+** the position list, and *pnToken is the number of phrase tokens in, the
+** phrase on the other side of the NEAR operator to pPhrase. For example,
+** if pPhrase refers to the "def ghi" phrase, then *paPoslist points to
+** the position list associated with phrase "abc".
+**
+** All positions in the pPhrase position list that are not sufficiently
+** close to a position in the *paPoslist position list are removed. If this
+** leaves 0 positions, zero is returned. Otherwise, non-zero.
+**
+** Before returning, *paPoslist is set to point to the position lsit 
+** associated with pPhrase. And *pnToken is set to the number of tokens in
+** pPhrase.
+*/
+static int fts3EvalNearTrim(
+  int nNear,                      /* NEAR distance. As in "NEAR/nNear". */
   char *aTmp,                     /* Temporary space to use */
   char **paPoslist,               /* IN/OUT: Position list */
   int *pnToken,                   /* IN/OUT: Tokens in phrase of *paPoslist */
@@ -115861,89 +119025,54 @@ static int fts3EvalNearTrim2(
   return res;
 }
 
-static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
-  int res = 1;
-
-  /* The following block runs if pExpr is the root of a NEAR query.
-  ** For example, the query:
-  **
-  **         "w" NEAR "x" NEAR "y" NEAR "z"
-  **
-  ** which is represented in tree form as:
-  **
-  **                               |
-  **                          +--NEAR--+      <-- root of NEAR query
-  **                          |        |
-  **                     +--NEAR--+   "z"
-  **                     |        |
-  **                +--NEAR--+   "y"
-  **                |        |
-  **               "w"      "x"
-  **
-  ** The right-hand child of a NEAR node is always a phrase. The 
-  ** left-hand child may be either a phrase or a NEAR node. There are
-  ** no exceptions to this.
-  */
-  if( *pRc==SQLITE_OK 
-   && pExpr->eType==FTSQUERY_NEAR 
-   && pExpr->bEof==0
-   && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
-  ){
-    Fts3Expr *p; 
-    int nTmp = 0;                 /* Bytes of temp space */
-    char *aTmp;                   /* Temp space for PoslistNearMerge() */
-
-    /* Allocate temporary working space. */
-    for(p=pExpr; p->pLeft; p=p->pLeft){
-      nTmp += p->pRight->pPhrase->doclist.nList;
-    }
-    nTmp += p->pPhrase->doclist.nList;
-    aTmp = sqlite3_malloc(nTmp*2);
-    if( !aTmp ){
-      *pRc = SQLITE_NOMEM;
-      res = 0;
-    }else{
-      char *aPoslist = p->pPhrase->doclist.pList;
-      int nToken = p->pPhrase->nToken;
-
-      for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
-        Fts3Phrase *pPhrase = p->pRight->pPhrase;
-        int nNear = p->nNear;
-        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
-      }
-  
-      aPoslist = pExpr->pRight->pPhrase->doclist.pList;
-      nToken = pExpr->pRight->pPhrase->nToken;
-      for(p=pExpr->pLeft; p && res; p=p->pLeft){
-        int nNear = p->pParent->nNear;
-        Fts3Phrase *pPhrase = (
-            p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
-        );
-        res = fts3EvalNearTrim2(nNear, aTmp, &aPoslist, &nToken, pPhrase);
-      }
-    }
-
-    sqlite3_free(aTmp);
-  }
-
-  return res;
-}
-
 /*
-** This macro is used by the fts3EvalNext() function. The two arguments are
-** 64-bit docid values. If the current query is "ORDER BY docid ASC", then
-** the macro returns (i1 - i2). Or if it is "ORDER BY docid DESC", then
-** it returns (i2 - i1). This allows the same code to be used for merging
-** doclists in ascending or descending order.
+** This function is a no-op if *pRc is other than SQLITE_OK when it is called.
+** Otherwise, it advances the expression passed as the second argument to
+** point to the next matching row in the database. Expressions iterate through
+** matching rows in docid order. Ascending order if Fts3Cursor.bDesc is zero,
+** or descending if it is non-zero.
+**
+** If an error occurs, *pRc is set to an SQLite error code. Otherwise, if
+** successful, the following variables in pExpr are set:
+**
+**   Fts3Expr.bEof                (non-zero if EOF - there is no next row)
+**   Fts3Expr.iDocid              (valid if bEof==0. The docid of the next row)
+**
+** If the expression is of type FTSQUERY_PHRASE, and the expression is not
+** at EOF, then the following variables are populated with the position list
+** for the phrase for the visited row:
+**
+**   FTs3Expr.pPhrase->doclist.nList        (length of pList in bytes)
+**   FTs3Expr.pPhrase->doclist.pList        (pointer to position list)
+**
+** It says above that this function advances the expression to the next
+** matching row. This is usually true, but there are the following exceptions:
+**
+**   1. Deferred tokens are not taken into account. If a phrase consists
+**      entirely of deferred tokens, it is assumed to match every row in
+**      the db. In this case the position-list is not populated at all. 
+**
+**      Or, if a phrase contains one or more deferred tokens and one or
+**      more non-deferred tokens, then the expression is advanced to the 
+**      next possible match, considering only non-deferred tokens. In other
+**      words, if the phrase is "A B C", and "B" is deferred, the expression
+**      is advanced to the next row that contains an instance of "A * C", 
+**      where "*" may match any single token. The position list in this case
+**      is populated as for "A * C" before returning.
+**
+**   2. NEAR is treated as AND. If the expression is "x NEAR y", it is 
+**      advanced to point to the next row that matches "x AND y".
+** 
+** See fts3EvalTestDeferredAndNear() for details on testing if a row is
+** really a match, taking into account deferred tokens and NEAR operators.
 */
-#define DOCID_CMP(i1, i2) ((pCsr->bDesc?-1:1) * (i1-i2))
-
-static void fts3EvalNext(
-  Fts3Cursor *pCsr, 
-  Fts3Expr *pExpr, 
-  int *pRc
+static void fts3EvalNextRow(
+  Fts3Cursor *pCsr,               /* FTS Cursor handle */
+  Fts3Expr *pExpr,                /* Expr. to advance to next matching row */
+  int *pRc                        /* IN/OUT: Error code */
 ){
   if( *pRc==SQLITE_OK ){
+    int bDescDoclist = pCsr->bDesc;         /* Used by DOCID_CMP() macro */
     assert( pExpr->bEof==0 );
     pExpr->bStart = 1;
 
@@ -115953,28 +119082,32 @@ static void fts3EvalNext(
         Fts3Expr *pLeft = pExpr->pLeft;
         Fts3Expr *pRight = pExpr->pRight;
         assert( !pLeft->bDeferred || !pRight->bDeferred );
+
         if( pLeft->bDeferred ){
-          fts3EvalNext(pCsr, pRight, pRc);
+          /* LHS is entirely deferred. So we assume it matches every row.
+          ** Advance the RHS iterator to find the next row visited. */
+          fts3EvalNextRow(pCsr, pRight, pRc);
           pExpr->iDocid = pRight->iDocid;
           pExpr->bEof = pRight->bEof;
         }else if( pRight->bDeferred ){
-          fts3EvalNext(pCsr, pLeft, pRc);
+          /* RHS is entirely deferred. So we assume it matches every row.
+          ** Advance the LHS iterator to find the next row visited. */
+          fts3EvalNextRow(pCsr, pLeft, pRc);
           pExpr->iDocid = pLeft->iDocid;
           pExpr->bEof = pLeft->bEof;
         }else{
-          fts3EvalNext(pCsr, pLeft, pRc);
-          fts3EvalNext(pCsr, pRight, pRc);
-
+          /* Neither the RHS or LHS are deferred. */
+          fts3EvalNextRow(pCsr, pLeft, pRc);
+          fts3EvalNextRow(pCsr, pRight, pRc);
           while( !pLeft->bEof && !pRight->bEof && *pRc==SQLITE_OK ){
             sqlite3_int64 iDiff = DOCID_CMP(pLeft->iDocid, pRight->iDocid);
             if( iDiff==0 ) break;
             if( iDiff<0 ){
-              fts3EvalNext(pCsr, pLeft, pRc);
+              fts3EvalNextRow(pCsr, pLeft, pRc);
             }else{
-              fts3EvalNext(pCsr, pRight, pRc);
+              fts3EvalNextRow(pCsr, pRight, pRc);
             }
           }
-
           pExpr->iDocid = pLeft->iDocid;
           pExpr->bEof = (pLeft->bEof || pRight->bEof);
         }
@@ -115990,12 +119123,12 @@ static void fts3EvalNext(
         assert( pRight->bStart || pLeft->iDocid==pRight->iDocid );
 
         if( pRight->bEof || (pLeft->bEof==0 && iCmp<0) ){
-          fts3EvalNext(pCsr, pLeft, pRc);
+          fts3EvalNextRow(pCsr, pLeft, pRc);
         }else if( pLeft->bEof || (pRight->bEof==0 && iCmp>0) ){
-          fts3EvalNext(pCsr, pRight, pRc);
+          fts3EvalNextRow(pCsr, pRight, pRc);
         }else{
-          fts3EvalNext(pCsr, pLeft, pRc);
-          fts3EvalNext(pCsr, pRight, pRc);
+          fts3EvalNextRow(pCsr, pLeft, pRc);
+          fts3EvalNextRow(pCsr, pRight, pRc);
         }
 
         pExpr->bEof = (pLeft->bEof && pRight->bEof);
@@ -116014,17 +119147,17 @@ static void fts3EvalNext(
         Fts3Expr *pRight = pExpr->pRight;
 
         if( pRight->bStart==0 ){
-          fts3EvalNext(pCsr, pRight, pRc);
+          fts3EvalNextRow(pCsr, pRight, pRc);
           assert( *pRc!=SQLITE_OK || pRight->bStart );
         }
 
-        fts3EvalNext(pCsr, pLeft, pRc);
+        fts3EvalNextRow(pCsr, pLeft, pRc);
         if( pLeft->bEof==0 ){
           while( !*pRc 
               && !pRight->bEof 
               && DOCID_CMP(pLeft->iDocid, pRight->iDocid)>0 
           ){
-            fts3EvalNext(pCsr, pRight, pRc);
+            fts3EvalNextRow(pCsr, pRight, pRc);
           }
         }
         pExpr->iDocid = pLeft->iDocid;
@@ -116034,7 +119167,7 @@ static void fts3EvalNext(
 
       default: {
         Fts3Phrase *pPhrase = pExpr->pPhrase;
-        fts3EvalZeroPoslist(pPhrase);
+        fts3EvalInvalidatePoslist(pPhrase);
         *pRc = fts3EvalPhraseNext(pCsr, pPhrase, &pExpr->bEof);
         pExpr->iDocid = pPhrase->doclist.iDocid;
         break;
@@ -116043,15 +119176,116 @@ static void fts3EvalNext(
   }
 }
 
-static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
-  int bHit = 1;
+/*
+** If *pRc is not SQLITE_OK, or if pExpr is not the root node of a NEAR
+** cluster, then this function returns 1 immediately.
+**
+** Otherwise, it checks if the current row really does match the NEAR 
+** expression, using the data currently stored in the position lists 
+** (Fts3Expr->pPhrase.doclist.pList/nList) for each phrase in the expression. 
+**
+** If the current row is a match, the position list associated with each
+** phrase in the NEAR expression is edited in place to contain only those
+** phrase instances sufficiently close to their peers to satisfy all NEAR
+** constraints. In this case it returns 1. If the NEAR expression does not 
+** match the current row, 0 is returned. The position lists may or may not
+** be edited if 0 is returned.
+*/
+static int fts3EvalNearTest(Fts3Expr *pExpr, int *pRc){
+  int res = 1;
+
+  /* The following block runs if pExpr is the root of a NEAR query.
+  ** For example, the query:
+  **
+  **         "w" NEAR "x" NEAR "y" NEAR "z"
+  **
+  ** which is represented in tree form as:
+  **
+  **                               |
+  **                          +--NEAR--+      <-- root of NEAR query
+  **                          |        |
+  **                     +--NEAR--+   "z"
+  **                     |        |
+  **                +--NEAR--+   "y"
+  **                |        |
+  **               "w"      "x"
+  **
+  ** The right-hand child of a NEAR node is always a phrase. The 
+  ** left-hand child may be either a phrase or a NEAR node. There are
+  ** no exceptions to this - it's the way the parser in fts3_expr.c works.
+  */
+  if( *pRc==SQLITE_OK 
+   && pExpr->eType==FTSQUERY_NEAR 
+   && pExpr->bEof==0
+   && (pExpr->pParent==0 || pExpr->pParent->eType!=FTSQUERY_NEAR)
+  ){
+    Fts3Expr *p; 
+    int nTmp = 0;                 /* Bytes of temp space */
+    char *aTmp;                   /* Temp space for PoslistNearMerge() */
+
+    /* Allocate temporary working space. */
+    for(p=pExpr; p->pLeft; p=p->pLeft){
+      nTmp += p->pRight->pPhrase->doclist.nList;
+    }
+    nTmp += p->pPhrase->doclist.nList;
+    aTmp = sqlite3_malloc(nTmp*2);
+    if( !aTmp ){
+      *pRc = SQLITE_NOMEM;
+      res = 0;
+    }else{
+      char *aPoslist = p->pPhrase->doclist.pList;
+      int nToken = p->pPhrase->nToken;
+
+      for(p=p->pParent;res && p && p->eType==FTSQUERY_NEAR; p=p->pParent){
+        Fts3Phrase *pPhrase = p->pRight->pPhrase;
+        int nNear = p->nNear;
+        res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+      }
+  
+      aPoslist = pExpr->pRight->pPhrase->doclist.pList;
+      nToken = pExpr->pRight->pPhrase->nToken;
+      for(p=pExpr->pLeft; p && res; p=p->pLeft){
+        int nNear;
+        Fts3Phrase *pPhrase;
+        assert( p->pParent && p->pParent->pLeft==p );
+        nNear = p->pParent->nNear;
+        pPhrase = (
+            p->eType==FTSQUERY_NEAR ? p->pRight->pPhrase : p->pPhrase
+        );
+        res = fts3EvalNearTrim(nNear, aTmp, &aPoslist, &nToken, pPhrase);
+      }
+    }
+
+    sqlite3_free(aTmp);
+  }
+
+  return res;
+}
+
+/*
+** This function is a helper function for fts3EvalTestDeferredAndNear().
+** Assuming no error occurs or has occurred, It returns non-zero if the
+** expression passed as the second argument matches the row that pCsr 
+** currently points to, or zero if it does not.
+**
+** If *pRc is not SQLITE_OK when this function is called, it is a no-op.
+** If an error occurs during execution of this function, *pRc is set to 
+** the appropriate SQLite error code. In this case the returned value is 
+** undefined.
+*/
+static int fts3EvalTestExpr(
+  Fts3Cursor *pCsr,               /* FTS cursor handle */
+  Fts3Expr *pExpr,                /* Expr to test. May or may not be root. */
+  int *pRc                        /* IN/OUT: Error code */
+){
+  int bHit = 1;                   /* Return value */
   if( *pRc==SQLITE_OK ){
     switch( pExpr->eType ){
       case FTSQUERY_NEAR:
       case FTSQUERY_AND:
         bHit = (
-            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
-         && fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
+            fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
+         && fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
          && fts3EvalNearTest(pExpr, pRc)
         );
 
@@ -116077,27 +119311,27 @@ static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
           Fts3Expr *p;
           for(p=pExpr; p->pPhrase==0; p=p->pLeft){
             if( p->pRight->iDocid==pCsr->iPrevId ){
-              fts3EvalZeroPoslist(p->pRight->pPhrase);
+              fts3EvalInvalidatePoslist(p->pRight->pPhrase);
             }
           }
           if( p->iDocid==pCsr->iPrevId ){
-            fts3EvalZeroPoslist(p->pPhrase);
+            fts3EvalInvalidatePoslist(p->pPhrase);
           }
         }
 
         break;
 
       case FTSQUERY_OR: {
-        int bHit1 = fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc);
-        int bHit2 = fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc);
+        int bHit1 = fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc);
+        int bHit2 = fts3EvalTestExpr(pCsr, pExpr->pRight, pRc);
         bHit = bHit1 || bHit2;
         break;
       }
 
       case FTSQUERY_NOT:
         bHit = (
-            fts3EvalDeferredTest(pCsr, pExpr->pLeft, pRc)
-         && !fts3EvalDeferredTest(pCsr, pExpr->pRight, pRc)
+            fts3EvalTestExpr(pCsr, pExpr->pLeft, pRc)
+         && !fts3EvalTestExpr(pCsr, pExpr->pRight, pRc)
         );
         break;
 
@@ -116108,7 +119342,7 @@ static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
           Fts3Phrase *pPhrase = pExpr->pPhrase;
           assert( pExpr->bDeferred || pPhrase->doclist.bFreeList==0 );
           if( pExpr->bDeferred ){
-            fts3EvalZeroPoslist(pPhrase);
+            fts3EvalInvalidatePoslist(pPhrase);
           }
           *pRc = fts3EvalDeferredPhrase(pCsr, pPhrase);
           bHit = (pPhrase->doclist.pList!=0);
@@ -116124,27 +119358,49 @@ static int fts3EvalDeferredTest(Fts3Cursor *pCsr, Fts3Expr *pExpr, int *pRc){
 }
 
 /*
-** Return 1 if both of the following are true:
+** This function is called as the second part of each xNext operation when
+** iterating through the results of a full-text query. At this point the
+** cursor points to a row that matches the query expression, with the
+** following caveats:
+**
+**   * Up until this point, "NEAR" operators in the expression have been
+**     treated as "AND".
+**
+**   * Deferred tokens have not yet been considered.
+**
+** If *pRc is not SQLITE_OK when this function is called, it immediately
+** returns 0. Otherwise, it tests whether or not after considering NEAR
+** operators and deferred tokens the current row is still a match for the
+** expression. It returns 1 if both of the following are true:
 **
 **   1. *pRc is SQLITE_OK when this function returns, and
 **
 **   2. After scanning the current FTS table row for the deferred tokens,
-**      it is determined that the row does not match the query.
+**      it is determined that the row does *not* match the query.
 **
 ** Or, if no error occurs and it seems the current row does match the FTS
 ** query, return 0.
 */
-static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){
+static int fts3EvalTestDeferredAndNear(Fts3Cursor *pCsr, int *pRc){
   int rc = *pRc;
   int bMiss = 0;
   if( rc==SQLITE_OK ){
+
+    /* If there are one or more deferred tokens, load the current row into
+    ** memory and scan it to determine the position list for each deferred
+    ** token. Then, see if this row is really a match, considering deferred
+    ** tokens and NEAR operators (neither of which were taken into account
+    ** earlier, by fts3EvalNextRow()). 
+    */
     if( pCsr->pDeferred ){
       rc = fts3CursorSeek(0, pCsr);
       if( rc==SQLITE_OK ){
         rc = sqlite3Fts3CacheDeferredDoclists(pCsr);
       }
     }
-    bMiss = (0==fts3EvalDeferredTest(pCsr, pCsr->pExpr, &rc));
+    bMiss = (0==fts3EvalTestExpr(pCsr, pCsr->pExpr, &rc));
+
+    /* Free the position-lists accumulated for each deferred token above. */
     sqlite3Fts3FreeDeferredDoclists(pCsr);
     *pRc = rc;
   }
@@ -116155,7 +119411,7 @@ static int fts3EvalLoadDeferred(Fts3Cursor *pCsr, int *pRc){
 ** Advance to the next document that matches the FTS expression in
 ** Fts3Cursor.pExpr.
 */
-SQLITE_PRIVATE int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){
+static int fts3EvalNext(Fts3Cursor *pCsr){
   int rc = SQLITE_OK;             /* Return Code */
   Fts3Expr *pExpr = pCsr->pExpr;
   assert( pCsr->isEof==0 );
@@ -116167,19 +119423,19 @@ SQLITE_PRIVATE int sqlite3Fts3EvalNext(Fts3Cursor *pCsr){
         sqlite3_reset(pCsr->pStmt);
       }
       assert( sqlite3_data_count(pCsr->pStmt)==0 );
-      fts3EvalNext(pCsr, pExpr, &rc);
+      fts3EvalNextRow(pCsr, pExpr, &rc);
       pCsr->isEof = pExpr->bEof;
       pCsr->isRequireSeek = 1;
       pCsr->isMatchinfoNeeded = 1;
       pCsr->iPrevId = pExpr->iDocid;
-    }while( pCsr->isEof==0 && fts3EvalLoadDeferred(pCsr, &rc) );
+    }while( pCsr->isEof==0 && fts3EvalTestDeferredAndNear(pCsr, &rc) );
   }
   return rc;
 }
 
 /*
 ** Restart interation for expression pExpr so that the next call to
-** sqlite3Fts3EvalNext() visits the first row. Do not allow incremental 
+** fts3EvalNext() visits the first row. Do not allow incremental 
 ** loading or merging of phrase doclists for this iteration.
 **
 ** If *pRc is other than SQLITE_OK when this function is called, it is
@@ -116195,7 +119451,7 @@ static void fts3EvalRestart(
     Fts3Phrase *pPhrase = pExpr->pPhrase;
 
     if( pPhrase ){
-      fts3EvalZeroPoslist(pPhrase);
+      fts3EvalInvalidatePoslist(pPhrase);
       if( pPhrase->bIncr ){
         assert( pPhrase->nToken==1 );
         assert( pPhrase->aToken[0].pSegcsr );
@@ -116311,14 +119567,14 @@ static int fts3EvalGatherStats(
         assert( sqlite3_data_count(pCsr->pStmt)==0 );
 
         /* Advance to the next document */
-        fts3EvalNext(pCsr, pRoot, &rc);
+        fts3EvalNextRow(pCsr, pRoot, &rc);
         pCsr->isEof = pRoot->bEof;
         pCsr->isRequireSeek = 1;
         pCsr->isMatchinfoNeeded = 1;
         pCsr->iPrevId = pRoot->iDocid;
       }while( pCsr->isEof==0 
            && pRoot->eType==FTSQUERY_NEAR 
-           && fts3EvalLoadDeferred(pCsr, &rc) 
+           && fts3EvalTestDeferredAndNear(pCsr, &rc) 
       );
 
       if( rc==SQLITE_OK && pCsr->isEof==0 ){
@@ -116340,10 +119596,10 @@ static int fts3EvalGatherStats(
       */
       fts3EvalRestart(pCsr, pRoot, &rc);
       do {
-        fts3EvalNext(pCsr, pRoot, &rc);
+        fts3EvalNextRow(pCsr, pRoot, &rc);
         assert( pRoot->bEof==0 );
       }while( pRoot->iDocid!=iDocid && rc==SQLITE_OK );
-      fts3EvalLoadDeferred(pCsr, &rc);
+      fts3EvalTestDeferredAndNear(pCsr, &rc);
     }
   }
   return rc;
@@ -116474,7 +119730,7 @@ SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
   if( pPhrase ){
     int i;
     sqlite3_free(pPhrase->doclist.aAll);
-    fts3EvalZeroPoslist(pPhrase);
+    fts3EvalInvalidatePoslist(pPhrase);
     memset(&pPhrase->doclist, 0, sizeof(Fts3Doclist));
     for(i=0; i<pPhrase->nToken; i++){
       fts3SegReaderCursorFree(pPhrase->aToken[i].pSegcsr);
@@ -116483,6 +119739,29 @@ SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
   }
 }
 
+/*
+** Return SQLITE_CORRUPT_VTAB.
+*/
+#ifdef SQLITE_DEBUG
+SQLITE_PRIVATE int sqlite3Fts3Corrupt(){
+  return SQLITE_CORRUPT_VTAB;
+}
+#endif
+
+#if !SQLITE_CORE
+/*
+** Initialize API pointer table, if required.
+*/
+SQLITE_API int sqlite3_extension_init(
+  sqlite3 *db, 
+  char **pzErrMsg,
+  const sqlite3_api_routines *pApi
+){
+  SQLITE_EXTENSION_INIT2(pApi)
+  return sqlite3Fts3Init(db);
+}
+#endif
+
 #endif
 
 /************** End of fts3.c ************************************************/
@@ -116502,6 +119781,8 @@ SQLITE_PRIVATE void sqlite3Fts3EvalPhraseCleanup(Fts3Phrase *pPhrase){
 */
 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
 
+/* #include <string.h> */
+/* #include <assert.h> */
 
 typedef struct Fts3auxTable Fts3auxTable;
 typedef struct Fts3auxCursor Fts3auxCursor;
@@ -117040,6 +120321,8 @@ SQLITE_API int sqlite3_fts3_enable_parentheses = 0;
 */
 #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10
 
+/* #include <string.h> */
+/* #include <assert.h> */
 
 /*
 ** isNot:
@@ -117053,6 +120336,7 @@ typedef struct ParseContext ParseContext;
 struct ParseContext {
   sqlite3_tokenizer *pTokenizer;      /* Tokenizer module */
   const char **azCol;                 /* Array of column names for fts3 table */
+  int bFts4;                          /* True to allow FTS4-only syntax */
   int nCol;                           /* Number of entries in azCol[] */
   int iDefaultCol;                    /* Default column to query */
   int isNot;                          /* True if getNextNode() sees a unary - */
@@ -117140,9 +120424,21 @@ static int getNextToken(
           pRet->pPhrase->aToken[0].isPrefix = 1;
           iEnd++;
         }
-        if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){
-          pParse->isNot = 1;
+
+        while( 1 ){
+          if( !sqlite3_fts3_enable_parentheses 
+           && iStart>0 && z[iStart-1]=='-' 
+          ){
+            pParse->isNot = 1;
+            iStart--;
+          }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
+            pRet->pPhrase->aToken[0].bFirst = 1;
+            iStart--;
+          }else{
+            break;
+          }
         }
+
       }
       nConsumed = iEnd;
     }
@@ -117241,6 +120537,7 @@ static int getNextString(
 
         pToken->n = nByte;
         pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
+        pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
         nToken = ii+1;
       }
     }
@@ -117262,8 +120559,12 @@ static int getNextString(
     p->pPhrase->nToken = nToken;
 
     zBuf = (char *)&p->pPhrase->aToken[nToken];
-    memcpy(zBuf, zTemp, nTemp);
-    sqlite3_free(zTemp);
+    if( zTemp ){
+      memcpy(zBuf, zTemp, nTemp);
+      sqlite3_free(zTemp);
+    }else{
+      assert( nTemp==0 );
+    }
 
     for(jj=0; jj<p->pPhrase->nToken; jj++){
       p->pPhrase->aToken[jj].z = zBuf;
@@ -117688,6 +120989,7 @@ exprparse_out:
 SQLITE_PRIVATE int sqlite3Fts3ExprParse(
   sqlite3_tokenizer *pTokenizer,      /* Tokenizer module */
   char **azCol,                       /* Array of column names for fts3 table */
+  int bFts4,                          /* True to allow FTS4-only syntax */
   int nCol,                           /* Number of entries in azCol[] */
   int iDefaultCol,                    /* Default column to query */
   const char *z, int n,               /* Text of MATCH query */
@@ -117701,6 +121003,7 @@ SQLITE_PRIVATE int sqlite3Fts3ExprParse(
   sParse.nCol = nCol;
   sParse.iDefaultCol = iDefaultCol;
   sParse.nNest = 0;
+  sParse.bFts4 = bFts4;
   if( z==0 ){
     *ppExpr = 0;
     return SQLITE_OK;
@@ -117741,6 +121044,7 @@ SQLITE_PRIVATE void sqlite3Fts3ExprFree(Fts3Expr *p){
 
 #ifdef SQLITE_TEST
 
+/* #include <stdio.h> */
 
 /*
 ** Function to query the hash-table of tokenizers (see README.tokenizers).
@@ -117889,7 +121193,7 @@ static void fts3ExprTest(
   }
 
   rc = sqlite3Fts3ExprParse(
-      pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr
+      pTokenizer, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr
   );
   if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){
     sqlite3_result_error(context, "Error parsing expression", -1);
@@ -117951,6 +121255,9 @@ SQLITE_PRIVATE int sqlite3Fts3ExprInitTestInterface(sqlite3* db){
 */
 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
 
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <string.h> */
 
 
 /*
@@ -118331,6 +121638,10 @@ SQLITE_PRIVATE void *sqlite3Fts3HashInsert(
 */
 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
 
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <stdio.h> */
+/* #include <string.h> */
 
 
 /*
@@ -118972,12 +122283,10 @@ SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(
 **     * The FTS3 module is being built into the core of
 **       SQLite (in which case SQLITE_ENABLE_FTS3 is defined).
 */
-#ifndef SQLITE_CORE
-  SQLITE_EXTENSION_INIT1
-#endif
-
 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
 
+/* #include <assert.h> */
+/* #include <string.h> */
 
 /*
 ** Implementation of the SQL scalar function for accessing the underlying 
@@ -119153,6 +122462,8 @@ SQLITE_PRIVATE int sqlite3Fts3InitTokenizer(
 
 #ifdef SQLITE_TEST
 
+/* #include <tcl.h> */
+/* #include <string.h> */
 
 /*
 ** Implementation of a special SQL scalar function for testing tokenizers 
@@ -119464,6 +122775,10 @@ SQLITE_PRIVATE int sqlite3Fts3InitHashTable(
 */
 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
 
+/* #include <assert.h> */
+/* #include <stdlib.h> */
+/* #include <stdio.h> */
+/* #include <string.h> */
 
 
 typedef struct simple_tokenizer {
@@ -119689,6 +123004,9 @@ SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(
 
 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
 
+/* #include <string.h> */
+/* #include <assert.h> */
+/* #include <stdlib.h> */
 
 /*
 ** When full-text index nodes are loaded from disk, the buffer that they
@@ -119922,7 +123240,7 @@ static int fts3SqlStmt(
 /* 4  */  "DELETE FROM %Q.'%q_segdir'",
 /* 5  */  "DELETE FROM %Q.'%q_docsize'",
 /* 6  */  "DELETE FROM %Q.'%q_stat'",
-/* 7  */  "SELECT %s FROM %Q.'%q_content' AS x WHERE rowid=?",
+/* 7  */  "SELECT %s WHERE rowid=?",
 /* 8  */  "SELECT (SELECT max(idx) FROM %Q.'%q_segdir' WHERE level = ?) + 1",
 /* 9  */  "INSERT INTO %Q.'%q_segments'(blockid, block) VALUES(?, ?)",
 /* 10 */  "SELECT coalesce((SELECT max(blockid) FROM %Q.'%q_segments') + 1, 1)",
@@ -119964,7 +123282,7 @@ static int fts3SqlStmt(
     if( eStmt==SQL_CONTENT_INSERT ){
       zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName, p->zWriteExprlist);
     }else if( eStmt==SQL_SELECT_CONTENT_BY_ROWID ){
-      zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist, p->zDb, p->zName);
+      zSql = sqlite3_mprintf(azSql[eStmt], p->zReadExprlist);
     }else{
       zSql = sqlite3_mprintf(azSql[eStmt], p->zDb, p->zName);
     }
@@ -120007,7 +123325,7 @@ static int fts3SelectDocsize(
     rc = sqlite3_step(pStmt);
     if( rc!=SQLITE_ROW || sqlite3_column_type(pStmt, 0)!=SQLITE_BLOB ){
       rc = sqlite3_reset(pStmt);
-      if( rc==SQLITE_OK ) rc = SQLITE_CORRUPT_VTAB;
+      if( rc==SQLITE_OK ) rc = FTS_CORRUPT_VTAB;
       pStmt = 0;
     }else{
       rc = SQLITE_OK;
@@ -120075,17 +123393,24 @@ static void fts3SqlExec(
 ** not what users expect when they get SQLITE_LOCKED_SHAREDCACHE. It can
 ** still happen if the user reads data directly from the %_segments or
 ** %_segdir tables instead of going through FTS3 though.
+**
+** This reasoning does not apply to a content=xxx table.
 */
 SQLITE_PRIVATE int sqlite3Fts3ReadLock(Fts3Table *p){
   int rc;                         /* Return code */
   sqlite3_stmt *pStmt;            /* Statement used to obtain lock */
 
-  rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0);
-  if( rc==SQLITE_OK ){
-    sqlite3_bind_null(pStmt, 1);
-    sqlite3_step(pStmt);
-    rc = sqlite3_reset(pStmt);
+  if( p->zContentTbl==0 ){
+    rc = fts3SqlStmt(p, SQL_SELECT_CONTENT_BY_ROWID, &pStmt, 0);
+    if( rc==SQLITE_OK ){
+      sqlite3_bind_null(pStmt, 1);
+      sqlite3_step(pStmt);
+      rc = sqlite3_reset(pStmt);
+    }
+  }else{
+    rc = SQLITE_OK;
   }
+
   return rc;
 }
 
@@ -120446,6 +123771,18 @@ static int fts3InsertData(
   int rc;                         /* Return code */
   sqlite3_stmt *pContentInsert;   /* INSERT INTO %_content VALUES(...) */
 
+  if( p->zContentTbl ){
+    sqlite3_value *pRowid = apVal[p->nColumn+3];
+    if( sqlite3_value_type(pRowid)==SQLITE_NULL ){
+      pRowid = apVal[1];
+    }
+    if( sqlite3_value_type(pRowid)!=SQLITE_INTEGER ){
+      return SQLITE_CONSTRAINT;
+    }
+    *piDocid = sqlite3_value_int64(pRowid);
+    return SQLITE_OK;
+  }
+
   /* Locate the statement handle used to insert data into the %_content
   ** table. The SQL for this statement is:
   **
@@ -120496,14 +123833,16 @@ static int fts3InsertData(
 ** Remove all data from the FTS3 table. Clear the hash table containing
 ** pending terms.
 */
-static int fts3DeleteAll(Fts3Table *p){
+static int fts3DeleteAll(Fts3Table *p, int bContent){
   int rc = SQLITE_OK;             /* Return code */
 
   /* Discard the contents of the pending-terms hash table. */
   sqlite3Fts3PendingTermsClear(p);
 
-  /* Delete everything from the %_content, %_segments and %_segdir tables. */
-  fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
+  /* Delete everything from the shadow tables. Except, leave %_content as
+  ** is if bContent is false.  */
+  assert( p->zContentTbl==0 || bContent==0 );
+  if( bContent ) fts3SqlExec(&rc, p, SQL_DELETE_ALL_CONTENT, 0);
   fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGMENTS, 0);
   fts3SqlExec(&rc, p, SQL_DELETE_ALL_SEGDIR, 0);
   if( p->bHasDocsize ){
@@ -120811,7 +124150,7 @@ static int fts3SegReaderNext(
   if( nPrefix<0 || nSuffix<=0 
    || &pNext[nSuffix]>&pReader->aNode[pReader->nNode] 
   ){
-    return SQLITE_CORRUPT_VTAB;
+    return FTS_CORRUPT_VTAB;
   }
 
   if( nPrefix+nSuffix>pReader->nTermAlloc ){
@@ -120841,7 +124180,7 @@ static int fts3SegReaderNext(
   if( &pReader->aDoclist[pReader->nDoclist]>&pReader->aNode[pReader->nNode] 
    || (pReader->nPopulate==0 && pReader->aDoclist[pReader->nDoclist-1])
   ){
-    return SQLITE_CORRUPT_VTAB;
+    return FTS_CORRUPT_VTAB;
   }
   return SQLITE_OK;
 }
@@ -121791,12 +125130,18 @@ static void fts3SegWriterFree(SegmentWriter *pWriter){
 static int fts3IsEmpty(Fts3Table *p, sqlite3_value *pRowid, int *pisEmpty){
   sqlite3_stmt *pStmt;
   int rc;
-  rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid);
-  if( rc==SQLITE_OK ){
-    if( SQLITE_ROW==sqlite3_step(pStmt) ){
-      *pisEmpty = sqlite3_column_int(pStmt, 0);
+  if( p->zContentTbl ){
+    /* If using the content=xxx option, assume the table is never empty */
+    *pisEmpty = 0;
+    rc = SQLITE_OK;
+  }else{
+    rc = fts3SqlStmt(p, SQL_IS_EMPTY, &pStmt, &pRowid);
+    if( rc==SQLITE_OK ){
+      if( SQLITE_ROW==sqlite3_step(pStmt) ){
+        *pisEmpty = sqlite3_column_int(pStmt, 0);
+      }
+      rc = sqlite3_reset(pStmt);
     }
-    rc = sqlite3_reset(pStmt);
   }
   return rc;
 }
@@ -122148,6 +125493,7 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(
   int isColFilter =    (pCsr->pFilter->flags & FTS3_SEGMENT_COLUMN_FILTER);
   int isPrefix =       (pCsr->pFilter->flags & FTS3_SEGMENT_PREFIX);
   int isScan =         (pCsr->pFilter->flags & FTS3_SEGMENT_SCAN);
+  int isFirst =        (pCsr->pFilter->flags & FTS3_SEGMENT_FIRST);
 
   Fts3SegReader **apSegment = pCsr->apSegment;
   int nSegment = pCsr->nSegment;
@@ -122207,6 +125553,7 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(
     assert( isIgnoreEmpty || (isRequirePos && !isColFilter) );
     if( nMerge==1 
      && !isIgnoreEmpty 
+     && !isFirst 
      && (p->bDescIdx==0 || fts3SegReaderIsPending(apSegment[0])==0)
     ){
       pCsr->nDoclist = apSegment[0]->nDoclist;
@@ -122272,12 +125619,24 @@ SQLITE_PRIVATE int sqlite3Fts3SegReaderStep(
             }
             pCsr->aBuffer = aNew;
           }
-          nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
-          iPrev = iDocid;
-          if( isRequirePos ){
-            memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
-            nDoclist += nList;
-            pCsr->aBuffer[nDoclist++] = '\0';
+
+          if( isFirst ){
+            char *a = &pCsr->aBuffer[nDoclist];
+            int nWrite;
+           
+            nWrite = sqlite3Fts3FirstFilter(iDelta, pList, nList, a);
+            if( nWrite ){
+              iPrev = iDocid;
+              nDoclist += nWrite;
+            }
+          }else{
+            nDoclist += sqlite3Fts3PutVarint(&pCsr->aBuffer[nDoclist], iDelta);
+            iPrev = iDocid;
+            if( isRequirePos ){
+              memcpy(&pCsr->aBuffer[nDoclist], pList, nList);
+              nDoclist += nList;
+              pCsr->aBuffer[nDoclist++] = '\0';
+            }
           }
         }
 
@@ -122453,9 +125812,9 @@ static void fts3DecodeIntArray(
 ** a blob of varints.
 */
 static void fts3InsertDocsize(
-  int *pRC,         /* Result code */
-  Fts3Table *p,     /* Table into which to insert */
-  u32 *aSz          /* Sizes of each column */
+  int *pRC,                       /* Result code */
+  Fts3Table *p,                   /* Table into which to insert */
+  u32 *aSz                        /* Sizes of each column, in tokens */
 ){
   char *pBlob;             /* The BLOB encoding of the document size */
   int nBlob;               /* Number of bytes in the BLOB */
@@ -122578,6 +125937,86 @@ static int fts3DoOptimize(Fts3Table *p, int bReturnDone){
 }
 
 /*
+** This function is called when the user executes the following statement:
+**
+**     INSERT INTO <tbl>(<tbl>) VALUES('rebuild');
+**
+** The entire FTS index is discarded and rebuilt. If the table is one 
+** created using the content=xxx option, then the new index is based on
+** the current contents of the xxx table. Otherwise, it is rebuilt based
+** on the contents of the %_content table.
+*/
+static int fts3DoRebuild(Fts3Table *p){
+  int rc;                         /* Return Code */
+
+  rc = fts3DeleteAll(p, 0);
+  if( rc==SQLITE_OK ){
+    u32 *aSz = 0;
+    u32 *aSzIns = 0;
+    u32 *aSzDel = 0;
+    sqlite3_stmt *pStmt = 0;
+    int nEntry = 0;
+
+    /* Compose and prepare an SQL statement to loop through the content table */
+    char *zSql = sqlite3_mprintf("SELECT %s" , p->zReadExprlist);
+    if( !zSql ){
+      rc = SQLITE_NOMEM;
+    }else{
+      rc = sqlite3_prepare_v2(p->db, zSql, -1, &pStmt, 0);
+      sqlite3_free(zSql);
+    }
+
+    if( rc==SQLITE_OK ){
+      int nByte = sizeof(u32) * (p->nColumn+1)*3;
+      aSz = (u32 *)sqlite3_malloc(nByte);
+      if( aSz==0 ){
+        rc = SQLITE_NOMEM;
+      }else{
+        memset(aSz, 0, nByte);
+        aSzIns = &aSz[p->nColumn+1];
+        aSzDel = &aSzIns[p->nColumn+1];
+      }
+    }
+
+    while( rc==SQLITE_OK && SQLITE_ROW==sqlite3_step(pStmt) ){
+      int iCol;
+      rc = fts3PendingTermsDocid(p, sqlite3_column_int64(pStmt, 0));
+      aSz[p->nColumn] = 0;
+      for(iCol=0; rc==SQLITE_OK && iCol<p->nColumn; iCol++){
+        const char *z = (const char *) sqlite3_column_text(pStmt, iCol+1);
+        rc = fts3PendingTermsAdd(p, z, iCol, &aSz[iCol]);
+        aSz[p->nColumn] += sqlite3_column_bytes(pStmt, iCol+1);
+      }
+      if( p->bHasDocsize ){
+        fts3InsertDocsize(&rc, p, aSz);
+      }
+      if( rc!=SQLITE_OK ){
+        sqlite3_finalize(pStmt);
+        pStmt = 0;
+      }else{
+        nEntry++;
+        for(iCol=0; iCol<=p->nColumn; iCol++){
+          aSzIns[iCol] += aSz[iCol];
+        }
+      }
+    }
+    if( p->bHasStat ){
+      fts3UpdateDocTotals(&rc, p, aSzIns, aSzDel, nEntry);
+    }
+    sqlite3_free(aSz);
+
+    if( pStmt ){
+      int rc2 = sqlite3_finalize(pStmt);
+      if( rc==SQLITE_OK ){
+        rc = rc2;
+      }
+    }
+  }
+
+  return rc;
+}
+
+/*
 ** Handle a 'special' INSERT of the form:
 **
 **   "INSERT INTO tbl(tbl) VALUES(<expr>)"
@@ -122594,6 +126033,8 @@ static int fts3SpecialInsert(Fts3Table *p, sqlite3_value *pVal){
     return SQLITE_NOMEM;
   }else if( nVal==8 && 0==sqlite3_strnicmp(zVal, "optimize", 8) ){
     rc = fts3DoOptimize(p, 0);
+  }else if( nVal==7 && 0==sqlite3_strnicmp(zVal, "rebuild", 7) ){
+    rc = fts3DoRebuild(p);
 #ifdef SQLITE_TEST
   }else if( nVal>9 && 0==sqlite3_strnicmp(zVal, "nodesize=", 9) ){
     p->nNodeSize = atoi(&zVal[9]);
@@ -122674,6 +126115,7 @@ SQLITE_PRIVATE int sqlite3Fts3CacheDeferredDoclists(Fts3Cursor *pCsr){
         for(pDef=pCsr->pDeferred; pDef && rc==SQLITE_OK; pDef=pDef->pNext){
           Fts3PhraseToken *pPT = pDef->pToken;
           if( (pDef->iCol>=p->nColumn || pDef->iCol==i)
+           && (pPT->bFirst==0 || iPos==0)
            && (pPT->n==nToken || (pPT->isPrefix && pPT->n<nToken))
            && (0==memcmp(zToken, pPT->z, pPT->n))
           ){
@@ -122765,14 +126207,18 @@ static int fts3DeleteByRowid(
       /* Deleting this row means the whole table is empty. In this case
       ** delete the contents of all three tables and throw away any
       ** data in the pendingTerms hash table.  */
-      rc = fts3DeleteAll(p);
+      rc = fts3DeleteAll(p, 1);
       *pnDoc = *pnDoc - 1;
     }else{
       sqlite3_int64 iRemove = sqlite3_value_int64(pRowid);
       rc = fts3PendingTermsDocid(p, iRemove);
       fts3DeleteTerms(&rc, p, pRowid, aSzDel);
-      fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
-      if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1;
+      if( p->zContentTbl==0 ){
+        fts3SqlExec(&rc, p, SQL_DELETE_CONTENT, &pRowid);
+        if( sqlite3_changes(p->db) ) *pnDoc = *pnDoc - 1;
+      }else{
+        *pnDoc = *pnDoc - 1;
+      }
       if( p->bHasDocsize ){
         fts3SqlExec(&rc, p, SQL_DELETE_DOCSIZE, &pRowid);
       }
@@ -122795,7 +126241,6 @@ SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(
   Fts3Table *p = (Fts3Table *)pVtab;
   int rc = SQLITE_OK;             /* Return Code */
   int isRemove = 0;               /* True for an UPDATE or DELETE */
-  sqlite3_int64 iRemove = 0;      /* Rowid removed by UPDATE or DELETE */
   u32 *aSzIns = 0;                /* Sizes of inserted documents */
   u32 *aSzDel;                    /* Sizes of deleted documents */
   int nChng = 0;                  /* Net change in number of documents */
@@ -122833,7 +126278,7 @@ SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(
   ** detect the conflict and return SQLITE_CONSTRAINT before beginning to
   ** modify the database file.
   */
-  if( nArg>1 ){
+  if( nArg>1 && p->zContentTbl==0 ){
     /* Find the value object that holds the new rowid value. */
     sqlite3_value *pNewRowid = apVal[3+p->nColumn];
     if( sqlite3_value_type(pNewRowid)==SQLITE_NULL ){
@@ -122878,19 +126323,21 @@ SQLITE_PRIVATE int sqlite3Fts3UpdateMethod(
     assert( sqlite3_value_type(apVal[0])==SQLITE_INTEGER );
     rc = fts3DeleteByRowid(p, apVal[0], &nChng, aSzDel);
     isRemove = 1;
-    iRemove = sqlite3_value_int64(apVal[0]);
   }
   
   /* If this is an INSERT or UPDATE operation, insert the new record. */
   if( nArg>1 && rc==SQLITE_OK ){
     if( bInsertDone==0 ){
       rc = fts3InsertData(p, apVal, pRowid);
-      if( rc==SQLITE_CONSTRAINT ) rc = SQLITE_CORRUPT_VTAB;
+      if( rc==SQLITE_CONSTRAINT && p->zContentTbl==0 ){
+        rc = FTS_CORRUPT_VTAB;
+      }
     }
-    if( rc==SQLITE_OK && (!isRemove || *pRowid!=iRemove) ){
+    if( rc==SQLITE_OK && (!isRemove || *pRowid!=p->iPrevDocid ) ){
       rc = fts3PendingTermsDocid(p, *pRowid);
     }
     if( rc==SQLITE_OK ){
+      assert( p->iPrevDocid==*pRowid );
       rc = fts3InsertTerms(p, apVal, aSzIns);
     }
     if( p->bHasDocsize ){
@@ -122950,6 +126397,8 @@ SQLITE_PRIVATE int sqlite3Fts3Optimize(Fts3Table *p){
 
 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
 
+/* #include <string.h> */
+/* #include <assert.h> */
 
 /*
 ** Characters that may appear in the second argument to matchinfo().
@@ -123302,6 +126751,7 @@ static int fts3SnippetFindPositions(Fts3Expr *pExpr, int iPhrase, void *ctx){
     int iFirst = 0;
     pPhrase->pList = pCsr;
     fts3GetDeltaPosition(&pCsr, &iFirst);
+    assert( iFirst>=0 );
     pPhrase->pHead = pCsr;
     pPhrase->pTail = pCsr;
     pPhrase->iHead = iFirst;
@@ -123782,7 +127232,7 @@ static int fts3MatchinfoSelectDoctotal(
 
   a = sqlite3_column_blob(pStmt, 0);
   a += sqlite3Fts3GetVarint(a, &nDoc);
-  if( nDoc==0 ) return SQLITE_CORRUPT_VTAB;
+  if( nDoc==0 ) return FTS_CORRUPT_VTAB;
   *pnDoc = (u32)nDoc;
 
   if( paLen ) *paLen = a;
@@ -124343,7 +127793,7 @@ SQLITE_PRIVATE void sqlite3Fts3Offsets(
 
       if( !pTerm ){
         /* All offsets for this column have been gathered. */
-        break;
+        rc = SQLITE_DONE;
       }else{
         assert( iCurrent<=iMinPos );
         if( 0==(0xFE&*pTerm->pList) ){
@@ -124360,8 +127810,8 @@ SQLITE_PRIVATE void sqlite3Fts3Offsets(
               "%d %d %d %d ", iCol, pTerm-sCtx.aTerm, iStart, iEnd-iStart
           );
           rc = fts3StringAppend(&res, aBuffer, -1);
-        }else if( rc==SQLITE_DONE ){
-          rc = SQLITE_CORRUPT_VTAB;
+        }else if( rc==SQLITE_DONE && pTab->zContentTbl==0 ){
+          rc = FTS_CORRUPT_VTAB;
         }
       }
     }
@@ -124537,6 +127987,8 @@ SQLITE_PRIVATE void sqlite3Fts3Matchinfo(
 #else
 #endif
 
+/* #include <string.h> */
+/* #include <assert.h> */
 
 #ifndef SQLITE_AMALGAMATION
 #include "sqlite3rtree.h"
@@ -125700,7 +129152,8 @@ static int rtreeFilter(
         rc = SQLITE_NOMEM;
       }else{
         memset(pCsr->aConstraint, 0, sizeof(RtreeConstraint)*argc);
-        assert( (idxStr==0 && argc==0) || (int)strlen(idxStr)==argc*2 );
+        assert( (idxStr==0 && argc==0)
+                || (idxStr && (int)strlen(idxStr)==argc*2) );
         for(ii=0; ii<argc; ii++){
           RtreeConstraint *p = &pCsr->aConstraint[ii];
           p->op = idxStr[ii*2];
@@ -126001,7 +129454,10 @@ static int ChooseLeaf(
 
     float fMinGrowth = 0.0;
     float fMinArea = 0.0;
+#if VARIANT_RSTARTREE_CHOOSESUBTREE
     float fMinOverlap = 0.0;
+    float overlap;
+#endif
 
     int nCell = NCELL(pNode);
     RtreeCell cell;
@@ -126033,7 +129489,6 @@ static int ChooseLeaf(
       int bBest = 0;
       float growth;
       float area;
-      float overlap = 0.0;
       nodeGetCell(pRtree, pNode, iCell, &cell);
       growth = cellGrowth(pRtree, &cell, pCell);
       area = cellArea(pRtree, &cell);
@@ -126041,6 +129496,8 @@ static int ChooseLeaf(
 #if VARIANT_RSTARTREE_CHOOSESUBTREE
       if( ii==(pRtree->iDepth-1) ){
         overlap = cellOverlapEnlargement(pRtree,&cell,pCell,aCell,nCell,iCell);
+      }else{
+        overlap = 0.0;
       }
       if( (iCell==0) 
        || (overlap<fMinOverlap) 
@@ -126048,6 +129505,7 @@ static int ChooseLeaf(
        || (overlap==fMinOverlap && growth==fMinGrowth && area<fMinArea)
       ){
         bBest = 1;
+        fMinOverlap = overlap;
       }
 #else
       if( iCell==0||growth<fMinGrowth||(growth==fMinGrowth && area<fMinArea) ){
@@ -126055,7 +129513,6 @@ static int ChooseLeaf(
       }
 #endif
       if( bBest ){
-        fMinOverlap = overlap;
         fMinGrowth = growth;
         fMinArea = area;
         iBest = cell.iRowid;
@@ -127751,6 +131208,7 @@ SQLITE_API int sqlite3_extension_init(
 #include <unicode/ustring.h>
 #include <unicode/ucol.h>
 
+/* #include <assert.h> */
 
 #ifndef SQLITE_CORE
   SQLITE_EXTENSION_INIT1
@@ -128230,8 +131688,12 @@ SQLITE_API int sqlite3_extension_init(
 #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3)
 #ifdef SQLITE_ENABLE_ICU
 
+/* #include <assert.h> */
+/* #include <string.h> */
 
 #include <unicode/ubrk.h>
+/* #include <unicode/ucol.h> */
+/* #include <unicode/ustring.h> */
 #include <unicode/utf16.h>
 
 typedef struct IcuTokenizer IcuTokenizer;
diff --git a/libgda/sqlite/sqlite-src/sqlite3.h b/libgda/sqlite/sqlite-src/sqlite3.h
index ed9edbd..efaf3c8 100644
--- a/libgda/sqlite/sqlite-src/sqlite3.h
+++ b/libgda/sqlite/sqlite-src/sqlite3.h
@@ -107,9 +107,9 @@ extern "C" {
 ** [sqlite3_libversion_number()], [sqlite3_sourceid()],
 ** [sqlite_version()] and [sqlite_source_id()].
 */
-#define SQLITE_VERSION        "3.7.7.1"
-#define SQLITE_VERSION_NUMBER 3007007
-#define SQLITE_SOURCE_ID      "2011-06-28 17:39:05 af0d91adf497f5f36ec3813f04235a6e195a605f"
+#define SQLITE_VERSION        "3.7.9"
+#define SQLITE_VERSION_NUMBER 3007009
+#define SQLITE_SOURCE_ID      "2011-11-01 00:52:41 c7c6050ef060877ebe77b41d959e9df13f8c9b5e"
 
 /*
 ** CAPI3REF: Run-Time Library Version Numbers
@@ -741,6 +741,41 @@ struct sqlite3_io_methods {
 ** Applications should not call [sqlite3_file_control()] with this
 ** opcode as doing so may disrupt the operation of the specialized VFSes
 ** that do require it.  
+**
+** ^The [SQLITE_FCNTL_WIN32_AV_RETRY] opcode is used to configure automatic
+** retry counts and intervals for certain disk I/O operations for the
+** windows [VFS] in order to work to provide robustness against
+** anti-virus programs.  By default, the windows VFS will retry file read,
+** file write, and file delete operations up to 10 times, with a delay
+** of 25 milliseconds before the first retry and with the delay increasing
+** by an additional 25 milliseconds with each subsequent retry.  This
+** opcode allows those to values (10 retries and 25 milliseconds of delay)
+** to be adjusted.  The values are changed for all database connections
+** within the same process.  The argument is a pointer to an array of two
+** integers where the first integer i the new retry count and the second
+** integer is the delay.  If either integer is negative, then the setting
+** is not changed but instead the prior value of that setting is written
+** into the array entry, allowing the current retry settings to be
+** interrogated.  The zDbName parameter is ignored.
+**
+** ^The [SQLITE_FCNTL_PERSIST_WAL] opcode is used to set or query the
+** persistent [WAL | Write AHead Log] setting.  By default, the auxiliary
+** write ahead log and shared memory files used for transaction control
+** are automatically deleted when the latest connection to the database
+** closes.  Setting persistent WAL mode causes those files to persist after
+** close.  Persisting the files is useful when other processes that do not
+** have write permission on the directory containing the database file want
+** to read the database file, as the WAL and shared memory files must exist
+** in order for the database to be readable.  The fourth parameter to
+** [sqlite3_file_control()] for this opcode should be a pointer to an integer.
+** That integer is 0 to disable persistent WAL mode or 1 to enable persistent
+** WAL mode.  If the integer is -1, then it is overwritten with the current
+** WAL persistence setting.
+**
+** ^The [SQLITE_FCNTL_OVERWRITE] opcode is invoked by SQLite after opening
+** a write transaction to indicate that, unless it is rolled back for some
+** reason, the entire database file will be overwritten by the current 
+** transaction. This is used by VACUUM operations.
 */
 #define SQLITE_FCNTL_LOCKSTATE        1
 #define SQLITE_GET_LOCKPROXYFILE      2
@@ -750,7 +785,9 @@ struct sqlite3_io_methods {
 #define SQLITE_FCNTL_CHUNK_SIZE       6
 #define SQLITE_FCNTL_FILE_POINTER     7
 #define SQLITE_FCNTL_SYNC_OMITTED     8
-
+#define SQLITE_FCNTL_WIN32_AV_RETRY   9
+#define SQLITE_FCNTL_PERSIST_WAL     10
+#define SQLITE_FCNTL_OVERWRITE       11
 
 /*
 ** CAPI3REF: Mutex Handle
@@ -1178,16 +1215,10 @@ SQLITE_API int sqlite3_db_config(sqlite3*, int op, ...);
 ** order to verify that SQLite recovers gracefully from such
 ** conditions.
 **
-** The xMalloc and xFree methods must work like the
-** malloc() and free() functions from the standard C library.
-** The xRealloc method must work like realloc() from the standard C library
-** with the exception that if the second argument to xRealloc is zero,
-** xRealloc must be a no-op - it must not perform any allocation or
-** deallocation.  ^SQLite guarantees that the second argument to
+** The xMalloc, xRealloc, and xFree methods must work like the
+** malloc(), realloc() and free() functions from the standard C library.
+** ^SQLite guarantees that the second argument to
 ** xRealloc is always a value returned by a prior call to xRoundup.
-** And so in cases where xRoundup always returns a positive number,
-** xRealloc can perform exactly as the standard library realloc() and
-** still be in compliance with this specification.
 **
 ** xSize should return the allocated size of a memory allocation
 ** previously obtained from xMalloc or xRealloc.  The allocated size
@@ -1373,8 +1404,8 @@ struct sqlite3_mem_methods {
 ** allocator is engaged to handle all of SQLites memory allocation needs.
 ** The first pointer (the memory pointer) must be aligned to an 8-byte
 ** boundary or subsequent behavior of SQLite will be undefined.
-** The minimum allocation size is capped at 2^12. Reasonable values
-** for the minimum allocation size are 2^5 through 2^8.</dd>
+** The minimum allocation size is capped at 2**12. Reasonable values
+** for the minimum allocation size are 2**5 through 2**8.</dd>
 **
 ** [[SQLITE_CONFIG_MUTEX]] <dt>SQLITE_CONFIG_MUTEX</dt>
 ** <dd> ^(This option takes a single argument which is a pointer to an
@@ -2773,7 +2804,8 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal);
 ** that the supplied string is nul-terminated, then there is a small
 ** performance advantage to be gained by passing an nByte parameter that
 ** is equal to the number of bytes in the input string <i>including</i>
-** the nul-terminator bytes.
+** the nul-terminator bytes as this saves SQLite from having to
+** make a copy of the input string.
 **
 ** ^If pzTail is not NULL then *pzTail is made to point to the first byte
 ** past the end of the first SQL statement in zSql.  These routines only
@@ -2824,7 +2856,7 @@ SQLITE_API int sqlite3_limit(sqlite3*, int id, int newVal);
 ** ^The specific value of WHERE-clause [parameter] might influence the 
 ** choice of query plan if the parameter is the left-hand side of a [LIKE]
 ** or [GLOB] operator or if the parameter is compared to an indexed column
-** and the [SQLITE_ENABLE_STAT2] compile-time option is enabled.
+** and the [SQLITE_ENABLE_STAT3] compile-time option is enabled.
 ** the 
 ** </li>
 ** </ol>
@@ -2994,6 +3026,13 @@ typedef struct sqlite3_context sqlite3_context;
 ** number of <u>bytes</u> in the value, not the number of characters.)^
 ** ^If the fourth parameter is negative, the length of the string is
 ** the number of bytes up to the first zero terminator.
+** If a non-negative fourth parameter is provided to sqlite3_bind_text()
+** or sqlite3_bind_text16() then that parameter must be the byte offset
+** where the NUL terminator would occur assuming the string were NUL
+** terminated.  If any NUL characters occur at byte offsets less than 
+** the value of the fourth parameter then the resulting string value will
+** contain embedded NULs.  The result of expressions involving strings
+** with embedded NULs is undefined.
 **
 ** ^The fifth argument to sqlite3_bind_blob(), sqlite3_bind_text(), and
 ** sqlite3_bind_text16() is a destructor used to dispose of the BLOB or
@@ -3327,6 +3366,12 @@ SQLITE_API int sqlite3_step(sqlite3_stmt*);
 ** (via calls to the [sqlite3_column_int | sqlite3_column_*()] of
 ** interfaces) then sqlite3_data_count(P) returns 0.
 ** ^The sqlite3_data_count(P) routine also returns 0 if P is a NULL pointer.
+** ^The sqlite3_data_count(P) routine returns 0 if the previous call to
+** [sqlite3_step](P) returned [SQLITE_DONE].  ^The sqlite3_data_count(P)
+** will return non-zero if previous call to [sqlite3_step](P) returned
+** [SQLITE_ROW], except in the case of the [PRAGMA incremental_vacuum]
+** where it always returns zero since each step of that multi-step
+** pragma returns 0 columns of data.
 **
 ** See also: [sqlite3_column_count()]
 */
@@ -4006,7 +4051,12 @@ typedef void (*sqlite3_destructor_type)(void*);
 ** ^If the 3rd parameter to the sqlite3_result_text* interfaces
 ** is non-negative, then as many bytes (not characters) of the text
 ** pointed to by the 2nd parameter are taken as the application-defined
-** function result.
+** function result.  If the 3rd parameter is non-negative, then it
+** must be the byte offset into the string where the NUL terminator would
+** appear if the string where NUL terminated.  If any NUL characters occur
+** in the string at a byte offset that is less than the value of the 3rd
+** parameter, then the resulting string will contain embedded NULs and the
+** result of expressions operating on strings with embedded NULs is undefined.
 ** ^If the 4th parameter to the sqlite3_result_text* interfaces
 ** or sqlite3_result_blob is a non-NULL pointer, then SQLite calls that
 ** function as the destructor on the text or BLOB result when it has
@@ -5789,6 +5839,18 @@ SQLITE_API int sqlite3_db_status(sqlite3*, int op, int *pCur, int *pHiwtr, int r
 ** the database connection.)^
 ** ^The highwater mark associated with SQLITE_DBSTATUS_STMT_USED is always 0.
 ** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_HIT]] ^(<dt>SQLITE_DBSTATUS_CACHE_HIT</dt>
+** <dd>This parameter returns the number of pager cache hits that have
+** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_HIT 
+** is always 0.
+** </dd>
+**
+** [[SQLITE_DBSTATUS_CACHE_MISS]] ^(<dt>SQLITE_DBSTATUS_CACHE_MISS</dt>
+** <dd>This parameter returns the number of pager cache misses that have
+** occurred.)^ ^The highwater mark associated with SQLITE_DBSTATUS_CACHE_MISS 
+** is always 0.
+** </dd>
 ** </dl>
 */
 #define SQLITE_DBSTATUS_LOOKASIDE_USED       0
@@ -5798,7 +5860,9 @@ SQLITE_API int sqlite3_db_status(sqlite3*, int op, int *pCur, int *pHiwtr, int r
 #define SQLITE_DBSTATUS_LOOKASIDE_HIT        4
 #define SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE  5
 #define SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL  6
-#define SQLITE_DBSTATUS_MAX                  6   /* Largest defined DBSTATUS */
+#define SQLITE_DBSTATUS_CACHE_HIT            7
+#define SQLITE_DBSTATUS_CACHE_MISS           8
+#define SQLITE_DBSTATUS_MAX                  8   /* Largest defined DBSTATUS */
 
 
 /*
@@ -5852,7 +5916,6 @@ SQLITE_API int sqlite3_stmt_status(sqlite3_stmt*, int op,int resetFlg);
 ** A non-zero value in this counter may indicate an opportunity to
 ** improvement performance by adding permanent indices that do not
 ** need to be reinitialized each time the statement is run.</dd>
-**
 ** </dl>
 */
 #define SQLITE_STMTSTATUS_FULLSCAN_STEP     1
diff --git a/providers/sqlcipher/sqlcipher.patch b/providers/sqlcipher/sqlcipher.patch
index fd56d40..332b6a9 100644
--- a/providers/sqlcipher/sqlcipher.patch
+++ b/providers/sqlcipher/sqlcipher.patch
@@ -1,6 +1,6 @@
---- sqlite3.c.sqlite	2011-08-31 19:26:40.563916786 +0200
-+++ sqlite3.c	2011-08-31 19:23:59.243916716 +0200
-@@ -11847,9 +11847,46 @@
+--- sqlite3.c.sqlite	2012-01-31 11:12:59.360849603 +0100
++++ sqlite3.c	2012-01-31 11:08:52.510576554 +0100
+@@ -11952,9 +11952,47 @@
  #endif /* _SQLITEINT_H_ */
  
  /************** End of sqliteInt.h *******************************************/
@@ -41,6 +41,7 @@
 +/* BEGIN CRYPTO */
 +#ifdef SQLITE_HAS_CODEC
 +
++/* #include <assert.h> */
 +/************** Include btreeInt.h in the middle of crypto.c *****************/
 +/************** Begin file btreeInt.h ****************************************/
  /*
@@ -49,7 +50,7 @@
  **
  ** The author disclaims copyright to this source code.  In place of
  ** a legal notice, here is a blessing:
-@@ -11859,223 +11896,2004 @@
+@@ -11964,313 +12002,2262 @@
  **    May you share freely, never taking more than you give.
  **
  *************************************************************************
@@ -410,7 +411,7 @@
 -   SQLITE_THREADSAFE==1,      /* bFullMutex */
 -   SQLITE_USE_URI,            /* bOpenUri */
 -   0x7ffffffe,                /* mxStrlen */
--   100,                       /* szLookaside */
+-   128,                       /* szLookaside */
 -   500,                       /* nLookaside */
 -   {0,0,0,0,0,0,0,0},         /* m */
 -   {0,0,0,0,0,0,0,0,0},       /* mutex */
@@ -589,7 +590,6 @@
 -**
 -** The author disclaims copyright to this source code.  In place of
 -** a legal notice, here is a blessing:
--**
 +** An instance of this object represents a single database file.
 +** 
 +** A single database file can be in use as the same time by two
@@ -598,16 +598,22 @@
 +** private Btree object for the file and each of those Btrees points
 +** to this one BtShared object.  BtShared.nRef is the number of
 +** connections currently sharing this database file.
-+**
+ **
+-**    May you do good and not evil.
+-**    May you find forgiveness for yourself and forgive others.
+-**    May you share freely, never taking more than you give.
 +** Fields in this structure are accessed under the BtShared.mutex
 +** mutex, except for nRef and pNext which are accessed under the
 +** global SQLITE_MUTEX_STATIC_MASTER mutex.  The pPager field
 +** may not be modified once it is initially set as long as nRef>0.
 +** The pSchema field may be set once under BtShared.mutex and
 +** thereafter is unchanged as long as nRef>0.
-+**
+ **
+-*************************************************************************
 +** isPending:
-+**
+ **
+-** This file implements routines used to report what compile-time options
+-** SQLite was built with.
 +**   If a BtShared client fails to obtain a write-lock on a database
 +**   table (because there exists one or more read-locks on the table),
 +**   the shared-cache enters 'pending-lock' state and isPending is
@@ -623,7 +629,7 @@
 +**   transaction.
 +**
 +**   This feature is included to help prevent writer-starvation.
-+*/
+ */
 +struct BtShared {
 +  Pager *pPager;        /* The page cache */
 +  sqlite3 *db;          /* Database connection currently using this Btree */
@@ -662,7 +668,8 @@
 +#endif
 +  u8 *pTmpSpace;        /* BtShared.pageSize bytes of space for tmp use */
 +};
-+
+ 
+-#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
 +/*
 +** An instance of the following structure is used to hold information
 +** about a cell.  The parseCellPtr() function fills in this structure
@@ -679,7 +686,7 @@
 +  u16 iOverflow; /* Offset to overflow page number.  Zero if no overflow */
 +  u16 nSize;     /* Size of the cell content on the main b-tree page */
 +};
-+
+ 
 +/*
 +** Maximum depth of an SQLite B-Tree structure. Any B-Tree deeper than
 +** this will be declared corrupt. This value is calculated based on a
@@ -690,11 +697,16 @@
 +** assumed that the database is corrupt.
 +*/
 +#define BTCURSOR_MAX_DEPTH 20
-+
-+/*
+ 
+ /*
+-** An array of names of all compile-time options.  This array should 
+-** be sorted A-Z.
 +** A cursor is a pointer to a particular entry within a particular
 +** b-tree within a database file.
-+**
+ **
+-** This array looks large, but in a typical installation actually uses
+-** only a handful of compile-time options, so most times this array is usually
+-** rather short and uses little memory space.
 +** The entry is identified by its MemPage and the index in
 +** MemPage.aCell[] of the entry.
 +**
@@ -704,7 +716,8 @@
 +**
 +** Fields in this structure are accessed under the BtShared.mutex
 +** found at self->pBt->mutex. 
-+*/
+ */
+-static const char * const azCompileOpt[] = {
 +struct BtCursor {
 +  Btree *pBtree;            /* The Btree to which this cursor belongs */
 +  BtShared *pBt;            /* The BtShared this cursor points to */
@@ -728,7 +741,11 @@
 +  u16 aiIdx[BTCURSOR_MAX_DEPTH];        /* Current index in apPage[i] */
 +  MemPage *apPage[BTCURSOR_MAX_DEPTH];  /* Pages from root to current page */
 +};
-+
+ 
+-/* These macros are provided to "stringify" the value of the define
+-** for those options in which the value is meaningful. */
+-#define CTIMEOPT_VAL_(opt) #opt
+-#define CTIMEOPT_VAL(opt) CTIMEOPT_VAL_(opt)
 +/*
 +** Potential values for BtCursor.eState.
 +**
@@ -758,7 +775,24 @@
 +#define CURSOR_VALID             1
 +#define CURSOR_REQUIRESEEK       2
 +#define CURSOR_FAULT             3
-+
+ 
+-#ifdef SQLITE_32BIT_ROWID
+-  "32BIT_ROWID",
+-#endif
+-#ifdef SQLITE_4_BYTE_ALIGNED_MALLOC
+-  "4_BYTE_ALIGNED_MALLOC",
+-#endif
+-#ifdef SQLITE_CASE_SENSITIVE_LIKE
+-  "CASE_SENSITIVE_LIKE",
+-#endif
+-#ifdef SQLITE_CHECK_PAGES
+-  "CHECK_PAGES",
+-#endif
+-#ifdef SQLITE_COVERAGE_TEST
+-  "COVERAGE_TEST",
+-#endif
+-#ifdef SQLITE_DEBUG
+-  "DEBUG",
 +/* 
 +** The database page the PENDING_BYTE occupies. This page is never used.
 +*/
@@ -839,7 +873,9 @@
 +#define ISAUTOVACUUM (pBt->autoVacuum)
 +#else
 +#define ISAUTOVACUUM 0
-+#endif
+ #endif
+-#ifdef SQLITE_DEFAULT_LOCKING_MODE
+-  "DEFAULT_LOCKING_MODE=" CTIMEOPT_VAL(SQLITE_DEFAULT_LOCKING_MODE),
 +
 +
 +/*
@@ -911,7 +947,9 @@
 +
 +#ifndef CIPHER
 +#define CIPHER "aes-256-cbc"
-+#endif
+ #endif
+-#ifdef SQLITE_DISABLE_DIRSYNC
+-  "DISABLE_DIRSYNC",
 +
 +#define CIPHER_DECRYPT 0
 +#define CIPHER_ENCRYPT 1
@@ -922,17 +960,43 @@
 +
 +#ifndef PBKDF2_ITER
 +#define PBKDF2_ITER 4000
-+#endif
+ #endif
+-#ifdef SQLITE_DISABLE_LFS
+-  "DISABLE_LFS",
 +
 +#ifndef DEFAULT_USE_HMAC
 +#define DEFAULT_USE_HMAC 1
-+#endif
+ #endif
+-#ifdef SQLITE_ENABLE_ATOMIC_WRITE
+-  "ENABLE_ATOMIC_WRITE",
++
++/* by default, sqlcipher will use a reduced number of iterations to generate
++   the HMAC key / or transform a raw cipher key 
++   */
++#ifndef FAST_PBKDF2_ITER
++#define FAST_PBKDF2_ITER 2
+ #endif
+-#ifdef SQLITE_ENABLE_CEROD
+-  "ENABLE_CEROD",
++
++/* this if a fixed random array that will be xor'd with the database salt to ensure that the
++   salt passed to the HMAC key derivation function is not the same as that used to derive
++   the encryption key. This can be overridden at compile time but it will make the resulting
++   binary incompatible with the default builds when using HMAC. A future version of SQLcipher
++   will likely allow this to be defined at runtime via pragma */ 
++#ifndef HMAC_SALT_MASK
++#define HMAC_SALT_MASK 0x3a
+ #endif
+-#ifdef SQLITE_ENABLE_COLUMN_METADATA
+-  "ENABLE_COLUMN_METADATA",
 +
 +#ifdef CODEC_DEBUG
 +#define CODEC_TRACE(X)  {printf X;fflush(stdout);}
 +#else
 +#define CODEC_TRACE(X)
-+#endif
+ #endif
+-#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
+-  "ENABLE_EXPENSIVE_ASSERT",
 +
 +
 +/* extensions defined in pragma.c */ 
@@ -997,6 +1061,8 @@
 +int sqlcipher_codec_ctx_set_kdf_iter(codec_ctx *, int, int);
 +void* sqlcipher_codec_ctx_get_kdf_salt(codec_ctx *ctx);
 +
++int sqlcipher_codec_ctx_set_fast_kdf_iter(codec_ctx *, int, int);
++
 +int sqlcipher_codec_ctx_set_cipher(codec_ctx *, const char *, int);
 +
 +void* sqlcipher_codec_ctx_get_data(codec_ctx *);
@@ -1006,8 +1072,12 @@
 +int sqlcipher_codec_ctx_set_use_hmac(codec_ctx *ctx, int use);
 +/* end extensions defined in crypto_impl.c */
 +
-+#endif
-+#endif
+ #endif
+-#ifdef SQLITE_ENABLE_FTS1
+-  "ENABLE_FTS1",
+ #endif
+-#ifdef SQLITE_ENABLE_FTS2
+-  "ENABLE_FTS2",
 +/* END CRYPTO */
 +
 +/************** End of crypto.h **********************************************/
@@ -1020,7 +1090,19 @@
 +  if(pDb->pBt) {
 +    codec_ctx *ctx;
 +    sqlite3pager_get_codec(pDb->pBt->pBt->pPager, (void **) &ctx);
-+    return sqlcipher_codec_ctx_set_kdf_iter(ctx, kdf_iter, for_ctx);
++    if(ctx) return sqlcipher_codec_ctx_set_kdf_iter(ctx, kdf_iter, for_ctx);
++  }
++  return SQLITE_ERROR;
++}
++
++int codec_set_fast_kdf_iter(sqlite3* db, int nDb, int kdf_iter, int for_ctx) {
++  struct Db *pDb = &db->aDb[nDb];
++  CODEC_TRACE(("codec_set_kdf_iter: entered db=%d nDb=%d kdf_iter=%d for_ctx=%d\n", db, nDb, kdf_iter, for_ctx));
++
++  if(pDb->pBt) {
++    codec_ctx *ctx;
++    sqlite3pager_get_codec(pDb->pBt->pBt->pPager, (void **) &ctx);
++    if(ctx) return sqlcipher_codec_ctx_set_fast_kdf_iter(ctx, kdf_iter, for_ctx);
 +  }
 +  return SQLITE_ERROR;
 +}
@@ -1049,14 +1131,14 @@
 +    int rc;
 +    codec_ctx *ctx;
 +    sqlite3pager_get_codec(pDb->pBt->pBt->pPager, (void **) &ctx);
-+
-+    rc = sqlcipher_codec_ctx_set_use_hmac(ctx, use);
-+    if(rc != SQLITE_OK) return rc;
-+
-+    /* since the use of hmac has changed, the page size may also change */
-+    /* Note: before forcing the page size we need to force pageSizeFixed to 0, else  
++    if(ctx) {
++      rc = sqlcipher_codec_ctx_set_use_hmac(ctx, use);
++      if(rc != SQLITE_OK) return rc;
++      /* since the use of hmac has changed, the page size may also change */
++      /* Note: before forcing the page size we need to force pageSizeFixed to 0, else  
 +             sqliteBtreeSetPageSize will block the change  */
-+    return codec_set_btree_to_codec_pagesize(db, pDb, ctx);
++      return codec_set_btree_to_codec_pagesize(db, pDb, ctx);
++    }
 +  }
 +  return SQLITE_ERROR;
 +}
@@ -1070,10 +1152,11 @@
 +    codec_ctx *ctx;
 +    sqlite3pager_get_codec(pDb->pBt->pBt->pPager, (void **) &ctx);
 +
-+    rc = sqlcipher_codec_ctx_set_pagesize(ctx, size);
-+    if(rc != SQLITE_OK) return rc;
-+
-+    return codec_set_btree_to_codec_pagesize(db, pDb, ctx);
++    if(ctx) {
++      rc = sqlcipher_codec_ctx_set_pagesize(ctx, size);
++      if(rc != SQLITE_OK) return rc;
++      return codec_set_btree_to_codec_pagesize(db, pDb, ctx);
++    }
 +  }
 +  return SQLITE_ERROR;
 +}
@@ -1091,7 +1174,7 @@
 +  if(pDb->pBt) {
 +    codec_ctx *ctx;
 +    sqlite3pager_get_codec(pDb->pBt->pBt->pPager, (void **) &ctx);
-+    return sqlcipher_codec_ctx_set_cipher(ctx, cipher_name, for_ctx);
++    if(ctx) return sqlcipher_codec_ctx_set_cipher(ctx, cipher_name, for_ctx);
 +  }
 +  return SQLITE_ERROR;
 +}
@@ -1102,7 +1185,7 @@
 +  if(pDb->pBt) {
 +    codec_ctx *ctx;
 +    sqlite3pager_get_codec(pDb->pBt->pBt->pPager, (void **) &ctx);
-+    return sqlcipher_codec_ctx_set_pass(ctx, zKey, nKey, for_ctx);
++    if(ctx) return sqlcipher_codec_ctx_set_pass(ctx, zKey, nKey, for_ctx);
 +  }
 +  return SQLITE_ERROR;
 +} 
@@ -1123,7 +1206,11 @@
 +  void *kdf_salt = sqlcipher_codec_ctx_get_kdf_salt(ctx);
 +  CODEC_TRACE(("sqlite3Codec: entered pgno=%d, mode=%d, page_sz=%d\n", pgno, mode, page_sz));
 +
-+  sqlcipher_codec_key_derive(ctx); /* call to derive keys if not present yet */
++  /* call to derive keys if not present yet */
++  if((rc = sqlcipher_codec_key_derive(ctx)) != SQLITE_OK) {
++   sqlcipher_codec_ctx_set_error(ctx, rc); 
++   return NULL;
++  }
 +
 +  if(pgno == 1) offset = FILE_HEADER_SZ; /* adjust starting pointers in data page for header offset on first page*/
 +
@@ -1241,7 +1328,7 @@
 +
 +      sqlite3_mutex_enter(db->mutex);
 +
-+      codec_set_pass_key(db, 0, pKey, nKey, 1);
++      codec_set_pass_key(db, 0, pKey, nKey, CIPHER_WRITE_CTX);
 +    
 +      /* do stuff here to rewrite the database 
 +      ** 1. Create a transaction on the database
@@ -1300,14 +1387,61 @@
 +
 +
 +/* END CRYPTO */
-+#endif
+ #endif
+-#ifdef SQLITE_ENABLE_FTS3
+-  "ENABLE_FTS3",
 +
 +/************** End of crypto.c **********************************************/
 +/************** Begin file crypto_impl.c *************************************/
++/* 
++** SQLCipher
++** crypto_impl.c developed by Stephen Lombardo (Zetetic LLC) 
++** sjlombardo at zetetic dot net
++** http://zetetic.net
++** 
++** Copyright (c) 2011, ZETETIC LLC
++** All rights reserved.
++** 
++** Redistribution and use in source and binary forms, with or without
++** modification, are permitted provided that the following conditions are met:
++**     * Redistributions of source code must retain the above copyright
++**       notice, this list of conditions and the following disclaimer.
++**     * Redistributions in binary form must reproduce the above copyright
++**       notice, this list of conditions and the following disclaimer in the
++**       documentation and/or other materials provided with the distribution.
++**     * Neither the name of the ZETETIC LLC nor the
++**       names of its contributors may be used to endorse or promote products
++**       derived from this software without specific prior written permission.
++** 
++** THIS SOFTWARE IS PROVIDED BY ZETETIC LLC ''AS IS'' AND ANY
++** EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++** WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++** DISCLAIMED. IN NO EVENT SHALL ZETETIC LLC BE LIABLE FOR ANY
++** DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++** (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++** LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++** SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++**  
++*/
++/* BEGIN CRYPTO */
++#ifdef SQLITE_HAS_CODEC
++
 +#include <openssl/rand.h>
 +#include <openssl/evp.h>
 +#include <openssl/hmac.h>
-+
++#ifndef OMIT_MEMLOCK
++#if defined(__unix__) || defined(__APPLE__) 
++#include <sys/mman.h>
++#elif defined(_WIN32)
++/* # include <windows.h> */
+ #endif
+-#ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
+-  "ENABLE_FTS3_PARENTHESIS",
+ #endif
+-#ifdef SQLITE_ENABLE_FTS4
+-  "ENABLE_FTS4",
 +
 +/* the default implementation of SQLCipher uses a cipher_ctx
 +   to keep track of read / write state separately. The following
@@ -1315,7 +1449,10 @@
 +typedef struct {
 +  int derive_key;
 +  EVP_CIPHER *evp_cipher;
++  EVP_CIPHER_CTX ectx;
++  HMAC_CTX hctx;
 +  int kdf_iter;
++  int fast_kdf_iter;
 +  int key_sz;
 +  int iv_sz;
 +  int block_sz;
@@ -1333,7 +1470,7 @@
 +int sqlcipher_cipher_ctx_copy(cipher_ctx *, cipher_ctx *);
 +int sqlcipher_cipher_ctx_init(cipher_ctx **);
 +int sqlcipher_cipher_ctx_set_pass(cipher_ctx *, const void *, int);
-+int  sqlcipher_cipher_ctx_key_derive(codec_ctx *, cipher_ctx *);
++int sqlcipher_cipher_ctx_key_derive(codec_ctx *, cipher_ctx *);
 +
 +/* prototype for pager HMAC function */
 +int sqlcipher_page_hmac(cipher_ctx *, Pgno, unsigned char *, int, unsigned char *);
@@ -1342,6 +1479,7 @@
 +  int kdf_salt_sz;
 +  int page_sz;
 +  unsigned char *kdf_salt;
++  unsigned char *hmac_kdf_salt;
 +  unsigned char *buffer;
 +  Btree *pBt;
 +  cipher_ctx *read_ctx;
@@ -1368,23 +1506,61 @@
 +}
 +
 +/* generate a defined number of pseudorandom bytes */
-+int sqlcipher_pseudorandom (void *buffer, int length) {
-+  return RAND_pseudo_bytes(buffer, length);
++int sqlcipher_random (void *buffer, int length) {
++  return RAND_bytes((unsigned char *)buffer, length);
 +}
 +
 +/**
-+  * Free and wipe memory
++  * Free and wipe memory. Uses SQLites internal sqlite3_free so that memory
++  * can be countend and memory leak detection works in the tet suite. 
 +  * If ptr is not null memory will be freed. 
 +  * If sz is greater than zero, the memory will be overwritten with zero before it is freed
++  * If sz is > 0, and not compiled with OMIT_MEMLOCK, system will attempt to unlock the
++  * memory segment so it can be paged
 +  */
 +void sqlcipher_free(void *ptr, int sz) {
 +  if(ptr) {
-+    if(sz > 0) memset(ptr, 0, sz); // FIXME - require buffer size
++    if(sz > 0) {
++      memset(ptr, 0, sz);
++#ifndef OMIT_MEMLOCK
++#if defined(__unix__) || defined(__APPLE__) 
++      munlock(ptr, sz);
++#elif defined(_WIN32)
++      VirtualUnlock(ptr, sz);
+ #endif
+-#ifdef SQLITE_ENABLE_ICU
+-  "ENABLE_ICU",
+ #endif
+-#ifdef SQLITE_ENABLE_IOTRACE
+-  "ENABLE_IOTRACE",
++    }
 +    sqlite3_free(ptr);
 +  }
 +}
 +
 +/**
++  * allocate memory. Uses sqlite's internall malloc wrapper so memory can be 
++  * reference counted and leak detection works. Unless compiled with OMIT_MEMLOCK
++  * attempts to lock the memory pages so sensitive information won't be swapped
++  */
++void* sqlcipher_malloc(int sz) {
++  void *ptr = sqlite3Malloc(sz);
++#ifndef OMIT_MEMLOCK
++  if(ptr) {
++#if defined(__unix__) || defined(__APPLE__) 
++    mlock(ptr, sz);
++#elif defined(_WIN32)
++    VirtualLock(ptr, sz);
+ #endif
+-#ifdef SQLITE_ENABLE_LOAD_EXTENSION
+-  "ENABLE_LOAD_EXTENSION",
++  }
++#endif
++  return ptr;
++}
++
++
++/**
 +  * Initialize a a new cipher_ctx struct. This function will allocate memory
 +  * for the cipher context and for the key
 +  * 
@@ -1393,12 +1569,12 @@
 +  */
 +int sqlcipher_cipher_ctx_init(cipher_ctx **iCtx) {
 +  cipher_ctx *ctx;
-+  *iCtx = sqlite3Malloc(sizeof(cipher_ctx));
++  *iCtx = (cipher_ctx *) sqlcipher_malloc(sizeof(cipher_ctx));
 +  ctx = *iCtx;
 +  if(ctx == NULL) return SQLITE_NOMEM;
 +  memset(ctx, 0, sizeof(cipher_ctx)); 
-+  ctx->key = sqlite3Malloc(EVP_MAX_KEY_LENGTH);
-+  ctx->hmac_key = sqlite3Malloc(EVP_MAX_KEY_LENGTH);
++  ctx->key = (unsigned char *) sqlcipher_malloc(EVP_MAX_KEY_LENGTH);
++  ctx->hmac_key = (unsigned char *) sqlcipher_malloc(EVP_MAX_KEY_LENGTH);
 +  if(ctx->key == NULL) return SQLITE_NOMEM;
 +  if(ctx->hmac_key == NULL) return SQLITE_NOMEM;
 +  return SQLITE_OK;
@@ -1429,6 +1605,7 @@
 +    c1->evp_cipher == c2->evp_cipher
 +    && c1->iv_sz == c2->iv_sz
 +    && c1->kdf_iter == c2->kdf_iter
++    && c1->fast_kdf_iter == c2->fast_kdf_iter
 +    && c1->key_sz == c2->key_sz
 +    && c1->pass_sz == c2->pass_sz
 +    && (
@@ -1463,7 +1640,7 @@
 +  target->hmac_key = hmac_key; //restore pointer to previously allocated hmac key data
 +  memcpy(target->hmac_key, source->hmac_key, EVP_MAX_KEY_LENGTH);
 +
-+  target->pass = sqlite3Malloc(source->pass_sz);
++  target->pass = sqlcipher_malloc(source->pass_sz);
 +  if(target->pass == NULL) return SQLITE_NOMEM;
 +  memcpy(target->pass, source->pass, source->pass_sz);
 +
@@ -1482,7 +1659,7 @@
 +  sqlcipher_free(ctx->pass, ctx->pass_sz);
 +  ctx->pass_sz = nKey;
 +  if(zKey && nKey) {
-+    ctx->pass = sqlite3Malloc(nKey);
++    ctx->pass = sqlcipher_malloc(nKey);
 +    if(ctx->pass == NULL) return SQLITE_NOMEM;
 +    memcpy(ctx->pass, zKey, nKey);
 +    return SQLITE_OK;
@@ -1492,17 +1669,21 @@
 +
 +int sqlcipher_codec_ctx_set_pass(codec_ctx *ctx, const void *zKey, int nKey, int for_ctx) {
 +  cipher_ctx *c_ctx = for_ctx ? ctx->write_ctx : ctx->read_ctx;
++  int rc;
 +
-+  sqlcipher_cipher_ctx_set_pass(c_ctx, zKey, nKey);
++  if((rc = sqlcipher_cipher_ctx_set_pass(c_ctx, zKey, nKey)) != SQLITE_OK) return rc; 
 +  c_ctx->derive_key = 1;
 +
-+  /* FIXME: return value of copy */
-+  if(for_ctx == 2) sqlcipher_cipher_ctx_copy( for_ctx ? ctx->read_ctx : ctx->write_ctx, c_ctx); 
++  if(for_ctx == 2)
++    if((rc = sqlcipher_cipher_ctx_copy( for_ctx ? ctx->read_ctx : ctx->write_ctx, c_ctx)) != SQLITE_OK) 
++      return rc; 
++
 +  return SQLITE_OK;
 +} 
 +
 +int sqlcipher_codec_ctx_set_cipher(codec_ctx *ctx, const char *cipher_name, int for_ctx) {
 +  cipher_ctx *c_ctx = for_ctx ? ctx->write_ctx : ctx->read_ctx;
++  int rc;
 +
 +  c_ctx->evp_cipher = (EVP_CIPHER *) EVP_get_cipherbyname(cipher_name);
 +  c_ctx->key_sz = EVP_CIPHER_key_length(c_ctx->evp_cipher);
@@ -1511,21 +1692,42 @@
 +  c_ctx->hmac_sz = EVP_MD_size(EVP_sha1());
 +  c_ctx->derive_key = 1;
 +
-+  if(for_ctx == 2) sqlcipher_cipher_ctx_copy( for_ctx ? ctx->read_ctx : ctx->write_ctx, c_ctx); 
++  if(for_ctx == 2)
++    if((rc = sqlcipher_cipher_ctx_copy( for_ctx ? ctx->read_ctx : ctx->write_ctx, c_ctx)) != SQLITE_OK)
++      return rc; 
 +
 +  return SQLITE_OK;
 +}
 +
 +int sqlcipher_codec_ctx_set_kdf_iter(codec_ctx *ctx, int kdf_iter, int for_ctx) {
 +  cipher_ctx *c_ctx = for_ctx ? ctx->write_ctx : ctx->read_ctx;
++  int rc;
 +
 +  c_ctx->kdf_iter = kdf_iter;
 +  c_ctx->derive_key = 1;
 +
-+  if(for_ctx == 2) sqlcipher_cipher_ctx_copy( for_ctx ? ctx->read_ctx : ctx->write_ctx, c_ctx); 
++  if(for_ctx == 2)
++    if((rc = sqlcipher_cipher_ctx_copy( for_ctx ? ctx->read_ctx : ctx->write_ctx, c_ctx)) != SQLITE_OK)
++      return rc; 
++
++  return SQLITE_OK;
++}
++
++int sqlcipher_codec_ctx_set_fast_kdf_iter(codec_ctx *ctx, int fast_kdf_iter, int for_ctx) {
++  cipher_ctx *c_ctx = for_ctx ? ctx->write_ctx : ctx->read_ctx;
++  int rc;
++
++  c_ctx->fast_kdf_iter = fast_kdf_iter;
++  c_ctx->derive_key = 1;
++
++  if(for_ctx == 2)
++    if((rc = sqlcipher_cipher_ctx_copy( for_ctx ? ctx->read_ctx : ctx->write_ctx, c_ctx)) != SQLITE_OK)
++      return rc; 
++
 +  return SQLITE_OK;
 +}
 +
++
 +int sqlcipher_codec_ctx_set_use_hmac(codec_ctx *ctx, int use) {
 +  int reserve = EVP_MAX_IV_LENGTH; /* base reserve size will be IV only */ 
 +
@@ -1578,7 +1780,7 @@
 +  /* pre-allocate a page buffer of PageSize bytes. This will
 +     be used as a persistent buffer for encryption and decryption 
 +     operations to avoid overhead of multiple memory allocations*/
-+  ctx->buffer = sqlite3Malloc(size);
++  ctx->buffer = sqlcipher_malloc(size);
 +  if(ctx->buffer == NULL) return SQLITE_NOMEM;
 +
 +  return SQLITE_OK;
@@ -1587,7 +1789,7 @@
 +int sqlcipher_codec_ctx_init(codec_ctx **iCtx, Db *pDb, Pager *pPager, sqlite3_file *fd, const void *zKey, int nKey) {
 +  int rc;
 +  codec_ctx *ctx;
-+  *iCtx = sqlite3Malloc(sizeof(codec_ctx));
++  *iCtx = sqlcipher_malloc(sizeof(codec_ctx));
 +  ctx = *iCtx;
 +
 +  if(ctx == NULL) return SQLITE_NOMEM;
@@ -1600,9 +1802,16 @@
 +       key derivation function. If we get a short read allocate
 +       a new random salt value */
 +  ctx->kdf_salt_sz = FILE_HEADER_SZ;
-+  ctx->kdf_salt = sqlite3Malloc(ctx->kdf_salt_sz);
++  ctx->kdf_salt = sqlcipher_malloc(ctx->kdf_salt_sz);
 +  if(ctx->kdf_salt == NULL) return SQLITE_NOMEM;
 +
++  /* allocate space for separate hmac salt data. We want the
++     HMAC derivation salt to be different than the encryption
++     key derivation salt */
++  ctx->hmac_kdf_salt = sqlcipher_malloc(ctx->kdf_salt_sz);
++  if(ctx->hmac_kdf_salt == NULL) return SQLITE_NOMEM;
++
++
 +  /*
 +     Always overwrite page size and set to the default because the first page of the database
 +     in encrypted and thus sqlite can't effectively determine the pagesize. this causes an issue in 
@@ -1615,18 +1824,19 @@
 +
 +  if(fd == NULL || sqlite3OsRead(fd, ctx->kdf_salt, FILE_HEADER_SZ, 0) != SQLITE_OK) {
 +    /* if unable to read the bytes, generate random salt */
-+    sqlcipher_pseudorandom(ctx->kdf_salt, FILE_HEADER_SZ);
++    if(sqlcipher_random(ctx->kdf_salt, FILE_HEADER_SZ) != 1) return SQLITE_ERROR;
 +  }
 +
-+  sqlcipher_codec_ctx_set_cipher(ctx, CIPHER, 0);
-+  sqlcipher_codec_ctx_set_kdf_iter(ctx, PBKDF2_ITER, 0);
-+  sqlcipher_codec_ctx_set_pass(ctx, zKey, nKey, 0);
++  if((rc = sqlcipher_codec_ctx_set_cipher(ctx, CIPHER, 0)) != SQLITE_OK) return rc;
++  if((rc = sqlcipher_codec_ctx_set_kdf_iter(ctx, PBKDF2_ITER, 0)) != SQLITE_OK) return rc;
++  if((rc = sqlcipher_codec_ctx_set_fast_kdf_iter(ctx, FAST_PBKDF2_ITER, 0)) != SQLITE_OK) return rc;
++  if((rc = sqlcipher_codec_ctx_set_pass(ctx, zKey, nKey, 0)) != SQLITE_OK) return rc;
 +
 +  /* Use HMAC signatures by default. Note that codec_set_use_hmac will implicity call
 +     codec_set_page_size to set the default */
 +  if((rc = sqlcipher_codec_ctx_set_use_hmac(ctx, DEFAULT_USE_HMAC)) != SQLITE_OK) return rc;
 +
-+  sqlcipher_cipher_ctx_copy(ctx->write_ctx, ctx->read_ctx);
++  if((rc = sqlcipher_cipher_ctx_copy(ctx->write_ctx, ctx->read_ctx)) != SQLITE_OK) return rc;
 +
 +  return SQLITE_OK;
 +}
@@ -1639,6 +1849,7 @@
 +  codec_ctx *ctx = *iCtx;
 +  CODEC_TRACE(("codec_ctx_free: entered iCtx=%d\n", iCtx));
 +  sqlcipher_free(ctx->kdf_salt, ctx->kdf_salt_sz);
++  sqlcipher_free(ctx->hmac_kdf_salt, ctx->kdf_salt_sz);
 +  sqlcipher_free(ctx->buffer, 0);
 +  sqlcipher_cipher_ctx_free(&ctx->read_ctx);
 +  sqlcipher_cipher_ctx_free(&ctx->write_ctx);
@@ -1646,18 +1857,18 @@
 +}
 +
 +int sqlcipher_page_hmac(cipher_ctx *ctx, Pgno pgno, unsigned char *in, int in_sz, unsigned char *out) {
-+  HMAC_CTX hctx;
-+  HMAC_CTX_init(&hctx);
-+  HMAC_Init_ex(&hctx, ctx->hmac_key, ctx->key_sz, EVP_sha1(), NULL); 
++  HMAC_CTX_init(&ctx->hctx);
++  
++  HMAC_Init_ex(&ctx->hctx, ctx->hmac_key, ctx->key_sz, EVP_sha1(), NULL);
 +
 +  /* include the encrypted page data,  initialization vector, and page number in HMAC. This will 
 +     prevent both tampering with the ciphertext, manipulation of the IV, or resequencing otherwise
 +     valid pages out of order in a database */ 
-+  HMAC_Update(&hctx, in, in_sz); 
-+  HMAC_Update(&hctx, (const unsigned char*) &pgno, sizeof(Pgno));
-+  HMAC_Final(&hctx, out, NULL);
-+  HMAC_CTX_cleanup(&hctx);
-+  return SQLITE_OK; /* FIXME: check for errors in HMAC routine to be safe */
++  HMAC_Update(&ctx->hctx, in, in_sz);
++  HMAC_Update(&ctx->hctx, (const unsigned char*) &pgno, sizeof(Pgno));
++  HMAC_Final(&ctx->hctx, out, NULL);
++  HMAC_CTX_cleanup(&ctx->hctx);
++  return SQLITE_OK; 
 +}
 +
 +/*
@@ -1670,7 +1881,6 @@
 + */
 +int sqlcipher_page_cipher(codec_ctx *ctx, int for_ctx, Pgno pgno, int mode, int page_sz, unsigned char *in, unsigned char *out) {
 +  cipher_ctx *c_ctx = for_ctx ? ctx->write_ctx : ctx->read_ctx;
-+  EVP_CIPHER_CTX ectx;
 +  unsigned char *iv_in, *iv_out, *hmac_in, *hmac_out, *out_start;
 +  int tmp_csz, csz, size;
 +
@@ -1695,13 +1905,18 @@
 +  } 
 +
 +  if(mode == CIPHER_ENCRYPT) {
-+    sqlcipher_pseudorandom(iv_out, c_ctx->reserve_sz); /* start at front of the reserve block, write random data to the end */
++    /* start at front of the reserve block, write random data to the end */
++    if(sqlcipher_random(iv_out, c_ctx->reserve_sz) != 1) return SQLITE_ERROR; 
 +  } else { /* CIPHER_DECRYPT */
 +    memcpy(iv_out, iv_in, c_ctx->iv_sz); /* copy the iv from the input to output buffer */
 +  } 
 +
 +  if(c_ctx->use_hmac && (mode == CIPHER_DECRYPT)) {
-+    sqlcipher_page_hmac(c_ctx, pgno, in, size + c_ctx->iv_sz, hmac_out); 
++    if(sqlcipher_page_hmac(c_ctx, pgno, in, size + c_ctx->iv_sz, hmac_out) != SQLITE_OK) {
++      memset(out, 0, page_sz); 
++      CODEC_TRACE(("codec_cipher: hmac operations failed for pgno=%d\n", pgno));
++      return SQLITE_ERROR;
++    }
 +
 +    CODEC_TRACE(("codec_cipher: comparing hmac on in=%d out=%d hmac_sz=%d\n", hmac_in, hmac_out, c_ctx->hmac_sz));
 +    if(sqlcipher_memcmp(hmac_in, hmac_out, c_ctx->hmac_sz) != 0) {
@@ -1714,15 +1929,15 @@
 +    }
 +  } 
 +
-+  EVP_CipherInit(&ectx, c_ctx->evp_cipher, NULL, NULL, mode);
-+  EVP_CIPHER_CTX_set_padding(&ectx, 0);
-+  EVP_CipherInit(&ectx, NULL, c_ctx->key, iv_out, mode);
-+  EVP_CipherUpdate(&ectx, out, &tmp_csz, in, size);
++  EVP_CipherInit(&c_ctx->ectx, c_ctx->evp_cipher, NULL, NULL, mode);
++  EVP_CIPHER_CTX_set_padding(&c_ctx->ectx, 0);
++  EVP_CipherInit(&c_ctx->ectx, NULL, c_ctx->key, iv_out, mode);
++  EVP_CipherUpdate(&c_ctx->ectx, out, &tmp_csz, in, size);
 +  csz = tmp_csz;  
 +  out += tmp_csz;
-+  EVP_CipherFinal(&ectx, out, &tmp_csz);
++  EVP_CipherFinal(&c_ctx->ectx, out, &tmp_csz);
 +  csz += tmp_csz;
-+  EVP_CIPHER_CTX_cleanup(&ectx);
++  EVP_CIPHER_CTX_cleanup(&c_ctx->ectx);
 +  assert(size == csz);
 +
 +  if(c_ctx->use_hmac && (mode == CIPHER_ENCRYPT)) {
@@ -1745,18 +1960,20 @@
 +  */
 +int sqlcipher_cipher_ctx_key_derive(codec_ctx *ctx, cipher_ctx *c_ctx) {
 +  CODEC_TRACE(("codec_key_derive: entered c_ctx->pass=%s, c_ctx->pass_sz=%d \
-+                ctx->kdf_salt=%d ctx->kdf_salt_sz=%d c_ctx->kdf_iter=%d c_ctx->key_sz=%d\n", 
-+                c_ctx->pass, c_ctx->pass_sz, ctx->kdf_salt, ctx->kdf_salt_sz, 
-+                c_ctx->kdf_iter, c_ctx->key_sz));
++                ctx->kdf_salt=%d ctx->kdf_salt_sz=%d c_ctx->kdf_iter=%d \
++                ctx->hmac_kdf_salt=%d, c_ctx->fast_kdf_iter=%d c_ctx->key_sz=%d\n", 
++                c_ctx->pass, c_ctx->pass_sz, ctx->kdf_salt, ctx->kdf_salt_sz, c_ctx->kdf_iter, 
++                ctx->hmac_kdf_salt, c_ctx->fast_kdf_iter, c_ctx->key_sz)); 
++                
 +
 +  if(c_ctx->pass && c_ctx->pass_sz) { // if pass is not null
 +    if (c_ctx->pass_sz == ((c_ctx->key_sz*2)+3) && sqlite3StrNICmp(c_ctx->pass ,"x'", 2) == 0) { 
 +      int n = c_ctx->pass_sz - 3; /* adjust for leading x' and tailing ' */
-+      const char *z = c_ctx->pass + 2; /* adjust lead offset of x' */ 
-+      CODEC_TRACE(("codec_key_derive: deriving key from hex\n")); 
++      const char *z = c_ctx->pass + 2; /* adjust lead offset of x' */
++      CODEC_TRACE(("codec_key_derive: using raw key from hex\n")); 
 +      cipher_hex2bin(z, n, c_ctx->key);
 +    } else { 
-+      CODEC_TRACE(("codec_key_derive: deriving key using PBKDF2\n")); 
++      CODEC_TRACE(("codec_key_derive: deriving key using full PBKDF2 with %d iterations\n", c_ctx->kdf_iter)); 
 +      PKCS5_PBKDF2_HMAC_SHA1( c_ctx->pass, c_ctx->pass_sz, 
 +                              ctx->kdf_salt, ctx->kdf_salt_sz, 
 +                              c_ctx->kdf_iter, c_ctx->key_sz, c_ctx->key);
@@ -1767,10 +1984,23 @@
 +       key for HMAC. In this case, we use the output of the previous KDF as the input to 
 +       this KDF run. This ensures a distinct but predictable HMAC key. */
 +    if(c_ctx->use_hmac) {
-+      CODEC_TRACE(("codec_key_derive: deriving hmac key using PBKDF2\n")); 
++      int i;
++
++      /* start by copying the kdf key into the hmac salt slot
++         then XOR it with the fixed hmac salt defined at compile time
++         this ensures that the salt passed in to derive the hmac key, while 
++         easy to derive and publically known, is not the same as the salt used 
++         to generate the encryption key */ 
++      memcpy(ctx->hmac_kdf_salt, ctx->kdf_salt, ctx->kdf_salt_sz);
++      for(i = 0; i < ctx->kdf_salt_sz; i++) {
++        ctx->hmac_kdf_salt[i] ^= HMAC_SALT_MASK;
++      } 
++
++      CODEC_TRACE(("codec_key_derive: deriving hmac key from encryption key using PBKDF2 with %d iterations\n", 
++        c_ctx->fast_kdf_iter)); 
 +      PKCS5_PBKDF2_HMAC_SHA1( (const char*)c_ctx->key, c_ctx->key_sz, 
-+                              ctx->kdf_salt, ctx->kdf_salt_sz, 
-+                              c_ctx->kdf_iter, c_ctx->key_sz, c_ctx->hmac_key); 
++                              ctx->hmac_kdf_salt, ctx->kdf_salt_sz, 
++                              c_ctx->fast_kdf_iter, c_ctx->key_sz, c_ctx->hmac_key); 
 +    }
 +
 +    c_ctx->derive_key = 0;
@@ -1782,17 +2012,18 @@
 +int sqlcipher_codec_key_derive(codec_ctx *ctx) {
 +  /* derive key on first use if necessary */
 +  if(ctx->read_ctx->derive_key) {
-+    sqlcipher_cipher_ctx_key_derive(ctx, ctx->read_ctx);
++    if(sqlcipher_cipher_ctx_key_derive(ctx, ctx->read_ctx) != SQLITE_OK) return SQLITE_ERROR;
 +  }
 +
 +  if(ctx->write_ctx->derive_key) {
 +    if(sqlcipher_cipher_ctx_cmp(ctx->write_ctx, ctx->read_ctx) == 0) {
-+      sqlcipher_cipher_ctx_copy(ctx->write_ctx, ctx->read_ctx); // the relevant parameters are the same, just copy read key
++      // the relevant parameters are the same, just copy read key
++      if(sqlcipher_cipher_ctx_copy(ctx->write_ctx, ctx->read_ctx) != SQLITE_OK) return SQLITE_ERROR;
 +    } else {
-+      sqlcipher_cipher_ctx_key_derive(ctx, ctx->write_ctx);
++      if(sqlcipher_cipher_ctx_key_derive(ctx, ctx->write_ctx) != SQLITE_OK) return SQLITE_ERROR;
 +    }
 +  }
-+  return SQLITE_OK; /* FIXME set proper return value */
++  return SQLITE_OK; 
 +}
 +
 +int sqlcipher_codec_key_copy(codec_ctx *ctx, int source) {
@@ -2001,6 +2232,7 @@
 +}
 +
 +#endif
++#endif
 +
 +/************** End of crypto_impl.c *****************************************/
 +/************** Begin file global.c ******************************************/
@@ -2148,7 +2380,7 @@
 +   SQLITE_THREADSAFE==1,      /* bFullMutex */
 +   SQLITE_USE_URI,            /* bOpenUri */
 +   0x7ffffffe,                /* mxStrlen */
-+   100,                       /* szLookaside */
++   128,                       /* szLookaside */
 +   500,                       /* nLookaside */
 +   {0,0,0,0,0,0,0,0},         /* m */
 +   {0,0,0,0,0,0,0,0,0},       /* mutex */
@@ -2232,15898 +2464,160 @@
 +** The author disclaims copyright to this source code.  In place of
 +** a legal notice, here is a blessing:
 +**
- **    May you do good and not evil.
- **    May you find forgiveness for yourself and forgive others.
- **    May you share freely, never taking more than you give.
-@@ -37430,9814 +39248,9196 @@
-   u8 subjInMemory;            /* True to use in-memory sub-journals */
-   Pgno dbSize;                /* Number of pages in the database */
-   Pgno dbOrigSize;            /* dbSize before the current transaction */
--  Pgno dbFileSize;            /* Number of pages in the database file */
--  Pgno dbHintSize;            /* Value passed to FCNTL_SIZE_HINT call */
--  int errCode;                /* One of several kinds of errors */
--  int nRec;                   /* Pages journalled since last j-header written */
--  u32 cksumInit;              /* Quasi-random value added to every checksum */
--  u32 nSubRec;                /* Number of records written to sub-journal */
--  Bitvec *pInJournal;         /* One bit for each page in the database file */
--  sqlite3_file *fd;           /* File descriptor for database */
--  sqlite3_file *jfd;          /* File descriptor for main journal */
--  sqlite3_file *sjfd;         /* File descriptor for sub-journal */
--  i64 journalOff;             /* Current write offset in the journal file */
--  i64 journalHdr;             /* Byte offset to previous journal header */
--  sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
--  PagerSavepoint *aSavepoint; /* Array of active savepoints */
--  int nSavepoint;             /* Number of elements in aSavepoint[] */
--  char dbFileVers[16];        /* Changes whenever database file changes */
--  /*
--  ** End of the routinely-changing class members
--  ***************************************************************************/
--
--  u16 nExtra;                 /* Add this many bytes to each in-memory page */
--  i16 nReserve;               /* Number of unused bytes at end of each page */
--  u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
--  u32 sectorSize;             /* Assumed sector size during rollback */
--  int pageSize;               /* Number of bytes in a page */
--  Pgno mxPgno;                /* Maximum allowed size of the database */
--  i64 journalSizeLimit;       /* Size limit for persistent journal files */
--  char *zFilename;            /* Name of the database file */
--  char *zJournal;             /* Name of the journal file */
--  int (*xBusyHandler)(void*); /* Function to call when busy */
--  void *pBusyHandlerArg;      /* Context argument for xBusyHandler */
--#ifdef SQLITE_TEST
--  int nHit, nMiss;            /* Cache hits and missing */
--  int nRead, nWrite;          /* Database pages read/written */
--#endif
--  void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
--#ifdef SQLITE_HAS_CODEC
--  void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
--  void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
--  void (*xCodecFree)(void*);             /* Destructor for the codec */
--  void *pCodec;               /* First argument to xCodec... methods */
--#endif
--  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
--  PCache *pPCache;            /* Pointer to page cache object */
--#ifndef SQLITE_OMIT_WAL
--  Wal *pWal;                  /* Write-ahead log used by "journal_mode=wal" */
--  char *zWal;                 /* File name for write-ahead log */
--#endif
--};
--
--/*
--** The following global variables hold counters used for
--** testing purposes only.  These variables do not exist in
--** a non-testing build.  These variables are not thread-safe.
--*/
--#ifdef SQLITE_TEST
--SQLITE_API int sqlite3_pager_readdb_count = 0;    /* Number of full pages read from DB */
--SQLITE_API int sqlite3_pager_writedb_count = 0;   /* Number of full pages written to DB */
--SQLITE_API int sqlite3_pager_writej_count = 0;    /* Number of pages written to journal */
--# define PAGER_INCR(v)  v++
--#else
--# define PAGER_INCR(v)
--#endif
--
--
--
--/*
--** Journal files begin with the following magic string.  The data
--** was obtained from /dev/random.  It is used only as a sanity check.
--**
--** Since version 2.8.0, the journal format contains additional sanity
--** checking information.  If the power fails while the journal is being
--** written, semi-random garbage data might appear in the journal
--** file after power is restored.  If an attempt is then made
--** to roll the journal back, the database could be corrupted.  The additional
--** sanity checking data is an attempt to discover the garbage in the
--** journal and ignore it.
--**
--** The sanity checking information for the new journal format consists
--** of a 32-bit checksum on each page of data.  The checksum covers both
--** the page number and the pPager->pageSize bytes of data for the page.
--** This cksum is initialized to a 32-bit random value that appears in the
--** journal file right after the header.  The random initializer is important,
--** because garbage data that appears at the end of a journal is likely
--** data that was once in other files that have now been deleted.  If the
--** garbage data came from an obsolete journal file, the checksums might
--** be correct.  But by initializing the checksum to random value which
--** is different for every journal, we minimize that risk.
--*/
--static const unsigned char aJournalMagic[] = {
--  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
--};
--
--/*
--** The size of the of each page record in the journal is given by
--** the following macro.
--*/
--#define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
--
--/*
--** The journal header size for this pager. This is usually the same 
--** size as a single disk sector. See also setSectorSize().
--*/
--#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
--
--/*
--** The macro MEMDB is true if we are dealing with an in-memory database.
--** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
--** the value of MEMDB will be a constant and the compiler will optimize
--** out code that would never execute.
--*/
--#ifdef SQLITE_OMIT_MEMORYDB
--# define MEMDB 0
--#else
--# define MEMDB pPager->memDb
--#endif
--
--/*
--** The maximum legal page number is (2^31 - 1).
--*/
--#define PAGER_MAX_PGNO 2147483647
--
--/*
--** The argument to this macro is a file descriptor (type sqlite3_file*).
--** Return 0 if it is not open, or non-zero (but not 1) if it is.
--**
--** This is so that expressions can be written as:
--**
--**   if( isOpen(pPager->jfd) ){ ...
--**
--** instead of
--**
--**   if( pPager->jfd->pMethods ){ ...
--*/
--#define isOpen(pFd) ((pFd)->pMethods)
--
--/*
--** Return true if this pager uses a write-ahead log instead of the usual
--** rollback journal. Otherwise false.
--*/
--#ifndef SQLITE_OMIT_WAL
--static int pagerUseWal(Pager *pPager){
--  return (pPager->pWal!=0);
--}
--#else
--# define pagerUseWal(x) 0
--# define pagerRollbackWal(x) 0
--# define pagerWalFrames(v,w,x,y,z) 0
--# define pagerOpenWalIfPresent(z) SQLITE_OK
--# define pagerBeginReadTransaction(z) SQLITE_OK
--#endif
--
--#ifndef NDEBUG 
++**    May you do good and not evil.
++**    May you find forgiveness for yourself and forgive others.
++**    May you share freely, never taking more than you give.
++**
++*************************************************************************
++**
++** This file implements routines used to report what compile-time options
++** SQLite was built with.
++*/
++
++#ifndef SQLITE_OMIT_COMPILEOPTION_DIAGS
++
++
++/*
++** An array of names of all compile-time options.  This array should 
++** be sorted A-Z.
++**
++** This array looks large, but in a typical installation actually uses
++** only a handful of compile-time options, so most times this array is usually
++** rather short and uses little memory space.
++*/
++static const char * const azCompileOpt[] = {
++
++/* These macros are provided to "stringify" the value of the define
++** for those options in which the value is meaningful. */
++#define CTIMEOPT_VAL_(opt) #opt
++#define CTIMEOPT_VAL(opt) CTIMEOPT_VAL_(opt)
++
++#ifdef SQLITE_32BIT_ROWID
++  "32BIT_ROWID",
++#endif
++#ifdef SQLITE_4_BYTE_ALIGNED_MALLOC
++  "4_BYTE_ALIGNED_MALLOC",
++#endif
++#ifdef SQLITE_CASE_SENSITIVE_LIKE
++  "CASE_SENSITIVE_LIKE",
++#endif
++#ifdef SQLITE_CHECK_PAGES
++  "CHECK_PAGES",
++#endif
++#ifdef SQLITE_COVERAGE_TEST
++  "COVERAGE_TEST",
++#endif
++#ifdef SQLITE_DEBUG
++  "DEBUG",
++#endif
++#ifdef SQLITE_DEFAULT_LOCKING_MODE
++  "DEFAULT_LOCKING_MODE=" CTIMEOPT_VAL(SQLITE_DEFAULT_LOCKING_MODE),
++#endif
++#ifdef SQLITE_DISABLE_DIRSYNC
++  "DISABLE_DIRSYNC",
++#endif
++#ifdef SQLITE_DISABLE_LFS
++  "DISABLE_LFS",
++#endif
++#ifdef SQLITE_ENABLE_ATOMIC_WRITE
++  "ENABLE_ATOMIC_WRITE",
++#endif
++#ifdef SQLITE_ENABLE_CEROD
++  "ENABLE_CEROD",
++#endif
++#ifdef SQLITE_ENABLE_COLUMN_METADATA
++  "ENABLE_COLUMN_METADATA",
++#endif
++#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
++  "ENABLE_EXPENSIVE_ASSERT",
++#endif
++#ifdef SQLITE_ENABLE_FTS1
++  "ENABLE_FTS1",
++#endif
++#ifdef SQLITE_ENABLE_FTS2
++  "ENABLE_FTS2",
++#endif
++#ifdef SQLITE_ENABLE_FTS3
++  "ENABLE_FTS3",
++#endif
++#ifdef SQLITE_ENABLE_FTS3_PARENTHESIS
++  "ENABLE_FTS3_PARENTHESIS",
++#endif
++#ifdef SQLITE_ENABLE_FTS4
++  "ENABLE_FTS4",
++#endif
++#ifdef SQLITE_ENABLE_ICU
++  "ENABLE_ICU",
++#endif
++#ifdef SQLITE_ENABLE_IOTRACE
++  "ENABLE_IOTRACE",
++#endif
++#ifdef SQLITE_ENABLE_LOAD_EXTENSION
++  "ENABLE_LOAD_EXTENSION",
+ #endif
+ #ifdef SQLITE_ENABLE_LOCKING_STYLE
+   "ENABLE_LOCKING_STYLE=" CTIMEOPT_VAL(SQLITE_ENABLE_LOCKING_STYLE),
+@@ -24516,7 +26503,7 @@
+ #include <sys/time.h>
+ #include <errno.h>
+ #ifndef SQLITE_OMIT_WAL
+-#include <sys/mman.h>
++/* #include <sys/mman.h> */
+ #endif
+ 
+ #if SQLITE_ENABLE_LOCKING_STYLE
+@@ -44185,12 +46172,41 @@
+ 
+ #endif /* SQLITE_OMIT_DISKIO */
+ 
+-/************** End of pager.c ***********************************************/
+-/************** Begin file wal.c *********************************************/
 -/*
--** Usage:
--**
--**   assert( assert_pager_state(pPager) );
+-** 2010 February 1
 -**
--** This function runs many asserts to try to find inconsistencies in
--** the internal state of the Pager object.
--*/
--static int assert_pager_state(Pager *p){
--  Pager *pPager = p;
--
--  /* State must be valid. */
--  assert( p->eState==PAGER_OPEN
--       || p->eState==PAGER_READER
--       || p->eState==PAGER_WRITER_LOCKED
--       || p->eState==PAGER_WRITER_CACHEMOD
--       || p->eState==PAGER_WRITER_DBMOD
--       || p->eState==PAGER_WRITER_FINISHED
--       || p->eState==PAGER_ERROR
--  );
--
--  /* Regardless of the current state, a temp-file connection always behaves
--  ** as if it has an exclusive lock on the database file. It never updates
--  ** the change-counter field, so the changeCountDone flag is always set.
--  */
--  assert( p->tempFile==0 || p->eLock==EXCLUSIVE_LOCK );
--  assert( p->tempFile==0 || pPager->changeCountDone );
--
--  /* If the useJournal flag is clear, the journal-mode must be "OFF". 
--  ** And if the journal-mode is "OFF", the journal file must not be open.
--  */
--  assert( p->journalMode==PAGER_JOURNALMODE_OFF || p->useJournal );
--  assert( p->journalMode!=PAGER_JOURNALMODE_OFF || !isOpen(p->jfd) );
--
--  /* Check that MEMDB implies noSync. And an in-memory journal. Since 
--  ** this means an in-memory pager performs no IO at all, it cannot encounter 
--  ** either SQLITE_IOERR or SQLITE_FULL during rollback or while finalizing 
--  ** a journal file. (although the in-memory journal implementation may 
--  ** return SQLITE_IOERR_NOMEM while the journal file is being written). It 
--  ** is therefore not possible for an in-memory pager to enter the ERROR 
--  ** state.
--  */
--  if( MEMDB ){
--    assert( p->noSync );
--    assert( p->journalMode==PAGER_JOURNALMODE_OFF 
--         || p->journalMode==PAGER_JOURNALMODE_MEMORY 
--    );
--    assert( p->eState!=PAGER_ERROR && p->eState!=PAGER_OPEN );
--    assert( pagerUseWal(p)==0 );
--  }
--
--  /* If changeCountDone is set, a RESERVED lock or greater must be held
--  ** on the file.
--  */
--  assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK );
--  assert( p->eLock!=PENDING_LOCK );
--
--  switch( p->eState ){
--    case PAGER_OPEN:
--      assert( !MEMDB );
--      assert( pPager->errCode==SQLITE_OK );
--      assert( sqlite3PcacheRefCount(pPager->pPCache)==0 || pPager->tempFile );
--      break;
--
--    case PAGER_READER:
--      assert( pPager->errCode==SQLITE_OK );
--      assert( p->eLock!=UNKNOWN_LOCK );
--      assert( p->eLock>=SHARED_LOCK || p->noReadlock );
--      break;
--
--    case PAGER_WRITER_LOCKED:
--      assert( p->eLock!=UNKNOWN_LOCK );
--      assert( pPager->errCode==SQLITE_OK );
--      if( !pagerUseWal(pPager) ){
--        assert( p->eLock>=RESERVED_LOCK );
--      }
--      assert( pPager->dbSize==pPager->dbOrigSize );
--      assert( pPager->dbOrigSize==pPager->dbFileSize );
--      assert( pPager->dbOrigSize==pPager->dbHintSize );
--      assert( pPager->setMaster==0 );
--      break;
--
--    case PAGER_WRITER_CACHEMOD:
--      assert( p->eLock!=UNKNOWN_LOCK );
--      assert( pPager->errCode==SQLITE_OK );
--      if( !pagerUseWal(pPager) ){
--        /* It is possible that if journal_mode=wal here that neither the
--        ** journal file nor the WAL file are open. This happens during
--        ** a rollback transaction that switches from journal_mode=off
--        ** to journal_mode=wal.
--        */
--        assert( p->eLock>=RESERVED_LOCK );
--        assert( isOpen(p->jfd) 
--             || p->journalMode==PAGER_JOURNALMODE_OFF 
--             || p->journalMode==PAGER_JOURNALMODE_WAL 
--        );
--      }
--      assert( pPager->dbOrigSize==pPager->dbFileSize );
--      assert( pPager->dbOrigSize==pPager->dbHintSize );
--      break;
--
--    case PAGER_WRITER_DBMOD:
--      assert( p->eLock==EXCLUSIVE_LOCK );
--      assert( pPager->errCode==SQLITE_OK );
--      assert( !pagerUseWal(pPager) );
--      assert( p->eLock>=EXCLUSIVE_LOCK );
--      assert( isOpen(p->jfd) 
--           || p->journalMode==PAGER_JOURNALMODE_OFF 
--           || p->journalMode==PAGER_JOURNALMODE_WAL 
--      );
--      assert( pPager->dbOrigSize<=pPager->dbHintSize );
--      break;
--
--    case PAGER_WRITER_FINISHED:
--      assert( p->eLock==EXCLUSIVE_LOCK );
--      assert( pPager->errCode==SQLITE_OK );
--      assert( !pagerUseWal(pPager) );
--      assert( isOpen(p->jfd) 
--           || p->journalMode==PAGER_JOURNALMODE_OFF 
--           || p->journalMode==PAGER_JOURNALMODE_WAL 
--      );
--      break;
--
--    case PAGER_ERROR:
--      /* There must be at least one outstanding reference to the pager if
--      ** in ERROR state. Otherwise the pager should have already dropped
--      ** back to OPEN state.
--      */
--      assert( pPager->errCode!=SQLITE_OK );
--      assert( sqlite3PcacheRefCount(pPager->pPCache)>0 );
--      break;
--  }
--
--  return 1;
--}
--#endif /* ifndef NDEBUG */
--
--#ifdef SQLITE_DEBUG 
--/*
--** Return a pointer to a human readable string in a static buffer
--** containing the state of the Pager object passed as an argument. This
--** is intended to be used within debuggers. For example, as an alternative
--** to "print *pPager" in gdb:
--**
--** (gdb) printf "%s", print_pager_state(pPager)
--*/
--static char *print_pager_state(Pager *p){
--  static char zRet[1024];
--
--  sqlite3_snprintf(1024, zRet,
--      "Filename:      %s\n"
--      "State:         %s errCode=%d\n"
--      "Lock:          %s\n"
--      "Locking mode:  locking_mode=%s\n"
--      "Journal mode:  journal_mode=%s\n"
--      "Backing store: tempFile=%d memDb=%d useJournal=%d\n"
--      "Journal:       journalOff=%lld journalHdr=%lld\n"
--      "Size:          dbsize=%d dbOrigSize=%d dbFileSize=%d\n"
--      , p->zFilename
--      , p->eState==PAGER_OPEN            ? "OPEN" :
--        p->eState==PAGER_READER          ? "READER" :
--        p->eState==PAGER_WRITER_LOCKED   ? "WRITER_LOCKED" :
--        p->eState==PAGER_WRITER_CACHEMOD ? "WRITER_CACHEMOD" :
--        p->eState==PAGER_WRITER_DBMOD    ? "WRITER_DBMOD" :
--        p->eState==PAGER_WRITER_FINISHED ? "WRITER_FINISHED" :
--        p->eState==PAGER_ERROR           ? "ERROR" : "?error?"
--      , (int)p->errCode
--      , p->eLock==NO_LOCK         ? "NO_LOCK" :
--        p->eLock==RESERVED_LOCK   ? "RESERVED" :
--        p->eLock==EXCLUSIVE_LOCK  ? "EXCLUSIVE" :
--        p->eLock==SHARED_LOCK     ? "SHARED" :
--        p->eLock==UNKNOWN_LOCK    ? "UNKNOWN" : "?error?"
--      , p->exclusiveMode ? "exclusive" : "normal"
--      , p->journalMode==PAGER_JOURNALMODE_MEMORY   ? "memory" :
--        p->journalMode==PAGER_JOURNALMODE_OFF      ? "off" :
--        p->journalMode==PAGER_JOURNALMODE_DELETE   ? "delete" :
--        p->journalMode==PAGER_JOURNALMODE_PERSIST  ? "persist" :
--        p->journalMode==PAGER_JOURNALMODE_TRUNCATE ? "truncate" :
--        p->journalMode==PAGER_JOURNALMODE_WAL      ? "wal" : "?error?"
--      , (int)p->tempFile, (int)p->memDb, (int)p->useJournal
--      , p->journalOff, p->journalHdr
--      , (int)p->dbSize, (int)p->dbOrigSize, (int)p->dbFileSize
--  );
--
--  return zRet;
--}
--#endif
--
--/*
--** Return true if it is necessary to write page *pPg into the sub-journal.
--** A page needs to be written into the sub-journal if there exists one
--** or more open savepoints for which:
--**
--**   * The page-number is less than or equal to PagerSavepoint.nOrig, and
--**   * The bit corresponding to the page-number is not set in
--**     PagerSavepoint.pInSavepoint.
--*/
--static int subjRequiresPage(PgHdr *pPg){
--  Pgno pgno = pPg->pgno;
--  Pager *pPager = pPg->pPager;
--  int i;
--  for(i=0; i<pPager->nSavepoint; i++){
--    PagerSavepoint *p = &pPager->aSavepoint[i];
--    if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){
--      return 1;
--    }
--  }
--  return 0;
--}
--
--/*
--** Return true if the page is already in the journal file.
--*/
--static int pageInJournal(PgHdr *pPg){
--  return sqlite3BitvecTest(pPg->pPager->pInJournal, pPg->pgno);
--}
--
--/*
--** Read a 32-bit integer from the given file descriptor.  Store the integer
--** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
--** error code is something goes wrong.
--**
--** All values are stored on disk as big-endian.
--*/
--static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
--  unsigned char ac[4];
--  int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
--  if( rc==SQLITE_OK ){
--    *pRes = sqlite3Get4byte(ac);
--  }
--  return rc;
--}
--
--/*
--** Write a 32-bit integer into a string buffer in big-endian byte order.
--*/
--#define put32bits(A,B)  sqlite3Put4byte((u8*)A,B)
--
--
--/*
--** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
--** on success or an error code is something goes wrong.
--*/
--static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
--  char ac[4];
--  put32bits(ac, val);
--  return sqlite3OsWrite(fd, ac, 4, offset);
--}
--
--/*
--** Unlock the database file to level eLock, which must be either NO_LOCK
--** or SHARED_LOCK. Regardless of whether or not the call to xUnlock()
--** succeeds, set the Pager.eLock variable to match the (attempted) new lock.
--**
--** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is
--** called, do not modify it. See the comment above the #define of 
--** UNKNOWN_LOCK for an explanation of this.
--*/
--static int pagerUnlockDb(Pager *pPager, int eLock){
--  int rc = SQLITE_OK;
--
--  assert( !pPager->exclusiveMode || pPager->eLock==eLock );
--  assert( eLock==NO_LOCK || eLock==SHARED_LOCK );
--  assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 );
--  if( isOpen(pPager->fd) ){
--    assert( pPager->eLock>=eLock );
--    rc = sqlite3OsUnlock(pPager->fd, eLock);
--    if( pPager->eLock!=UNKNOWN_LOCK ){
--      pPager->eLock = (u8)eLock;
--    }
--    IOTRACE(("UNLOCK %p %d\n", pPager, eLock))
--  }
--  return rc;
--}
--
--/*
--** Lock the database file to level eLock, which must be either SHARED_LOCK,
--** RESERVED_LOCK or EXCLUSIVE_LOCK. If the caller is successful, set the
--** Pager.eLock variable to the new locking state. 
--**
--** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is 
--** called, do not modify it unless the new locking state is EXCLUSIVE_LOCK. 
--** See the comment above the #define of UNKNOWN_LOCK for an explanation 
--** of this.
--*/
--static int pagerLockDb(Pager *pPager, int eLock){
--  int rc = SQLITE_OK;
--
--  assert( eLock==SHARED_LOCK || eLock==RESERVED_LOCK || eLock==EXCLUSIVE_LOCK );
--  if( pPager->eLock<eLock || pPager->eLock==UNKNOWN_LOCK ){
--    rc = sqlite3OsLock(pPager->fd, eLock);
--    if( rc==SQLITE_OK && (pPager->eLock!=UNKNOWN_LOCK||eLock==EXCLUSIVE_LOCK) ){
--      pPager->eLock = (u8)eLock;
--      IOTRACE(("LOCK %p %d\n", pPager, eLock))
--    }
--  }
--  return rc;
--}
--
--/*
--** This function determines whether or not the atomic-write optimization
--** can be used with this pager. The optimization can be used if:
--**
--**  (a) the value returned by OsDeviceCharacteristics() indicates that
--**      a database page may be written atomically, and
--**  (b) the value returned by OsSectorSize() is less than or equal
--**      to the page size.
--**
--** The optimization is also always enabled for temporary files. It is
--** an error to call this function if pPager is opened on an in-memory
--** database.
--**
--** If the optimization cannot be used, 0 is returned. If it can be used,
--** then the value returned is the size of the journal file when it
--** contains rollback data for exactly one page.
--*/
--#ifdef SQLITE_ENABLE_ATOMIC_WRITE
--static int jrnlBufferSize(Pager *pPager){
--  assert( !MEMDB );
--  if( !pPager->tempFile ){
--    int dc;                           /* Device characteristics */
--    int nSector;                      /* Sector size */
--    int szPage;                       /* Page size */
--
--    assert( isOpen(pPager->fd) );
--    dc = sqlite3OsDeviceCharacteristics(pPager->fd);
--    nSector = pPager->sectorSize;
--    szPage = pPager->pageSize;
--
--    assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
--    assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
--    if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){
--      return 0;
--    }
--  }
-+  Pgno dbFileSize;            /* Number of pages in the database file */
-+  Pgno dbHintSize;            /* Value passed to FCNTL_SIZE_HINT call */
-+  int errCode;                /* One of several kinds of errors */
-+  int nRec;                   /* Pages journalled since last j-header written */
-+  u32 cksumInit;              /* Quasi-random value added to every checksum */
-+  u32 nSubRec;                /* Number of records written to sub-journal */
-+  Bitvec *pInJournal;         /* One bit for each page in the database file */
-+  sqlite3_file *fd;           /* File descriptor for database */
-+  sqlite3_file *jfd;          /* File descriptor for main journal */
-+  sqlite3_file *sjfd;         /* File descriptor for sub-journal */
-+  i64 journalOff;             /* Current write offset in the journal file */
-+  i64 journalHdr;             /* Byte offset to previous journal header */
-+  sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
-+  PagerSavepoint *aSavepoint; /* Array of active savepoints */
-+  int nSavepoint;             /* Number of elements in aSavepoint[] */
-+  char dbFileVers[16];        /* Changes whenever database file changes */
-+  /*
-+  ** End of the routinely-changing class members
-+  ***************************************************************************/
- 
--  return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
--}
-+  u16 nExtra;                 /* Add this many bytes to each in-memory page */
-+  i16 nReserve;               /* Number of unused bytes at end of each page */
-+  u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
-+  u32 sectorSize;             /* Assumed sector size during rollback */
-+  int pageSize;               /* Number of bytes in a page */
-+  Pgno mxPgno;                /* Maximum allowed size of the database */
-+  i64 journalSizeLimit;       /* Size limit for persistent journal files */
-+  char *zFilename;            /* Name of the database file */
-+  char *zJournal;             /* Name of the journal file */
-+  int (*xBusyHandler)(void*); /* Function to call when busy */
-+  void *pBusyHandlerArg;      /* Context argument for xBusyHandler */
-+#ifdef SQLITE_TEST
-+  int nHit, nMiss;            /* Cache hits and missing */
-+  int nRead, nWrite;          /* Database pages read/written */
-+#endif
-+  void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
-+#ifdef SQLITE_HAS_CODEC
-+  void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
-+  void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
-+  void (*xCodecFree)(void*);             /* Destructor for the codec */
-+  void *pCodec;               /* First argument to xCodec... methods */
-+#endif
-+  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
-+  PCache *pPCache;            /* Pointer to page cache object */
-+#ifndef SQLITE_OMIT_WAL
-+  Wal *pWal;                  /* Write-ahead log used by "journal_mode=wal" */
-+  char *zWal;                 /* File name for write-ahead log */
- #endif
-+};
- 
- /*
--** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
--** on the cache using a hash function.  This is used for testing
--** and debugging only.
--*/
--#ifdef SQLITE_CHECK_PAGES
--/*
--** Return a 32-bit hash of the page data for pPage.
-+** The following global variables hold counters used for
-+** testing purposes only.  These variables do not exist in
-+** a non-testing build.  These variables are not thread-safe.
- */
--static u32 pager_datahash(int nByte, unsigned char *pData){
--  u32 hash = 0;
--  int i;
--  for(i=0; i<nByte; i++){
--    hash = (hash*1039) + pData[i];
--  }
--  return hash;
--}
--static u32 pager_pagehash(PgHdr *pPage){
--  return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
--}
--static void pager_set_pagehash(PgHdr *pPage){
--  pPage->pageHash = pager_pagehash(pPage);
--}
-+#ifdef SQLITE_TEST
-+SQLITE_API int sqlite3_pager_readdb_count = 0;    /* Number of full pages read from DB */
-+SQLITE_API int sqlite3_pager_writedb_count = 0;   /* Number of full pages written to DB */
-+SQLITE_API int sqlite3_pager_writej_count = 0;    /* Number of pages written to journal */
-+# define PAGER_INCR(v)  v++
-+#else
-+# define PAGER_INCR(v)
-+#endif
- 
--/*
--** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
--** is defined, and NDEBUG is not defined, an assert() statement checks
--** that the page is either dirty or still matches the calculated page-hash.
--*/
--#define CHECK_PAGE(x) checkPage(x)
--static void checkPage(PgHdr *pPg){
--  Pager *pPager = pPg->pPager;
--  assert( pPager->eState!=PAGER_ERROR );
--  assert( (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
--}
- 
--#else
--#define pager_datahash(X,Y)  0
--#define pager_pagehash(X)  0
--#define pager_set_pagehash(X)
--#define CHECK_PAGE(x)
--#endif  /* SQLITE_CHECK_PAGES */
- 
- /*
--** When this is called the journal file for pager pPager must be open.
--** This function attempts to read a master journal file name from the 
--** end of the file and, if successful, copies it into memory supplied 
--** by the caller. See comments above writeMasterJournal() for the format
--** used to store a master journal file name at the end of a journal file.
--**
--** zMaster must point to a buffer of at least nMaster bytes allocated by
--** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
--** enough space to write the master journal name). If the master journal
--** name in the journal is longer than nMaster bytes (including a
--** nul-terminator), then this is handled as if no master journal name
--** were present in the journal.
--**
--** If a master journal file name is present at the end of the journal
--** file, then it is copied into the buffer pointed to by zMaster. A
--** nul-terminator byte is appended to the buffer following the master
--** journal file name.
-+** Journal files begin with the following magic string.  The data
-+** was obtained from /dev/random.  It is used only as a sanity check.
- **
--** If it is determined that no master journal file name is present 
--** zMaster[0] is set to 0 and SQLITE_OK returned.
-+** Since version 2.8.0, the journal format contains additional sanity
-+** checking information.  If the power fails while the journal is being
-+** written, semi-random garbage data might appear in the journal
-+** file after power is restored.  If an attempt is then made
-+** to roll the journal back, the database could be corrupted.  The additional
-+** sanity checking data is an attempt to discover the garbage in the
-+** journal and ignore it.
- **
--** If an error occurs while reading from the journal file, an SQLite
--** error code is returned.
-+** The sanity checking information for the new journal format consists
-+** of a 32-bit checksum on each page of data.  The checksum covers both
-+** the page number and the pPager->pageSize bytes of data for the page.
-+** This cksum is initialized to a 32-bit random value that appears in the
-+** journal file right after the header.  The random initializer is important,
-+** because garbage data that appears at the end of a journal is likely
-+** data that was once in other files that have now been deleted.  If the
-+** garbage data came from an obsolete journal file, the checksums might
-+** be correct.  But by initializing the checksum to random value which
-+** is different for every journal, we minimize that risk.
- */
--static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, u32 nMaster){
--  int rc;                    /* Return code */
--  u32 len;                   /* Length in bytes of master journal name */
--  i64 szJ;                   /* Total size in bytes of journal file pJrnl */
--  u32 cksum;                 /* MJ checksum value read from journal */
--  u32 u;                     /* Unsigned loop counter */
--  unsigned char aMagic[8];   /* A buffer to hold the magic header */
--  zMaster[0] = '\0';
--
--  if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ))
--   || szJ<16
--   || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len))
--   || len>=nMaster 
--   || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum))
--   || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8))
--   || memcmp(aMagic, aJournalMagic, 8)
--   || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len))
--  ){
--    return rc;
--  }
--
--  /* See if the checksum matches the master journal name */
--  for(u=0; u<len; u++){
--    cksum -= zMaster[u];
--  }
--  if( cksum ){
--    /* If the checksum doesn't add up, then one or more of the disk sectors
--    ** containing the master journal filename is corrupted. This means
--    ** definitely roll back, so just return SQLITE_OK and report a (nul)
--    ** master-journal filename.
--    */
--    len = 0;
--  }
--  zMaster[len] = '\0';
--   
--  return SQLITE_OK;
--}
-+static const unsigned char aJournalMagic[] = {
-+  0xd9, 0xd5, 0x05, 0xf9, 0x20, 0xa1, 0x63, 0xd7,
-+};
- 
- /*
--** Return the offset of the sector boundary at or immediately 
--** following the value in pPager->journalOff, assuming a sector 
--** size of pPager->sectorSize bytes.
--**
--** i.e for a sector size of 512:
--**
--**   Pager.journalOff          Return value
--**   ---------------------------------------
--**   0                         0
--**   512                       512
--**   100                       512
--**   2000                      2048
--** 
-+** The size of the of each page record in the journal is given by
-+** the following macro.
- */
--static i64 journalHdrOffset(Pager *pPager){
--  i64 offset = 0;
--  i64 c = pPager->journalOff;
--  if( c ){
--    offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
--  }
--  assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
--  assert( offset>=c );
--  assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
--  return offset;
--}
-+#define JOURNAL_PG_SZ(pPager)  ((pPager->pageSize) + 8)
- 
- /*
--** The journal file must be open when this function is called.
--**
--** This function is a no-op if the journal file has not been written to
--** within the current transaction (i.e. if Pager.journalOff==0).
--**
--** If doTruncate is non-zero or the Pager.journalSizeLimit variable is
--** set to 0, then truncate the journal file to zero bytes in size. Otherwise,
--** zero the 28-byte header at the start of the journal file. In either case, 
--** if the pager is not in no-sync mode, sync the journal file immediately 
--** after writing or truncating it.
--**
--** If Pager.journalSizeLimit is set to a positive, non-zero value, and
--** following the truncation or zeroing described above the size of the 
--** journal file in bytes is larger than this value, then truncate the
--** journal file to Pager.journalSizeLimit bytes. The journal file does
--** not need to be synced following this operation.
--**
--** If an IO error occurs, abandon processing and return the IO error code.
--** Otherwise, return SQLITE_OK.
-+** The journal header size for this pager. This is usually the same 
-+** size as a single disk sector. See also setSectorSize().
- */
--static int zeroJournalHdr(Pager *pPager, int doTruncate){
--  int rc = SQLITE_OK;                               /* Return code */
--  assert( isOpen(pPager->jfd) );
--  if( pPager->journalOff ){
--    const i64 iLimit = pPager->journalSizeLimit;    /* Local cache of jsl */
--
--    IOTRACE(("JZEROHDR %p\n", pPager))
--    if( doTruncate || iLimit==0 ){
--      rc = sqlite3OsTruncate(pPager->jfd, 0);
--    }else{
--      static const char zeroHdr[28] = {0};
--      rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
--    }
--    if( rc==SQLITE_OK && !pPager->noSync ){
--      rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->syncFlags);
--    }
--
--    /* At this point the transaction is committed but the write lock 
--    ** is still held on the file. If there is a size limit configured for 
--    ** the persistent journal and the journal file currently consumes more
--    ** space than that limit allows for, truncate it now. There is no need
--    ** to sync the file following this operation.
--    */
--    if( rc==SQLITE_OK && iLimit>0 ){
--      i64 sz;
--      rc = sqlite3OsFileSize(pPager->jfd, &sz);
--      if( rc==SQLITE_OK && sz>iLimit ){
--        rc = sqlite3OsTruncate(pPager->jfd, iLimit);
--      }
--    }
--  }
--  return rc;
--}
-+#define JOURNAL_HDR_SZ(pPager) (pPager->sectorSize)
- 
- /*
--** The journal file must be open when this routine is called. A journal
--** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
--** current location.
--**
--** The format for the journal header is as follows:
--** - 8 bytes: Magic identifying journal format.
--** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
--** - 4 bytes: Random number used for page hash.
--** - 4 bytes: Initial database page count.
--** - 4 bytes: Sector size used by the process that wrote this journal.
--** - 4 bytes: Database page size.
--** 
--** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
-+** The macro MEMDB is true if we are dealing with an in-memory database.
-+** We do this as a macro so that if the SQLITE_OMIT_MEMORYDB macro is set,
-+** the value of MEMDB will be a constant and the compiler will optimize
-+** out code that would never execute.
- */
--static int writeJournalHdr(Pager *pPager){
--  int rc = SQLITE_OK;                 /* Return code */
--  char *zHeader = pPager->pTmpSpace;  /* Temporary space used to build header */
--  u32 nHeader = (u32)pPager->pageSize;/* Size of buffer pointed to by zHeader */
--  u32 nWrite;                         /* Bytes of header sector written */
--  int ii;                             /* Loop counter */
--
--  assert( isOpen(pPager->jfd) );      /* Journal file must be open. */
--
--  if( nHeader>JOURNAL_HDR_SZ(pPager) ){
--    nHeader = JOURNAL_HDR_SZ(pPager);
--  }
--
--  /* If there are active savepoints and any of them were created 
--  ** since the most recent journal header was written, update the 
--  ** PagerSavepoint.iHdrOffset fields now.
--  */
--  for(ii=0; ii<pPager->nSavepoint; ii++){
--    if( pPager->aSavepoint[ii].iHdrOffset==0 ){
--      pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff;
--    }
--  }
--
--  pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager);
--
--  /* 
--  ** Write the nRec Field - the number of page records that follow this
--  ** journal header. Normally, zero is written to this value at this time.
--  ** After the records are added to the journal (and the journal synced, 
--  ** if in full-sync mode), the zero is overwritten with the true number
--  ** of records (see syncJournal()).
--  **
--  ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
--  ** reading the journal this value tells SQLite to assume that the
--  ** rest of the journal file contains valid page records. This assumption
--  ** is dangerous, as if a failure occurred whilst writing to the journal
--  ** file it may contain some garbage data. There are two scenarios
--  ** where this risk can be ignored:
--  **
--  **   * When the pager is in no-sync mode. Corruption can follow a
--  **     power failure in this case anyway.
--  **
--  **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
--  **     that garbage data is never appended to the journal file.
--  */
--  assert( isOpen(pPager->fd) || pPager->noSync );
--  if( pPager->noSync || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY)
--   || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
--  ){
--    memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
--    put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
--  }else{
--    memset(zHeader, 0, sizeof(aJournalMagic)+4);
--  }
--
--  /* The random check-hash initialiser */ 
--  sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
--  put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
--  /* The initial database size */
--  put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize);
--  /* The assumed sector size for this process */
--  put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
--
--  /* The page size */
--  put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
--
--  /* Initializing the tail of the buffer is not necessary.  Everything
--  ** works find if the following memset() is omitted.  But initializing
--  ** the memory prevents valgrind from complaining, so we are willing to
--  ** take the performance hit.
--  */
--  memset(&zHeader[sizeof(aJournalMagic)+20], 0,
--         nHeader-(sizeof(aJournalMagic)+20));
--
--  /* In theory, it is only necessary to write the 28 bytes that the 
--  ** journal header consumes to the journal file here. Then increment the 
--  ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next 
--  ** record is written to the following sector (leaving a gap in the file
--  ** that will be implicitly filled in by the OS).
--  **
--  ** However it has been discovered that on some systems this pattern can 
--  ** be significantly slower than contiguously writing data to the file,
--  ** even if that means explicitly writing data to the block of 
--  ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what
--  ** is done. 
--  **
--  ** The loop is required here in case the sector-size is larger than the 
--  ** database page size. Since the zHeader buffer is only Pager.pageSize
--  ** bytes in size, more than one call to sqlite3OsWrite() may be required
--  ** to populate the entire journal header sector.
--  */ 
--  for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
--    IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
--    rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
--    assert( pPager->journalHdr <= pPager->journalOff );
--    pPager->journalOff += nHeader;
--  }
-+#ifdef SQLITE_OMIT_MEMORYDB
-+# define MEMDB 0
-+#else
-+# define MEMDB pPager->memDb
-+#endif
- 
--  return rc;
--}
-+/*
-+** The maximum legal page number is (2^31 - 1).
-+*/
-+#define PAGER_MAX_PGNO 2147483647
- 
- /*
--** The journal file must be open when this is called. A journal header file
--** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
--** file. The current location in the journal file is given by
--** pPager->journalOff. See comments above function writeJournalHdr() for
--** a description of the journal header format.
-+** The argument to this macro is a file descriptor (type sqlite3_file*).
-+** Return 0 if it is not open, or non-zero (but not 1) if it is.
- **
--** If the header is read successfully, *pNRec is set to the number of
--** page records following this header and *pDbSize is set to the size of the
--** database before the transaction began, in pages. Also, pPager->cksumInit
--** is set to the value read from the journal header. SQLITE_OK is returned
--** in this case.
-+** This is so that expressions can be written as:
- **
--** If the journal header file appears to be corrupted, SQLITE_DONE is
--** returned and *pNRec and *PDbSize are undefined.  If JOURNAL_HDR_SZ bytes
--** cannot be read from the journal file an error code is returned.
-+**   if( isOpen(pPager->jfd) ){ ...
-+**
-+** instead of
-+**
-+**   if( pPager->jfd->pMethods ){ ...
- */
--static int readJournalHdr(
--  Pager *pPager,               /* Pager object */
--  int isHot,
--  i64 journalSize,             /* Size of the open journal file in bytes */
--  u32 *pNRec,                  /* OUT: Value read from the nRec field */
--  u32 *pDbSize                 /* OUT: Value of original database size field */
--){
--  int rc;                      /* Return code */
--  unsigned char aMagic[8];     /* A buffer to hold the magic header */
--  i64 iHdrOff;                 /* Offset of journal header being read */
--
--  assert( isOpen(pPager->jfd) );      /* Journal file must be open. */
--
--  /* Advance Pager.journalOff to the start of the next sector. If the
--  ** journal file is too small for there to be a header stored at this
--  ** point, return SQLITE_DONE.
--  */
--  pPager->journalOff = journalHdrOffset(pPager);
--  if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
--    return SQLITE_DONE;
--  }
--  iHdrOff = pPager->journalOff;
--
--  /* Read in the first 8 bytes of the journal header. If they do not match
--  ** the  magic string found at the start of each journal header, return
--  ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise,
--  ** proceed.
--  */
--  if( isHot || iHdrOff!=pPager->journalHdr ){
--    rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff);
--    if( rc ){
--      return rc;
--    }
--    if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
--      return SQLITE_DONE;
--    }
--  }
--
--  /* Read the first three 32-bit fields of the journal header: The nRec
--  ** field, the checksum-initializer and the database size at the start
--  ** of the transaction. Return an error code if anything goes wrong.
--  */
--  if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec))
--   || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit))
--   || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize))
--  ){
--    return rc;
--  }
--
--  if( pPager->journalOff==0 ){
--    u32 iPageSize;               /* Page-size field of journal header */
--    u32 iSectorSize;             /* Sector-size field of journal header */
--
--    /* Read the page-size and sector-size journal header fields. */
--    if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize))
--     || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize))
--    ){
--      return rc;
--    }
--
--    /* Versions of SQLite prior to 3.5.8 set the page-size field of the
--    ** journal header to zero. In this case, assume that the Pager.pageSize
--    ** variable is already set to the correct page size.
--    */
--    if( iPageSize==0 ){
--      iPageSize = pPager->pageSize;
--    }
--
--    /* Check that the values read from the page-size and sector-size fields
--    ** are within range. To be 'in range', both values need to be a power
--    ** of two greater than or equal to 512 or 32, and not greater than their 
--    ** respective compile time maximum limits.
--    */
--    if( iPageSize<512                  || iSectorSize<32
--     || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE
--     || ((iPageSize-1)&iPageSize)!=0   || ((iSectorSize-1)&iSectorSize)!=0 
--    ){
--      /* If the either the page-size or sector-size in the journal-header is 
--      ** invalid, then the process that wrote the journal-header must have 
--      ** crashed before the header was synced. In this case stop reading 
--      ** the journal file here.
--      */
--      return SQLITE_DONE;
--    }
--
--    /* Update the page-size to match the value read from the journal. 
--    ** Use a testcase() macro to make sure that malloc failure within 
--    ** PagerSetPagesize() is tested.
--    */
--    rc = sqlite3PagerSetPagesize(pPager, &iPageSize, -1);
--    testcase( rc!=SQLITE_OK );
--
--    /* Update the assumed sector-size to match the value used by 
--    ** the process that created this journal. If this journal was
--    ** created by a process other than this one, then this routine
--    ** is being called from within pager_playback(). The local value
--    ** of Pager.sectorSize is restored at the end of that routine.
--    */
--    pPager->sectorSize = iSectorSize;
--  }
-+#define isOpen(pFd) ((pFd)->pMethods)
- 
--  pPager->journalOff += JOURNAL_HDR_SZ(pPager);
--  return rc;
-+/*
-+** Return true if this pager uses a write-ahead log instead of the usual
-+** rollback journal. Otherwise false.
-+*/
-+#ifndef SQLITE_OMIT_WAL
-+static int pagerUseWal(Pager *pPager){
-+  return (pPager->pWal!=0);
- }
-+#else
-+# define pagerUseWal(x) 0
-+# define pagerRollbackWal(x) 0
-+# define pagerWalFrames(v,w,x,y,z) 0
-+# define pagerOpenWalIfPresent(z) SQLITE_OK
-+# define pagerBeginReadTransaction(z) SQLITE_OK
-+#endif
- 
--
-+#ifndef NDEBUG 
- /*
--** Write the supplied master journal name into the journal file for pager
--** pPager at the current location. The master journal name must be the last
--** thing written to a journal file. If the pager is in full-sync mode, the
--** journal file descriptor is advanced to the next sector boundary before
--** anything is written. The format is:
--**
--**   + 4 bytes: PAGER_MJ_PGNO.
--**   + N bytes: Master journal filename in utf-8.
--**   + 4 bytes: N (length of master journal name in bytes, no nul-terminator).
--**   + 4 bytes: Master journal name checksum.
--**   + 8 bytes: aJournalMagic[].
-+** Usage:
- **
--** The master journal page checksum is the sum of the bytes in the master
--** journal name, where each byte is interpreted as a signed 8-bit integer.
-+**   assert( assert_pager_state(pPager) );
- **
--** If zMaster is a NULL pointer (occurs for a single database transaction), 
--** this call is a no-op.
-+** This function runs many asserts to try to find inconsistencies in
-+** the internal state of the Pager object.
- */
--static int writeMasterJournal(Pager *pPager, const char *zMaster){
--  int rc;                          /* Return code */
--  int nMaster;                     /* Length of string zMaster */
--  i64 iHdrOff;                     /* Offset of header in journal file */
--  i64 jrnlSize;                    /* Size of journal file on disk */
--  u32 cksum = 0;                   /* Checksum of string zMaster */
-+static int assert_pager_state(Pager *p){
-+  Pager *pPager = p;
- 
--  assert( pPager->setMaster==0 );
--  assert( !pagerUseWal(pPager) );
-+  /* State must be valid. */
-+  assert( p->eState==PAGER_OPEN
-+       || p->eState==PAGER_READER
-+       || p->eState==PAGER_WRITER_LOCKED
-+       || p->eState==PAGER_WRITER_CACHEMOD
-+       || p->eState==PAGER_WRITER_DBMOD
-+       || p->eState==PAGER_WRITER_FINISHED
-+       || p->eState==PAGER_ERROR
-+  );
- 
--  if( !zMaster 
--   || pPager->journalMode==PAGER_JOURNALMODE_MEMORY 
--   || pPager->journalMode==PAGER_JOURNALMODE_OFF 
--  ){
--    return SQLITE_OK;
--  }
--  pPager->setMaster = 1;
--  assert( isOpen(pPager->jfd) );
--  assert( pPager->journalHdr <= pPager->journalOff );
-+  /* Regardless of the current state, a temp-file connection always behaves
-+  ** as if it has an exclusive lock on the database file. It never updates
-+  ** the change-counter field, so the changeCountDone flag is always set.
-+  */
-+  assert( p->tempFile==0 || p->eLock==EXCLUSIVE_LOCK );
-+  assert( p->tempFile==0 || pPager->changeCountDone );
- 
--  /* Calculate the length in bytes and the checksum of zMaster */
--  for(nMaster=0; zMaster[nMaster]; nMaster++){
--    cksum += zMaster[nMaster];
--  }
-+  /* If the useJournal flag is clear, the journal-mode must be "OFF". 
-+  ** And if the journal-mode is "OFF", the journal file must not be open.
-+  */
-+  assert( p->journalMode==PAGER_JOURNALMODE_OFF || p->useJournal );
-+  assert( p->journalMode!=PAGER_JOURNALMODE_OFF || !isOpen(p->jfd) );
- 
--  /* If in full-sync mode, advance to the next disk sector before writing
--  ** the master journal name. This is in case the previous page written to
--  ** the journal has already been synced.
-+  /* Check that MEMDB implies noSync. And an in-memory journal. Since 
-+  ** this means an in-memory pager performs no IO at all, it cannot encounter 
-+  ** either SQLITE_IOERR or SQLITE_FULL during rollback or while finalizing 
-+  ** a journal file. (although the in-memory journal implementation may 
-+  ** return SQLITE_IOERR_NOMEM while the journal file is being written). It 
-+  ** is therefore not possible for an in-memory pager to enter the ERROR 
-+  ** state.
-   */
--  if( pPager->fullSync ){
--    pPager->journalOff = journalHdrOffset(pPager);
-+  if( MEMDB ){
-+    assert( p->noSync );
-+    assert( p->journalMode==PAGER_JOURNALMODE_OFF 
-+         || p->journalMode==PAGER_JOURNALMODE_MEMORY 
-+    );
-+    assert( p->eState!=PAGER_ERROR && p->eState!=PAGER_OPEN );
-+    assert( pagerUseWal(p)==0 );
-   }
--  iHdrOff = pPager->journalOff;
- 
--  /* Write the master journal data to the end of the journal file. If
--  ** an error occurs, return the error code to the caller.
-+  /* If changeCountDone is set, a RESERVED lock or greater must be held
-+  ** on the file.
-   */
--  if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager))))
--   || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4)))
--   || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster)))
--   || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum)))
--   || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8)))
--  ){
--    return rc;
--  }
--  pPager->journalOff += (nMaster+20);
-+  assert( pPager->changeCountDone==0 || pPager->eLock>=RESERVED_LOCK );
-+  assert( p->eLock!=PENDING_LOCK );
- 
--  /* If the pager is in peristent-journal mode, then the physical 
--  ** journal-file may extend past the end of the master-journal name
--  ** and 8 bytes of magic data just written to the file. This is 
--  ** dangerous because the code to rollback a hot-journal file
--  ** will not be able to find the master-journal name to determine 
--  ** whether or not the journal is hot. 
--  **
--  ** Easiest thing to do in this scenario is to truncate the journal 
--  ** file to the required size.
--  */ 
--  if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))
--   && jrnlSize>pPager->journalOff
--  ){
--    rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff);
-+  switch( p->eState ){
-+    case PAGER_OPEN:
-+      assert( !MEMDB );
-+      assert( pPager->errCode==SQLITE_OK );
-+      assert( sqlite3PcacheRefCount(pPager->pPCache)==0 || pPager->tempFile );
-+      break;
-+
-+    case PAGER_READER:
-+      assert( pPager->errCode==SQLITE_OK );
-+      assert( p->eLock!=UNKNOWN_LOCK );
-+      assert( p->eLock>=SHARED_LOCK || p->noReadlock );
-+      break;
-+
-+    case PAGER_WRITER_LOCKED:
-+      assert( p->eLock!=UNKNOWN_LOCK );
-+      assert( pPager->errCode==SQLITE_OK );
-+      if( !pagerUseWal(pPager) ){
-+        assert( p->eLock>=RESERVED_LOCK );
-+      }
-+      assert( pPager->dbSize==pPager->dbOrigSize );
-+      assert( pPager->dbOrigSize==pPager->dbFileSize );
-+      assert( pPager->dbOrigSize==pPager->dbHintSize );
-+      assert( pPager->setMaster==0 );
-+      break;
-+
-+    case PAGER_WRITER_CACHEMOD:
-+      assert( p->eLock!=UNKNOWN_LOCK );
-+      assert( pPager->errCode==SQLITE_OK );
-+      if( !pagerUseWal(pPager) ){
-+        /* It is possible that if journal_mode=wal here that neither the
-+        ** journal file nor the WAL file are open. This happens during
-+        ** a rollback transaction that switches from journal_mode=off
-+        ** to journal_mode=wal.
-+        */
-+        assert( p->eLock>=RESERVED_LOCK );
-+        assert( isOpen(p->jfd) 
-+             || p->journalMode==PAGER_JOURNALMODE_OFF 
-+             || p->journalMode==PAGER_JOURNALMODE_WAL 
-+        );
-+      }
-+      assert( pPager->dbOrigSize==pPager->dbFileSize );
-+      assert( pPager->dbOrigSize==pPager->dbHintSize );
-+      break;
-+
-+    case PAGER_WRITER_DBMOD:
-+      assert( p->eLock==EXCLUSIVE_LOCK );
-+      assert( pPager->errCode==SQLITE_OK );
-+      assert( !pagerUseWal(pPager) );
-+      assert( p->eLock>=EXCLUSIVE_LOCK );
-+      assert( isOpen(p->jfd) 
-+           || p->journalMode==PAGER_JOURNALMODE_OFF 
-+           || p->journalMode==PAGER_JOURNALMODE_WAL 
-+      );
-+      assert( pPager->dbOrigSize<=pPager->dbHintSize );
-+      break;
-+
-+    case PAGER_WRITER_FINISHED:
-+      assert( p->eLock==EXCLUSIVE_LOCK );
-+      assert( pPager->errCode==SQLITE_OK );
-+      assert( !pagerUseWal(pPager) );
-+      assert( isOpen(p->jfd) 
-+           || p->journalMode==PAGER_JOURNALMODE_OFF 
-+           || p->journalMode==PAGER_JOURNALMODE_WAL 
-+      );
-+      break;
-+
-+    case PAGER_ERROR:
-+      /* There must be at least one outstanding reference to the pager if
-+      ** in ERROR state. Otherwise the pager should have already dropped
-+      ** back to OPEN state.
-+      */
-+      assert( pPager->errCode!=SQLITE_OK );
-+      assert( sqlite3PcacheRefCount(pPager->pPCache)>0 );
-+      break;
-   }
--  return rc;
-+
-+  return 1;
- }
-+#endif /* ifndef NDEBUG */
- 
-+#ifdef SQLITE_DEBUG 
- /*
--** Find a page in the hash table given its page number. Return
--** a pointer to the page or NULL if the requested page is not 
--** already in memory.
-+** Return a pointer to a human readable string in a static buffer
-+** containing the state of the Pager object passed as an argument. This
-+** is intended to be used within debuggers. For example, as an alternative
-+** to "print *pPager" in gdb:
-+**
-+** (gdb) printf "%s", print_pager_state(pPager)
- */
--static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
--  PgHdr *p;                         /* Return value */
-+static char *print_pager_state(Pager *p){
-+  static char zRet[1024];
- 
--  /* It is not possible for a call to PcacheFetch() with createFlag==0 to
--  ** fail, since no attempt to allocate dynamic memory will be made.
--  */
--  (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p);
--  return p;
-+  sqlite3_snprintf(1024, zRet,
-+      "Filename:      %s\n"
-+      "State:         %s errCode=%d\n"
-+      "Lock:          %s\n"
-+      "Locking mode:  locking_mode=%s\n"
-+      "Journal mode:  journal_mode=%s\n"
-+      "Backing store: tempFile=%d memDb=%d useJournal=%d\n"
-+      "Journal:       journalOff=%lld journalHdr=%lld\n"
-+      "Size:          dbsize=%d dbOrigSize=%d dbFileSize=%d\n"
-+      , p->zFilename
-+      , p->eState==PAGER_OPEN            ? "OPEN" :
-+        p->eState==PAGER_READER          ? "READER" :
-+        p->eState==PAGER_WRITER_LOCKED   ? "WRITER_LOCKED" :
-+        p->eState==PAGER_WRITER_CACHEMOD ? "WRITER_CACHEMOD" :
-+        p->eState==PAGER_WRITER_DBMOD    ? "WRITER_DBMOD" :
-+        p->eState==PAGER_WRITER_FINISHED ? "WRITER_FINISHED" :
-+        p->eState==PAGER_ERROR           ? "ERROR" : "?error?"
-+      , (int)p->errCode
-+      , p->eLock==NO_LOCK         ? "NO_LOCK" :
-+        p->eLock==RESERVED_LOCK   ? "RESERVED" :
-+        p->eLock==EXCLUSIVE_LOCK  ? "EXCLUSIVE" :
-+        p->eLock==SHARED_LOCK     ? "SHARED" :
-+        p->eLock==UNKNOWN_LOCK    ? "UNKNOWN" : "?error?"
-+      , p->exclusiveMode ? "exclusive" : "normal"
-+      , p->journalMode==PAGER_JOURNALMODE_MEMORY   ? "memory" :
-+        p->journalMode==PAGER_JOURNALMODE_OFF      ? "off" :
-+        p->journalMode==PAGER_JOURNALMODE_DELETE   ? "delete" :
-+        p->journalMode==PAGER_JOURNALMODE_PERSIST  ? "persist" :
-+        p->journalMode==PAGER_JOURNALMODE_TRUNCATE ? "truncate" :
-+        p->journalMode==PAGER_JOURNALMODE_WAL      ? "wal" : "?error?"
-+      , (int)p->tempFile, (int)p->memDb, (int)p->useJournal
-+      , p->journalOff, p->journalHdr
-+      , (int)p->dbSize, (int)p->dbOrigSize, (int)p->dbFileSize
-+  );
-+
-+  return zRet;
- }
-+#endif
- 
- /*
--** Discard the entire contents of the in-memory page-cache.
-+** Return true if it is necessary to write page *pPg into the sub-journal.
-+** A page needs to be written into the sub-journal if there exists one
-+** or more open savepoints for which:
-+**
-+**   * The page-number is less than or equal to PagerSavepoint.nOrig, and
-+**   * The bit corresponding to the page-number is not set in
-+**     PagerSavepoint.pInSavepoint.
- */
--static void pager_reset(Pager *pPager){
--  sqlite3BackupRestart(pPager->pBackup);
--  sqlite3PcacheClear(pPager->pPCache);
-+static int subjRequiresPage(PgHdr *pPg){
-+  Pgno pgno = pPg->pgno;
-+  Pager *pPager = pPg->pPager;
-+  int i;
-+  for(i=0; i<pPager->nSavepoint; i++){
-+    PagerSavepoint *p = &pPager->aSavepoint[i];
-+    if( p->nOrig>=pgno && 0==sqlite3BitvecTest(p->pInSavepoint, pgno) ){
-+      return 1;
-+    }
-+  }
-+  return 0;
- }
- 
- /*
--** Free all structures in the Pager.aSavepoint[] array and set both
--** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal
--** if it is open and the pager is not in exclusive mode.
-+** Return true if the page is already in the journal file.
- */
--static void releaseAllSavepoints(Pager *pPager){
--  int ii;               /* Iterator for looping through Pager.aSavepoint */
--  for(ii=0; ii<pPager->nSavepoint; ii++){
--    sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
--  }
--  if( !pPager->exclusiveMode || sqlite3IsMemJournal(pPager->sjfd) ){
--    sqlite3OsClose(pPager->sjfd);
--  }
--  sqlite3_free(pPager->aSavepoint);
--  pPager->aSavepoint = 0;
--  pPager->nSavepoint = 0;
--  pPager->nSubRec = 0;
-+static int pageInJournal(PgHdr *pPg){
-+  return sqlite3BitvecTest(pPg->pPager->pInJournal, pPg->pgno);
- }
- 
- /*
--** Set the bit number pgno in the PagerSavepoint.pInSavepoint 
--** bitvecs of all open savepoints. Return SQLITE_OK if successful
--** or SQLITE_NOMEM if a malloc failure occurs.
-+** Read a 32-bit integer from the given file descriptor.  Store the integer
-+** that is read in *pRes.  Return SQLITE_OK if everything worked, or an
-+** error code is something goes wrong.
-+**
-+** All values are stored on disk as big-endian.
- */
--static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){
--  int ii;                   /* Loop counter */
--  int rc = SQLITE_OK;       /* Result code */
--
--  for(ii=0; ii<pPager->nSavepoint; ii++){
--    PagerSavepoint *p = &pPager->aSavepoint[ii];
--    if( pgno<=p->nOrig ){
--      rc |= sqlite3BitvecSet(p->pInSavepoint, pgno);
--      testcase( rc==SQLITE_NOMEM );
--      assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
--    }
-+static int read32bits(sqlite3_file *fd, i64 offset, u32 *pRes){
-+  unsigned char ac[4];
-+  int rc = sqlite3OsRead(fd, ac, sizeof(ac), offset);
-+  if( rc==SQLITE_OK ){
-+    *pRes = sqlite3Get4byte(ac);
-   }
-   return rc;
- }
- 
- /*
--** This function is a no-op if the pager is in exclusive mode and not
--** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN
--** state.
--**
--** If the pager is not in exclusive-access mode, the database file is
--** completely unlocked. If the file is unlocked and the file-system does
--** not exhibit the UNDELETABLE_WHEN_OPEN property, the journal file is
--** closed (if it is open).
--**
--** If the pager is in ERROR state when this function is called, the 
--** contents of the pager cache are discarded before switching back to 
--** the OPEN state. Regardless of whether the pager is in exclusive-mode
--** or not, any journal file left in the file-system will be treated
--** as a hot-journal and rolled back the next time a read-transaction
--** is opened (by this or by any other connection).
-+** Write a 32-bit integer into a string buffer in big-endian byte order.
- */
--static void pager_unlock(Pager *pPager){
--
--  assert( pPager->eState==PAGER_READER 
--       || pPager->eState==PAGER_OPEN 
--       || pPager->eState==PAGER_ERROR 
--  );
-+#define put32bits(A,B)  sqlite3Put4byte((u8*)A,B)
- 
--  sqlite3BitvecDestroy(pPager->pInJournal);
--  pPager->pInJournal = 0;
--  releaseAllSavepoints(pPager);
- 
--  if( pagerUseWal(pPager) ){
--    assert( !isOpen(pPager->jfd) );
--    sqlite3WalEndReadTransaction(pPager->pWal);
--    pPager->eState = PAGER_OPEN;
--  }else if( !pPager->exclusiveMode ){
--    int rc;                       /* Error code returned by pagerUnlockDb() */
--    int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0;
-+/*
-+** Write a 32-bit integer into the given file descriptor.  Return SQLITE_OK
-+** on success or an error code is something goes wrong.
-+*/
-+static int write32bits(sqlite3_file *fd, i64 offset, u32 val){
-+  char ac[4];
-+  put32bits(ac, val);
-+  return sqlite3OsWrite(fd, ac, 4, offset);
-+}
- 
--    /* If the operating system support deletion of open files, then
--    ** close the journal file when dropping the database lock.  Otherwise
--    ** another connection with journal_mode=delete might delete the file
--    ** out from under us.
--    */
--    assert( (PAGER_JOURNALMODE_MEMORY   & 5)!=1 );
--    assert( (PAGER_JOURNALMODE_OFF      & 5)!=1 );
--    assert( (PAGER_JOURNALMODE_WAL      & 5)!=1 );
--    assert( (PAGER_JOURNALMODE_DELETE   & 5)!=1 );
--    assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
--    assert( (PAGER_JOURNALMODE_PERSIST  & 5)==1 );
--    if( 0==(iDc & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN)
--     || 1!=(pPager->journalMode & 5)
--    ){
--      sqlite3OsClose(pPager->jfd);
--    }
-+/*
-+** Unlock the database file to level eLock, which must be either NO_LOCK
-+** or SHARED_LOCK. Regardless of whether or not the call to xUnlock()
-+** succeeds, set the Pager.eLock variable to match the (attempted) new lock.
-+**
-+** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is
-+** called, do not modify it. See the comment above the #define of 
-+** UNKNOWN_LOCK for an explanation of this.
-+*/
-+static int pagerUnlockDb(Pager *pPager, int eLock){
-+  int rc = SQLITE_OK;
- 
--    /* If the pager is in the ERROR state and the call to unlock the database
--    ** file fails, set the current lock to UNKNOWN_LOCK. See the comment
--    ** above the #define for UNKNOWN_LOCK for an explanation of why this
--    ** is necessary.
--    */
--    rc = pagerUnlockDb(pPager, NO_LOCK);
--    if( rc!=SQLITE_OK && pPager->eState==PAGER_ERROR ){
--      pPager->eLock = UNKNOWN_LOCK;
-+  assert( !pPager->exclusiveMode || pPager->eLock==eLock );
-+  assert( eLock==NO_LOCK || eLock==SHARED_LOCK );
-+  assert( eLock!=NO_LOCK || pagerUseWal(pPager)==0 );
-+  if( isOpen(pPager->fd) ){
-+    assert( pPager->eLock>=eLock );
-+    rc = sqlite3OsUnlock(pPager->fd, eLock);
-+    if( pPager->eLock!=UNKNOWN_LOCK ){
-+      pPager->eLock = (u8)eLock;
-     }
--
--    /* The pager state may be changed from PAGER_ERROR to PAGER_OPEN here
--    ** without clearing the error code. This is intentional - the error
--    ** code is cleared and the cache reset in the block below.
--    */
--    assert( pPager->errCode || pPager->eState!=PAGER_ERROR );
--    pPager->changeCountDone = 0;
--    pPager->eState = PAGER_OPEN;
--  }
--
--  /* If Pager.errCode is set, the contents of the pager cache cannot be
--  ** trusted. Now that there are no outstanding references to the pager,
--  ** it can safely move back to PAGER_OPEN state. This happens in both
--  ** normal and exclusive-locking mode.
--  */
--  if( pPager->errCode ){
--    assert( !MEMDB );
--    pager_reset(pPager);
--    pPager->changeCountDone = pPager->tempFile;
--    pPager->eState = PAGER_OPEN;
--    pPager->errCode = SQLITE_OK;
-+    IOTRACE(("UNLOCK %p %d\n", pPager, eLock))
-   }
--
--  pPager->journalOff = 0;
--  pPager->journalHdr = 0;
--  pPager->setMaster = 0;
-+  return rc;
- }
- 
- /*
--** This function is called whenever an IOERR or FULL error that requires
--** the pager to transition into the ERROR state may ahve occurred.
--** The first argument is a pointer to the pager structure, the second 
--** the error-code about to be returned by a pager API function. The 
--** value returned is a copy of the second argument to this function. 
--**
--** If the second argument is SQLITE_FULL, SQLITE_IOERR or one of the
--** IOERR sub-codes, the pager enters the ERROR state and the error code
--** is stored in Pager.errCode. While the pager remains in the ERROR state,
--** all major API calls on the Pager will immediately return Pager.errCode.
-+** Lock the database file to level eLock, which must be either SHARED_LOCK,
-+** RESERVED_LOCK or EXCLUSIVE_LOCK. If the caller is successful, set the
-+** Pager.eLock variable to the new locking state. 
- **
--** The ERROR state indicates that the contents of the pager-cache 
--** cannot be trusted. This state can be cleared by completely discarding 
--** the contents of the pager-cache. If a transaction was active when
--** the persistent error occurred, then the rollback journal may need
--** to be replayed to restore the contents of the database file (as if
--** it were a hot-journal).
-+** Except, if Pager.eLock is set to UNKNOWN_LOCK when this function is 
-+** called, do not modify it unless the new locking state is EXCLUSIVE_LOCK. 
-+** See the comment above the #define of UNKNOWN_LOCK for an explanation 
-+** of this.
- */
--static int pager_error(Pager *pPager, int rc){
--  int rc2 = rc & 0xff;
--  assert( rc==SQLITE_OK || !MEMDB );
--  assert(
--       pPager->errCode==SQLITE_FULL ||
--       pPager->errCode==SQLITE_OK ||
--       (pPager->errCode & 0xff)==SQLITE_IOERR
--  );
--  if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){
--    pPager->errCode = rc;
--    pPager->eState = PAGER_ERROR;
-+static int pagerLockDb(Pager *pPager, int eLock){
-+  int rc = SQLITE_OK;
-+
-+  assert( eLock==SHARED_LOCK || eLock==RESERVED_LOCK || eLock==EXCLUSIVE_LOCK );
-+  if( pPager->eLock<eLock || pPager->eLock==UNKNOWN_LOCK ){
-+    rc = sqlite3OsLock(pPager->fd, eLock);
-+    if( rc==SQLITE_OK && (pPager->eLock!=UNKNOWN_LOCK||eLock==EXCLUSIVE_LOCK) ){
-+      pPager->eLock = (u8)eLock;
-+      IOTRACE(("LOCK %p %d\n", pPager, eLock))
-+    }
-   }
-   return rc;
- }
- 
--/*
--** This routine ends a transaction. A transaction is usually ended by 
--** either a COMMIT or a ROLLBACK operation. This routine may be called 
--** after rollback of a hot-journal, or if an error occurs while opening
--** the journal file or writing the very first journal-header of a
--** database transaction.
--** 
--** This routine is never called in PAGER_ERROR state. If it is called
--** in PAGER_NONE or PAGER_SHARED state and the lock held is less
--** exclusive than a RESERVED lock, it is a no-op.
--**
--** Otherwise, any active savepoints are released.
--**
--** If the journal file is open, then it is "finalized". Once a journal 
--** file has been finalized it is not possible to use it to roll back a 
--** transaction. Nor will it be considered to be a hot-journal by this
--** or any other database connection. Exactly how a journal is finalized
--** depends on whether or not the pager is running in exclusive mode and
--** the current journal-mode (Pager.journalMode value), as follows:
--**
--**   journalMode==MEMORY
--**     Journal file descriptor is simply closed. This destroys an 
--**     in-memory journal.
--**
--**   journalMode==TRUNCATE
--**     Journal file is truncated to zero bytes in size.
--**
--**   journalMode==PERSIST
--**     The first 28 bytes of the journal file are zeroed. This invalidates
--**     the first journal header in the file, and hence the entire journal
--**     file. An invalid journal file cannot be rolled back.
--**
--**   journalMode==DELETE
--**     The journal file is closed and deleted using sqlite3OsDelete().
-+/*
-+** This function determines whether or not the atomic-write optimization
-+** can be used with this pager. The optimization can be used if:
- **
--**     If the pager is running in exclusive mode, this method of finalizing
--**     the journal file is never used. Instead, if the journalMode is
--**     DELETE and the pager is in exclusive mode, the method described under
--**     journalMode==PERSIST is used instead.
-+**  (a) the value returned by OsDeviceCharacteristics() indicates that
-+**      a database page may be written atomically, and
-+**  (b) the value returned by OsSectorSize() is less than or equal
-+**      to the page size.
- **
--** After the journal is finalized, the pager moves to PAGER_READER state.
--** If running in non-exclusive rollback mode, the lock on the file is 
--** downgraded to a SHARED_LOCK.
-+** The optimization is also always enabled for temporary files. It is
-+** an error to call this function if pPager is opened on an in-memory
-+** database.
- **
--** SQLITE_OK is returned if no error occurs. If an error occurs during
--** any of the IO operations to finalize the journal file or unlock the
--** database then the IO error code is returned to the user. If the 
--** operation to finalize the journal file fails, then the code still
--** tries to unlock the database file if not in exclusive mode. If the
--** unlock operation fails as well, then the first error code related
--** to the first error encountered (the journal finalization one) is
--** returned.
-+** If the optimization cannot be used, 0 is returned. If it can be used,
-+** then the value returned is the size of the journal file when it
-+** contains rollback data for exactly one page.
- */
--static int pager_end_transaction(Pager *pPager, int hasMaster){
--  int rc = SQLITE_OK;      /* Error code from journal finalization operation */
--  int rc2 = SQLITE_OK;     /* Error code from db file unlock operation */
--
--  /* Do nothing if the pager does not have an open write transaction
--  ** or at least a RESERVED lock. This function may be called when there
--  ** is no write-transaction active but a RESERVED or greater lock is
--  ** held under two circumstances:
--  **
--  **   1. After a successful hot-journal rollback, it is called with
--  **      eState==PAGER_NONE and eLock==EXCLUSIVE_LOCK.
--  **
--  **   2. If a connection with locking_mode=exclusive holding an EXCLUSIVE 
--  **      lock switches back to locking_mode=normal and then executes a
--  **      read-transaction, this function is called with eState==PAGER_READER 
--  **      and eLock==EXCLUSIVE_LOCK when the read-transaction is closed.
--  */
--  assert( assert_pager_state(pPager) );
--  assert( pPager->eState!=PAGER_ERROR );
--  if( pPager->eState<PAGER_WRITER_LOCKED && pPager->eLock<RESERVED_LOCK ){
--    return SQLITE_OK;
--  }
-+#ifdef SQLITE_ENABLE_ATOMIC_WRITE
-+static int jrnlBufferSize(Pager *pPager){
-+  assert( !MEMDB );
-+  if( !pPager->tempFile ){
-+    int dc;                           /* Device characteristics */
-+    int nSector;                      /* Sector size */
-+    int szPage;                       /* Page size */
- 
--  releaseAllSavepoints(pPager);
--  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );
--  if( isOpen(pPager->jfd) ){
--    assert( !pagerUseWal(pPager) );
-+    assert( isOpen(pPager->fd) );
-+    dc = sqlite3OsDeviceCharacteristics(pPager->fd);
-+    nSector = pPager->sectorSize;
-+    szPage = pPager->pageSize;
- 
--    /* Finalize the journal file. */
--    if( sqlite3IsMemJournal(pPager->jfd) ){
--      assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY );
--      sqlite3OsClose(pPager->jfd);
--    }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){
--      if( pPager->journalOff==0 ){
--        rc = SQLITE_OK;
--      }else{
--        rc = sqlite3OsTruncate(pPager->jfd, 0);
--      }
--      pPager->journalOff = 0;
--    }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST
--      || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL)
--    ){
--      rc = zeroJournalHdr(pPager, hasMaster);
--      pPager->journalOff = 0;
--    }else{
--      /* This branch may be executed with Pager.journalMode==MEMORY if
--      ** a hot-journal was just rolled back. In this case the journal
--      ** file should be closed and deleted. If this connection writes to
--      ** the database file, it will do so using an in-memory journal. 
--      */
--      assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE 
--           || pPager->journalMode==PAGER_JOURNALMODE_MEMORY 
--           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
--      );
--      sqlite3OsClose(pPager->jfd);
--      if( !pPager->tempFile ){
--        rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
--      }
-+    assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
-+    assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
-+    if( 0==(dc&(SQLITE_IOCAP_ATOMIC|(szPage>>8)) || nSector>szPage) ){
-+      return 0;
-     }
-   }
- 
--#ifdef SQLITE_CHECK_PAGES
--  sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash);
--  if( pPager->dbSize==0 && sqlite3PcacheRefCount(pPager->pPCache)>0 ){
--    PgHdr *p = pager_lookup(pPager, 1);
--    if( p ){
--      p->pageHash = 0;
--      sqlite3PagerUnref(p);
--    }
--  }
-+  return JOURNAL_HDR_SZ(pPager) + JOURNAL_PG_SZ(pPager);
-+}
- #endif
- 
--  sqlite3BitvecDestroy(pPager->pInJournal);
--  pPager->pInJournal = 0;
--  pPager->nRec = 0;
--  sqlite3PcacheCleanAll(pPager->pPCache);
--  sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
--
--  if( pagerUseWal(pPager) ){
--    /* Drop the WAL write-lock, if any. Also, if the connection was in 
--    ** locking_mode=exclusive mode but is no longer, drop the EXCLUSIVE 
--    ** lock held on the database file.
--    */
--    rc2 = sqlite3WalEndWriteTransaction(pPager->pWal);
--    assert( rc2==SQLITE_OK );
--  }
--  if( !pPager->exclusiveMode 
--   && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0))
--  ){
--    rc2 = pagerUnlockDb(pPager, SHARED_LOCK);
--    pPager->changeCountDone = 0;
-+/*
-+** If SQLITE_CHECK_PAGES is defined then we do some sanity checking
-+** on the cache using a hash function.  This is used for testing
-+** and debugging only.
-+*/
-+#ifdef SQLITE_CHECK_PAGES
-+/*
-+** Return a 32-bit hash of the page data for pPage.
-+*/
-+static u32 pager_datahash(int nByte, unsigned char *pData){
-+  u32 hash = 0;
-+  int i;
-+  for(i=0; i<nByte; i++){
-+    hash = (hash*1039) + pData[i];
-   }
--  pPager->eState = PAGER_READER;
--  pPager->setMaster = 0;
--
--  return (rc==SQLITE_OK?rc2:rc);
-+  return hash;
-+}
-+static u32 pager_pagehash(PgHdr *pPage){
-+  return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
-+}
-+static void pager_set_pagehash(PgHdr *pPage){
-+  pPage->pageHash = pager_pagehash(pPage);
- }
- 
- /*
--** Execute a rollback if a transaction is active and unlock the 
--** database file. 
--**
--** If the pager has already entered the ERROR state, do not attempt 
--** the rollback at this time. Instead, pager_unlock() is called. The
--** call to pager_unlock() will discard all in-memory pages, unlock
--** the database file and move the pager back to OPEN state. If this 
--** means that there is a hot-journal left in the file-system, the next 
--** connection to obtain a shared lock on the pager (which may be this one) 
--** will roll it back.
--**
--** If the pager has not already entered the ERROR state, but an IO or
--** malloc error occurs during a rollback, then this will itself cause 
--** the pager to enter the ERROR state. Which will be cleared by the
--** call to pager_unlock(), as described above.
-+** The CHECK_PAGE macro takes a PgHdr* as an argument. If SQLITE_CHECK_PAGES
-+** is defined, and NDEBUG is not defined, an assert() statement checks
-+** that the page is either dirty or still matches the calculated page-hash.
- */
--static void pagerUnlockAndRollback(Pager *pPager){
--  if( pPager->eState!=PAGER_ERROR && pPager->eState!=PAGER_OPEN ){
--    assert( assert_pager_state(pPager) );
--    if( pPager->eState>=PAGER_WRITER_LOCKED ){
--      sqlite3BeginBenignMalloc();
--      sqlite3PagerRollback(pPager);
--      sqlite3EndBenignMalloc();
--    }else if( !pPager->exclusiveMode ){
--      assert( pPager->eState==PAGER_READER );
--      pager_end_transaction(pPager, 0);
--    }
--  }
--  pager_unlock(pPager);
-+#define CHECK_PAGE(x) checkPage(x)
-+static void checkPage(PgHdr *pPg){
-+  Pager *pPager = pPg->pPager;
-+  assert( pPager->eState!=PAGER_ERROR );
-+  assert( (pPg->flags&PGHDR_DIRTY) || pPg->pageHash==pager_pagehash(pPg) );
- }
- 
-+#else
-+#define pager_datahash(X,Y)  0
-+#define pager_pagehash(X)  0
-+#define pager_set_pagehash(X)
-+#define CHECK_PAGE(x)
-+#endif  /* SQLITE_CHECK_PAGES */
-+
- /*
--** Parameter aData must point to a buffer of pPager->pageSize bytes
--** of data. Compute and return a checksum based ont the contents of the 
--** page of data and the current value of pPager->cksumInit.
-+** When this is called the journal file for pager pPager must be open.
-+** This function attempts to read a master journal file name from the 
-+** end of the file and, if successful, copies it into memory supplied 
-+** by the caller. See comments above writeMasterJournal() for the format
-+** used to store a master journal file name at the end of a journal file.
- **
--** This is not a real checksum. It is really just the sum of the 
--** random initial value (pPager->cksumInit) and every 200th byte
--** of the page data, starting with byte offset (pPager->pageSize%200).
--** Each byte is interpreted as an 8-bit unsigned integer.
-+** zMaster must point to a buffer of at least nMaster bytes allocated by
-+** the caller. This should be sqlite3_vfs.mxPathname+1 (to ensure there is
-+** enough space to write the master journal name). If the master journal
-+** name in the journal is longer than nMaster bytes (including a
-+** nul-terminator), then this is handled as if no master journal name
-+** were present in the journal.
- **
--** Changing the formula used to compute this checksum results in an
--** incompatible journal file format.
-+** If a master journal file name is present at the end of the journal
-+** file, then it is copied into the buffer pointed to by zMaster. A
-+** nul-terminator byte is appended to the buffer following the master
-+** journal file name.
- **
--** If journal corruption occurs due to a power failure, the most likely 
--** scenario is that one end or the other of the record will be changed. 
--** It is much less likely that the two ends of the journal record will be
--** correct and the middle be corrupt.  Thus, this "checksum" scheme,
--** though fast and simple, catches the mostly likely kind of corruption.
-+** If it is determined that no master journal file name is present 
-+** zMaster[0] is set to 0 and SQLITE_OK returned.
-+**
-+** If an error occurs while reading from the journal file, an SQLite
-+** error code is returned.
- */
--static u32 pager_cksum(Pager *pPager, const u8 *aData){
--  u32 cksum = pPager->cksumInit;         /* Checksum value to return */
--  int i = pPager->pageSize-200;          /* Loop counter */
--  while( i>0 ){
--    cksum += aData[i];
--    i -= 200;
-+static int readMasterJournal(sqlite3_file *pJrnl, char *zMaster, u32 nMaster){
-+  int rc;                    /* Return code */
-+  u32 len;                   /* Length in bytes of master journal name */
-+  i64 szJ;                   /* Total size in bytes of journal file pJrnl */
-+  u32 cksum;                 /* MJ checksum value read from journal */
-+  u32 u;                     /* Unsigned loop counter */
-+  unsigned char aMagic[8];   /* A buffer to hold the magic header */
-+  zMaster[0] = '\0';
-+
-+  if( SQLITE_OK!=(rc = sqlite3OsFileSize(pJrnl, &szJ))
-+   || szJ<16
-+   || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-16, &len))
-+   || len>=nMaster 
-+   || SQLITE_OK!=(rc = read32bits(pJrnl, szJ-12, &cksum))
-+   || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, aMagic, 8, szJ-8))
-+   || memcmp(aMagic, aJournalMagic, 8)
-+   || SQLITE_OK!=(rc = sqlite3OsRead(pJrnl, zMaster, len, szJ-16-len))
-+  ){
-+    return rc;
-   }
--  return cksum;
-+
-+  /* See if the checksum matches the master journal name */
-+  for(u=0; u<len; u++){
-+    cksum -= zMaster[u];
-+  }
-+  if( cksum ){
-+    /* If the checksum doesn't add up, then one or more of the disk sectors
-+    ** containing the master journal filename is corrupted. This means
-+    ** definitely roll back, so just return SQLITE_OK and report a (nul)
-+    ** master-journal filename.
-+    */
-+    len = 0;
-+  }
-+  zMaster[len] = '\0';
-+   
-+  return SQLITE_OK;
- }
- 
- /*
--** Report the current page size and number of reserved bytes back
--** to the codec.
-+** Return the offset of the sector boundary at or immediately 
-+** following the value in pPager->journalOff, assuming a sector 
-+** size of pPager->sectorSize bytes.
-+**
-+** i.e for a sector size of 512:
-+**
-+**   Pager.journalOff          Return value
-+**   ---------------------------------------
-+**   0                         0
-+**   512                       512
-+**   100                       512
-+**   2000                      2048
-+** 
- */
--#ifdef SQLITE_HAS_CODEC
--static void pagerReportSize(Pager *pPager){
--  if( pPager->xCodecSizeChng ){
--    pPager->xCodecSizeChng(pPager->pCodec, pPager->pageSize,
--                           (int)pPager->nReserve);
-+static i64 journalHdrOffset(Pager *pPager){
-+  i64 offset = 0;
-+  i64 c = pPager->journalOff;
-+  if( c ){
-+    offset = ((c-1)/JOURNAL_HDR_SZ(pPager) + 1) * JOURNAL_HDR_SZ(pPager);
-   }
-+  assert( offset%JOURNAL_HDR_SZ(pPager)==0 );
-+  assert( offset>=c );
-+  assert( (offset-c)<JOURNAL_HDR_SZ(pPager) );
-+  return offset;
- }
--#else
--# define pagerReportSize(X)     /* No-op if we do not support a codec */
--#endif
- 
- /*
--** Read a single page from either the journal file (if isMainJrnl==1) or
--** from the sub-journal (if isMainJrnl==0) and playback that page.
--** The page begins at offset *pOffset into the file. The *pOffset
--** value is increased to the start of the next page in the journal.
--**
--** The main rollback journal uses checksums - the statement journal does 
--** not.
-+** The journal file must be open when this function is called.
- **
--** If the page number of the page record read from the (sub-)journal file
--** is greater than the current value of Pager.dbSize, then playback is
--** skipped and SQLITE_OK is returned.
-+** This function is a no-op if the journal file has not been written to
-+** within the current transaction (i.e. if Pager.journalOff==0).
- **
--** If pDone is not NULL, then it is a record of pages that have already
--** been played back.  If the page at *pOffset has already been played back
--** (if the corresponding pDone bit is set) then skip the playback.
--** Make sure the pDone bit corresponding to the *pOffset page is set
--** prior to returning.
-+** If doTruncate is non-zero or the Pager.journalSizeLimit variable is
-+** set to 0, then truncate the journal file to zero bytes in size. Otherwise,
-+** zero the 28-byte header at the start of the journal file. In either case, 
-+** if the pager is not in no-sync mode, sync the journal file immediately 
-+** after writing or truncating it.
- **
--** If the page record is successfully read from the (sub-)journal file
--** and played back, then SQLITE_OK is returned. If an IO error occurs
--** while reading the record from the (sub-)journal file or while writing
--** to the database file, then the IO error code is returned. If data
--** is successfully read from the (sub-)journal file but appears to be
--** corrupted, SQLITE_DONE is returned. Data is considered corrupted in
--** two circumstances:
--** 
--**   * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or
--**   * If the record is being rolled back from the main journal file
--**     and the checksum field does not match the record content.
-+** If Pager.journalSizeLimit is set to a positive, non-zero value, and
-+** following the truncation or zeroing described above the size of the 
-+** journal file in bytes is larger than this value, then truncate the
-+** journal file to Pager.journalSizeLimit bytes. The journal file does
-+** not need to be synced following this operation.
- **
--** Neither of these two scenarios are possible during a savepoint rollback.
-+** If an IO error occurs, abandon processing and return the IO error code.
-+** Otherwise, return SQLITE_OK.
-+*/
-+static int zeroJournalHdr(Pager *pPager, int doTruncate){
-+  int rc = SQLITE_OK;                               /* Return code */
-+  assert( isOpen(pPager->jfd) );
-+  if( pPager->journalOff ){
-+    const i64 iLimit = pPager->journalSizeLimit;    /* Local cache of jsl */
-+
-+    IOTRACE(("JZEROHDR %p\n", pPager))
-+    if( doTruncate || iLimit==0 ){
-+      rc = sqlite3OsTruncate(pPager->jfd, 0);
-+    }else{
-+      static const char zeroHdr[28] = {0};
-+      rc = sqlite3OsWrite(pPager->jfd, zeroHdr, sizeof(zeroHdr), 0);
-+    }
-+    if( rc==SQLITE_OK && !pPager->noSync ){
-+      rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_DATAONLY|pPager->syncFlags);
-+    }
-+
-+    /* At this point the transaction is committed but the write lock 
-+    ** is still held on the file. If there is a size limit configured for 
-+    ** the persistent journal and the journal file currently consumes more
-+    ** space than that limit allows for, truncate it now. There is no need
-+    ** to sync the file following this operation.
-+    */
-+    if( rc==SQLITE_OK && iLimit>0 ){
-+      i64 sz;
-+      rc = sqlite3OsFileSize(pPager->jfd, &sz);
-+      if( rc==SQLITE_OK && sz>iLimit ){
-+        rc = sqlite3OsTruncate(pPager->jfd, iLimit);
-+      }
-+    }
-+  }
-+  return rc;
-+}
-+
-+/*
-+** The journal file must be open when this routine is called. A journal
-+** header (JOURNAL_HDR_SZ bytes) is written into the journal file at the
-+** current location.
- **
--** If this is a savepoint rollback, then memory may have to be dynamically
--** allocated by this function. If this is the case and an allocation fails,
--** SQLITE_NOMEM is returned.
-+** The format for the journal header is as follows:
-+** - 8 bytes: Magic identifying journal format.
-+** - 4 bytes: Number of records in journal, or -1 no-sync mode is on.
-+** - 4 bytes: Random number used for page hash.
-+** - 4 bytes: Initial database page count.
-+** - 4 bytes: Sector size used by the process that wrote this journal.
-+** - 4 bytes: Database page size.
-+** 
-+** Followed by (JOURNAL_HDR_SZ - 28) bytes of unused space.
- */
--static int pager_playback_one_page(
--  Pager *pPager,                /* The pager being played back */
--  i64 *pOffset,                 /* Offset of record to playback */
--  Bitvec *pDone,                /* Bitvec of pages already played back */
--  int isMainJrnl,               /* 1 -> main journal. 0 -> sub-journal. */
--  int isSavepnt                 /* True for a savepoint rollback */
--){
--  int rc;
--  PgHdr *pPg;                   /* An existing page in the cache */
--  Pgno pgno;                    /* The page number of a page in journal */
--  u32 cksum;                    /* Checksum used for sanity checking */
--  char *aData;                  /* Temporary storage for the page */
--  sqlite3_file *jfd;            /* The file descriptor for the journal file */
--  int isSynced;                 /* True if journal page is synced */
-+static int writeJournalHdr(Pager *pPager){
-+  int rc = SQLITE_OK;                 /* Return code */
-+  char *zHeader = pPager->pTmpSpace;  /* Temporary space used to build header */
-+  u32 nHeader = (u32)pPager->pageSize;/* Size of buffer pointed to by zHeader */
-+  u32 nWrite;                         /* Bytes of header sector written */
-+  int ii;                             /* Loop counter */
- 
--  assert( (isMainJrnl&~1)==0 );      /* isMainJrnl is 0 or 1 */
--  assert( (isSavepnt&~1)==0 );       /* isSavepnt is 0 or 1 */
--  assert( isMainJrnl || pDone );     /* pDone always used on sub-journals */
--  assert( isSavepnt || pDone==0 );   /* pDone never used on non-savepoint */
-+  assert( isOpen(pPager->jfd) );      /* Journal file must be open. */
-+
-+  if( nHeader>JOURNAL_HDR_SZ(pPager) ){
-+    nHeader = JOURNAL_HDR_SZ(pPager);
-+  }
-+
-+  /* If there are active savepoints and any of them were created 
-+  ** since the most recent journal header was written, update the 
-+  ** PagerSavepoint.iHdrOffset fields now.
-+  */
-+  for(ii=0; ii<pPager->nSavepoint; ii++){
-+    if( pPager->aSavepoint[ii].iHdrOffset==0 ){
-+      pPager->aSavepoint[ii].iHdrOffset = pPager->journalOff;
-+    }
-+  }
-+
-+  pPager->journalHdr = pPager->journalOff = journalHdrOffset(pPager);
-+
-+  /* 
-+  ** Write the nRec Field - the number of page records that follow this
-+  ** journal header. Normally, zero is written to this value at this time.
-+  ** After the records are added to the journal (and the journal synced, 
-+  ** if in full-sync mode), the zero is overwritten with the true number
-+  ** of records (see syncJournal()).
-+  **
-+  ** A faster alternative is to write 0xFFFFFFFF to the nRec field. When
-+  ** reading the journal this value tells SQLite to assume that the
-+  ** rest of the journal file contains valid page records. This assumption
-+  ** is dangerous, as if a failure occurred whilst writing to the journal
-+  ** file it may contain some garbage data. There are two scenarios
-+  ** where this risk can be ignored:
-+  **
-+  **   * When the pager is in no-sync mode. Corruption can follow a
-+  **     power failure in this case anyway.
-+  **
-+  **   * When the SQLITE_IOCAP_SAFE_APPEND flag is set. This guarantees
-+  **     that garbage data is never appended to the journal file.
-+  */
-+  assert( isOpen(pPager->fd) || pPager->noSync );
-+  if( pPager->noSync || (pPager->journalMode==PAGER_JOURNALMODE_MEMORY)
-+   || (sqlite3OsDeviceCharacteristics(pPager->fd)&SQLITE_IOCAP_SAFE_APPEND) 
-+  ){
-+    memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
-+    put32bits(&zHeader[sizeof(aJournalMagic)], 0xffffffff);
-+  }else{
-+    memset(zHeader, 0, sizeof(aJournalMagic)+4);
-+  }
-+
-+  /* The random check-hash initialiser */ 
-+  sqlite3_randomness(sizeof(pPager->cksumInit), &pPager->cksumInit);
-+  put32bits(&zHeader[sizeof(aJournalMagic)+4], pPager->cksumInit);
-+  /* The initial database size */
-+  put32bits(&zHeader[sizeof(aJournalMagic)+8], pPager->dbOrigSize);
-+  /* The assumed sector size for this process */
-+  put32bits(&zHeader[sizeof(aJournalMagic)+12], pPager->sectorSize);
-+
-+  /* The page size */
-+  put32bits(&zHeader[sizeof(aJournalMagic)+16], pPager->pageSize);
-+
-+  /* Initializing the tail of the buffer is not necessary.  Everything
-+  ** works find if the following memset() is omitted.  But initializing
-+  ** the memory prevents valgrind from complaining, so we are willing to
-+  ** take the performance hit.
-+  */
-+  memset(&zHeader[sizeof(aJournalMagic)+20], 0,
-+         nHeader-(sizeof(aJournalMagic)+20));
-+
-+  /* In theory, it is only necessary to write the 28 bytes that the 
-+  ** journal header consumes to the journal file here. Then increment the 
-+  ** Pager.journalOff variable by JOURNAL_HDR_SZ so that the next 
-+  ** record is written to the following sector (leaving a gap in the file
-+  ** that will be implicitly filled in by the OS).
-+  **
-+  ** However it has been discovered that on some systems this pattern can 
-+  ** be significantly slower than contiguously writing data to the file,
-+  ** even if that means explicitly writing data to the block of 
-+  ** (JOURNAL_HDR_SZ - 28) bytes that will not be used. So that is what
-+  ** is done. 
-+  **
-+  ** The loop is required here in case the sector-size is larger than the 
-+  ** database page size. Since the zHeader buffer is only Pager.pageSize
-+  ** bytes in size, more than one call to sqlite3OsWrite() may be required
-+  ** to populate the entire journal header sector.
-+  */ 
-+  for(nWrite=0; rc==SQLITE_OK&&nWrite<JOURNAL_HDR_SZ(pPager); nWrite+=nHeader){
-+    IOTRACE(("JHDR %p %lld %d\n", pPager, pPager->journalHdr, nHeader))
-+    rc = sqlite3OsWrite(pPager->jfd, zHeader, nHeader, pPager->journalOff);
-+    assert( pPager->journalHdr <= pPager->journalOff );
-+    pPager->journalOff += nHeader;
-+  }
- 
--  aData = pPager->pTmpSpace;
--  assert( aData );         /* Temp storage must have already been allocated */
--  assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) );
-+  return rc;
-+}
- 
--  /* Either the state is greater than PAGER_WRITER_CACHEMOD (a transaction 
--  ** or savepoint rollback done at the request of the caller) or this is
--  ** a hot-journal rollback. If it is a hot-journal rollback, the pager
--  ** is in state OPEN and holds an EXCLUSIVE lock. Hot-journal rollback
--  ** only reads from the main journal, not the sub-journal.
--  */
--  assert( pPager->eState>=PAGER_WRITER_CACHEMOD
--       || (pPager->eState==PAGER_OPEN && pPager->eLock==EXCLUSIVE_LOCK)
--  );
--  assert( pPager->eState>=PAGER_WRITER_CACHEMOD || isMainJrnl );
-+/*
-+** The journal file must be open when this is called. A journal header file
-+** (JOURNAL_HDR_SZ bytes) is read from the current location in the journal
-+** file. The current location in the journal file is given by
-+** pPager->journalOff. See comments above function writeJournalHdr() for
-+** a description of the journal header format.
-+**
-+** If the header is read successfully, *pNRec is set to the number of
-+** page records following this header and *pDbSize is set to the size of the
-+** database before the transaction began, in pages. Also, pPager->cksumInit
-+** is set to the value read from the journal header. SQLITE_OK is returned
-+** in this case.
-+**
-+** If the journal header file appears to be corrupted, SQLITE_DONE is
-+** returned and *pNRec and *PDbSize are undefined.  If JOURNAL_HDR_SZ bytes
-+** cannot be read from the journal file an error code is returned.
-+*/
-+static int readJournalHdr(
-+  Pager *pPager,               /* Pager object */
-+  int isHot,
-+  i64 journalSize,             /* Size of the open journal file in bytes */
-+  u32 *pNRec,                  /* OUT: Value read from the nRec field */
-+  u32 *pDbSize                 /* OUT: Value of original database size field */
-+){
-+  int rc;                      /* Return code */
-+  unsigned char aMagic[8];     /* A buffer to hold the magic header */
-+  i64 iHdrOff;                 /* Offset of journal header being read */
- 
--  /* Read the page number and page data from the journal or sub-journal
--  ** file. Return an error code to the caller if an IO error occurs.
--  */
--  jfd = isMainJrnl ? pPager->jfd : pPager->sjfd;
--  rc = read32bits(jfd, *pOffset, &pgno);
--  if( rc!=SQLITE_OK ) return rc;
--  rc = sqlite3OsRead(jfd, (u8*)aData, pPager->pageSize, (*pOffset)+4);
--  if( rc!=SQLITE_OK ) return rc;
--  *pOffset += pPager->pageSize + 4 + isMainJrnl*4;
-+  assert( isOpen(pPager->jfd) );      /* Journal file must be open. */
- 
--  /* Sanity checking on the page.  This is more important that I originally
--  ** thought.  If a power failure occurs while the journal is being written,
--  ** it could cause invalid data to be written into the journal.  We need to
--  ** detect this invalid data (with high probability) and ignore it.
-+  /* Advance Pager.journalOff to the start of the next sector. If the
-+  ** journal file is too small for there to be a header stored at this
-+  ** point, return SQLITE_DONE.
-   */
--  if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
--    assert( !isSavepnt );
-+  pPager->journalOff = journalHdrOffset(pPager);
-+  if( pPager->journalOff+JOURNAL_HDR_SZ(pPager) > journalSize ){
-     return SQLITE_DONE;
-   }
--  if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){
--    return SQLITE_OK;
--  }
--  if( isMainJrnl ){
--    rc = read32bits(jfd, (*pOffset)-4, &cksum);
--    if( rc ) return rc;
--    if( !isSavepnt && pager_cksum(pPager, (u8*)aData)!=cksum ){
-+  iHdrOff = pPager->journalOff;
-+
-+  /* Read in the first 8 bytes of the journal header. If they do not match
-+  ** the  magic string found at the start of each journal header, return
-+  ** SQLITE_DONE. If an IO error occurs, return an error code. Otherwise,
-+  ** proceed.
-+  */
-+  if( isHot || iHdrOff!=pPager->journalHdr ){
-+    rc = sqlite3OsRead(pPager->jfd, aMagic, sizeof(aMagic), iHdrOff);
-+    if( rc ){
-+      return rc;
-+    }
-+    if( memcmp(aMagic, aJournalMagic, sizeof(aMagic))!=0 ){
-       return SQLITE_DONE;
-     }
-   }
- 
--  /* If this page has already been played by before during the current
--  ** rollback, then don't bother to play it back again.
-+  /* Read the first three 32-bit fields of the journal header: The nRec
-+  ** field, the checksum-initializer and the database size at the start
-+  ** of the transaction. Return an error code if anything goes wrong.
-   */
--  if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){
-+  if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+8, pNRec))
-+   || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+12, &pPager->cksumInit))
-+   || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+16, pDbSize))
-+  ){
-     return rc;
-   }
- 
--  /* When playing back page 1, restore the nReserve setting
--  */
--  if( pgno==1 && pPager->nReserve!=((u8*)aData)[20] ){
--    pPager->nReserve = ((u8*)aData)[20];
--    pagerReportSize(pPager);
--  }
-+  if( pPager->journalOff==0 ){
-+    u32 iPageSize;               /* Page-size field of journal header */
-+    u32 iSectorSize;             /* Sector-size field of journal header */
- 
--  /* If the pager is in CACHEMOD state, then there must be a copy of this
--  ** page in the pager cache. In this case just update the pager cache,
--  ** not the database file. The page is left marked dirty in this case.
--  **
--  ** An exception to the above rule: If the database is in no-sync mode
--  ** and a page is moved during an incremental vacuum then the page may
--  ** not be in the pager cache. Later: if a malloc() or IO error occurs
--  ** during a Movepage() call, then the page may not be in the cache
--  ** either. So the condition described in the above paragraph is not
--  ** assert()able.
--  **
--  ** If in WRITER_DBMOD, WRITER_FINISHED or OPEN state, then we update the
--  ** pager cache if it exists and the main file. The page is then marked 
--  ** not dirty. Since this code is only executed in PAGER_OPEN state for
--  ** a hot-journal rollback, it is guaranteed that the page-cache is empty
--  ** if the pager is in OPEN state.
--  **
--  ** Ticket #1171:  The statement journal might contain page content that is
--  ** different from the page content at the start of the transaction.
--  ** This occurs when a page is changed prior to the start of a statement
--  ** then changed again within the statement.  When rolling back such a
--  ** statement we must not write to the original database unless we know
--  ** for certain that original page contents are synced into the main rollback
--  ** journal.  Otherwise, a power loss might leave modified data in the
--  ** database file without an entry in the rollback journal that can
--  ** restore the database to its original form.  Two conditions must be
--  ** met before writing to the database files. (1) the database must be
--  ** locked.  (2) we know that the original page content is fully synced
--  ** in the main journal either because the page is not in cache or else
--  ** the page is marked as needSync==0.
--  **
--  ** 2008-04-14:  When attempting to vacuum a corrupt database file, it
--  ** is possible to fail a statement on a database that does not yet exist.
--  ** Do not attempt to write if database file has never been opened.
--  */
--  if( pagerUseWal(pPager) ){
--    pPg = 0;
--  }else{
--    pPg = pager_lookup(pPager, pgno);
--  }
--  assert( pPg || !MEMDB );
--  assert( pPager->eState!=PAGER_OPEN || pPg==0 );
--  PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n",
--           PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData),
--           (isMainJrnl?"main-journal":"sub-journal")
--  ));
--  if( isMainJrnl ){
--    isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr);
--  }else{
--    isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC));
--  }
--  if( isOpen(pPager->fd)
--   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
--   && isSynced
--  ){
--    i64 ofst = (pgno-1)*(i64)pPager->pageSize;
--    testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 );
--    assert( !pagerUseWal(pPager) );
--    rc = sqlite3OsWrite(pPager->fd, (u8*)aData, pPager->pageSize, ofst);
--    if( pgno>pPager->dbFileSize ){
--      pPager->dbFileSize = pgno;
--    }
--    if( pPager->pBackup ){
--      CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM);
--      sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData);
--      CODEC2(pPager, aData, pgno, 7, rc=SQLITE_NOMEM, aData);
-+    /* Read the page-size and sector-size journal header fields. */
-+    if( SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+20, &iSectorSize))
-+     || SQLITE_OK!=(rc = read32bits(pPager->jfd, iHdrOff+24, &iPageSize))
-+    ){
-+      return rc;
-     }
--  }else if( !isMainJrnl && pPg==0 ){
--    /* If this is a rollback of a savepoint and data was not written to
--    ** the database and the page is not in-memory, there is a potential
--    ** problem. When the page is next fetched by the b-tree layer, it 
--    ** will be read from the database file, which may or may not be 
--    ** current. 
--    **
--    ** There are a couple of different ways this can happen. All are quite
--    ** obscure. When running in synchronous mode, this can only happen 
--    ** if the page is on the free-list at the start of the transaction, then
--    ** populated, then moved using sqlite3PagerMovepage().
--    **
--    ** The solution is to add an in-memory page to the cache containing
--    ** the data just read from the sub-journal. Mark the page as dirty 
--    ** and if the pager requires a journal-sync, then mark the page as 
--    ** requiring a journal-sync before it is written.
-+
-+    /* Versions of SQLite prior to 3.5.8 set the page-size field of the
-+    ** journal header to zero. In this case, assume that the Pager.pageSize
-+    ** variable is already set to the correct page size.
-     */
--    assert( isSavepnt );
--    assert( pPager->doNotSpill==0 );
--    pPager->doNotSpill++;
--    rc = sqlite3PagerAcquire(pPager, pgno, &pPg, 1);
--    assert( pPager->doNotSpill==1 );
--    pPager->doNotSpill--;
--    if( rc!=SQLITE_OK ) return rc;
--    pPg->flags &= ~PGHDR_NEED_READ;
--    sqlite3PcacheMakeDirty(pPg);
--  }
--  if( pPg ){
--    /* No page should ever be explicitly rolled back that is in use, except
--    ** for page 1 which is held in use in order to keep the lock on the
--    ** database active. However such a page may be rolled back as a result
--    ** of an internal error resulting in an automatic call to
--    ** sqlite3PagerRollback().
-+    if( iPageSize==0 ){
-+      iPageSize = pPager->pageSize;
-+    }
-+
-+    /* Check that the values read from the page-size and sector-size fields
-+    ** are within range. To be 'in range', both values need to be a power
-+    ** of two greater than or equal to 512 or 32, and not greater than their 
-+    ** respective compile time maximum limits.
-     */
--    void *pData;
--    pData = pPg->pData;
--    memcpy(pData, (u8*)aData, pPager->pageSize);
--    pPager->xReiniter(pPg);
--    if( isMainJrnl && (!isSavepnt || *pOffset<=pPager->journalHdr) ){
--      /* If the contents of this page were just restored from the main 
--      ** journal file, then its content must be as they were when the 
--      ** transaction was first opened. In this case we can mark the page
--      ** as clean, since there will be no need to write it out to the
--      ** database.
--      **
--      ** There is one exception to this rule. If the page is being rolled
--      ** back as part of a savepoint (or statement) rollback from an 
--      ** unsynced portion of the main journal file, then it is not safe
--      ** to mark the page as clean. This is because marking the page as
--      ** clean will clear the PGHDR_NEED_SYNC flag. Since the page is
--      ** already in the journal file (recorded in Pager.pInJournal) and
--      ** the PGHDR_NEED_SYNC flag is cleared, if the page is written to
--      ** again within this transaction, it will be marked as dirty but
--      ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially
--      ** be written out into the database file before its journal file
--      ** segment is synced. If a crash occurs during or following this,
--      ** database corruption may ensue.
-+    if( iPageSize<512                  || iSectorSize<32
-+     || iPageSize>SQLITE_MAX_PAGE_SIZE || iSectorSize>MAX_SECTOR_SIZE
-+     || ((iPageSize-1)&iPageSize)!=0   || ((iSectorSize-1)&iSectorSize)!=0 
-+    ){
-+      /* If the either the page-size or sector-size in the journal-header is 
-+      ** invalid, then the process that wrote the journal-header must have 
-+      ** crashed before the header was synced. In this case stop reading 
-+      ** the journal file here.
-       */
--      assert( !pagerUseWal(pPager) );
--      sqlite3PcacheMakeClean(pPg);
-+      return SQLITE_DONE;
-     }
--    pager_set_pagehash(pPg);
- 
--    /* If this was page 1, then restore the value of Pager.dbFileVers.
--    ** Do this before any decoding. */
--    if( pgno==1 ){
--      memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
--    }
-+    /* Update the page-size to match the value read from the journal. 
-+    ** Use a testcase() macro to make sure that malloc failure within 
-+    ** PagerSetPagesize() is tested.
-+    */
-+    rc = sqlite3PagerSetPagesize(pPager, &iPageSize, -1);
-+    testcase( rc!=SQLITE_OK );
- 
--    /* Decode the page just read from disk */
--    CODEC1(pPager, pData, pPg->pgno, 3, rc=SQLITE_NOMEM);
--    sqlite3PcacheRelease(pPg);
-+    /* Update the assumed sector-size to match the value used by 
-+    ** the process that created this journal. If this journal was
-+    ** created by a process other than this one, then this routine
-+    ** is being called from within pager_playback(). The local value
-+    ** of Pager.sectorSize is restored at the end of that routine.
-+    */
-+    pPager->sectorSize = iSectorSize;
-   }
-+
-+  pPager->journalOff += JOURNAL_HDR_SZ(pPager);
-   return rc;
- }
- 
-+
- /*
--** Parameter zMaster is the name of a master journal file. A single journal
--** file that referred to the master journal file has just been rolled back.
--** This routine checks if it is possible to delete the master journal file,
--** and does so if it is.
--**
--** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
--** available for use within this function.
--**
--** When a master journal file is created, it is populated with the names 
--** of all of its child journals, one after another, formatted as utf-8 
--** encoded text. The end of each child journal file is marked with a 
--** nul-terminator byte (0x00). i.e. the entire contents of a master journal
--** file for a transaction involving two databases might be:
--**
--**   "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00"
--**
--** A master journal file may only be deleted once all of its child 
--** journals have been rolled back.
--**
--** This function reads the contents of the master-journal file into 
--** memory and loops through each of the child journal names. For
--** each child journal, it checks if:
--**
--**   * if the child journal exists, and if so
--**   * if the child journal contains a reference to master journal 
--**     file zMaster
-+** Write the supplied master journal name into the journal file for pager
-+** pPager at the current location. The master journal name must be the last
-+** thing written to a journal file. If the pager is in full-sync mode, the
-+** journal file descriptor is advanced to the next sector boundary before
-+** anything is written. The format is:
- **
--** If a child journal can be found that matches both of the criteria
--** above, this function returns without doing anything. Otherwise, if
--** no such child journal can be found, file zMaster is deleted from
--** the file-system using sqlite3OsDelete().
-+**   + 4 bytes: PAGER_MJ_PGNO.
-+**   + N bytes: Master journal filename in utf-8.
-+**   + 4 bytes: N (length of master journal name in bytes, no nul-terminator).
-+**   + 4 bytes: Master journal name checksum.
-+**   + 8 bytes: aJournalMagic[].
- **
--** If an IO error within this function, an error code is returned. This
--** function allocates memory by calling sqlite3Malloc(). If an allocation
--** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors 
--** occur, SQLITE_OK is returned.
-+** The master journal page checksum is the sum of the bytes in the master
-+** journal name, where each byte is interpreted as a signed 8-bit integer.
- **
--** TODO: This function allocates a single block of memory to load
--** the entire contents of the master journal file. This could be
--** a couple of kilobytes or so - potentially larger than the page 
--** size.
-+** If zMaster is a NULL pointer (occurs for a single database transaction), 
-+** this call is a no-op.
- */
--static int pager_delmaster(Pager *pPager, const char *zMaster){
--  sqlite3_vfs *pVfs = pPager->pVfs;
--  int rc;                   /* Return code */
--  sqlite3_file *pMaster;    /* Malloc'd master-journal file descriptor */
--  sqlite3_file *pJournal;   /* Malloc'd child-journal file descriptor */
--  char *zMasterJournal = 0; /* Contents of master journal file */
--  i64 nMasterJournal;       /* Size of master journal file */
--  char *zJournal;           /* Pointer to one journal within MJ file */
--  char *zMasterPtr;         /* Space to hold MJ filename from a journal file */
--  int nMasterPtr;           /* Amount of space allocated to zMasterPtr[] */
-+static int writeMasterJournal(Pager *pPager, const char *zMaster){
-+  int rc;                          /* Return code */
-+  int nMaster;                     /* Length of string zMaster */
-+  i64 iHdrOff;                     /* Offset of header in journal file */
-+  i64 jrnlSize;                    /* Size of journal file on disk */
-+  u32 cksum = 0;                   /* Checksum of string zMaster */
- 
--  /* Allocate space for both the pJournal and pMaster file descriptors.
--  ** If successful, open the master journal file for reading.
--  */
--  pMaster = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2);
--  pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
--  if( !pMaster ){
--    rc = SQLITE_NOMEM;
--  }else{
--    const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
--    rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
--  }
--  if( rc!=SQLITE_OK ) goto delmaster_out;
-+  assert( pPager->setMaster==0 );
-+  assert( !pagerUseWal(pPager) );
- 
--  /* Load the entire master journal file into space obtained from
--  ** sqlite3_malloc() and pointed to by zMasterJournal.   Also obtain
--  ** sufficient space (in zMasterPtr) to hold the names of master
--  ** journal files extracted from regular rollback-journals.
--  */
--  rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
--  if( rc!=SQLITE_OK ) goto delmaster_out;
--  nMasterPtr = pVfs->mxPathname+1;
--  zMasterJournal = sqlite3Malloc((int)nMasterJournal + nMasterPtr + 1);
--  if( !zMasterJournal ){
--    rc = SQLITE_NOMEM;
--    goto delmaster_out;
-+  if( !zMaster 
-+   || pPager->journalMode==PAGER_JOURNALMODE_MEMORY 
-+   || pPager->journalMode==PAGER_JOURNALMODE_OFF 
-+  ){
-+    return SQLITE_OK;
-   }
--  zMasterPtr = &zMasterJournal[nMasterJournal+1];
--  rc = sqlite3OsRead(pMaster, zMasterJournal, (int)nMasterJournal, 0);
--  if( rc!=SQLITE_OK ) goto delmaster_out;
--  zMasterJournal[nMasterJournal] = 0;
-+  pPager->setMaster = 1;
-+  assert( isOpen(pPager->jfd) );
-+  assert( pPager->journalHdr <= pPager->journalOff );
- 
--  zJournal = zMasterJournal;
--  while( (zJournal-zMasterJournal)<nMasterJournal ){
--    int exists;
--    rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
--    if( rc!=SQLITE_OK ){
--      goto delmaster_out;
--    }
--    if( exists ){
--      /* One of the journals pointed to by the master journal exists.
--      ** Open it and check if it points at the master journal. If
--      ** so, return without deleting the master journal file.
--      */
--      int c;
--      int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
--      rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
--      if( rc!=SQLITE_OK ){
--        goto delmaster_out;
--      }
-+  /* Calculate the length in bytes and the checksum of zMaster */
-+  for(nMaster=0; zMaster[nMaster]; nMaster++){
-+    cksum += zMaster[nMaster];
-+  }
- 
--      rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
--      sqlite3OsClose(pJournal);
--      if( rc!=SQLITE_OK ){
--        goto delmaster_out;
--      }
-+  /* If in full-sync mode, advance to the next disk sector before writing
-+  ** the master journal name. This is in case the previous page written to
-+  ** the journal has already been synced.
-+  */
-+  if( pPager->fullSync ){
-+    pPager->journalOff = journalHdrOffset(pPager);
-+  }
-+  iHdrOff = pPager->journalOff;
- 
--      c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
--      if( c ){
--        /* We have a match. Do not delete the master journal file. */
--        goto delmaster_out;
--      }
--    }
--    zJournal += (sqlite3Strlen30(zJournal)+1);
-+  /* Write the master journal data to the end of the journal file. If
-+  ** an error occurs, return the error code to the caller.
-+  */
-+  if( (0 != (rc = write32bits(pPager->jfd, iHdrOff, PAGER_MJ_PGNO(pPager))))
-+   || (0 != (rc = sqlite3OsWrite(pPager->jfd, zMaster, nMaster, iHdrOff+4)))
-+   || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster, nMaster)))
-+   || (0 != (rc = write32bits(pPager->jfd, iHdrOff+4+nMaster+4, cksum)))
-+   || (0 != (rc = sqlite3OsWrite(pPager->jfd, aJournalMagic, 8, iHdrOff+4+nMaster+8)))
-+  ){
-+    return rc;
-   }
-- 
--  sqlite3OsClose(pMaster);
--  rc = sqlite3OsDelete(pVfs, zMaster, 0);
-+  pPager->journalOff += (nMaster+20);
- 
--delmaster_out:
--  sqlite3_free(zMasterJournal);
--  if( pMaster ){
--    sqlite3OsClose(pMaster);
--    assert( !isOpen(pJournal) );
--    sqlite3_free(pMaster);
-+  /* If the pager is in peristent-journal mode, then the physical 
-+  ** journal-file may extend past the end of the master-journal name
-+  ** and 8 bytes of magic data just written to the file. This is 
-+  ** dangerous because the code to rollback a hot-journal file
-+  ** will not be able to find the master-journal name to determine 
-+  ** whether or not the journal is hot. 
-+  **
-+  ** Easiest thing to do in this scenario is to truncate the journal 
-+  ** file to the required size.
-+  */ 
-+  if( SQLITE_OK==(rc = sqlite3OsFileSize(pPager->jfd, &jrnlSize))
-+   && jrnlSize>pPager->journalOff
-+  ){
-+    rc = sqlite3OsTruncate(pPager->jfd, pPager->journalOff);
-   }
-   return rc;
- }
- 
--
- /*
--** This function is used to change the actual size of the database 
--** file in the file-system. This only happens when committing a transaction,
--** or rolling back a transaction (including rolling back a hot-journal).
--**
--** If the main database file is not open, or the pager is not in either
--** DBMOD or OPEN state, this function is a no-op. Otherwise, the size 
--** of the file is changed to nPage pages (nPage*pPager->pageSize bytes). 
--** If the file on disk is currently larger than nPage pages, then use the VFS
--** xTruncate() method to truncate it.
--**
--** Or, it might might be the case that the file on disk is smaller than 
--** nPage pages. Some operating system implementations can get confused if 
--** you try to truncate a file to some size that is larger than it 
--** currently is, so detect this case and write a single zero byte to 
--** the end of the new file instead.
--**
--** If successful, return SQLITE_OK. If an IO error occurs while modifying
--** the database file, return the error code to the caller.
-+** Find a page in the hash table given its page number. Return
-+** a pointer to the page or NULL if the requested page is not 
-+** already in memory.
- */
--static int pager_truncate(Pager *pPager, Pgno nPage){
--  int rc = SQLITE_OK;
--  assert( pPager->eState!=PAGER_ERROR );
--  assert( pPager->eState!=PAGER_READER );
--  
--  if( isOpen(pPager->fd) 
--   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) 
--  ){
--    i64 currentSize, newSize;
--    int szPage = pPager->pageSize;
--    assert( pPager->eLock==EXCLUSIVE_LOCK );
--    /* TODO: Is it safe to use Pager.dbFileSize here? */
--    rc = sqlite3OsFileSize(pPager->fd, &currentSize);
--    newSize = szPage*(i64)nPage;
--    if( rc==SQLITE_OK && currentSize!=newSize ){
--      if( currentSize>newSize ){
--        rc = sqlite3OsTruncate(pPager->fd, newSize);
--      }else{
--        char *pTmp = pPager->pTmpSpace;
--        memset(pTmp, 0, szPage);
--        testcase( (newSize-szPage) <  currentSize );
--        testcase( (newSize-szPage) == currentSize );
--        testcase( (newSize-szPage) >  currentSize );
--        rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage);
--      }
--      if( rc==SQLITE_OK ){
--        pPager->dbFileSize = nPage;
--      }
--    }
--  }
--  return rc;
-+static PgHdr *pager_lookup(Pager *pPager, Pgno pgno){
-+  PgHdr *p;                         /* Return value */
-+
-+  /* It is not possible for a call to PcacheFetch() with createFlag==0 to
-+  ** fail, since no attempt to allocate dynamic memory will be made.
-+  */
-+  (void)sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &p);
-+  return p;
- }
- 
- /*
--** Set the value of the Pager.sectorSize variable for the given
--** pager based on the value returned by the xSectorSize method
--** of the open database file. The sector size will be used used 
--** to determine the size and alignment of journal header and 
--** master journal pointers within created journal files.
--**
--** For temporary files the effective sector size is always 512 bytes.
--**
--** Otherwise, for non-temporary files, the effective sector size is
--** the value returned by the xSectorSize() method rounded up to 32 if
--** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it
--** is greater than MAX_SECTOR_SIZE.
-+** Discard the entire contents of the in-memory page-cache.
- */
--static void setSectorSize(Pager *pPager){
--  assert( isOpen(pPager->fd) || pPager->tempFile );
-+static void pager_reset(Pager *pPager){
-+  sqlite3BackupRestart(pPager->pBackup);
-+  sqlite3PcacheClear(pPager->pPCache);
-+}
- 
--  if( !pPager->tempFile ){
--    /* Sector size doesn't matter for temporary files. Also, the file
--    ** may not have been opened yet, in which case the OsSectorSize()
--    ** call will segfault.
--    */
--    pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
-+/*
-+** Free all structures in the Pager.aSavepoint[] array and set both
-+** Pager.aSavepoint and Pager.nSavepoint to zero. Close the sub-journal
-+** if it is open and the pager is not in exclusive mode.
-+*/
-+static void releaseAllSavepoints(Pager *pPager){
-+  int ii;               /* Iterator for looping through Pager.aSavepoint */
-+  for(ii=0; ii<pPager->nSavepoint; ii++){
-+    sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
-   }
--  if( pPager->sectorSize<32 ){
--    pPager->sectorSize = 512;
-+  if( !pPager->exclusiveMode || sqlite3IsMemJournal(pPager->sjfd) ){
-+    sqlite3OsClose(pPager->sjfd);
-   }
--  if( pPager->sectorSize>MAX_SECTOR_SIZE ){
--    assert( MAX_SECTOR_SIZE>=512 );
--    pPager->sectorSize = MAX_SECTOR_SIZE;
-+  sqlite3_free(pPager->aSavepoint);
-+  pPager->aSavepoint = 0;
-+  pPager->nSavepoint = 0;
-+  pPager->nSubRec = 0;
-+}
-+
-+/*
-+** Set the bit number pgno in the PagerSavepoint.pInSavepoint 
-+** bitvecs of all open savepoints. Return SQLITE_OK if successful
-+** or SQLITE_NOMEM if a malloc failure occurs.
-+*/
-+static int addToSavepointBitvecs(Pager *pPager, Pgno pgno){
-+  int ii;                   /* Loop counter */
-+  int rc = SQLITE_OK;       /* Result code */
-+
-+  for(ii=0; ii<pPager->nSavepoint; ii++){
-+    PagerSavepoint *p = &pPager->aSavepoint[ii];
-+    if( pgno<=p->nOrig ){
-+      rc |= sqlite3BitvecSet(p->pInSavepoint, pgno);
-+      testcase( rc==SQLITE_NOMEM );
-+      assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
-+    }
-   }
-+  return rc;
- }
- 
- /*
--** Playback the journal and thus restore the database file to
--** the state it was in before we started making changes.  
--**
--** The journal file format is as follows: 
--**
--**  (1)  8 byte prefix.  A copy of aJournalMagic[].
--**  (2)  4 byte big-endian integer which is the number of valid page records
--**       in the journal.  If this value is 0xffffffff, then compute the
--**       number of page records from the journal size.
--**  (3)  4 byte big-endian integer which is the initial value for the 
--**       sanity checksum.
--**  (4)  4 byte integer which is the number of pages to truncate the
--**       database to during a rollback.
--**  (5)  4 byte big-endian integer which is the sector size.  The header
--**       is this many bytes in size.
--**  (6)  4 byte big-endian integer which is the page size.
--**  (7)  zero padding out to the next sector size.
--**  (8)  Zero or more pages instances, each as follows:
--**        +  4 byte page number.
--**        +  pPager->pageSize bytes of data.
--**        +  4 byte checksum
--**
--** When we speak of the journal header, we mean the first 7 items above.
--** Each entry in the journal is an instance of the 8th item.
--**
--** Call the value from the second bullet "nRec".  nRec is the number of
--** valid page entries in the journal.  In most cases, you can compute the
--** value of nRec from the size of the journal file.  But if a power
--** failure occurred while the journal was being written, it could be the
--** case that the size of the journal file had already been increased but
--** the extra entries had not yet made it safely to disk.  In such a case,
--** the value of nRec computed from the file size would be too large.  For
--** that reason, we always use the nRec value in the header.
--**
--** If the nRec value is 0xffffffff it means that nRec should be computed
--** from the file size.  This value is used when the user selects the
--** no-sync option for the journal.  A power failure could lead to corruption
--** in this case.  But for things like temporary table (which will be
--** deleted when the power is restored) we don't care.  
--**
--** If the file opened as the journal file is not a well-formed
--** journal file then all pages up to the first corrupted page are rolled
--** back (or no pages if the journal header is corrupted). The journal file
--** is then deleted and SQLITE_OK returned, just as if no corruption had
--** been encountered.
-+** This function is a no-op if the pager is in exclusive mode and not
-+** in the ERROR state. Otherwise, it switches the pager to PAGER_OPEN
-+** state.
- **
--** If an I/O or malloc() error occurs, the journal-file is not deleted
--** and an error code is returned.
-+** If the pager is not in exclusive-access mode, the database file is
-+** completely unlocked. If the file is unlocked and the file-system does
-+** not exhibit the UNDELETABLE_WHEN_OPEN property, the journal file is
-+** closed (if it is open).
- **
--** The isHot parameter indicates that we are trying to rollback a journal
--** that might be a hot journal.  Or, it could be that the journal is 
--** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE.
--** If the journal really is hot, reset the pager cache prior rolling
--** back any content.  If the journal is merely persistent, no reset is
--** needed.
-+** If the pager is in ERROR state when this function is called, the 
-+** contents of the pager cache are discarded before switching back to 
-+** the OPEN state. Regardless of whether the pager is in exclusive-mode
-+** or not, any journal file left in the file-system will be treated
-+** as a hot-journal and rolled back the next time a read-transaction
-+** is opened (by this or by any other connection).
- */
--static int pager_playback(Pager *pPager, int isHot){
--  sqlite3_vfs *pVfs = pPager->pVfs;
--  i64 szJ;                 /* Size of the journal file in bytes */
--  u32 nRec;                /* Number of Records in the journal */
--  u32 u;                   /* Unsigned loop counter */
--  Pgno mxPg = 0;           /* Size of the original file in pages */
--  int rc;                  /* Result code of a subroutine */
--  int res = 1;             /* Value returned by sqlite3OsAccess() */
--  char *zMaster = 0;       /* Name of master journal file if any */
--  int needPagerReset;      /* True to reset page prior to first page rollback */
--
--  /* Figure out how many records are in the journal.  Abort early if
--  ** the journal is empty.
--  */
--  assert( isOpen(pPager->jfd) );
--  rc = sqlite3OsFileSize(pPager->jfd, &szJ);
--  if( rc!=SQLITE_OK ){
--    goto end_playback;
--  }
-+static void pager_unlock(Pager *pPager){
- 
--  /* Read the master journal name from the journal, if it is present.
--  ** If a master journal file name is specified, but the file is not
--  ** present on disk, then the journal is not hot and does not need to be
--  ** played back.
--  **
--  ** TODO: Technically the following is an error because it assumes that
--  ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that
--  ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c,
--  **  mxPathname is 512, which is the same as the minimum allowable value
--  ** for pageSize.
--  */
--  zMaster = pPager->pTmpSpace;
--  rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
--  if( rc==SQLITE_OK && zMaster[0] ){
--    rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
--  }
--  zMaster = 0;
--  if( rc!=SQLITE_OK || !res ){
--    goto end_playback;
--  }
--  pPager->journalOff = 0;
--  needPagerReset = isHot;
-+  assert( pPager->eState==PAGER_READER 
-+       || pPager->eState==PAGER_OPEN 
-+       || pPager->eState==PAGER_ERROR 
-+  );
- 
--  /* This loop terminates either when a readJournalHdr() or 
--  ** pager_playback_one_page() call returns SQLITE_DONE or an IO error 
--  ** occurs. 
--  */
--  while( 1 ){
--    /* Read the next journal header from the journal file.  If there are
--    ** not enough bytes left in the journal file for a complete header, or
--    ** it is corrupted, then a process must have failed while writing it.
--    ** This indicates nothing more needs to be rolled back.
--    */
--    rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg);
--    if( rc!=SQLITE_OK ){ 
--      if( rc==SQLITE_DONE ){
--        rc = SQLITE_OK;
--      }
--      goto end_playback;
--    }
-+  sqlite3BitvecDestroy(pPager->pInJournal);
-+  pPager->pInJournal = 0;
-+  releaseAllSavepoints(pPager);
- 
--    /* If nRec is 0xffffffff, then this journal was created by a process
--    ** working in no-sync mode. This means that the rest of the journal
--    ** file consists of pages, there are no more journal headers. Compute
--    ** the value of nRec based on this assumption.
--    */
--    if( nRec==0xffffffff ){
--      assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
--      nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager));
--    }
-+  if( pagerUseWal(pPager) ){
-+    assert( !isOpen(pPager->jfd) );
-+    sqlite3WalEndReadTransaction(pPager->pWal);
-+    pPager->eState = PAGER_OPEN;
-+  }else if( !pPager->exclusiveMode ){
-+    int rc;                       /* Error code returned by pagerUnlockDb() */
-+    int iDc = isOpen(pPager->fd)?sqlite3OsDeviceCharacteristics(pPager->fd):0;
- 
--    /* If nRec is 0 and this rollback is of a transaction created by this
--    ** process and if this is the final header in the journal, then it means
--    ** that this part of the journal was being filled but has not yet been
--    ** synced to disk.  Compute the number of pages based on the remaining
--    ** size of the file.
--    **
--    ** The third term of the test was added to fix ticket #2565.
--    ** When rolling back a hot journal, nRec==0 always means that the next
--    ** chunk of the journal contains zero pages to be rolled back.  But
--    ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in
--    ** the journal, it means that the journal might contain additional
--    ** pages that need to be rolled back and that the number of pages 
--    ** should be computed based on the journal file size.
-+    /* If the operating system support deletion of open files, then
-+    ** close the journal file when dropping the database lock.  Otherwise
-+    ** another connection with journal_mode=delete might delete the file
-+    ** out from under us.
-     */
--    if( nRec==0 && !isHot &&
--        pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
--      nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager));
-+    assert( (PAGER_JOURNALMODE_MEMORY   & 5)!=1 );
-+    assert( (PAGER_JOURNALMODE_OFF      & 5)!=1 );
-+    assert( (PAGER_JOURNALMODE_WAL      & 5)!=1 );
-+    assert( (PAGER_JOURNALMODE_DELETE   & 5)!=1 );
-+    assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
-+    assert( (PAGER_JOURNALMODE_PERSIST  & 5)==1 );
-+    if( 0==(iDc & SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN)
-+     || 1!=(pPager->journalMode & 5)
-+    ){
-+      sqlite3OsClose(pPager->jfd);
-     }
- 
--    /* If this is the first header read from the journal, truncate the
--    ** database file back to its original size.
-+    /* If the pager is in the ERROR state and the call to unlock the database
-+    ** file fails, set the current lock to UNKNOWN_LOCK. See the comment
-+    ** above the #define for UNKNOWN_LOCK for an explanation of why this
-+    ** is necessary.
-     */
--    if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
--      rc = pager_truncate(pPager, mxPg);
--      if( rc!=SQLITE_OK ){
--        goto end_playback;
--      }
--      pPager->dbSize = mxPg;
-+    rc = pagerUnlockDb(pPager, NO_LOCK);
-+    if( rc!=SQLITE_OK && pPager->eState==PAGER_ERROR ){
-+      pPager->eLock = UNKNOWN_LOCK;
-     }
- 
--    /* Copy original pages out of the journal and back into the 
--    ** database file and/or page cache.
-+    /* The pager state may be changed from PAGER_ERROR to PAGER_OPEN here
-+    ** without clearing the error code. This is intentional - the error
-+    ** code is cleared and the cache reset in the block below.
-     */
--    for(u=0; u<nRec; u++){
--      if( needPagerReset ){
--        pager_reset(pPager);
--        needPagerReset = 0;
--      }
--      rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0);
--      if( rc!=SQLITE_OK ){
--        if( rc==SQLITE_DONE ){
--          rc = SQLITE_OK;
--          pPager->journalOff = szJ;
--          break;
--        }else if( rc==SQLITE_IOERR_SHORT_READ ){
--          /* If the journal has been truncated, simply stop reading and
--          ** processing the journal. This might happen if the journal was
--          ** not completely written and synced prior to a crash.  In that
--          ** case, the database should have never been written in the
--          ** first place so it is OK to simply abandon the rollback. */
--          rc = SQLITE_OK;
--          goto end_playback;
--        }else{
--          /* If we are unable to rollback, quit and return the error
--          ** code.  This will cause the pager to enter the error state
--          ** so that no further harm will be done.  Perhaps the next
--          ** process to come along will be able to rollback the database.
--          */
--          goto end_playback;
--        }
--      }
--    }
-+    assert( pPager->errCode || pPager->eState!=PAGER_ERROR );
-+    pPager->changeCountDone = 0;
-+    pPager->eState = PAGER_OPEN;
-   }
--  /*NOTREACHED*/
--  assert( 0 );
- 
--end_playback:
--  /* Following a rollback, the database file should be back in its original
--  ** state prior to the start of the transaction, so invoke the
--  ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the
--  ** assertion that the transaction counter was modified.
-+  /* If Pager.errCode is set, the contents of the pager cache cannot be
-+  ** trusted. Now that there are no outstanding references to the pager,
-+  ** it can safely move back to PAGER_OPEN state. This happens in both
-+  ** normal and exclusive-locking mode.
-   */
--  assert(
--    pPager->fd->pMethods==0 ||
--    sqlite3OsFileControl(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0)>=SQLITE_OK
--  );
-+  if( pPager->errCode ){
-+    assert( !MEMDB );
-+    pager_reset(pPager);
-+    pPager->changeCountDone = pPager->tempFile;
-+    pPager->eState = PAGER_OPEN;
-+    pPager->errCode = SQLITE_OK;
-+  }
- 
--  /* If this playback is happening automatically as a result of an IO or 
--  ** malloc error that occurred after the change-counter was updated but 
--  ** before the transaction was committed, then the change-counter 
--  ** modification may just have been reverted. If this happens in exclusive 
--  ** mode, then subsequent transactions performed by the connection will not
--  ** update the change-counter at all. This may lead to cache inconsistency
--  ** problems for other processes at some point in the future. So, just
--  ** in case this has happened, clear the changeCountDone flag now.
--  */
--  pPager->changeCountDone = pPager->tempFile;
-+  pPager->journalOff = 0;
-+  pPager->journalHdr = 0;
-+  pPager->setMaster = 0;
-+}
- 
--  if( rc==SQLITE_OK ){
--    zMaster = pPager->pTmpSpace;
--    rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
--    testcase( rc!=SQLITE_OK );
--  }
--  if( rc==SQLITE_OK
--   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
--  ){
--    rc = sqlite3PagerSync(pPager);
--  }
--  if( rc==SQLITE_OK ){
--    rc = pager_end_transaction(pPager, zMaster[0]!='\0');
--    testcase( rc!=SQLITE_OK );
--  }
--  if( rc==SQLITE_OK && zMaster[0] && res ){
--    /* If there was a master journal and this routine will return success,
--    ** see if it is possible to delete the master journal.
--    */
--    rc = pager_delmaster(pPager, zMaster);
--    testcase( rc!=SQLITE_OK );
-+/*
-+** This function is called whenever an IOERR or FULL error that requires
-+** the pager to transition into the ERROR state may ahve occurred.
-+** The first argument is a pointer to the pager structure, the second 
-+** the error-code about to be returned by a pager API function. The 
-+** value returned is a copy of the second argument to this function. 
-+**
-+** If the second argument is SQLITE_FULL, SQLITE_IOERR or one of the
-+** IOERR sub-codes, the pager enters the ERROR state and the error code
-+** is stored in Pager.errCode. While the pager remains in the ERROR state,
-+** all major API calls on the Pager will immediately return Pager.errCode.
-+**
-+** The ERROR state indicates that the contents of the pager-cache 
-+** cannot be trusted. This state can be cleared by completely discarding 
-+** the contents of the pager-cache. If a transaction was active when
-+** the persistent error occurred, then the rollback journal may need
-+** to be replayed to restore the contents of the database file (as if
-+** it were a hot-journal).
-+*/
-+static int pager_error(Pager *pPager, int rc){
-+  int rc2 = rc & 0xff;
-+  assert( rc==SQLITE_OK || !MEMDB );
-+  assert(
-+       pPager->errCode==SQLITE_FULL ||
-+       pPager->errCode==SQLITE_OK ||
-+       (pPager->errCode & 0xff)==SQLITE_IOERR
-+  );
-+  if( rc2==SQLITE_FULL || rc2==SQLITE_IOERR ){
-+    pPager->errCode = rc;
-+    pPager->eState = PAGER_ERROR;
-   }
--
--  /* The Pager.sectorSize variable may have been updated while rolling
--  ** back a journal created by a process with a different sector size
--  ** value. Reset it to the correct value for this process.
--  */
--  setSectorSize(pPager);
-   return rc;
- }
- 
--
- /*
--** Read the content for page pPg out of the database file and into 
--** pPg->pData. A shared lock or greater must be held on the database
--** file before this function is called.
-+** This routine ends a transaction. A transaction is usually ended by 
-+** either a COMMIT or a ROLLBACK operation. This routine may be called 
-+** after rollback of a hot-journal, or if an error occurs while opening
-+** the journal file or writing the very first journal-header of a
-+** database transaction.
-+** 
-+** This routine is never called in PAGER_ERROR state. If it is called
-+** in PAGER_NONE or PAGER_SHARED state and the lock held is less
-+** exclusive than a RESERVED lock, it is a no-op.
- **
--** If page 1 is read, then the value of Pager.dbFileVers[] is set to
--** the value read from the database file.
-+** Otherwise, any active savepoints are released.
- **
--** If an IO error occurs, then the IO error is returned to the caller.
--** Otherwise, SQLITE_OK is returned.
-+** If the journal file is open, then it is "finalized". Once a journal 
-+** file has been finalized it is not possible to use it to roll back a 
-+** transaction. Nor will it be considered to be a hot-journal by this
-+** or any other database connection. Exactly how a journal is finalized
-+** depends on whether or not the pager is running in exclusive mode and
-+** the current journal-mode (Pager.journalMode value), as follows:
-+**
-+**   journalMode==MEMORY
-+**     Journal file descriptor is simply closed. This destroys an 
-+**     in-memory journal.
-+**
-+**   journalMode==TRUNCATE
-+**     Journal file is truncated to zero bytes in size.
-+**
-+**   journalMode==PERSIST
-+**     The first 28 bytes of the journal file are zeroed. This invalidates
-+**     the first journal header in the file, and hence the entire journal
-+**     file. An invalid journal file cannot be rolled back.
-+**
-+**   journalMode==DELETE
-+**     The journal file is closed and deleted using sqlite3OsDelete().
-+**
-+**     If the pager is running in exclusive mode, this method of finalizing
-+**     the journal file is never used. Instead, if the journalMode is
-+**     DELETE and the pager is in exclusive mode, the method described under
-+**     journalMode==PERSIST is used instead.
-+**
-+** After the journal is finalized, the pager moves to PAGER_READER state.
-+** If running in non-exclusive rollback mode, the lock on the file is 
-+** downgraded to a SHARED_LOCK.
-+**
-+** SQLITE_OK is returned if no error occurs. If an error occurs during
-+** any of the IO operations to finalize the journal file or unlock the
-+** database then the IO error code is returned to the user. If the 
-+** operation to finalize the journal file fails, then the code still
-+** tries to unlock the database file if not in exclusive mode. If the
-+** unlock operation fails as well, then the first error code related
-+** to the first error encountered (the journal finalization one) is
-+** returned.
- */
--static int readDbPage(PgHdr *pPg){
--  Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
--  Pgno pgno = pPg->pgno;       /* Page number to read */
--  int rc = SQLITE_OK;          /* Return code */
--  int isInWal = 0;             /* True if page is in log file */
--  int pgsz = pPager->pageSize; /* Number of bytes to read */
--
--  assert( pPager->eState>=PAGER_READER && !MEMDB );
--  assert( isOpen(pPager->fd) );
-+static int pager_end_transaction(Pager *pPager, int hasMaster){
-+  int rc = SQLITE_OK;      /* Error code from journal finalization operation */
-+  int rc2 = SQLITE_OK;     /* Error code from db file unlock operation */
- 
--  if( NEVER(!isOpen(pPager->fd)) ){
--    assert( pPager->tempFile );
--    memset(pPg->pData, 0, pPager->pageSize);
-+  /* Do nothing if the pager does not have an open write transaction
-+  ** or at least a RESERVED lock. This function may be called when there
-+  ** is no write-transaction active but a RESERVED or greater lock is
-+  ** held under two circumstances:
-+  **
-+  **   1. After a successful hot-journal rollback, it is called with
-+  **      eState==PAGER_NONE and eLock==EXCLUSIVE_LOCK.
-+  **
-+  **   2. If a connection with locking_mode=exclusive holding an EXCLUSIVE 
-+  **      lock switches back to locking_mode=normal and then executes a
-+  **      read-transaction, this function is called with eState==PAGER_READER 
-+  **      and eLock==EXCLUSIVE_LOCK when the read-transaction is closed.
-+  */
-+  assert( assert_pager_state(pPager) );
-+  assert( pPager->eState!=PAGER_ERROR );
-+  if( pPager->eState<PAGER_WRITER_LOCKED && pPager->eLock<RESERVED_LOCK ){
-     return SQLITE_OK;
-   }
- 
--  if( pagerUseWal(pPager) ){
--    /* Try to pull the page from the write-ahead log. */
--    rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pgsz, pPg->pData);
--  }
--  if( rc==SQLITE_OK && !isInWal ){
--    i64 iOffset = (pgno-1)*(i64)pPager->pageSize;
--    rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
--    if( rc==SQLITE_IOERR_SHORT_READ ){
--      rc = SQLITE_OK;
-+  releaseAllSavepoints(pPager);
-+  assert( isOpen(pPager->jfd) || pPager->pInJournal==0 );
-+  if( isOpen(pPager->jfd) ){
-+    assert( !pagerUseWal(pPager) );
-+
-+    /* Finalize the journal file. */
-+    if( sqlite3IsMemJournal(pPager->jfd) ){
-+      assert( pPager->journalMode==PAGER_JOURNALMODE_MEMORY );
-+      sqlite3OsClose(pPager->jfd);
-+    }else if( pPager->journalMode==PAGER_JOURNALMODE_TRUNCATE ){
-+      if( pPager->journalOff==0 ){
-+        rc = SQLITE_OK;
-+      }else{
-+        rc = sqlite3OsTruncate(pPager->jfd, 0);
-+      }
-+      pPager->journalOff = 0;
-+    }else if( pPager->journalMode==PAGER_JOURNALMODE_PERSIST
-+      || (pPager->exclusiveMode && pPager->journalMode!=PAGER_JOURNALMODE_WAL)
-+    ){
-+      rc = zeroJournalHdr(pPager, hasMaster);
-+      pPager->journalOff = 0;
-+    }else{
-+      /* This branch may be executed with Pager.journalMode==MEMORY if
-+      ** a hot-journal was just rolled back. In this case the journal
-+      ** file should be closed and deleted. If this connection writes to
-+      ** the database file, it will do so using an in-memory journal. 
-+      */
-+      assert( pPager->journalMode==PAGER_JOURNALMODE_DELETE 
-+           || pPager->journalMode==PAGER_JOURNALMODE_MEMORY 
-+           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
-+      );
-+      sqlite3OsClose(pPager->jfd);
-+      if( !pPager->tempFile ){
-+        rc = sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
-+      }
-     }
-   }
- 
--  if( pgno==1 ){
--    if( rc ){
--      /* If the read is unsuccessful, set the dbFileVers[] to something
--      ** that will never be a valid file version.  dbFileVers[] is a copy
--      ** of bytes 24..39 of the database.  Bytes 28..31 should always be
--      ** zero or the size of the database in page. Bytes 32..35 and 35..39
--      ** should be page numbers which are never 0xffffffff.  So filling
--      ** pPager->dbFileVers[] with all 0xff bytes should suffice.
--      **
--      ** For an encrypted database, the situation is more complex:  bytes
--      ** 24..39 of the database are white noise.  But the probability of
--      ** white noising equaling 16 bytes of 0xff is vanishingly small so
--      ** we should still be ok.
--      */
--      memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers));
--    }else{
--      u8 *dbFileVers = &((u8*)pPg->pData)[24];
--      memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers));
-+#ifdef SQLITE_CHECK_PAGES
-+  sqlite3PcacheIterateDirty(pPager->pPCache, pager_set_pagehash);
-+  if( pPager->dbSize==0 && sqlite3PcacheRefCount(pPager->pPCache)>0 ){
-+    PgHdr *p = pager_lookup(pPager, 1);
-+    if( p ){
-+      p->pageHash = 0;
-+      sqlite3PagerUnref(p);
-     }
-   }
--  CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM);
-+#endif
- 
--  PAGER_INCR(sqlite3_pager_readdb_count);
--  PAGER_INCR(pPager->nRead);
--  IOTRACE(("PGIN %p %d\n", pPager, pgno));
--  PAGERTRACE(("FETCH %d page %d hash(%08x)\n",
--               PAGERID(pPager), pgno, pager_pagehash(pPg)));
-+  sqlite3BitvecDestroy(pPager->pInJournal);
-+  pPager->pInJournal = 0;
-+  pPager->nRec = 0;
-+  sqlite3PcacheCleanAll(pPager->pPCache);
-+  sqlite3PcacheTruncate(pPager->pPCache, pPager->dbSize);
- 
--  return rc;
-+  if( pagerUseWal(pPager) ){
-+    /* Drop the WAL write-lock, if any. Also, if the connection was in 
-+    ** locking_mode=exclusive mode but is no longer, drop the EXCLUSIVE 
-+    ** lock held on the database file.
-+    */
-+    rc2 = sqlite3WalEndWriteTransaction(pPager->pWal);
-+    assert( rc2==SQLITE_OK );
-+  }
-+  if( !pPager->exclusiveMode 
-+   && (!pagerUseWal(pPager) || sqlite3WalExclusiveMode(pPager->pWal, 0))
-+  ){
-+    rc2 = pagerUnlockDb(pPager, SHARED_LOCK);
-+    pPager->changeCountDone = 0;
-+  }
-+  pPager->eState = PAGER_READER;
-+  pPager->setMaster = 0;
-+
-+  return (rc==SQLITE_OK?rc2:rc);
- }
- 
- /*
--** Update the value of the change-counter at offsets 24 and 92 in
--** the header and the sqlite version number at offset 96.
-+** Execute a rollback if a transaction is active and unlock the 
-+** database file. 
- **
--** This is an unconditional update.  See also the pager_incr_changecounter()
--** routine which only updates the change-counter if the update is actually
--** needed, as determined by the pPager->changeCountDone state variable.
-+** If the pager has already entered the ERROR state, do not attempt 
-+** the rollback at this time. Instead, pager_unlock() is called. The
-+** call to pager_unlock() will discard all in-memory pages, unlock
-+** the database file and move the pager back to OPEN state. If this 
-+** means that there is a hot-journal left in the file-system, the next 
-+** connection to obtain a shared lock on the pager (which may be this one) 
-+** will roll it back.
-+**
-+** If the pager has not already entered the ERROR state, but an IO or
-+** malloc error occurs during a rollback, then this will itself cause 
-+** the pager to enter the ERROR state. Which will be cleared by the
-+** call to pager_unlock(), as described above.
- */
--static void pager_write_changecounter(PgHdr *pPg){
--  u32 change_counter;
--
--  /* Increment the value just read and write it back to byte 24. */
--  change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1;
--  put32bits(((char*)pPg->pData)+24, change_counter);
--
--  /* Also store the SQLite version number in bytes 96..99 and in
--  ** bytes 92..95 store the change counter for which the version number
--  ** is valid. */
--  put32bits(((char*)pPg->pData)+92, change_counter);
--  put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER);
-+static void pagerUnlockAndRollback(Pager *pPager){
-+  if( pPager->eState!=PAGER_ERROR && pPager->eState!=PAGER_OPEN ){
-+    assert( assert_pager_state(pPager) );
-+    if( pPager->eState>=PAGER_WRITER_LOCKED ){
-+      sqlite3BeginBenignMalloc();
-+      sqlite3PagerRollback(pPager);
-+      sqlite3EndBenignMalloc();
-+    }else if( !pPager->exclusiveMode ){
-+      assert( pPager->eState==PAGER_READER );
-+      pager_end_transaction(pPager, 0);
-+    }
-+  }
-+  pager_unlock(pPager);
- }
- 
--#ifndef SQLITE_OMIT_WAL
- /*
--** This function is invoked once for each page that has already been 
--** written into the log file when a WAL transaction is rolled back.
--** Parameter iPg is the page number of said page. The pCtx argument 
--** is actually a pointer to the Pager structure.
-+** Parameter aData must point to a buffer of pPager->pageSize bytes
-+** of data. Compute and return a checksum based ont the contents of the 
-+** page of data and the current value of pPager->cksumInit.
- **
--** If page iPg is present in the cache, and has no outstanding references,
--** it is discarded. Otherwise, if there are one or more outstanding
--** references, the page content is reloaded from the database. If the
--** attempt to reload content from the database is required and fails, 
--** return an SQLite error code. Otherwise, SQLITE_OK.
-+** This is not a real checksum. It is really just the sum of the 
-+** random initial value (pPager->cksumInit) and every 200th byte
-+** of the page data, starting with byte offset (pPager->pageSize%200).
-+** Each byte is interpreted as an 8-bit unsigned integer.
-+**
-+** Changing the formula used to compute this checksum results in an
-+** incompatible journal file format.
-+**
-+** If journal corruption occurs due to a power failure, the most likely 
-+** scenario is that one end or the other of the record will be changed. 
-+** It is much less likely that the two ends of the journal record will be
-+** correct and the middle be corrupt.  Thus, this "checksum" scheme,
-+** though fast and simple, catches the mostly likely kind of corruption.
- */
--static int pagerUndoCallback(void *pCtx, Pgno iPg){
--  int rc = SQLITE_OK;
--  Pager *pPager = (Pager *)pCtx;
--  PgHdr *pPg;
--
--  pPg = sqlite3PagerLookup(pPager, iPg);
--  if( pPg ){
--    if( sqlite3PcachePageRefcount(pPg)==1 ){
--      sqlite3PcacheDrop(pPg);
--    }else{
--      rc = readDbPage(pPg);
--      if( rc==SQLITE_OK ){
--        pPager->xReiniter(pPg);
--      }
--      sqlite3PagerUnref(pPg);
--    }
-+static u32 pager_cksum(Pager *pPager, const u8 *aData){
-+  u32 cksum = pPager->cksumInit;         /* Checksum value to return */
-+  int i = pPager->pageSize-200;          /* Loop counter */
-+  while( i>0 ){
-+    cksum += aData[i];
-+    i -= 200;
-   }
--
--  /* Normally, if a transaction is rolled back, any backup processes are
--  ** updated as data is copied out of the rollback journal and into the
--  ** database. This is not generally possible with a WAL database, as
--  ** rollback involves simply truncating the log file. Therefore, if one
--  ** or more frames have already been written to the log (and therefore 
--  ** also copied into the backup databases) as part of this transaction,
--  ** the backups must be restarted.
--  */
--  sqlite3BackupRestart(pPager->pBackup);
--
--  return rc;
-+  return cksum;
- }
- 
- /*
--** This function is called to rollback a transaction on a WAL database.
-+** Report the current page size and number of reserved bytes back
-+** to the codec.
- */
--static int pagerRollbackWal(Pager *pPager){
--  int rc;                         /* Return Code */
--  PgHdr *pList;                   /* List of dirty pages to revert */
--
--  /* For all pages in the cache that are currently dirty or have already
--  ** been written (but not committed) to the log file, do one of the 
--  ** following:
--  **
--  **   + Discard the cached page (if refcount==0), or
--  **   + Reload page content from the database (if refcount>0).
--  */
--  pPager->dbSize = pPager->dbOrigSize;
--  rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager);
--  pList = sqlite3PcacheDirtyList(pPager->pPCache);
--  while( pList && rc==SQLITE_OK ){
--    PgHdr *pNext = pList->pDirty;
--    rc = pagerUndoCallback((void *)pPager, pList->pgno);
--    pList = pNext;
-+#ifdef SQLITE_HAS_CODEC
-+static void pagerReportSize(Pager *pPager){
-+  if( pPager->xCodecSizeChng ){
-+    pPager->xCodecSizeChng(pPager->pCodec, pPager->pageSize,
-+                           (int)pPager->nReserve);
-   }
--
--  return rc;
- }
-+#else
-+# define pagerReportSize(X)     /* No-op if we do not support a codec */
-+#endif
- 
- /*
--** This function is a wrapper around sqlite3WalFrames(). As well as logging
--** the contents of the list of pages headed by pList (connected by pDirty),
--** this function notifies any active backup processes that the pages have
--** changed. 
-+** Read a single page from either the journal file (if isMainJrnl==1) or
-+** from the sub-journal (if isMainJrnl==0) and playback that page.
-+** The page begins at offset *pOffset into the file. The *pOffset
-+** value is increased to the start of the next page in the journal.
- **
--** The list of pages passed into this routine is always sorted by page number.
--** Hence, if page 1 appears anywhere on the list, it will be the first page.
--*/ 
--static int pagerWalFrames(
--  Pager *pPager,                  /* Pager object */
--  PgHdr *pList,                   /* List of frames to log */
--  Pgno nTruncate,                 /* Database size after this commit */
--  int isCommit,                   /* True if this is a commit */
--  int syncFlags                   /* Flags to pass to OsSync() (or 0) */
-+** The main rollback journal uses checksums - the statement journal does 
-+** not.
-+**
-+** If the page number of the page record read from the (sub-)journal file
-+** is greater than the current value of Pager.dbSize, then playback is
-+** skipped and SQLITE_OK is returned.
-+**
-+** If pDone is not NULL, then it is a record of pages that have already
-+** been played back.  If the page at *pOffset has already been played back
-+** (if the corresponding pDone bit is set) then skip the playback.
-+** Make sure the pDone bit corresponding to the *pOffset page is set
-+** prior to returning.
-+**
-+** If the page record is successfully read from the (sub-)journal file
-+** and played back, then SQLITE_OK is returned. If an IO error occurs
-+** while reading the record from the (sub-)journal file or while writing
-+** to the database file, then the IO error code is returned. If data
-+** is successfully read from the (sub-)journal file but appears to be
-+** corrupted, SQLITE_DONE is returned. Data is considered corrupted in
-+** two circumstances:
-+** 
-+**   * If the record page-number is illegal (0 or PAGER_MJ_PGNO), or
-+**   * If the record is being rolled back from the main journal file
-+**     and the checksum field does not match the record content.
-+**
-+** Neither of these two scenarios are possible during a savepoint rollback.
-+**
-+** If this is a savepoint rollback, then memory may have to be dynamically
-+** allocated by this function. If this is the case and an allocation fails,
-+** SQLITE_NOMEM is returned.
-+*/
-+static int pager_playback_one_page(
-+  Pager *pPager,                /* The pager being played back */
-+  i64 *pOffset,                 /* Offset of record to playback */
-+  Bitvec *pDone,                /* Bitvec of pages already played back */
-+  int isMainJrnl,               /* 1 -> main journal. 0 -> sub-journal. */
-+  int isSavepnt                 /* True for a savepoint rollback */
- ){
--  int rc;                         /* Return code */
--#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES)
--  PgHdr *p;                       /* For looping over pages */
--#endif
-+  int rc;
-+  PgHdr *pPg;                   /* An existing page in the cache */
-+  Pgno pgno;                    /* The page number of a page in journal */
-+  u32 cksum;                    /* Checksum used for sanity checking */
-+  char *aData;                  /* Temporary storage for the page */
-+  sqlite3_file *jfd;            /* The file descriptor for the journal file */
-+  int isSynced;                 /* True if journal page is synced */
- 
--  assert( pPager->pWal );
--#ifdef SQLITE_DEBUG
--  /* Verify that the page list is in accending order */
--  for(p=pList; p && p->pDirty; p=p->pDirty){
--    assert( p->pgno < p->pDirty->pgno );
--  }
--#endif
-+  assert( (isMainJrnl&~1)==0 );      /* isMainJrnl is 0 or 1 */
-+  assert( (isSavepnt&~1)==0 );       /* isSavepnt is 0 or 1 */
-+  assert( isMainJrnl || pDone );     /* pDone always used on sub-journals */
-+  assert( isSavepnt || pDone==0 );   /* pDone never used on non-savepoint */
- 
--  if( isCommit ){
--    /* If a WAL transaction is being committed, there is no point in writing
--    ** any pages with page numbers greater than nTruncate into the WAL file.
--    ** They will never be read by any client. So remove them from the pDirty
--    ** list here. */
--    PgHdr *p;
--    PgHdr **ppNext = &pList;
--    for(p=pList; (*ppNext = p); p=p->pDirty){
--      if( p->pgno<=nTruncate ) ppNext = &p->pDirty;
--    }
--    assert( pList );
--  }
-+  aData = pPager->pTmpSpace;
-+  assert( aData );         /* Temp storage must have already been allocated */
-+  assert( pagerUseWal(pPager)==0 || (!isMainJrnl && isSavepnt) );
- 
--  if( pList->pgno==1 ) pager_write_changecounter(pList);
--  rc = sqlite3WalFrames(pPager->pWal, 
--      pPager->pageSize, pList, nTruncate, isCommit, syncFlags
-+  /* Either the state is greater than PAGER_WRITER_CACHEMOD (a transaction 
-+  ** or savepoint rollback done at the request of the caller) or this is
-+  ** a hot-journal rollback. If it is a hot-journal rollback, the pager
-+  ** is in state OPEN and holds an EXCLUSIVE lock. Hot-journal rollback
-+  ** only reads from the main journal, not the sub-journal.
-+  */
-+  assert( pPager->eState>=PAGER_WRITER_CACHEMOD
-+       || (pPager->eState==PAGER_OPEN && pPager->eLock==EXCLUSIVE_LOCK)
-   );
--  if( rc==SQLITE_OK && pPager->pBackup ){
--    PgHdr *p;
--    for(p=pList; p; p=p->pDirty){
--      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
--    }
--  }
--
--#ifdef SQLITE_CHECK_PAGES
--  pList = sqlite3PcacheDirtyList(pPager->pPCache);
--  for(p=pList; p; p=p->pDirty){
--    pager_set_pagehash(p);
--  }
--#endif
--
--  return rc;
--}
--
--/*
--** Begin a read transaction on the WAL.
--**
--** This routine used to be called "pagerOpenSnapshot()" because it essentially
--** makes a snapshot of the database at the current point in time and preserves
--** that snapshot for use by the reader in spite of concurrently changes by
--** other writers or checkpointers.
--*/
--static int pagerBeginReadTransaction(Pager *pPager){
--  int rc;                         /* Return code */
--  int changed = 0;                /* True if cache must be reset */
--
--  assert( pagerUseWal(pPager) );
--  assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
-+  assert( pPager->eState>=PAGER_WRITER_CACHEMOD || isMainJrnl );
- 
--  /* sqlite3WalEndReadTransaction() was not called for the previous
--  ** transaction in locking_mode=EXCLUSIVE.  So call it now.  If we
--  ** are in locking_mode=NORMAL and EndRead() was previously called,
--  ** the duplicate call is harmless.
-+  /* Read the page number and page data from the journal or sub-journal
-+  ** file. Return an error code to the caller if an IO error occurs.
-   */
--  sqlite3WalEndReadTransaction(pPager->pWal);
-+  jfd = isMainJrnl ? pPager->jfd : pPager->sjfd;
-+  rc = read32bits(jfd, *pOffset, &pgno);
-+  if( rc!=SQLITE_OK ) return rc;
-+  rc = sqlite3OsRead(jfd, (u8*)aData, pPager->pageSize, (*pOffset)+4);
-+  if( rc!=SQLITE_OK ) return rc;
-+  *pOffset += pPager->pageSize + 4 + isMainJrnl*4;
- 
--  rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
--  if( rc!=SQLITE_OK || changed ){
--    pager_reset(pPager);
-+  /* Sanity checking on the page.  This is more important that I originally
-+  ** thought.  If a power failure occurs while the journal is being written,
-+  ** it could cause invalid data to be written into the journal.  We need to
-+  ** detect this invalid data (with high probability) and ignore it.
-+  */
-+  if( pgno==0 || pgno==PAGER_MJ_PGNO(pPager) ){
-+    assert( !isSavepnt );
-+    return SQLITE_DONE;
-+  }
-+  if( pgno>(Pgno)pPager->dbSize || sqlite3BitvecTest(pDone, pgno) ){
-+    return SQLITE_OK;
-+  }
-+  if( isMainJrnl ){
-+    rc = read32bits(jfd, (*pOffset)-4, &cksum);
-+    if( rc ) return rc;
-+    if( !isSavepnt && pager_cksum(pPager, (u8*)aData)!=cksum ){
-+      return SQLITE_DONE;
-+    }
-   }
- 
--  return rc;
--}
--#endif
--
--/*
--** This function is called as part of the transition from PAGER_OPEN
--** to PAGER_READER state to determine the size of the database file
--** in pages (assuming the page size currently stored in Pager.pageSize).
--**
--** If no error occurs, SQLITE_OK is returned and the size of the database
--** in pages is stored in *pnPage. Otherwise, an error code (perhaps
--** SQLITE_IOERR_FSTAT) is returned and *pnPage is left unmodified.
--*/
--static int pagerPagecount(Pager *pPager, Pgno *pnPage){
--  Pgno nPage;                     /* Value to return via *pnPage */
-+  /* If this page has already been played by before during the current
-+  ** rollback, then don't bother to play it back again.
-+  */
-+  if( pDone && (rc = sqlite3BitvecSet(pDone, pgno))!=SQLITE_OK ){
-+    return rc;
-+  }
- 
--  /* Query the WAL sub-system for the database size. The WalDbsize()
--  ** function returns zero if the WAL is not open (i.e. Pager.pWal==0), or
--  ** if the database size is not available. The database size is not
--  ** available from the WAL sub-system if the log file is empty or
--  ** contains no valid committed transactions.
-+  /* When playing back page 1, restore the nReserve setting
-   */
--  assert( pPager->eState==PAGER_OPEN );
--  assert( pPager->eLock>=SHARED_LOCK || pPager->noReadlock );
--  nPage = sqlite3WalDbsize(pPager->pWal);
-+  if( pgno==1 && pPager->nReserve!=((u8*)aData)[20] ){
-+    pPager->nReserve = ((u8*)aData)[20];
-+    pagerReportSize(pPager);
-+  }
- 
--  /* If the database size was not available from the WAL sub-system,
--  ** determine it based on the size of the database file. If the size
--  ** of the database file is not an integer multiple of the page-size,
--  ** round down to the nearest page. Except, any file larger than 0
--  ** bytes in size is considered to contain at least one page.
-+  /* If the pager is in CACHEMOD state, then there must be a copy of this
-+  ** page in the pager cache. In this case just update the pager cache,
-+  ** not the database file. The page is left marked dirty in this case.
-+  **
-+  ** An exception to the above rule: If the database is in no-sync mode
-+  ** and a page is moved during an incremental vacuum then the page may
-+  ** not be in the pager cache. Later: if a malloc() or IO error occurs
-+  ** during a Movepage() call, then the page may not be in the cache
-+  ** either. So the condition described in the above paragraph is not
-+  ** assert()able.
-+  **
-+  ** If in WRITER_DBMOD, WRITER_FINISHED or OPEN state, then we update the
-+  ** pager cache if it exists and the main file. The page is then marked 
-+  ** not dirty. Since this code is only executed in PAGER_OPEN state for
-+  ** a hot-journal rollback, it is guaranteed that the page-cache is empty
-+  ** if the pager is in OPEN state.
-+  **
-+  ** Ticket #1171:  The statement journal might contain page content that is
-+  ** different from the page content at the start of the transaction.
-+  ** This occurs when a page is changed prior to the start of a statement
-+  ** then changed again within the statement.  When rolling back such a
-+  ** statement we must not write to the original database unless we know
-+  ** for certain that original page contents are synced into the main rollback
-+  ** journal.  Otherwise, a power loss might leave modified data in the
-+  ** database file without an entry in the rollback journal that can
-+  ** restore the database to its original form.  Two conditions must be
-+  ** met before writing to the database files. (1) the database must be
-+  ** locked.  (2) we know that the original page content is fully synced
-+  ** in the main journal either because the page is not in cache or else
-+  ** the page is marked as needSync==0.
-+  **
-+  ** 2008-04-14:  When attempting to vacuum a corrupt database file, it
-+  ** is possible to fail a statement on a database that does not yet exist.
-+  ** Do not attempt to write if database file has never been opened.
-   */
--  if( nPage==0 ){
--    i64 n = 0;                    /* Size of db file in bytes */
--    assert( isOpen(pPager->fd) || pPager->tempFile );
--    if( isOpen(pPager->fd) ){
--      int rc = sqlite3OsFileSize(pPager->fd, &n);
--      if( rc!=SQLITE_OK ){
--        return rc;
--      }
-+  if( pagerUseWal(pPager) ){
-+    pPg = 0;
-+  }else{
-+    pPg = pager_lookup(pPager, pgno);
-+  }
-+  assert( pPg || !MEMDB );
-+  assert( pPager->eState!=PAGER_OPEN || pPg==0 );
-+  PAGERTRACE(("PLAYBACK %d page %d hash(%08x) %s\n",
-+           PAGERID(pPager), pgno, pager_datahash(pPager->pageSize, (u8*)aData),
-+           (isMainJrnl?"main-journal":"sub-journal")
-+  ));
-+  if( isMainJrnl ){
-+    isSynced = pPager->noSync || (*pOffset <= pPager->journalHdr);
-+  }else{
-+    isSynced = (pPg==0 || 0==(pPg->flags & PGHDR_NEED_SYNC));
-+  }
-+  if( isOpen(pPager->fd)
-+   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
-+   && isSynced
-+  ){
-+    i64 ofst = (pgno-1)*(i64)pPager->pageSize;
-+    testcase( !isSavepnt && pPg!=0 && (pPg->flags&PGHDR_NEED_SYNC)!=0 );
-+    assert( !pagerUseWal(pPager) );
-+    rc = sqlite3OsWrite(pPager->fd, (u8*)aData, pPager->pageSize, ofst);
-+    if( pgno>pPager->dbFileSize ){
-+      pPager->dbFileSize = pgno;
-+    }
-+    if( pPager->pBackup ){
-+      CODEC1(pPager, aData, pgno, 3, rc=SQLITE_NOMEM);
-+      sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)aData);
-+      CODEC2(pPager, aData, pgno, 7, rc=SQLITE_NOMEM, aData);
-+    }
-+  }else if( !isMainJrnl && pPg==0 ){
-+    /* If this is a rollback of a savepoint and data was not written to
-+    ** the database and the page is not in-memory, there is a potential
-+    ** problem. When the page is next fetched by the b-tree layer, it 
-+    ** will be read from the database file, which may or may not be 
-+    ** current. 
-+    **
-+    ** There are a couple of different ways this can happen. All are quite
-+    ** obscure. When running in synchronous mode, this can only happen 
-+    ** if the page is on the free-list at the start of the transaction, then
-+    ** populated, then moved using sqlite3PagerMovepage().
-+    **
-+    ** The solution is to add an in-memory page to the cache containing
-+    ** the data just read from the sub-journal. Mark the page as dirty 
-+    ** and if the pager requires a journal-sync, then mark the page as 
-+    ** requiring a journal-sync before it is written.
-+    */
-+    assert( isSavepnt );
-+    assert( pPager->doNotSpill==0 );
-+    pPager->doNotSpill++;
-+    rc = sqlite3PagerAcquire(pPager, pgno, &pPg, 1);
-+    assert( pPager->doNotSpill==1 );
-+    pPager->doNotSpill--;
-+    if( rc!=SQLITE_OK ) return rc;
-+    pPg->flags &= ~PGHDR_NEED_READ;
-+    sqlite3PcacheMakeDirty(pPg);
-+  }
-+  if( pPg ){
-+    /* No page should ever be explicitly rolled back that is in use, except
-+    ** for page 1 which is held in use in order to keep the lock on the
-+    ** database active. However such a page may be rolled back as a result
-+    ** of an internal error resulting in an automatic call to
-+    ** sqlite3PagerRollback().
-+    */
-+    void *pData;
-+    pData = pPg->pData;
-+    memcpy(pData, (u8*)aData, pPager->pageSize);
-+    pPager->xReiniter(pPg);
-+    if( isMainJrnl && (!isSavepnt || *pOffset<=pPager->journalHdr) ){
-+      /* If the contents of this page were just restored from the main 
-+      ** journal file, then its content must be as they were when the 
-+      ** transaction was first opened. In this case we can mark the page
-+      ** as clean, since there will be no need to write it out to the
-+      ** database.
-+      **
-+      ** There is one exception to this rule. If the page is being rolled
-+      ** back as part of a savepoint (or statement) rollback from an 
-+      ** unsynced portion of the main journal file, then it is not safe
-+      ** to mark the page as clean. This is because marking the page as
-+      ** clean will clear the PGHDR_NEED_SYNC flag. Since the page is
-+      ** already in the journal file (recorded in Pager.pInJournal) and
-+      ** the PGHDR_NEED_SYNC flag is cleared, if the page is written to
-+      ** again within this transaction, it will be marked as dirty but
-+      ** the PGHDR_NEED_SYNC flag will not be set. It could then potentially
-+      ** be written out into the database file before its journal file
-+      ** segment is synced. If a crash occurs during or following this,
-+      ** database corruption may ensue.
-+      */
-+      assert( !pagerUseWal(pPager) );
-+      sqlite3PcacheMakeClean(pPg);
-     }
--    nPage = (Pgno)(n / pPager->pageSize);
--    if( nPage==0 && n>0 ){
--      nPage = 1;
-+    pager_set_pagehash(pPg);
-+
-+    /* If this was page 1, then restore the value of Pager.dbFileVers.
-+    ** Do this before any decoding. */
-+    if( pgno==1 ){
-+      memcpy(&pPager->dbFileVers, &((u8*)pData)[24],sizeof(pPager->dbFileVers));
-     }
--  }
- 
--  /* If the current number of pages in the file is greater than the
--  ** configured maximum pager number, increase the allowed limit so
--  ** that the file can be read.
--  */
--  if( nPage>pPager->mxPgno ){
--    pPager->mxPgno = (Pgno)nPage;
-+    /* Decode the page just read from disk */
-+    CODEC1(pPager, pData, pPg->pgno, 3, rc=SQLITE_NOMEM);
-+    sqlite3PcacheRelease(pPg);
-   }
--
--  *pnPage = nPage;
--  return SQLITE_OK;
-+  return rc;
- }
- 
--#ifndef SQLITE_OMIT_WAL
- /*
--** Check if the *-wal file that corresponds to the database opened by pPager
--** exists if the database is not empy, or verify that the *-wal file does
--** not exist (by deleting it) if the database file is empty.
--**
--** If the database is not empty and the *-wal file exists, open the pager
--** in WAL mode.  If the database is empty or if no *-wal file exists and
--** if no error occurs, make sure Pager.journalMode is not set to
--** PAGER_JOURNALMODE_WAL.
-+** Parameter zMaster is the name of a master journal file. A single journal
-+** file that referred to the master journal file has just been rolled back.
-+** This routine checks if it is possible to delete the master journal file,
-+** and does so if it is.
- **
--** Return SQLITE_OK or an error code.
-+** Argument zMaster may point to Pager.pTmpSpace. So that buffer is not 
-+** available for use within this function.
- **
--** The caller must hold a SHARED lock on the database file to call this
--** function. Because an EXCLUSIVE lock on the db file is required to delete 
--** a WAL on a none-empty database, this ensures there is no race condition 
--** between the xAccess() below and an xDelete() being executed by some 
--** other connection.
--*/
--static int pagerOpenWalIfPresent(Pager *pPager){
--  int rc = SQLITE_OK;
--  assert( pPager->eState==PAGER_OPEN );
--  assert( pPager->eLock>=SHARED_LOCK || pPager->noReadlock );
--
--  if( !pPager->tempFile ){
--    int isWal;                    /* True if WAL file exists */
--    Pgno nPage;                   /* Size of the database file */
--
--    rc = pagerPagecount(pPager, &nPage);
--    if( rc ) return rc;
--    if( nPage==0 ){
--      rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0);
--      isWal = 0;
--    }else{
--      rc = sqlite3OsAccess(
--          pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &isWal
--      );
--    }
--    if( rc==SQLITE_OK ){
--      if( isWal ){
--        testcase( sqlite3PcachePagecount(pPager->pPCache)==0 );
--        rc = sqlite3PagerOpenWal(pPager, 0);
--      }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
--        pPager->journalMode = PAGER_JOURNALMODE_DELETE;
--      }
--    }
--  }
--  return rc;
--}
--#endif
--
--/*
--** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback
--** the entire master journal file. The case pSavepoint==NULL occurs when 
--** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction 
--** savepoint.
-+** When a master journal file is created, it is populated with the names 
-+** of all of its child journals, one after another, formatted as utf-8 
-+** encoded text. The end of each child journal file is marked with a 
-+** nul-terminator byte (0x00). i.e. the entire contents of a master journal
-+** file for a transaction involving two databases might be:
- **
--** When pSavepoint is not NULL (meaning a non-transaction savepoint is 
--** being rolled back), then the rollback consists of up to three stages,
--** performed in the order specified:
-+**   "/home/bill/a.db-journal\x00/home/bill/b.db-journal\x00"
- **
--**   * Pages are played back from the main journal starting at byte
--**     offset PagerSavepoint.iOffset and continuing to 
--**     PagerSavepoint.iHdrOffset, or to the end of the main journal
--**     file if PagerSavepoint.iHdrOffset is zero.
-+** A master journal file may only be deleted once all of its child 
-+** journals have been rolled back.
- **
--**   * If PagerSavepoint.iHdrOffset is not zero, then pages are played
--**     back starting from the journal header immediately following 
--**     PagerSavepoint.iHdrOffset to the end of the main journal file.
-+** This function reads the contents of the master-journal file into 
-+** memory and loops through each of the child journal names. For
-+** each child journal, it checks if:
- **
--**   * Pages are then played back from the sub-journal file, starting
--**     with the PagerSavepoint.iSubRec and continuing to the end of
--**     the journal file.
-+**   * if the child journal exists, and if so
-+**   * if the child journal contains a reference to master journal 
-+**     file zMaster
- **
--** Throughout the rollback process, each time a page is rolled back, the
--** corresponding bit is set in a bitvec structure (variable pDone in the
--** implementation below). This is used to ensure that a page is only
--** rolled back the first time it is encountered in either journal.
-+** If a child journal can be found that matches both of the criteria
-+** above, this function returns without doing anything. Otherwise, if
-+** no such child journal can be found, file zMaster is deleted from
-+** the file-system using sqlite3OsDelete().
- **
--** If pSavepoint is NULL, then pages are only played back from the main
--** journal file. There is no need for a bitvec in this case.
-+** If an IO error within this function, an error code is returned. This
-+** function allocates memory by calling sqlite3Malloc(). If an allocation
-+** fails, SQLITE_NOMEM is returned. Otherwise, if no IO or malloc errors 
-+** occur, SQLITE_OK is returned.
- **
--** In either case, before playback commences the Pager.dbSize variable
--** is reset to the value that it held at the start of the savepoint 
--** (or transaction). No page with a page-number greater than this value
--** is played back. If one is encountered it is simply skipped.
-+** TODO: This function allocates a single block of memory to load
-+** the entire contents of the master journal file. This could be
-+** a couple of kilobytes or so - potentially larger than the page 
-+** size.
- */
--static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
--  i64 szJ;                 /* Effective size of the main journal */
--  i64 iHdrOff;             /* End of first segment of main-journal records */
--  int rc = SQLITE_OK;      /* Return code */
--  Bitvec *pDone = 0;       /* Bitvec to ensure pages played back only once */
--
--  assert( pPager->eState!=PAGER_ERROR );
--  assert( pPager->eState>=PAGER_WRITER_LOCKED );
--
--  /* Allocate a bitvec to use to store the set of pages rolled back */
--  if( pSavepoint ){
--    pDone = sqlite3BitvecCreate(pSavepoint->nOrig);
--    if( !pDone ){
--      return SQLITE_NOMEM;
--    }
--  }
--
--  /* Set the database size back to the value it was before the savepoint 
--  ** being reverted was opened.
--  */
--  pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize;
--  pPager->changeCountDone = pPager->tempFile;
--
--  if( !pSavepoint && pagerUseWal(pPager) ){
--    return pagerRollbackWal(pPager);
--  }
--
--  /* Use pPager->journalOff as the effective size of the main rollback
--  ** journal.  The actual file might be larger than this in
--  ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST.  But anything
--  ** past pPager->journalOff is off-limits to us.
--  */
--  szJ = pPager->journalOff;
--  assert( pagerUseWal(pPager)==0 || szJ==0 );
-+static int pager_delmaster(Pager *pPager, const char *zMaster){
-+  sqlite3_vfs *pVfs = pPager->pVfs;
-+  int rc;                   /* Return code */
-+  sqlite3_file *pMaster;    /* Malloc'd master-journal file descriptor */
-+  sqlite3_file *pJournal;   /* Malloc'd child-journal file descriptor */
-+  char *zMasterJournal = 0; /* Contents of master journal file */
-+  i64 nMasterJournal;       /* Size of master journal file */
-+  char *zJournal;           /* Pointer to one journal within MJ file */
-+  char *zMasterPtr;         /* Space to hold MJ filename from a journal file */
-+  int nMasterPtr;           /* Amount of space allocated to zMasterPtr[] */
- 
--  /* Begin by rolling back records from the main journal starting at
--  ** PagerSavepoint.iOffset and continuing to the next journal header.
--  ** There might be records in the main journal that have a page number
--  ** greater than the current database size (pPager->dbSize) but those
--  ** will be skipped automatically.  Pages are added to pDone as they
--  ** are played back.
-+  /* Allocate space for both the pJournal and pMaster file descriptors.
-+  ** If successful, open the master journal file for reading.
-   */
--  if( pSavepoint && !pagerUseWal(pPager) ){
--    iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
--    pPager->journalOff = pSavepoint->iOffset;
--    while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
--      rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
--    }
--    assert( rc!=SQLITE_DONE );
-+  pMaster = (sqlite3_file *)sqlite3MallocZero(pVfs->szOsFile * 2);
-+  pJournal = (sqlite3_file *)(((u8 *)pMaster) + pVfs->szOsFile);
-+  if( !pMaster ){
-+    rc = SQLITE_NOMEM;
-   }else{
--    pPager->journalOff = 0;
-+    const int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MASTER_JOURNAL);
-+    rc = sqlite3OsOpen(pVfs, zMaster, pMaster, flags, 0);
-   }
-+  if( rc!=SQLITE_OK ) goto delmaster_out;
- 
--  /* Continue rolling back records out of the main journal starting at
--  ** the first journal header seen and continuing until the effective end
--  ** of the main journal file.  Continue to skip out-of-range pages and
--  ** continue adding pages rolled back to pDone.
-+  /* Load the entire master journal file into space obtained from
-+  ** sqlite3_malloc() and pointed to by zMasterJournal.   Also obtain
-+  ** sufficient space (in zMasterPtr) to hold the names of master
-+  ** journal files extracted from regular rollback-journals.
-   */
--  while( rc==SQLITE_OK && pPager->journalOff<szJ ){
--    u32 ii;            /* Loop counter */
--    u32 nJRec = 0;     /* Number of Journal Records */
--    u32 dummy;
--    rc = readJournalHdr(pPager, 0, szJ, &nJRec, &dummy);
--    assert( rc!=SQLITE_DONE );
-+  rc = sqlite3OsFileSize(pMaster, &nMasterJournal);
-+  if( rc!=SQLITE_OK ) goto delmaster_out;
-+  nMasterPtr = pVfs->mxPathname+1;
-+  zMasterJournal = sqlite3Malloc((int)nMasterJournal + nMasterPtr + 1);
-+  if( !zMasterJournal ){
-+    rc = SQLITE_NOMEM;
-+    goto delmaster_out;
-+  }
-+  zMasterPtr = &zMasterJournal[nMasterJournal+1];
-+  rc = sqlite3OsRead(pMaster, zMasterJournal, (int)nMasterJournal, 0);
-+  if( rc!=SQLITE_OK ) goto delmaster_out;
-+  zMasterJournal[nMasterJournal] = 0;
- 
--    /*
--    ** The "pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff"
--    ** test is related to ticket #2565.  See the discussion in the
--    ** pager_playback() function for additional information.
--    */
--    if( nJRec==0 
--     && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff
--    ){
--      nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager));
--    }
--    for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){
--      rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
-+  zJournal = zMasterJournal;
-+  while( (zJournal-zMasterJournal)<nMasterJournal ){
-+    int exists;
-+    rc = sqlite3OsAccess(pVfs, zJournal, SQLITE_ACCESS_EXISTS, &exists);
-+    if( rc!=SQLITE_OK ){
-+      goto delmaster_out;
-     }
--    assert( rc!=SQLITE_DONE );
--  }
--  assert( rc!=SQLITE_OK || pPager->journalOff>=szJ );
-+    if( exists ){
-+      /* One of the journals pointed to by the master journal exists.
-+      ** Open it and check if it points at the master journal. If
-+      ** so, return without deleting the master journal file.
-+      */
-+      int c;
-+      int flags = (SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL);
-+      rc = sqlite3OsOpen(pVfs, zJournal, pJournal, flags, 0);
-+      if( rc!=SQLITE_OK ){
-+        goto delmaster_out;
-+      }
- 
--  /* Finally,  rollback pages from the sub-journal.  Page that were
--  ** previously rolled back out of the main journal (and are hence in pDone)
--  ** will be skipped.  Out-of-range pages are also skipped.
--  */
--  if( pSavepoint ){
--    u32 ii;            /* Loop counter */
--    i64 offset = pSavepoint->iSubRec*(4+pPager->pageSize);
-+      rc = readMasterJournal(pJournal, zMasterPtr, nMasterPtr);
-+      sqlite3OsClose(pJournal);
-+      if( rc!=SQLITE_OK ){
-+        goto delmaster_out;
-+      }
- 
--    if( pagerUseWal(pPager) ){
--      rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->aWalData);
--    }
--    for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){
--      assert( offset==ii*(4+pPager->pageSize) );
--      rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1);
-+      c = zMasterPtr[0]!=0 && strcmp(zMasterPtr, zMaster)==0;
-+      if( c ){
-+        /* We have a match. Do not delete the master journal file. */
-+        goto delmaster_out;
-+      }
-     }
--    assert( rc!=SQLITE_DONE );
-+    zJournal += (sqlite3Strlen30(zJournal)+1);
-   }
-+ 
-+  sqlite3OsClose(pMaster);
-+  rc = sqlite3OsDelete(pVfs, zMaster, 0);
- 
--  sqlite3BitvecDestroy(pDone);
--  if( rc==SQLITE_OK ){
--    pPager->journalOff = szJ;
-+delmaster_out:
-+  sqlite3_free(zMasterJournal);
-+  if( pMaster ){
-+    sqlite3OsClose(pMaster);
-+    assert( !isOpen(pJournal) );
-+    sqlite3_free(pMaster);
-   }
--
-   return rc;
- }
- 
--/*
--** Change the maximum number of in-memory pages that are allowed.
--*/
--SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
--  sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
--}
- 
- /*
--** Adjust the robustness of the database to damage due to OS crashes
--** or power failures by changing the number of syncs()s when writing
--** the rollback journal.  There are three levels:
--**
--**    OFF       sqlite3OsSync() is never called.  This is the default
--**              for temporary and transient files.
--**
--**    NORMAL    The journal is synced once before writes begin on the
--**              database.  This is normally adequate protection, but
--**              it is theoretically possible, though very unlikely,
--**              that an inopertune power failure could leave the journal
--**              in a state which would cause damage to the database
--**              when it is rolled back.
--**
--**    FULL      The journal is synced twice before writes begin on the
--**              database (with some additional information - the nRec field
--**              of the journal header - being written in between the two
--**              syncs).  If we assume that writing a
--**              single disk sector is atomic, then this mode provides
--**              assurance that the journal will not be corrupted to the
--**              point of causing damage to the database during rollback.
-+** This function is used to change the actual size of the database 
-+** file in the file-system. This only happens when committing a transaction,
-+** or rolling back a transaction (including rolling back a hot-journal).
- **
--** The above is for a rollback-journal mode.  For WAL mode, OFF continues
--** to mean that no syncs ever occur.  NORMAL means that the WAL is synced
--** prior to the start of checkpoint and that the database file is synced
--** at the conclusion of the checkpoint if the entire content of the WAL
--** was written back into the database.  But no sync operations occur for
--** an ordinary commit in NORMAL mode with WAL.  FULL means that the WAL
--** file is synced following each commit operation, in addition to the
--** syncs associated with NORMAL.
-+** If the main database file is not open, or the pager is not in either
-+** DBMOD or OPEN state, this function is a no-op. Otherwise, the size 
-+** of the file is changed to nPage pages (nPage*pPager->pageSize bytes). 
-+** If the file on disk is currently larger than nPage pages, then use the VFS
-+** xTruncate() method to truncate it.
- **
--** Do not confuse synchronous=FULL with SQLITE_SYNC_FULL.  The
--** SQLITE_SYNC_FULL macro means to use the MacOSX-style full-fsync
--** using fcntl(F_FULLFSYNC).  SQLITE_SYNC_NORMAL means to do an
--** ordinary fsync() call.  There is no difference between SQLITE_SYNC_FULL
--** and SQLITE_SYNC_NORMAL on platforms other than MacOSX.  But the
--** synchronous=FULL versus synchronous=NORMAL setting determines when
--** the xSync primitive is called and is relevant to all platforms.
-+** Or, it might might be the case that the file on disk is smaller than 
-+** nPage pages. Some operating system implementations can get confused if 
-+** you try to truncate a file to some size that is larger than it 
-+** currently is, so detect this case and write a single zero byte to 
-+** the end of the new file instead.
- **
--** Numeric values associated with these states are OFF==1, NORMAL=2,
--** and FULL=3.
-+** If successful, return SQLITE_OK. If an IO error occurs while modifying
-+** the database file, return the error code to the caller.
- */
--#ifndef SQLITE_OMIT_PAGER_PRAGMAS
--SQLITE_PRIVATE void sqlite3PagerSetSafetyLevel(
--  Pager *pPager,        /* The pager to set safety level for */
--  int level,            /* PRAGMA synchronous.  1=OFF, 2=NORMAL, 3=FULL */  
--  int bFullFsync,       /* PRAGMA fullfsync */
--  int bCkptFullFsync    /* PRAGMA checkpoint_fullfsync */
--){
--  assert( level>=1 && level<=3 );
--  pPager->noSync =  (level==1 || pPager->tempFile) ?1:0;
--  pPager->fullSync = (level==3 && !pPager->tempFile) ?1:0;
--  if( pPager->noSync ){
--    pPager->syncFlags = 0;
--    pPager->ckptSyncFlags = 0;
--  }else if( bFullFsync ){
--    pPager->syncFlags = SQLITE_SYNC_FULL;
--    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
--  }else if( bCkptFullFsync ){
--    pPager->syncFlags = SQLITE_SYNC_NORMAL;
--    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
--  }else{
--    pPager->syncFlags = SQLITE_SYNC_NORMAL;
--    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
-+static int pager_truncate(Pager *pPager, Pgno nPage){
-+  int rc = SQLITE_OK;
-+  assert( pPager->eState!=PAGER_ERROR );
-+  assert( pPager->eState!=PAGER_READER );
-+  
-+  if( isOpen(pPager->fd) 
-+   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN) 
-+  ){
-+    i64 currentSize, newSize;
-+    int szPage = pPager->pageSize;
-+    assert( pPager->eLock==EXCLUSIVE_LOCK );
-+    /* TODO: Is it safe to use Pager.dbFileSize here? */
-+    rc = sqlite3OsFileSize(pPager->fd, &currentSize);
-+    newSize = szPage*(i64)nPage;
-+    if( rc==SQLITE_OK && currentSize!=newSize ){
-+      if( currentSize>newSize ){
-+        rc = sqlite3OsTruncate(pPager->fd, newSize);
-+      }else{
-+        char *pTmp = pPager->pTmpSpace;
-+        memset(pTmp, 0, szPage);
-+        testcase( (newSize-szPage) <  currentSize );
-+        testcase( (newSize-szPage) == currentSize );
-+        testcase( (newSize-szPage) >  currentSize );
-+        rc = sqlite3OsWrite(pPager->fd, pTmp, szPage, newSize-szPage);
-+      }
-+      if( rc==SQLITE_OK ){
-+        pPager->dbFileSize = nPage;
-+      }
-+    }
-   }
--}
--#endif
--
--/*
--** The following global variable is incremented whenever the library
--** attempts to open a temporary file.  This information is used for
--** testing and analysis only.  
--*/
--#ifdef SQLITE_TEST
--SQLITE_API int sqlite3_opentemp_count = 0;
--#endif
--
--/*
--** Open a temporary file.
--**
--** Write the file descriptor into *pFile. Return SQLITE_OK on success 
--** or some other error code if we fail. The OS will automatically 
--** delete the temporary file when it is closed.
--**
--** The flags passed to the VFS layer xOpen() call are those specified
--** by parameter vfsFlags ORed with the following:
--**
--**     SQLITE_OPEN_READWRITE
--**     SQLITE_OPEN_CREATE
--**     SQLITE_OPEN_EXCLUSIVE
--**     SQLITE_OPEN_DELETEONCLOSE
--*/
--static int pagerOpentemp(
--  Pager *pPager,        /* The pager object */
--  sqlite3_file *pFile,  /* Write the file descriptor here */
--  int vfsFlags          /* Flags passed through to the VFS */
--){
--  int rc;               /* Return code */
--
--#ifdef SQLITE_TEST
--  sqlite3_opentemp_count++;  /* Used for testing and analysis only */
--#endif
--
--  vfsFlags |=  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
--            SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
--  rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
--  assert( rc!=SQLITE_OK || isOpen(pFile) );
-   return rc;
- }
- 
- /*
--** Set the busy handler function.
--**
--** The pager invokes the busy-handler if sqlite3OsLock() returns 
--** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock,
--** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE 
--** lock. It does *not* invoke the busy handler when upgrading from
--** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE
--** (which occurs during hot-journal rollback). Summary:
-+** Set the value of the Pager.sectorSize variable for the given
-+** pager based on the value returned by the xSectorSize method
-+** of the open database file. The sector size will be used used 
-+** to determine the size and alignment of journal header and 
-+** master journal pointers within created journal files.
- **
--**   Transition                        | Invokes xBusyHandler
--**   --------------------------------------------------------
--**   NO_LOCK       -> SHARED_LOCK      | Yes
--**   SHARED_LOCK   -> RESERVED_LOCK    | No
--**   SHARED_LOCK   -> EXCLUSIVE_LOCK   | No
--**   RESERVED_LOCK -> EXCLUSIVE_LOCK   | Yes
-+** For temporary files the effective sector size is always 512 bytes.
- **
--** If the busy-handler callback returns non-zero, the lock is 
--** retried. If it returns zero, then the SQLITE_BUSY error is
--** returned to the caller of the pager API function.
--*/
--SQLITE_PRIVATE void sqlite3PagerSetBusyhandler(
--  Pager *pPager,                       /* Pager object */
--  int (*xBusyHandler)(void *),         /* Pointer to busy-handler function */
--  void *pBusyHandlerArg                /* Argument to pass to xBusyHandler */
--){  
--  pPager->xBusyHandler = xBusyHandler;
--  pPager->pBusyHandlerArg = pBusyHandlerArg;
-+** Otherwise, for non-temporary files, the effective sector size is
-+** the value returned by the xSectorSize() method rounded up to 32 if
-+** it is less than 32, or rounded down to MAX_SECTOR_SIZE if it
-+** is greater than MAX_SECTOR_SIZE.
-+*/
-+static void setSectorSize(Pager *pPager){
-+  assert( isOpen(pPager->fd) || pPager->tempFile );
-+
-+  if( !pPager->tempFile ){
-+    /* Sector size doesn't matter for temporary files. Also, the file
-+    ** may not have been opened yet, in which case the OsSectorSize()
-+    ** call will segfault.
-+    */
-+    pPager->sectorSize = sqlite3OsSectorSize(pPager->fd);
-+  }
-+  if( pPager->sectorSize<32 ){
-+    pPager->sectorSize = 512;
-+  }
-+  if( pPager->sectorSize>MAX_SECTOR_SIZE ){
-+    assert( MAX_SECTOR_SIZE>=512 );
-+    pPager->sectorSize = MAX_SECTOR_SIZE;
-+  }
- }
- 
- /*
--** Change the page size used by the Pager object. The new page size 
--** is passed in *pPageSize.
-+** Playback the journal and thus restore the database file to
-+** the state it was in before we started making changes.  
- **
--** If the pager is in the error state when this function is called, it
--** is a no-op. The value returned is the error state error code (i.e. 
--** one of SQLITE_IOERR, an SQLITE_IOERR_xxx sub-code or SQLITE_FULL).
-+** The journal file format is as follows: 
- **
--** Otherwise, if all of the following are true:
-+**  (1)  8 byte prefix.  A copy of aJournalMagic[].
-+**  (2)  4 byte big-endian integer which is the number of valid page records
-+**       in the journal.  If this value is 0xffffffff, then compute the
-+**       number of page records from the journal size.
-+**  (3)  4 byte big-endian integer which is the initial value for the 
-+**       sanity checksum.
-+**  (4)  4 byte integer which is the number of pages to truncate the
-+**       database to during a rollback.
-+**  (5)  4 byte big-endian integer which is the sector size.  The header
-+**       is this many bytes in size.
-+**  (6)  4 byte big-endian integer which is the page size.
-+**  (7)  zero padding out to the next sector size.
-+**  (8)  Zero or more pages instances, each as follows:
-+**        +  4 byte page number.
-+**        +  pPager->pageSize bytes of data.
-+**        +  4 byte checksum
- **
--**   * the new page size (value of *pPageSize) is valid (a power 
--**     of two between 512 and SQLITE_MAX_PAGE_SIZE, inclusive), and
-+** When we speak of the journal header, we mean the first 7 items above.
-+** Each entry in the journal is an instance of the 8th item.
- **
--**   * there are no outstanding page references, and
-+** Call the value from the second bullet "nRec".  nRec is the number of
-+** valid page entries in the journal.  In most cases, you can compute the
-+** value of nRec from the size of the journal file.  But if a power
-+** failure occurred while the journal was being written, it could be the
-+** case that the size of the journal file had already been increased but
-+** the extra entries had not yet made it safely to disk.  In such a case,
-+** the value of nRec computed from the file size would be too large.  For
-+** that reason, we always use the nRec value in the header.
- **
--**   * the database is either not an in-memory database or it is
--**     an in-memory database that currently consists of zero pages.
-+** If the nRec value is 0xffffffff it means that nRec should be computed
-+** from the file size.  This value is used when the user selects the
-+** no-sync option for the journal.  A power failure could lead to corruption
-+** in this case.  But for things like temporary table (which will be
-+** deleted when the power is restored) we don't care.  
- **
--** then the pager object page size is set to *pPageSize.
-+** If the file opened as the journal file is not a well-formed
-+** journal file then all pages up to the first corrupted page are rolled
-+** back (or no pages if the journal header is corrupted). The journal file
-+** is then deleted and SQLITE_OK returned, just as if no corruption had
-+** been encountered.
- **
--** If the page size is changed, then this function uses sqlite3PagerMalloc() 
--** to obtain a new Pager.pTmpSpace buffer. If this allocation attempt 
--** fails, SQLITE_NOMEM is returned and the page size remains unchanged. 
--** In all other cases, SQLITE_OK is returned.
-+** If an I/O or malloc() error occurs, the journal-file is not deleted
-+** and an error code is returned.
- **
--** If the page size is not changed, either because one of the enumerated
--** conditions above is not true, the pager was in error state when this
--** function was called, or because the memory allocation attempt failed, 
--** then *pPageSize is set to the old, retained page size before returning.
-+** The isHot parameter indicates that we are trying to rollback a journal
-+** that might be a hot journal.  Or, it could be that the journal is 
-+** preserved because of JOURNALMODE_PERSIST or JOURNALMODE_TRUNCATE.
-+** If the journal really is hot, reset the pager cache prior rolling
-+** back any content.  If the journal is merely persistent, no reset is
-+** needed.
- */
--SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve){
--  int rc = SQLITE_OK;
-+static int pager_playback(Pager *pPager, int isHot){
-+  sqlite3_vfs *pVfs = pPager->pVfs;
-+  i64 szJ;                 /* Size of the journal file in bytes */
-+  u32 nRec;                /* Number of Records in the journal */
-+  u32 u;                   /* Unsigned loop counter */
-+  Pgno mxPg = 0;           /* Size of the original file in pages */
-+  int rc;                  /* Result code of a subroutine */
-+  int res = 1;             /* Value returned by sqlite3OsAccess() */
-+  char *zMaster = 0;       /* Name of master journal file if any */
-+  int needPagerReset;      /* True to reset page prior to first page rollback */
- 
--  /* It is not possible to do a full assert_pager_state() here, as this
--  ** function may be called from within PagerOpen(), before the state
--  ** of the Pager object is internally consistent.
-+  /* Figure out how many records are in the journal.  Abort early if
-+  ** the journal is empty.
-+  */
-+  assert( isOpen(pPager->jfd) );
-+  rc = sqlite3OsFileSize(pPager->jfd, &szJ);
-+  if( rc!=SQLITE_OK ){
-+    goto end_playback;
-+  }
-+
-+  /* Read the master journal name from the journal, if it is present.
-+  ** If a master journal file name is specified, but the file is not
-+  ** present on disk, then the journal is not hot and does not need to be
-+  ** played back.
-   **
--  ** At one point this function returned an error if the pager was in 
--  ** PAGER_ERROR state. But since PAGER_ERROR state guarantees that
--  ** there is at least one outstanding page reference, this function
--  ** is a no-op for that case anyhow.
-+  ** TODO: Technically the following is an error because it assumes that
-+  ** buffer Pager.pTmpSpace is (mxPathname+1) bytes or larger. i.e. that
-+  ** (pPager->pageSize >= pPager->pVfs->mxPathname+1). Using os_unix.c,
-+  **  mxPathname is 512, which is the same as the minimum allowable value
-+  ** for pageSize.
-+  */
-+  zMaster = pPager->pTmpSpace;
-+  rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
-+  if( rc==SQLITE_OK && zMaster[0] ){
-+    rc = sqlite3OsAccess(pVfs, zMaster, SQLITE_ACCESS_EXISTS, &res);
-+  }
-+  zMaster = 0;
-+  if( rc!=SQLITE_OK || !res ){
-+    goto end_playback;
-+  }
-+  pPager->journalOff = 0;
-+  needPagerReset = isHot;
-+
-+  /* This loop terminates either when a readJournalHdr() or 
-+  ** pager_playback_one_page() call returns SQLITE_DONE or an IO error 
-+  ** occurs. 
-   */
-+  while( 1 ){
-+    /* Read the next journal header from the journal file.  If there are
-+    ** not enough bytes left in the journal file for a complete header, or
-+    ** it is corrupted, then a process must have failed while writing it.
-+    ** This indicates nothing more needs to be rolled back.
-+    */
-+    rc = readJournalHdr(pPager, isHot, szJ, &nRec, &mxPg);
-+    if( rc!=SQLITE_OK ){ 
-+      if( rc==SQLITE_DONE ){
-+        rc = SQLITE_OK;
-+      }
-+      goto end_playback;
-+    }
- 
--  u32 pageSize = *pPageSize;
--  assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
--  if( (pPager->memDb==0 || pPager->dbSize==0)
--   && sqlite3PcacheRefCount(pPager->pPCache)==0 
--   && pageSize && pageSize!=(u32)pPager->pageSize 
--  ){
--    char *pNew = NULL;             /* New temp space */
--    i64 nByte = 0;
-+    /* If nRec is 0xffffffff, then this journal was created by a process
-+    ** working in no-sync mode. This means that the rest of the journal
-+    ** file consists of pages, there are no more journal headers. Compute
-+    ** the value of nRec based on this assumption.
-+    */
-+    if( nRec==0xffffffff ){
-+      assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) );
-+      nRec = (int)((szJ - JOURNAL_HDR_SZ(pPager))/JOURNAL_PG_SZ(pPager));
-+    }
- 
--    if( pPager->eState>PAGER_OPEN && isOpen(pPager->fd) ){
--      rc = sqlite3OsFileSize(pPager->fd, &nByte);
-+    /* If nRec is 0 and this rollback is of a transaction created by this
-+    ** process and if this is the final header in the journal, then it means
-+    ** that this part of the journal was being filled but has not yet been
-+    ** synced to disk.  Compute the number of pages based on the remaining
-+    ** size of the file.
-+    **
-+    ** The third term of the test was added to fix ticket #2565.
-+    ** When rolling back a hot journal, nRec==0 always means that the next
-+    ** chunk of the journal contains zero pages to be rolled back.  But
-+    ** when doing a ROLLBACK and the nRec==0 chunk is the last chunk in
-+    ** the journal, it means that the journal might contain additional
-+    ** pages that need to be rolled back and that the number of pages 
-+    ** should be computed based on the journal file size.
-+    */
-+    if( nRec==0 && !isHot &&
-+        pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff ){
-+      nRec = (int)((szJ - pPager->journalOff) / JOURNAL_PG_SZ(pPager));
-     }
--    if( rc==SQLITE_OK ){
--      pNew = (char *)sqlite3PageMalloc(pageSize);
--      if( !pNew ) rc = SQLITE_NOMEM;
-+
-+    /* If this is the first header read from the journal, truncate the
-+    ** database file back to its original size.
-+    */
-+    if( pPager->journalOff==JOURNAL_HDR_SZ(pPager) ){
-+      rc = pager_truncate(pPager, mxPg);
-+      if( rc!=SQLITE_OK ){
-+        goto end_playback;
-+      }
-+      pPager->dbSize = mxPg;
-     }
- 
--    if( rc==SQLITE_OK ){
--      pager_reset(pPager);
--      pPager->dbSize = (Pgno)(nByte/pageSize);
--      pPager->pageSize = pageSize;
--      sqlite3PageFree(pPager->pTmpSpace);
--      pPager->pTmpSpace = pNew;
--      sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
-+    /* Copy original pages out of the journal and back into the 
-+    ** database file and/or page cache.
-+    */
-+    for(u=0; u<nRec; u++){
-+      if( needPagerReset ){
-+        pager_reset(pPager);
-+        needPagerReset = 0;
-+      }
-+      rc = pager_playback_one_page(pPager,&pPager->journalOff,0,1,0);
-+      if( rc!=SQLITE_OK ){
-+        if( rc==SQLITE_DONE ){
-+          rc = SQLITE_OK;
-+          pPager->journalOff = szJ;
-+          break;
-+        }else if( rc==SQLITE_IOERR_SHORT_READ ){
-+          /* If the journal has been truncated, simply stop reading and
-+          ** processing the journal. This might happen if the journal was
-+          ** not completely written and synced prior to a crash.  In that
-+          ** case, the database should have never been written in the
-+          ** first place so it is OK to simply abandon the rollback. */
-+          rc = SQLITE_OK;
-+          goto end_playback;
-+        }else{
-+          /* If we are unable to rollback, quit and return the error
-+          ** code.  This will cause the pager to enter the error state
-+          ** so that no further harm will be done.  Perhaps the next
-+          ** process to come along will be able to rollback the database.
-+          */
-+          goto end_playback;
-+        }
-+      }
-     }
-   }
-+  /*NOTREACHED*/
-+  assert( 0 );
-+
-+end_playback:
-+  /* Following a rollback, the database file should be back in its original
-+  ** state prior to the start of the transaction, so invoke the
-+  ** SQLITE_FCNTL_DB_UNCHANGED file-control method to disable the
-+  ** assertion that the transaction counter was modified.
-+  */
-+  assert(
-+    pPager->fd->pMethods==0 ||
-+    sqlite3OsFileControl(pPager->fd,SQLITE_FCNTL_DB_UNCHANGED,0)>=SQLITE_OK
-+  );
-+
-+  /* If this playback is happening automatically as a result of an IO or 
-+  ** malloc error that occurred after the change-counter was updated but 
-+  ** before the transaction was committed, then the change-counter 
-+  ** modification may just have been reverted. If this happens in exclusive 
-+  ** mode, then subsequent transactions performed by the connection will not
-+  ** update the change-counter at all. This may lead to cache inconsistency
-+  ** problems for other processes at some point in the future. So, just
-+  ** in case this has happened, clear the changeCountDone flag now.
-+  */
-+  pPager->changeCountDone = pPager->tempFile;
- 
--  *pPageSize = pPager->pageSize;
-   if( rc==SQLITE_OK ){
--    if( nReserve<0 ) nReserve = pPager->nReserve;
--    assert( nReserve>=0 && nReserve<1000 );
--    pPager->nReserve = (i16)nReserve;
--    pagerReportSize(pPager);
-+    zMaster = pPager->pTmpSpace;
-+    rc = readMasterJournal(pPager->jfd, zMaster, pPager->pVfs->mxPathname+1);
-+    testcase( rc!=SQLITE_OK );
-+  }
-+  if( rc==SQLITE_OK
-+   && (pPager->eState>=PAGER_WRITER_DBMOD || pPager->eState==PAGER_OPEN)
-+  ){
-+    rc = sqlite3PagerSync(pPager);
-+  }
-+  if( rc==SQLITE_OK ){
-+    rc = pager_end_transaction(pPager, zMaster[0]!='\0');
-+    testcase( rc!=SQLITE_OK );
-+  }
-+  if( rc==SQLITE_OK && zMaster[0] && res ){
-+    /* If there was a master journal and this routine will return success,
-+    ** see if it is possible to delete the master journal.
-+    */
-+    rc = pager_delmaster(pPager, zMaster);
-+    testcase( rc!=SQLITE_OK );
-   }
-+
-+  /* The Pager.sectorSize variable may have been updated while rolling
-+  ** back a journal created by a process with a different sector size
-+  ** value. Reset it to the correct value for this process.
-+  */
-+  setSectorSize(pPager);
-   return rc;
- }
- 
--/*
--** Return a pointer to the "temporary page" buffer held internally
--** by the pager.  This is a buffer that is big enough to hold the
--** entire content of a database page.  This buffer is used internally
--** during rollback and will be overwritten whenever a rollback
--** occurs.  But other modules are free to use it too, as long as
--** no rollbacks are happening.
--*/
--SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager *pPager){
--  return pPager->pTmpSpace;
--}
- 
- /*
--** Attempt to set the maximum database page count if mxPage is positive. 
--** Make no changes if mxPage is zero or negative.  And never reduce the
--** maximum page count below the current size of the database.
-+** Read the content for page pPg out of the database file and into 
-+** pPg->pData. A shared lock or greater must be held on the database
-+** file before this function is called.
- **
--** Regardless of mxPage, return the current maximum page count.
--*/
--SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
--  if( mxPage>0 ){
--    pPager->mxPgno = mxPage;
--  }
--  assert( pPager->eState!=PAGER_OPEN );      /* Called only by OP_MaxPgcnt */
--  assert( pPager->mxPgno>=pPager->dbSize );  /* OP_MaxPgcnt enforces this */
--  return pPager->mxPgno;
--}
--
--/*
--** The following set of routines are used to disable the simulated
--** I/O error mechanism.  These routines are used to avoid simulated
--** errors in places where we do not care about errors.
-+** If page 1 is read, then the value of Pager.dbFileVers[] is set to
-+** the value read from the database file.
- **
--** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
--** and generate no code.
-+** If an IO error occurs, then the IO error is returned to the caller.
-+** Otherwise, SQLITE_OK is returned.
- */
--#ifdef SQLITE_TEST
--SQLITE_API extern int sqlite3_io_error_pending;
--SQLITE_API extern int sqlite3_io_error_hit;
--static int saved_cnt;
--void disable_simulated_io_errors(void){
--  saved_cnt = sqlite3_io_error_pending;
--  sqlite3_io_error_pending = -1;
--}
--void enable_simulated_io_errors(void){
--  sqlite3_io_error_pending = saved_cnt;
--}
--#else
--# define disable_simulated_io_errors()
--# define enable_simulated_io_errors()
--#endif
-+static int readDbPage(PgHdr *pPg){
-+  Pager *pPager = pPg->pPager; /* Pager object associated with page pPg */
-+  Pgno pgno = pPg->pgno;       /* Page number to read */
-+  int rc = SQLITE_OK;          /* Return code */
-+  int isInWal = 0;             /* True if page is in log file */
-+  int pgsz = pPager->pageSize; /* Number of bytes to read */
- 
--/*
--** Read the first N bytes from the beginning of the file into memory
--** that pDest points to. 
--**
--** If the pager was opened on a transient file (zFilename==""), or
--** opened on a file less than N bytes in size, the output buffer is
--** zeroed and SQLITE_OK returned. The rationale for this is that this 
--** function is used to read database headers, and a new transient or
--** zero sized database has a header than consists entirely of zeroes.
--**
--** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered,
--** the error code is returned to the caller and the contents of the
--** output buffer undefined.
--*/
--SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
--  int rc = SQLITE_OK;
--  memset(pDest, 0, N);
--  assert( isOpen(pPager->fd) || pPager->tempFile );
-+  assert( pPager->eState>=PAGER_READER && !MEMDB );
-+  assert( isOpen(pPager->fd) );
- 
--  /* This routine is only called by btree immediately after creating
--  ** the Pager object.  There has not been an opportunity to transition
--  ** to WAL mode yet.
--  */
--  assert( !pagerUseWal(pPager) );
-+  if( NEVER(!isOpen(pPager->fd)) ){
-+    assert( pPager->tempFile );
-+    memset(pPg->pData, 0, pPager->pageSize);
-+    return SQLITE_OK;
-+  }
- 
--  if( isOpen(pPager->fd) ){
--    IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
--    rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
-+  if( pagerUseWal(pPager) ){
-+    /* Try to pull the page from the write-ahead log. */
-+    rc = sqlite3WalRead(pPager->pWal, pgno, &isInWal, pgsz, pPg->pData);
-+  }
-+  if( rc==SQLITE_OK && !isInWal ){
-+    i64 iOffset = (pgno-1)*(i64)pPager->pageSize;
-+    rc = sqlite3OsRead(pPager->fd, pPg->pData, pgsz, iOffset);
-     if( rc==SQLITE_IOERR_SHORT_READ ){
-       rc = SQLITE_OK;
-     }
-   }
--  return rc;
--}
--
--/*
--** This function may only be called when a read-transaction is open on
--** the pager. It returns the total number of pages in the database.
--**
--** However, if the file is between 1 and <page-size> bytes in size, then 
--** this is considered a 1 page file.
--*/
--SQLITE_PRIVATE void sqlite3PagerPagecount(Pager *pPager, int *pnPage){
--  assert( pPager->eState>=PAGER_READER );
--  assert( pPager->eState!=PAGER_WRITER_FINISHED );
--  *pnPage = (int)pPager->dbSize;
--}
--
--
--/*
--** Try to obtain a lock of type locktype on the database file. If
--** a similar or greater lock is already held, this function is a no-op
--** (returning SQLITE_OK immediately).
--**
--** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke 
--** the busy callback if the lock is currently not available. Repeat 
--** until the busy callback returns false or until the attempt to 
--** obtain the lock succeeds.
--**
--** Return SQLITE_OK on success and an error code if we cannot obtain
--** the lock. If the lock is obtained successfully, set the Pager.state 
--** variable to locktype before returning.
--*/
--static int pager_wait_on_lock(Pager *pPager, int locktype){
--  int rc;                              /* Return code */
--
--  /* Check that this is either a no-op (because the requested lock is 
--  ** already held, or one of the transistions that the busy-handler
--  ** may be invoked during, according to the comment above
--  ** sqlite3PagerSetBusyhandler().
--  */
--  assert( (pPager->eLock>=locktype)
--       || (pPager->eLock==NO_LOCK && locktype==SHARED_LOCK)
--       || (pPager->eLock==RESERVED_LOCK && locktype==EXCLUSIVE_LOCK)
--  );
--
--  do {
--    rc = pagerLockDb(pPager, locktype);
--  }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) );
--  return rc;
--}
- 
--/*
--** Function assertTruncateConstraint(pPager) checks that one of the 
--** following is true for all dirty pages currently in the page-cache:
--**
--**   a) The page number is less than or equal to the size of the 
--**      current database image, in pages, OR
--**
--**   b) if the page content were written at this time, it would not
--**      be necessary to write the current content out to the sub-journal
--**      (as determined by function subjRequiresPage()).
--**
--** If the condition asserted by this function were not true, and the
--** dirty page were to be discarded from the cache via the pagerStress()
--** routine, pagerStress() would not write the current page content to
--** the database file. If a savepoint transaction were rolled back after
--** this happened, the correct behaviour would be to restore the current
--** content of the page. However, since this content is not present in either
--** the database file or the portion of the rollback journal and 
--** sub-journal rolled back the content could not be restored and the
--** database image would become corrupt. It is therefore fortunate that 
--** this circumstance cannot arise.
--*/
--#if defined(SQLITE_DEBUG)
--static void assertTruncateConstraintCb(PgHdr *pPg){
--  assert( pPg->flags&PGHDR_DIRTY );
--  assert( !subjRequiresPage(pPg) || pPg->pgno<=pPg->pPager->dbSize );
--}
--static void assertTruncateConstraint(Pager *pPager){
--  sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb);
--}
--#else
--# define assertTruncateConstraint(pPager)
--#endif
-+  if( pgno==1 ){
-+    if( rc ){
-+      /* If the read is unsuccessful, set the dbFileVers[] to something
-+      ** that will never be a valid file version.  dbFileVers[] is a copy
-+      ** of bytes 24..39 of the database.  Bytes 28..31 should always be
-+      ** zero or the size of the database in page. Bytes 32..35 and 35..39
-+      ** should be page numbers which are never 0xffffffff.  So filling
-+      ** pPager->dbFileVers[] with all 0xff bytes should suffice.
-+      **
-+      ** For an encrypted database, the situation is more complex:  bytes
-+      ** 24..39 of the database are white noise.  But the probability of
-+      ** white noising equaling 16 bytes of 0xff is vanishingly small so
-+      ** we should still be ok.
-+      */
-+      memset(pPager->dbFileVers, 0xff, sizeof(pPager->dbFileVers));
-+    }else{
-+      u8 *dbFileVers = &((u8*)pPg->pData)[24];
-+      memcpy(&pPager->dbFileVers, dbFileVers, sizeof(pPager->dbFileVers));
-+    }
-+  }
-+  CODEC1(pPager, pPg->pData, pgno, 3, rc = SQLITE_NOMEM);
- 
--/*
--** Truncate the in-memory database file image to nPage pages. This 
--** function does not actually modify the database file on disk. It 
--** just sets the internal state of the pager object so that the 
--** truncation will be done when the current transaction is committed.
--*/
--SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){
--  assert( pPager->dbSize>=nPage );
--  assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
--  pPager->dbSize = nPage;
--  assertTruncateConstraint(pPager);
--}
-+  PAGER_INCR(sqlite3_pager_readdb_count);
-+  PAGER_INCR(pPager->nRead);
-+  IOTRACE(("PGIN %p %d\n", pPager, pgno));
-+  PAGERTRACE(("FETCH %d page %d hash(%08x)\n",
-+               PAGERID(pPager), pgno, pager_pagehash(pPg)));
- 
-+  return rc;
-+}
- 
- /*
--** This function is called before attempting a hot-journal rollback. It
--** syncs the journal file to disk, then sets pPager->journalHdr to the
--** size of the journal file so that the pager_playback() routine knows
--** that the entire journal file has been synced.
--**
--** Syncing a hot-journal to disk before attempting to roll it back ensures 
--** that if a power-failure occurs during the rollback, the process that
--** attempts rollback following system recovery sees the same journal
--** content as this process.
-+** Update the value of the change-counter at offsets 24 and 92 in
-+** the header and the sqlite version number at offset 96.
- **
--** If everything goes as planned, SQLITE_OK is returned. Otherwise, 
--** an SQLite error code.
-+** This is an unconditional update.  See also the pager_incr_changecounter()
-+** routine which only updates the change-counter if the update is actually
-+** needed, as determined by the pPager->changeCountDone state variable.
- */
--static int pagerSyncHotJournal(Pager *pPager){
--  int rc = SQLITE_OK;
--  if( !pPager->noSync ){
--    rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL);
--  }
--  if( rc==SQLITE_OK ){
--    rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr);
--  }
--  return rc;
-+static void pager_write_changecounter(PgHdr *pPg){
-+  u32 change_counter;
-+
-+  /* Increment the value just read and write it back to byte 24. */
-+  change_counter = sqlite3Get4byte((u8*)pPg->pPager->dbFileVers)+1;
-+  put32bits(((char*)pPg->pData)+24, change_counter);
-+
-+  /* Also store the SQLite version number in bytes 96..99 and in
-+  ** bytes 92..95 store the change counter for which the version number
-+  ** is valid. */
-+  put32bits(((char*)pPg->pData)+92, change_counter);
-+  put32bits(((char*)pPg->pData)+96, SQLITE_VERSION_NUMBER);
- }
- 
-+#ifndef SQLITE_OMIT_WAL
- /*
--** Shutdown the page cache.  Free all memory and close all files.
--**
--** If a transaction was in progress when this routine is called, that
--** transaction is rolled back.  All outstanding pages are invalidated
--** and their memory is freed.  Any attempt to use a page associated
--** with this page cache after this function returns will likely
--** result in a coredump.
-+** This function is invoked once for each page that has already been 
-+** written into the log file when a WAL transaction is rolled back.
-+** Parameter iPg is the page number of said page. The pCtx argument 
-+** is actually a pointer to the Pager structure.
- **
--** This function always succeeds. If a transaction is active an attempt
--** is made to roll it back. If an error occurs during the rollback 
--** a hot journal may be left in the filesystem but no error is returned
--** to the caller.
-+** If page iPg is present in the cache, and has no outstanding references,
-+** it is discarded. Otherwise, if there are one or more outstanding
-+** references, the page content is reloaded from the database. If the
-+** attempt to reload content from the database is required and fails, 
-+** return an SQLite error code. Otherwise, SQLITE_OK.
- */
--SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager){
--  u8 *pTmp = (u8 *)pPager->pTmpSpace;
-+static int pagerUndoCallback(void *pCtx, Pgno iPg){
-+  int rc = SQLITE_OK;
-+  Pager *pPager = (Pager *)pCtx;
-+  PgHdr *pPg;
- 
--  disable_simulated_io_errors();
--  sqlite3BeginBenignMalloc();
--  /* pPager->errCode = 0; */
--  pPager->exclusiveMode = 0;
--#ifndef SQLITE_OMIT_WAL
--  sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp);
--  pPager->pWal = 0;
--#endif
--  pager_reset(pPager);
--  if( MEMDB ){
--    pager_unlock(pPager);
--  }else{
--    /* If it is open, sync the journal file before calling UnlockAndRollback.
--    ** If this is not done, then an unsynced portion of the open journal 
--    ** file may be played back into the database. If a power failure occurs 
--    ** while this is happening, the database could become corrupt.
--    **
--    ** If an error occurs while trying to sync the journal, shift the pager
--    ** into the ERROR state. This causes UnlockAndRollback to unlock the
--    ** database and close the journal file without attempting to roll it
--    ** back or finalize it. The next database user will have to do hot-journal
--    ** rollback before accessing the database file.
--    */
--    if( isOpen(pPager->jfd) ){
--      pager_error(pPager, pagerSyncHotJournal(pPager));
-+  pPg = sqlite3PagerLookup(pPager, iPg);
-+  if( pPg ){
-+    if( sqlite3PcachePageRefcount(pPg)==1 ){
-+      sqlite3PcacheDrop(pPg);
-+    }else{
-+      rc = readDbPage(pPg);
-+      if( rc==SQLITE_OK ){
-+        pPager->xReiniter(pPg);
-+      }
-+      sqlite3PagerUnref(pPg);
-     }
--    pagerUnlockAndRollback(pPager);
-   }
--  sqlite3EndBenignMalloc();
--  enable_simulated_io_errors();
--  PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
--  IOTRACE(("CLOSE %p\n", pPager))
--  sqlite3OsClose(pPager->jfd);
--  sqlite3OsClose(pPager->fd);
--  sqlite3PageFree(pTmp);
--  sqlite3PcacheClose(pPager->pPCache);
- 
--#ifdef SQLITE_HAS_CODEC
--  if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
--#endif
--
--  assert( !pPager->aSavepoint && !pPager->pInJournal );
--  assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) );
-+  /* Normally, if a transaction is rolled back, any backup processes are
-+  ** updated as data is copied out of the rollback journal and into the
-+  ** database. This is not generally possible with a WAL database, as
-+  ** rollback involves simply truncating the log file. Therefore, if one
-+  ** or more frames have already been written to the log (and therefore 
-+  ** also copied into the backup databases) as part of this transaction,
-+  ** the backups must be restarted.
-+  */
-+  sqlite3BackupRestart(pPager->pBackup);
- 
--  sqlite3_free(pPager);
--  return SQLITE_OK;
-+  return rc;
- }
- 
--#if !defined(NDEBUG) || defined(SQLITE_TEST)
- /*
--** Return the page number for page pPg.
-+** This function is called to rollback a transaction on a WAL database.
- */
--SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage *pPg){
--  return pPg->pgno;
--}
--#endif
-+static int pagerRollbackWal(Pager *pPager){
-+  int rc;                         /* Return Code */
-+  PgHdr *pList;                   /* List of dirty pages to revert */
- 
--/*
--** Increment the reference count for page pPg.
--*/
--SQLITE_PRIVATE void sqlite3PagerRef(DbPage *pPg){
--  sqlite3PcacheRef(pPg);
-+  /* For all pages in the cache that are currently dirty or have already
-+  ** been written (but not committed) to the log file, do one of the 
-+  ** following:
-+  **
-+  **   + Discard the cached page (if refcount==0), or
-+  **   + Reload page content from the database (if refcount>0).
-+  */
-+  pPager->dbSize = pPager->dbOrigSize;
-+  rc = sqlite3WalUndo(pPager->pWal, pagerUndoCallback, (void *)pPager);
-+  pList = sqlite3PcacheDirtyList(pPager->pPCache);
-+  while( pList && rc==SQLITE_OK ){
-+    PgHdr *pNext = pList->pDirty;
-+    rc = pagerUndoCallback((void *)pPager, pList->pgno);
-+    pList = pNext;
-+  }
-+
-+  return rc;
- }
- 
- /*
--** Sync the journal. In other words, make sure all the pages that have
--** been written to the journal have actually reached the surface of the
--** disk and can be restored in the event of a hot-journal rollback.
--**
--** If the Pager.noSync flag is set, then this function is a no-op.
--** Otherwise, the actions required depend on the journal-mode and the 
--** device characteristics of the the file-system, as follows:
--**
--**   * If the journal file is an in-memory journal file, no action need
--**     be taken.
--**
--**   * Otherwise, if the device does not support the SAFE_APPEND property,
--**     then the nRec field of the most recently written journal header
--**     is updated to contain the number of journal records that have
--**     been written following it. If the pager is operating in full-sync
--**     mode, then the journal file is synced before this field is updated.
--**
--**   * If the device does not support the SEQUENTIAL property, then 
--**     journal file is synced.
--**
--** Or, in pseudo-code:
--**
--**   if( NOT <in-memory journal> ){
--**     if( NOT SAFE_APPEND ){
--**       if( <full-sync mode> ) xSync(<journal file>);
--**       <update nRec field>
--**     } 
--**     if( NOT SEQUENTIAL ) xSync(<journal file>);
--**   }
-+** This function is a wrapper around sqlite3WalFrames(). As well as logging
-+** the contents of the list of pages headed by pList (connected by pDirty),
-+** this function notifies any active backup processes that the pages have
-+** changed. 
- **
--** If successful, this routine clears the PGHDR_NEED_SYNC flag of every 
--** page currently held in memory before returning SQLITE_OK. If an IO
--** error is encountered, then the IO error code is returned to the caller.
--*/
--static int syncJournal(Pager *pPager, int newHdr){
-+** The list of pages passed into this routine is always sorted by page number.
-+** Hence, if page 1 appears anywhere on the list, it will be the first page.
-+*/ 
-+static int pagerWalFrames(
-+  Pager *pPager,                  /* Pager object */
-+  PgHdr *pList,                   /* List of frames to log */
-+  Pgno nTruncate,                 /* Database size after this commit */
-+  int isCommit,                   /* True if this is a commit */
-+  int syncFlags                   /* Flags to pass to OsSync() (or 0) */
-+){
-   int rc;                         /* Return code */
-+#if defined(SQLITE_DEBUG) || defined(SQLITE_CHECK_PAGES)
-+  PgHdr *p;                       /* For looping over pages */
-+#endif
- 
--  assert( pPager->eState==PAGER_WRITER_CACHEMOD
--       || pPager->eState==PAGER_WRITER_DBMOD
--  );
--  assert( assert_pager_state(pPager) );
--  assert( !pagerUseWal(pPager) );
--
--  rc = sqlite3PagerExclusiveLock(pPager);
--  if( rc!=SQLITE_OK ) return rc;
--
--  if( !pPager->noSync ){
--    assert( !pPager->tempFile );
--    if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){
--      const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
--      assert( isOpen(pPager->jfd) );
--
--      if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
--        /* This block deals with an obscure problem. If the last connection
--        ** that wrote to this database was operating in persistent-journal
--        ** mode, then the journal file may at this point actually be larger
--        ** than Pager.journalOff bytes. If the next thing in the journal
--        ** file happens to be a journal-header (written as part of the
--        ** previous connection's transaction), and a crash or power-failure 
--        ** occurs after nRec is updated but before this connection writes 
--        ** anything else to the journal file (or commits/rolls back its 
--        ** transaction), then SQLite may become confused when doing the 
--        ** hot-journal rollback following recovery. It may roll back all
--        ** of this connections data, then proceed to rolling back the old,
--        ** out-of-date data that follows it. Database corruption.
--        **
--        ** To work around this, if the journal file does appear to contain
--        ** a valid header following Pager.journalOff, then write a 0x00
--        ** byte to the start of it to prevent it from being recognized.
--        **
--        ** Variable iNextHdrOffset is set to the offset at which this
--        ** problematic header will occur, if it exists. aMagic is used 
--        ** as a temporary buffer to inspect the first couple of bytes of
--        ** the potential journal header.
--        */
--        i64 iNextHdrOffset;
--        u8 aMagic[8];
--        u8 zHeader[sizeof(aJournalMagic)+4];
--
--        memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
--        put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec);
--
--        iNextHdrOffset = journalHdrOffset(pPager);
--        rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset);
--        if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){
--          static const u8 zerobyte = 0;
--          rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset);
--        }
--        if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
--          return rc;
--        }
-+  assert( pPager->pWal );
-+#ifdef SQLITE_DEBUG
-+  /* Verify that the page list is in accending order */
-+  for(p=pList; p && p->pDirty; p=p->pDirty){
-+    assert( p->pgno < p->pDirty->pgno );
-+  }
-+#endif
- 
--        /* Write the nRec value into the journal file header. If in
--        ** full-synchronous mode, sync the journal first. This ensures that
--        ** all data has really hit the disk before nRec is updated to mark
--        ** it as a candidate for rollback.
--        **
--        ** This is not required if the persistent media supports the
--        ** SAFE_APPEND property. Because in this case it is not possible 
--        ** for garbage data to be appended to the file, the nRec field
--        ** is populated with 0xFFFFFFFF when the journal header is written
--        ** and never needs to be updated.
--        */
--        if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
--          PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
--          IOTRACE(("JSYNC %p\n", pPager))
--          rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags);
--          if( rc!=SQLITE_OK ) return rc;
--        }
--        IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr));
--        rc = sqlite3OsWrite(
--            pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr
--        );
--        if( rc!=SQLITE_OK ) return rc;
--      }
--      if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
--        PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
--        IOTRACE(("JSYNC %p\n", pPager))
--        rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags| 
--          (pPager->syncFlags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
--        );
--        if( rc!=SQLITE_OK ) return rc;
--      }
-+  if( isCommit ){
-+    /* If a WAL transaction is being committed, there is no point in writing
-+    ** any pages with page numbers greater than nTruncate into the WAL file.
-+    ** They will never be read by any client. So remove them from the pDirty
-+    ** list here. */
-+    PgHdr *p;
-+    PgHdr **ppNext = &pList;
-+    for(p=pList; (*ppNext = p); p=p->pDirty){
-+      if( p->pgno<=nTruncate ) ppNext = &p->pDirty;
-+    }
-+    assert( pList );
-+  }
- 
--      pPager->journalHdr = pPager->journalOff;
--      if( newHdr && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
--        pPager->nRec = 0;
--        rc = writeJournalHdr(pPager);
--        if( rc!=SQLITE_OK ) return rc;
--      }
--    }else{
--      pPager->journalHdr = pPager->journalOff;
-+  if( pList->pgno==1 ) pager_write_changecounter(pList);
-+  rc = sqlite3WalFrames(pPager->pWal, 
-+      pPager->pageSize, pList, nTruncate, isCommit, syncFlags
-+  );
-+  if( rc==SQLITE_OK && pPager->pBackup ){
-+    PgHdr *p;
-+    for(p=pList; p; p=p->pDirty){
-+      sqlite3BackupUpdate(pPager->pBackup, p->pgno, (u8 *)p->pData);
-     }
-   }
- 
--  /* Unless the pager is in noSync mode, the journal file was just 
--  ** successfully synced. Either way, clear the PGHDR_NEED_SYNC flag on 
--  ** all pages.
--  */
--  sqlite3PcacheClearSyncFlags(pPager->pPCache);
--  pPager->eState = PAGER_WRITER_DBMOD;
--  assert( assert_pager_state(pPager) );
--  return SQLITE_OK;
-+#ifdef SQLITE_CHECK_PAGES
-+  pList = sqlite3PcacheDirtyList(pPager->pPCache);
-+  for(p=pList; p; p=p->pDirty){
-+    pager_set_pagehash(p);
-+  }
-+#endif
-+
-+  return rc;
- }
- 
- /*
--** The argument is the first in a linked list of dirty pages connected
--** by the PgHdr.pDirty pointer. This function writes each one of the
--** in-memory pages in the list to the database file. The argument may
--** be NULL, representing an empty list. In this case this function is
--** a no-op.
--**
--** The pager must hold at least a RESERVED lock when this function
--** is called. Before writing anything to the database file, this lock
--** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained,
--** SQLITE_BUSY is returned and no data is written to the database file.
--** 
--** If the pager is a temp-file pager and the actual file-system file
--** is not yet open, it is created and opened before any data is 
--** written out.
--**
--** Once the lock has been upgraded and, if necessary, the file opened,
--** the pages are written out to the database file in list order. Writing
--** a page is skipped if it meets either of the following criteria:
--**
--**   * The page number is greater than Pager.dbSize, or
--**   * The PGHDR_DONT_WRITE flag is set on the page.
--**
--** If writing out a page causes the database file to grow, Pager.dbFileSize
--** is updated accordingly. If page 1 is written out, then the value cached
--** in Pager.dbFileVers[] is updated to match the new value stored in
--** the database file.
-+** Begin a read transaction on the WAL.
- **
--** If everything is successful, SQLITE_OK is returned. If an IO error 
--** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot
--** be obtained, SQLITE_BUSY is returned.
-+** This routine used to be called "pagerOpenSnapshot()" because it essentially
-+** makes a snapshot of the database at the current point in time and preserves
-+** that snapshot for use by the reader in spite of concurrently changes by
-+** other writers or checkpointers.
- */
--static int pager_write_pagelist(Pager *pPager, PgHdr *pList){
--  int rc = SQLITE_OK;                  /* Return code */
--
--  /* This function is only called for rollback pagers in WRITER_DBMOD state. */
--  assert( !pagerUseWal(pPager) );
--  assert( pPager->eState==PAGER_WRITER_DBMOD );
--  assert( pPager->eLock==EXCLUSIVE_LOCK );
-+static int pagerBeginReadTransaction(Pager *pPager){
-+  int rc;                         /* Return code */
-+  int changed = 0;                /* True if cache must be reset */
- 
--  /* If the file is a temp-file has not yet been opened, open it now. It
--  ** is not possible for rc to be other than SQLITE_OK if this branch
--  ** is taken, as pager_wait_on_lock() is a no-op for temp-files.
--  */
--  if( !isOpen(pPager->fd) ){
--    assert( pPager->tempFile && rc==SQLITE_OK );
--    rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
--  }
-+  assert( pagerUseWal(pPager) );
-+  assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
- 
--  /* Before the first write, give the VFS a hint of what the final
--  ** file size will be.
-+  /* sqlite3WalEndReadTransaction() was not called for the previous
-+  ** transaction in locking_mode=EXCLUSIVE.  So call it now.  If we
-+  ** are in locking_mode=NORMAL and EndRead() was previously called,
-+  ** the duplicate call is harmless.
-   */
--  assert( rc!=SQLITE_OK || isOpen(pPager->fd) );
--  if( rc==SQLITE_OK && pPager->dbSize>pPager->dbHintSize ){
--    sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize;
--    sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile);
--    pPager->dbHintSize = pPager->dbSize;
--  }
--
--  while( rc==SQLITE_OK && pList ){
--    Pgno pgno = pList->pgno;
--
--    /* If there are dirty pages in the page cache with page numbers greater
--    ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to
--    ** make the file smaller (presumably by auto-vacuum code). Do not write
--    ** any such pages to the file.
--    **
--    ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag
--    ** set (set by sqlite3PagerDontWrite()).
--    */
--    if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
--      i64 offset = (pgno-1)*(i64)pPager->pageSize;   /* Offset to write */
--      char *pData;                                   /* Data to write */    
--
--      assert( (pList->flags&PGHDR_NEED_SYNC)==0 );
--      if( pList->pgno==1 ) pager_write_changecounter(pList);
--
--      /* Encode the database */
--      CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData);
--
--      /* Write out the page data. */
--      rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
--
--      /* If page 1 was just written, update Pager.dbFileVers to match
--      ** the value now stored in the database file. If writing this 
--      ** page caused the database file to grow, update dbFileSize. 
--      */
--      if( pgno==1 ){
--        memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
--      }
--      if( pgno>pPager->dbFileSize ){
--        pPager->dbFileSize = pgno;
--      }
--
--      /* Update any backup objects copying the contents of this pager. */
--      sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData);
-+  sqlite3WalEndReadTransaction(pPager->pWal);
- 
--      PAGERTRACE(("STORE %d page %d hash(%08x)\n",
--                   PAGERID(pPager), pgno, pager_pagehash(pList)));
--      IOTRACE(("PGOUT %p %d\n", pPager, pgno));
--      PAGER_INCR(sqlite3_pager_writedb_count);
--      PAGER_INCR(pPager->nWrite);
--    }else{
--      PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno));
--    }
--    pager_set_pagehash(pList);
--    pList = pList->pDirty;
-+  rc = sqlite3WalBeginReadTransaction(pPager->pWal, &changed);
-+  if( rc!=SQLITE_OK || changed ){
-+    pager_reset(pPager);
-   }
- 
-   return rc;
- }
-+#endif
- 
- /*
--** Ensure that the sub-journal file is open. If it is already open, this 
--** function is a no-op.
-+** This function is called as part of the transition from PAGER_OPEN
-+** to PAGER_READER state to determine the size of the database file
-+** in pages (assuming the page size currently stored in Pager.pageSize).
- **
--** SQLITE_OK is returned if everything goes according to plan. An 
--** SQLITE_IOERR_XXX error code is returned if a call to sqlite3OsOpen() 
--** fails.
-+** If no error occurs, SQLITE_OK is returned and the size of the database
-+** in pages is stored in *pnPage. Otherwise, an error code (perhaps
-+** SQLITE_IOERR_FSTAT) is returned and *pnPage is left unmodified.
- */
--static int openSubJournal(Pager *pPager){
--  int rc = SQLITE_OK;
--  if( !isOpen(pPager->sjfd) ){
--    if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){
--      sqlite3MemJournalOpen(pPager->sjfd);
--    }else{
--      rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL);
-+static int pagerPagecount(Pager *pPager, Pgno *pnPage){
-+  Pgno nPage;                     /* Value to return via *pnPage */
-+
-+  /* Query the WAL sub-system for the database size. The WalDbsize()
-+  ** function returns zero if the WAL is not open (i.e. Pager.pWal==0), or
-+  ** if the database size is not available. The database size is not
-+  ** available from the WAL sub-system if the log file is empty or
-+  ** contains no valid committed transactions.
-+  */
-+  assert( pPager->eState==PAGER_OPEN );
-+  assert( pPager->eLock>=SHARED_LOCK || pPager->noReadlock );
-+  nPage = sqlite3WalDbsize(pPager->pWal);
-+
-+  /* If the database size was not available from the WAL sub-system,
-+  ** determine it based on the size of the database file. If the size
-+  ** of the database file is not an integer multiple of the page-size,
-+  ** round down to the nearest page. Except, any file larger than 0
-+  ** bytes in size is considered to contain at least one page.
-+  */
-+  if( nPage==0 ){
-+    i64 n = 0;                    /* Size of db file in bytes */
-+    assert( isOpen(pPager->fd) || pPager->tempFile );
-+    if( isOpen(pPager->fd) ){
-+      int rc = sqlite3OsFileSize(pPager->fd, &n);
-+      if( rc!=SQLITE_OK ){
-+        return rc;
-+      }
-+    }
-+    nPage = (Pgno)(n / pPager->pageSize);
-+    if( nPage==0 && n>0 ){
-+      nPage = 1;
-     }
-   }
--  return rc;
-+
-+  /* If the current number of pages in the file is greater than the
-+  ** configured maximum pager number, increase the allowed limit so
-+  ** that the file can be read.
-+  */
-+  if( nPage>pPager->mxPgno ){
-+    pPager->mxPgno = (Pgno)nPage;
-+  }
-+
-+  *pnPage = nPage;
-+  return SQLITE_OK;
- }
- 
-+#ifndef SQLITE_OMIT_WAL
- /*
--** Append a record of the current state of page pPg to the sub-journal. 
--** It is the callers responsibility to use subjRequiresPage() to check 
--** that it is really required before calling this function.
-+** Check if the *-wal file that corresponds to the database opened by pPager
-+** exists if the database is not empy, or verify that the *-wal file does
-+** not exist (by deleting it) if the database file is empty.
- **
--** If successful, set the bit corresponding to pPg->pgno in the bitvecs
--** for all open savepoints before returning.
-+** If the database is not empty and the *-wal file exists, open the pager
-+** in WAL mode.  If the database is empty or if no *-wal file exists and
-+** if no error occurs, make sure Pager.journalMode is not set to
-+** PAGER_JOURNALMODE_WAL.
- **
--** This function returns SQLITE_OK if everything is successful, an IO
--** error code if the attempt to write to the sub-journal fails, or 
--** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint
--** bitvec.
-+** Return SQLITE_OK or an error code.
-+**
-+** The caller must hold a SHARED lock on the database file to call this
-+** function. Because an EXCLUSIVE lock on the db file is required to delete 
-+** a WAL on a none-empty database, this ensures there is no race condition 
-+** between the xAccess() below and an xDelete() being executed by some 
-+** other connection.
- */
--static int subjournalPage(PgHdr *pPg){
-+static int pagerOpenWalIfPresent(Pager *pPager){
-   int rc = SQLITE_OK;
--  Pager *pPager = pPg->pPager;
--  if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
-+  assert( pPager->eState==PAGER_OPEN );
-+  assert( pPager->eLock>=SHARED_LOCK || pPager->noReadlock );
- 
--    /* Open the sub-journal, if it has not already been opened */
--    assert( pPager->useJournal );
--    assert( isOpen(pPager->jfd) || pagerUseWal(pPager) );
--    assert( isOpen(pPager->sjfd) || pPager->nSubRec==0 );
--    assert( pagerUseWal(pPager) 
--         || pageInJournal(pPg) 
--         || pPg->pgno>pPager->dbOrigSize 
--    );
--    rc = openSubJournal(pPager);
-+  if( !pPager->tempFile ){
-+    int isWal;                    /* True if WAL file exists */
-+    Pgno nPage;                   /* Size of the database file */
- 
--    /* If the sub-journal was opened successfully (or was already open),
--    ** write the journal record into the file.  */
-+    rc = pagerPagecount(pPager, &nPage);
-+    if( rc ) return rc;
-+    if( nPage==0 ){
-+      rc = sqlite3OsDelete(pPager->pVfs, pPager->zWal, 0);
-+      isWal = 0;
-+    }else{
-+      rc = sqlite3OsAccess(
-+          pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &isWal
-+      );
-+    }
-     if( rc==SQLITE_OK ){
--      void *pData = pPg->pData;
--      i64 offset = pPager->nSubRec*(4+pPager->pageSize);
--      char *pData2;
--  
--      CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
--      PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno));
--      rc = write32bits(pPager->sjfd, offset, pPg->pgno);
--      if( rc==SQLITE_OK ){
--        rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4);
-+      if( isWal ){
-+        testcase( sqlite3PcachePagecount(pPager->pPCache)==0 );
-+        rc = sqlite3PagerOpenWal(pPager, 0);
-+      }else if( pPager->journalMode==PAGER_JOURNALMODE_WAL ){
-+        pPager->journalMode = PAGER_JOURNALMODE_DELETE;
-       }
-     }
-   }
--  if( rc==SQLITE_OK ){
--    pPager->nSubRec++;
--    assert( pPager->nSavepoint>0 );
--    rc = addToSavepointBitvecs(pPager, pPg->pgno);
--  }
-   return rc;
- }
-+#endif
- 
- /*
--** This function is called by the pcache layer when it has reached some
--** soft memory limit. The first argument is a pointer to a Pager object
--** (cast as a void*). The pager is always 'purgeable' (not an in-memory
--** database). The second argument is a reference to a page that is 
--** currently dirty but has no outstanding references. The page
--** is always associated with the Pager object passed as the first 
--** argument.
-+** Playback savepoint pSavepoint. Or, if pSavepoint==NULL, then playback
-+** the entire master journal file. The case pSavepoint==NULL occurs when 
-+** a ROLLBACK TO command is invoked on a SAVEPOINT that is a transaction 
-+** savepoint.
- **
--** The job of this function is to make pPg clean by writing its contents
--** out to the database file, if possible. This may involve syncing the
--** journal file. 
-+** When pSavepoint is not NULL (meaning a non-transaction savepoint is 
-+** being rolled back), then the rollback consists of up to three stages,
-+** performed in the order specified:
- **
--** If successful, sqlite3PcacheMakeClean() is called on the page and
--** SQLITE_OK returned. If an IO error occurs while trying to make the
--** page clean, the IO error code is returned. If the page cannot be
--** made clean for some other reason, but no error occurs, then SQLITE_OK
--** is returned by sqlite3PcacheMakeClean() is not called.
-+**   * Pages are played back from the main journal starting at byte
-+**     offset PagerSavepoint.iOffset and continuing to 
-+**     PagerSavepoint.iHdrOffset, or to the end of the main journal
-+**     file if PagerSavepoint.iHdrOffset is zero.
-+**
-+**   * If PagerSavepoint.iHdrOffset is not zero, then pages are played
-+**     back starting from the journal header immediately following 
-+**     PagerSavepoint.iHdrOffset to the end of the main journal file.
-+**
-+**   * Pages are then played back from the sub-journal file, starting
-+**     with the PagerSavepoint.iSubRec and continuing to the end of
-+**     the journal file.
-+**
-+** Throughout the rollback process, each time a page is rolled back, the
-+** corresponding bit is set in a bitvec structure (variable pDone in the
-+** implementation below). This is used to ensure that a page is only
-+** rolled back the first time it is encountered in either journal.
-+**
-+** If pSavepoint is NULL, then pages are only played back from the main
-+** journal file. There is no need for a bitvec in this case.
-+**
-+** In either case, before playback commences the Pager.dbSize variable
-+** is reset to the value that it held at the start of the savepoint 
-+** (or transaction). No page with a page-number greater than this value
-+** is played back. If one is encountered it is simply skipped.
- */
--static int pagerStress(void *p, PgHdr *pPg){
--  Pager *pPager = (Pager *)p;
--  int rc = SQLITE_OK;
-+static int pagerPlaybackSavepoint(Pager *pPager, PagerSavepoint *pSavepoint){
-+  i64 szJ;                 /* Effective size of the main journal */
-+  i64 iHdrOff;             /* End of first segment of main-journal records */
-+  int rc = SQLITE_OK;      /* Return code */
-+  Bitvec *pDone = 0;       /* Bitvec to ensure pages played back only once */
- 
--  assert( pPg->pPager==pPager );
--  assert( pPg->flags&PGHDR_DIRTY );
-+  assert( pPager->eState!=PAGER_ERROR );
-+  assert( pPager->eState>=PAGER_WRITER_LOCKED );
- 
--  /* The doNotSyncSpill flag is set during times when doing a sync of
--  ** journal (and adding a new header) is not allowed.  This occurs
--  ** during calls to sqlite3PagerWrite() while trying to journal multiple
--  ** pages belonging to the same sector.
--  **
--  ** The doNotSpill flag inhibits all cache spilling regardless of whether
--  ** or not a sync is required.  This is set during a rollback.
--  **
--  ** Spilling is also prohibited when in an error state since that could
--  ** lead to database corruption.   In the current implementaton it 
--  ** is impossible for sqlite3PCacheFetch() to be called with createFlag==1
--  ** while in the error state, hence it is impossible for this routine to
--  ** be called in the error state.  Nevertheless, we include a NEVER()
--  ** test for the error state as a safeguard against future changes.
-+  /* Allocate a bitvec to use to store the set of pages rolled back */
-+  if( pSavepoint ){
-+    pDone = sqlite3BitvecCreate(pSavepoint->nOrig);
-+    if( !pDone ){
-+      return SQLITE_NOMEM;
-+    }
-+  }
-+
-+  /* Set the database size back to the value it was before the savepoint 
-+  ** being reverted was opened.
-   */
--  if( NEVER(pPager->errCode) ) return SQLITE_OK;
--  if( pPager->doNotSpill ) return SQLITE_OK;
--  if( pPager->doNotSyncSpill && (pPg->flags & PGHDR_NEED_SYNC)!=0 ){
--    return SQLITE_OK;
-+  pPager->dbSize = pSavepoint ? pSavepoint->nOrig : pPager->dbOrigSize;
-+  pPager->changeCountDone = pPager->tempFile;
-+
-+  if( !pSavepoint && pagerUseWal(pPager) ){
-+    return pagerRollbackWal(pPager);
-   }
- 
--  pPg->pDirty = 0;
--  if( pagerUseWal(pPager) ){
--    /* Write a single frame for this page to the log. */
--    if( subjRequiresPage(pPg) ){ 
--      rc = subjournalPage(pPg); 
--    }
--    if( rc==SQLITE_OK ){
--      rc = pagerWalFrames(pPager, pPg, 0, 0, 0);
-+  /* Use pPager->journalOff as the effective size of the main rollback
-+  ** journal.  The actual file might be larger than this in
-+  ** PAGER_JOURNALMODE_TRUNCATE or PAGER_JOURNALMODE_PERSIST.  But anything
-+  ** past pPager->journalOff is off-limits to us.
-+  */
-+  szJ = pPager->journalOff;
-+  assert( pagerUseWal(pPager)==0 || szJ==0 );
-+
-+  /* Begin by rolling back records from the main journal starting at
-+  ** PagerSavepoint.iOffset and continuing to the next journal header.
-+  ** There might be records in the main journal that have a page number
-+  ** greater than the current database size (pPager->dbSize) but those
-+  ** will be skipped automatically.  Pages are added to pDone as they
-+  ** are played back.
-+  */
-+  if( pSavepoint && !pagerUseWal(pPager) ){
-+    iHdrOff = pSavepoint->iHdrOffset ? pSavepoint->iHdrOffset : szJ;
-+    pPager->journalOff = pSavepoint->iOffset;
-+    while( rc==SQLITE_OK && pPager->journalOff<iHdrOff ){
-+      rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
-     }
-+    assert( rc!=SQLITE_DONE );
-   }else{
--  
--    /* Sync the journal file if required. */
--    if( pPg->flags&PGHDR_NEED_SYNC 
--     || pPager->eState==PAGER_WRITER_CACHEMOD
-+    pPager->journalOff = 0;
-+  }
-+
-+  /* Continue rolling back records out of the main journal starting at
-+  ** the first journal header seen and continuing until the effective end
-+  ** of the main journal file.  Continue to skip out-of-range pages and
-+  ** continue adding pages rolled back to pDone.
-+  */
-+  while( rc==SQLITE_OK && pPager->journalOff<szJ ){
-+    u32 ii;            /* Loop counter */
-+    u32 nJRec = 0;     /* Number of Journal Records */
-+    u32 dummy;
-+    rc = readJournalHdr(pPager, 0, szJ, &nJRec, &dummy);
-+    assert( rc!=SQLITE_DONE );
-+
-+    /*
-+    ** The "pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff"
-+    ** test is related to ticket #2565.  See the discussion in the
-+    ** pager_playback() function for additional information.
-+    */
-+    if( nJRec==0 
-+     && pPager->journalHdr+JOURNAL_HDR_SZ(pPager)==pPager->journalOff
-     ){
--      rc = syncJournal(pPager, 1);
-+      nJRec = (u32)((szJ - pPager->journalOff)/JOURNAL_PG_SZ(pPager));
-     }
--  
--    /* If the page number of this page is larger than the current size of
--    ** the database image, it may need to be written to the sub-journal.
--    ** This is because the call to pager_write_pagelist() below will not
--    ** actually write data to the file in this case.
--    **
--    ** Consider the following sequence of events:
--    **
--    **   BEGIN;
--    **     <journal page X>
--    **     <modify page X>
--    **     SAVEPOINT sp;
--    **       <shrink database file to Y pages>
--    **       pagerStress(page X)
--    **     ROLLBACK TO sp;
--    **
--    ** If (X>Y), then when pagerStress is called page X will not be written
--    ** out to the database file, but will be dropped from the cache. Then,
--    ** following the "ROLLBACK TO sp" statement, reading page X will read
--    ** data from the database file. This will be the copy of page X as it
--    ** was when the transaction started, not as it was when "SAVEPOINT sp"
--    ** was executed.
--    **
--    ** The solution is to write the current data for page X into the 
--    ** sub-journal file now (if it is not already there), so that it will
--    ** be restored to its current value when the "ROLLBACK TO sp" is 
--    ** executed.
--    */
--    if( NEVER(
--        rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
--    ) ){
--      rc = subjournalPage(pPg);
-+    for(ii=0; rc==SQLITE_OK && ii<nJRec && pPager->journalOff<szJ; ii++){
-+      rc = pager_playback_one_page(pPager, &pPager->journalOff, pDone, 1, 1);
-     }
--  
--    /* Write the contents of the page out to the database file. */
--    if( rc==SQLITE_OK ){
--      assert( (pPg->flags&PGHDR_NEED_SYNC)==0 );
--      rc = pager_write_pagelist(pPager, pPg);
-+    assert( rc!=SQLITE_DONE );
-+  }
-+  assert( rc!=SQLITE_OK || pPager->journalOff>=szJ );
-+
-+  /* Finally,  rollback pages from the sub-journal.  Page that were
-+  ** previously rolled back out of the main journal (and are hence in pDone)
-+  ** will be skipped.  Out-of-range pages are also skipped.
-+  */
-+  if( pSavepoint ){
-+    u32 ii;            /* Loop counter */
-+    i64 offset = pSavepoint->iSubRec*(4+pPager->pageSize);
-+
-+    if( pagerUseWal(pPager) ){
-+      rc = sqlite3WalSavepointUndo(pPager->pWal, pSavepoint->aWalData);
-+    }
-+    for(ii=pSavepoint->iSubRec; rc==SQLITE_OK && ii<pPager->nSubRec; ii++){
-+      assert( offset==ii*(4+pPager->pageSize) );
-+      rc = pager_playback_one_page(pPager, &offset, pDone, 0, 1);
-     }
-+    assert( rc!=SQLITE_DONE );
-   }
- 
--  /* Mark the page as clean. */
-+  sqlite3BitvecDestroy(pDone);
-   if( rc==SQLITE_OK ){
--    PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno));
--    sqlite3PcacheMakeClean(pPg);
-+    pPager->journalOff = szJ;
-   }
- 
--  return pager_error(pPager, rc); 
-+  return rc;
- }
- 
-+/*
-+** Change the maximum number of in-memory pages that are allowed.
-+*/
-+SQLITE_PRIVATE void sqlite3PagerSetCachesize(Pager *pPager, int mxPage){
-+  sqlite3PcacheSetCachesize(pPager->pPCache, mxPage);
-+}
- 
- /*
--** Allocate and initialize a new Pager object and put a pointer to it
--** in *ppPager. The pager should eventually be freed by passing it
--** to sqlite3PagerClose().
-+** Adjust the robustness of the database to damage due to OS crashes
-+** or power failures by changing the number of syncs()s when writing
-+** the rollback journal.  There are three levels:
- **
--** The zFilename argument is the path to the database file to open.
--** If zFilename is NULL then a randomly-named temporary file is created
--** and used as the file to be cached. Temporary files are be deleted
--** automatically when they are closed. If zFilename is ":memory:" then 
--** all information is held in cache. It is never written to disk. 
--** This can be used to implement an in-memory database.
-+**    OFF       sqlite3OsSync() is never called.  This is the default
-+**              for temporary and transient files.
- **
--** The nExtra parameter specifies the number of bytes of space allocated
--** along with each page reference. This space is available to the user
--** via the sqlite3PagerGetExtra() API.
-+**    NORMAL    The journal is synced once before writes begin on the
-+**              database.  This is normally adequate protection, but
-+**              it is theoretically possible, though very unlikely,
-+**              that an inopertune power failure could leave the journal
-+**              in a state which would cause damage to the database
-+**              when it is rolled back.
- **
--** The flags argument is used to specify properties that affect the
--** operation of the pager. It should be passed some bitwise combination
--** of the PAGER_OMIT_JOURNAL and PAGER_NO_READLOCK flags.
-+**    FULL      The journal is synced twice before writes begin on the
-+**              database (with some additional information - the nRec field
-+**              of the journal header - being written in between the two
-+**              syncs).  If we assume that writing a
-+**              single disk sector is atomic, then this mode provides
-+**              assurance that the journal will not be corrupted to the
-+**              point of causing damage to the database during rollback.
- **
--** The vfsFlags parameter is a bitmask to pass to the flags parameter
--** of the xOpen() method of the supplied VFS when opening files. 
-+** The above is for a rollback-journal mode.  For WAL mode, OFF continues
-+** to mean that no syncs ever occur.  NORMAL means that the WAL is synced
-+** prior to the start of checkpoint and that the database file is synced
-+** at the conclusion of the checkpoint if the entire content of the WAL
-+** was written back into the database.  But no sync operations occur for
-+** an ordinary commit in NORMAL mode with WAL.  FULL means that the WAL
-+** file is synced following each commit operation, in addition to the
-+** syncs associated with NORMAL.
- **
--** If the pager object is allocated and the specified file opened 
--** successfully, SQLITE_OK is returned and *ppPager set to point to
--** the new pager object. If an error occurs, *ppPager is set to NULL
--** and error code returned. This function may return SQLITE_NOMEM
--** (sqlite3Malloc() is used to allocate memory), SQLITE_CANTOPEN or 
--** various SQLITE_IO_XXX errors.
-+** Do not confuse synchronous=FULL with SQLITE_SYNC_FULL.  The
-+** SQLITE_SYNC_FULL macro means to use the MacOSX-style full-fsync
-+** using fcntl(F_FULLFSYNC).  SQLITE_SYNC_NORMAL means to do an
-+** ordinary fsync() call.  There is no difference between SQLITE_SYNC_FULL
-+** and SQLITE_SYNC_NORMAL on platforms other than MacOSX.  But the
-+** synchronous=FULL versus synchronous=NORMAL setting determines when
-+** the xSync primitive is called and is relevant to all platforms.
-+**
-+** Numeric values associated with these states are OFF==1, NORMAL=2,
-+** and FULL=3.
- */
--SQLITE_PRIVATE int sqlite3PagerOpen(
--  sqlite3_vfs *pVfs,       /* The virtual file system to use */
--  Pager **ppPager,         /* OUT: Return the Pager structure here */
--  const char *zFilename,   /* Name of the database file to open */
--  int nExtra,              /* Extra bytes append to each in-memory page */
--  int flags,               /* flags controlling this file */
--  int vfsFlags,            /* flags passed through to sqlite3_vfs.xOpen() */
--  void (*xReinit)(DbPage*) /* Function to reinitialize pages */
-+#ifndef SQLITE_OMIT_PAGER_PRAGMAS
-+SQLITE_PRIVATE void sqlite3PagerSetSafetyLevel(
-+  Pager *pPager,        /* The pager to set safety level for */
-+  int level,            /* PRAGMA synchronous.  1=OFF, 2=NORMAL, 3=FULL */  
-+  int bFullFsync,       /* PRAGMA fullfsync */
-+  int bCkptFullFsync    /* PRAGMA checkpoint_fullfsync */
- ){
--  u8 *pPtr;
--  Pager *pPager = 0;       /* Pager object to allocate and return */
--  int rc = SQLITE_OK;      /* Return code */
--  int tempFile = 0;        /* True for temp files (incl. in-memory files) */
--  int memDb = 0;           /* True if this is an in-memory file */
--  int readOnly = 0;        /* True if this is a read-only file */
--  int journalFileSize;     /* Bytes to allocate for each journal fd */
--  char *zPathname = 0;     /* Full path to database file */
--  int nPathname = 0;       /* Number of bytes in zPathname */
--  int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; /* False to omit journal */
--  int noReadlock = (flags & PAGER_NO_READLOCK)!=0;  /* True to omit read-lock */
--  int pcacheSize = sqlite3PcacheSize();       /* Bytes to allocate for PCache */
--  u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;  /* Default page size */
--  const char *zUri = 0;    /* URI args to copy */
--  int nUri = 0;            /* Number of bytes of URI args at *zUri */
-+  assert( level>=1 && level<=3 );
-+  pPager->noSync =  (level==1 || pPager->tempFile) ?1:0;
-+  pPager->fullSync = (level==3 && !pPager->tempFile) ?1:0;
-+  if( pPager->noSync ){
-+    pPager->syncFlags = 0;
-+    pPager->ckptSyncFlags = 0;
-+  }else if( bFullFsync ){
-+    pPager->syncFlags = SQLITE_SYNC_FULL;
-+    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
-+  }else if( bCkptFullFsync ){
-+    pPager->syncFlags = SQLITE_SYNC_NORMAL;
-+    pPager->ckptSyncFlags = SQLITE_SYNC_FULL;
-+  }else{
-+    pPager->syncFlags = SQLITE_SYNC_NORMAL;
-+    pPager->ckptSyncFlags = SQLITE_SYNC_NORMAL;
-+  }
-+}
-+#endif
-+
-+/*
-+** The following global variable is incremented whenever the library
-+** attempts to open a temporary file.  This information is used for
-+** testing and analysis only.  
-+*/
-+#ifdef SQLITE_TEST
-+SQLITE_API int sqlite3_opentemp_count = 0;
-+#endif
-+
-+/*
-+** Open a temporary file.
-+**
-+** Write the file descriptor into *pFile. Return SQLITE_OK on success 
-+** or some other error code if we fail. The OS will automatically 
-+** delete the temporary file when it is closed.
-+**
-+** The flags passed to the VFS layer xOpen() call are those specified
-+** by parameter vfsFlags ORed with the following:
-+**
-+**     SQLITE_OPEN_READWRITE
-+**     SQLITE_OPEN_CREATE
-+**     SQLITE_OPEN_EXCLUSIVE
-+**     SQLITE_OPEN_DELETEONCLOSE
-+*/
-+static int pagerOpentemp(
-+  Pager *pPager,        /* The pager object */
-+  sqlite3_file *pFile,  /* Write the file descriptor here */
-+  int vfsFlags          /* Flags passed through to the VFS */
-+){
-+  int rc;               /* Return code */
-+
-+#ifdef SQLITE_TEST
-+  sqlite3_opentemp_count++;  /* Used for testing and analysis only */
-+#endif
-+
-+  vfsFlags |=  SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE |
-+            SQLITE_OPEN_EXCLUSIVE | SQLITE_OPEN_DELETEONCLOSE;
-+  rc = sqlite3OsOpen(pPager->pVfs, 0, pFile, vfsFlags, 0);
-+  assert( rc!=SQLITE_OK || isOpen(pFile) );
-+  return rc;
-+}
-+
-+/*
-+** Set the busy handler function.
-+**
-+** The pager invokes the busy-handler if sqlite3OsLock() returns 
-+** SQLITE_BUSY when trying to upgrade from no-lock to a SHARED lock,
-+** or when trying to upgrade from a RESERVED lock to an EXCLUSIVE 
-+** lock. It does *not* invoke the busy handler when upgrading from
-+** SHARED to RESERVED, or when upgrading from SHARED to EXCLUSIVE
-+** (which occurs during hot-journal rollback). Summary:
-+**
-+**   Transition                        | Invokes xBusyHandler
-+**   --------------------------------------------------------
-+**   NO_LOCK       -> SHARED_LOCK      | Yes
-+**   SHARED_LOCK   -> RESERVED_LOCK    | No
-+**   SHARED_LOCK   -> EXCLUSIVE_LOCK   | No
-+**   RESERVED_LOCK -> EXCLUSIVE_LOCK   | Yes
-+**
-+** If the busy-handler callback returns non-zero, the lock is 
-+** retried. If it returns zero, then the SQLITE_BUSY error is
-+** returned to the caller of the pager API function.
-+*/
-+SQLITE_PRIVATE void sqlite3PagerSetBusyhandler(
-+  Pager *pPager,                       /* Pager object */
-+  int (*xBusyHandler)(void *),         /* Pointer to busy-handler function */
-+  void *pBusyHandlerArg                /* Argument to pass to xBusyHandler */
-+){  
-+  pPager->xBusyHandler = xBusyHandler;
-+  pPager->pBusyHandlerArg = pBusyHandlerArg;
-+}
-+
-+/*
-+** Change the page size used by the Pager object. The new page size 
-+** is passed in *pPageSize.
-+**
-+** If the pager is in the error state when this function is called, it
-+** is a no-op. The value returned is the error state error code (i.e. 
-+** one of SQLITE_IOERR, an SQLITE_IOERR_xxx sub-code or SQLITE_FULL).
-+**
-+** Otherwise, if all of the following are true:
-+**
-+**   * the new page size (value of *pPageSize) is valid (a power 
-+**     of two between 512 and SQLITE_MAX_PAGE_SIZE, inclusive), and
-+**
-+**   * there are no outstanding page references, and
-+**
-+**   * the database is either not an in-memory database or it is
-+**     an in-memory database that currently consists of zero pages.
-+**
-+** then the pager object page size is set to *pPageSize.
-+**
-+** If the page size is changed, then this function uses sqlite3PagerMalloc() 
-+** to obtain a new Pager.pTmpSpace buffer. If this allocation attempt 
-+** fails, SQLITE_NOMEM is returned and the page size remains unchanged. 
-+** In all other cases, SQLITE_OK is returned.
-+**
-+** If the page size is not changed, either because one of the enumerated
-+** conditions above is not true, the pager was in error state when this
-+** function was called, or because the memory allocation attempt failed, 
-+** then *pPageSize is set to the old, retained page size before returning.
-+*/
-+SQLITE_PRIVATE int sqlite3PagerSetPagesize(Pager *pPager, u32 *pPageSize, int nReserve){
-+  int rc = SQLITE_OK;
- 
--  /* Figure out how much space is required for each journal file-handle
--  ** (there are two of them, the main journal and the sub-journal). This
--  ** is the maximum space required for an in-memory journal file handle 
--  ** and a regular journal file-handle. Note that a "regular journal-handle"
--  ** may be a wrapper capable of caching the first portion of the journal
--  ** file in memory to implement the atomic-write optimization (see 
--  ** source file journal.c).
-+  /* It is not possible to do a full assert_pager_state() here, as this
-+  ** function may be called from within PagerOpen(), before the state
-+  ** of the Pager object is internally consistent.
-+  **
-+  ** At one point this function returned an error if the pager was in 
-+  ** PAGER_ERROR state. But since PAGER_ERROR state guarantees that
-+  ** there is at least one outstanding page reference, this function
-+  ** is a no-op for that case anyhow.
-   */
--  if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){
--    journalFileSize = ROUND8(sqlite3JournalSize(pVfs));
--  }else{
--    journalFileSize = ROUND8(sqlite3MemJournalSize());
--  }
--
--  /* Set the output variable to NULL in case an error occurs. */
--  *ppPager = 0;
- 
--#ifndef SQLITE_OMIT_MEMORYDB
--  if( flags & PAGER_MEMORY ){
--    memDb = 1;
--    zFilename = 0;
--  }
--#endif
-+  u32 pageSize = *pPageSize;
-+  assert( pageSize==0 || (pageSize>=512 && pageSize<=SQLITE_MAX_PAGE_SIZE) );
-+  if( (pPager->memDb==0 || pPager->dbSize==0)
-+   && sqlite3PcacheRefCount(pPager->pPCache)==0 
-+   && pageSize && pageSize!=(u32)pPager->pageSize 
-+  ){
-+    char *pNew = NULL;             /* New temp space */
-+    i64 nByte = 0;
- 
--  /* Compute and store the full pathname in an allocated buffer pointed
--  ** to by zPathname, length nPathname. Or, if this is a temporary file,
--  ** leave both nPathname and zPathname set to 0.
--  */
--  if( zFilename && zFilename[0] ){
--    const char *z;
--    nPathname = pVfs->mxPathname+1;
--    zPathname = sqlite3Malloc(nPathname*2);
--    if( zPathname==0 ){
--      return SQLITE_NOMEM;
--    }
--    zPathname[0] = 0; /* Make sure initialized even if FullPathname() fails */
--    rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
--    nPathname = sqlite3Strlen30(zPathname);
--    z = zUri = &zFilename[sqlite3Strlen30(zFilename)+1];
--    while( *z ){
--      z += sqlite3Strlen30(z)+1;
--      z += sqlite3Strlen30(z)+1;
-+    if( pPager->eState>PAGER_OPEN && isOpen(pPager->fd) ){
-+      rc = sqlite3OsFileSize(pPager->fd, &nByte);
-     }
--    nUri = &z[1] - zUri;
--    if( rc==SQLITE_OK && nPathname+8>pVfs->mxPathname ){
--      /* This branch is taken when the journal path required by
--      ** the database being opened will be more than pVfs->mxPathname
--      ** bytes in length. This means the database cannot be opened,
--      ** as it will not be possible to open the journal file or even
--      ** check for a hot-journal before reading.
--      */
--      rc = SQLITE_CANTOPEN_BKPT;
-+    if( rc==SQLITE_OK ){
-+      pNew = (char *)sqlite3PageMalloc(pageSize);
-+      if( !pNew ) rc = SQLITE_NOMEM;
-     }
--    if( rc!=SQLITE_OK ){
--      sqlite3_free(zPathname);
--      return rc;
-+
-+    if( rc==SQLITE_OK ){
-+      pager_reset(pPager);
-+      pPager->dbSize = (Pgno)(nByte/pageSize);
-+      pPager->pageSize = pageSize;
-+      sqlite3PageFree(pPager->pTmpSpace);
-+      pPager->pTmpSpace = pNew;
-+      sqlite3PcacheSetPageSize(pPager->pPCache, pageSize);
-     }
-   }
- 
--  /* Allocate memory for the Pager structure, PCache object, the
--  ** three file descriptors, the database file name and the journal 
--  ** file name. The layout in memory is as follows:
--  **
--  **     Pager object                    (sizeof(Pager) bytes)
--  **     PCache object                   (sqlite3PcacheSize() bytes)
--  **     Database file handle            (pVfs->szOsFile bytes)
--  **     Sub-journal file handle         (journalFileSize bytes)
--  **     Main journal file handle        (journalFileSize bytes)
--  **     Database file name              (nPathname+1 bytes)
--  **     Journal file name               (nPathname+8+1 bytes)
--  */
--  pPtr = (u8 *)sqlite3MallocZero(
--    ROUND8(sizeof(*pPager)) +      /* Pager structure */
--    ROUND8(pcacheSize) +           /* PCache object */
--    ROUND8(pVfs->szOsFile) +       /* The main db file */
--    journalFileSize * 2 +          /* The two journal files */ 
--    nPathname + 1 + nUri +         /* zFilename */
--    nPathname + 8 + 1              /* zJournal */
--#ifndef SQLITE_OMIT_WAL
--    + nPathname + 4 + 1              /* zWal */
--#endif
--  );
--  assert( EIGHT_BYTE_ALIGNMENT(SQLITE_INT_TO_PTR(journalFileSize)) );
--  if( !pPtr ){
--    sqlite3_free(zPathname);
--    return SQLITE_NOMEM;
-+  *pPageSize = pPager->pageSize;
-+  if( rc==SQLITE_OK ){
-+    if( nReserve<0 ) nReserve = pPager->nReserve;
-+    assert( nReserve>=0 && nReserve<1000 );
-+    pPager->nReserve = (i16)nReserve;
-+    pagerReportSize(pPager);
-   }
--  pPager =              (Pager*)(pPtr);
--  pPager->pPCache =    (PCache*)(pPtr += ROUND8(sizeof(*pPager)));
--  pPager->fd =   (sqlite3_file*)(pPtr += ROUND8(pcacheSize));
--  pPager->sjfd = (sqlite3_file*)(pPtr += ROUND8(pVfs->szOsFile));
--  pPager->jfd =  (sqlite3_file*)(pPtr += journalFileSize);
--  pPager->zFilename =    (char*)(pPtr += journalFileSize);
--  assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) );
-+  return rc;
-+}
- 
--  /* Fill in the Pager.zFilename and Pager.zJournal buffers, if required. */
--  if( zPathname ){
--    assert( nPathname>0 );
--    pPager->zJournal =   (char*)(pPtr += nPathname + 1 + nUri);
--    memcpy(pPager->zFilename, zPathname, nPathname);
--    memcpy(&pPager->zFilename[nPathname+1], zUri, nUri);
--    memcpy(pPager->zJournal, zPathname, nPathname);
--    memcpy(&pPager->zJournal[nPathname], "-journal", 8);
--    sqlite3FileSuffix3(pPager->zFilename, pPager->zJournal);
--#ifndef SQLITE_OMIT_WAL
--    pPager->zWal = &pPager->zJournal[nPathname+8+1];
--    memcpy(pPager->zWal, zPathname, nPathname);
--    memcpy(&pPager->zWal[nPathname], "-wal", 4);
--    sqlite3FileSuffix3(pPager->zFilename, pPager->zWal);
--#endif
--    sqlite3_free(zPathname);
--  }
--  pPager->pVfs = pVfs;
--  pPager->vfsFlags = vfsFlags;
-+/*
-+** Return a pointer to the "temporary page" buffer held internally
-+** by the pager.  This is a buffer that is big enough to hold the
-+** entire content of a database page.  This buffer is used internally
-+** during rollback and will be overwritten whenever a rollback
-+** occurs.  But other modules are free to use it too, as long as
-+** no rollbacks are happening.
-+*/
-+SQLITE_PRIVATE void *sqlite3PagerTempSpace(Pager *pPager){
-+  return pPager->pTmpSpace;
-+}
- 
--  /* Open the pager file.
--  */
--  if( zFilename && zFilename[0] ){
--    int fout = 0;                    /* VFS flags returned by xOpen() */
--    rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout);
--    assert( !memDb );
--    readOnly = (fout&SQLITE_OPEN_READONLY);
-+/*
-+** Attempt to set the maximum database page count if mxPage is positive. 
-+** Make no changes if mxPage is zero or negative.  And never reduce the
-+** maximum page count below the current size of the database.
-+**
-+** Regardless of mxPage, return the current maximum page count.
-+*/
-+SQLITE_PRIVATE int sqlite3PagerMaxPageCount(Pager *pPager, int mxPage){
-+  if( mxPage>0 ){
-+    pPager->mxPgno = mxPage;
-+  }
-+  assert( pPager->eState!=PAGER_OPEN );      /* Called only by OP_MaxPgcnt */
-+  assert( pPager->mxPgno>=pPager->dbSize );  /* OP_MaxPgcnt enforces this */
-+  return pPager->mxPgno;
-+}
- 
--    /* If the file was successfully opened for read/write access,
--    ** choose a default page size in case we have to create the
--    ** database file. The default page size is the maximum of:
--    **
--    **    + SQLITE_DEFAULT_PAGE_SIZE,
--    **    + The value returned by sqlite3OsSectorSize()
--    **    + The largest page size that can be written atomically.
--    */
--    if( rc==SQLITE_OK && !readOnly ){
--      setSectorSize(pPager);
--      assert(SQLITE_DEFAULT_PAGE_SIZE<=SQLITE_MAX_DEFAULT_PAGE_SIZE);
--      if( szPageDflt<pPager->sectorSize ){
--        if( pPager->sectorSize>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
--          szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
--        }else{
--          szPageDflt = (u32)pPager->sectorSize;
--        }
--      }
--#ifdef SQLITE_ENABLE_ATOMIC_WRITE
--      {
--        int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
--        int ii;
--        assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
--        assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
--        assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
--        for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
--          if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ){
--            szPageDflt = ii;
--          }
--        }
--      }
-+/*
-+** The following set of routines are used to disable the simulated
-+** I/O error mechanism.  These routines are used to avoid simulated
-+** errors in places where we do not care about errors.
-+**
-+** Unless -DSQLITE_TEST=1 is used, these routines are all no-ops
-+** and generate no code.
-+*/
-+#ifdef SQLITE_TEST
-+SQLITE_API extern int sqlite3_io_error_pending;
-+SQLITE_API extern int sqlite3_io_error_hit;
-+static int saved_cnt;
-+void disable_simulated_io_errors(void){
-+  saved_cnt = sqlite3_io_error_pending;
-+  sqlite3_io_error_pending = -1;
-+}
-+void enable_simulated_io_errors(void){
-+  sqlite3_io_error_pending = saved_cnt;
-+}
-+#else
-+# define disable_simulated_io_errors()
-+# define enable_simulated_io_errors()
- #endif
--    }
--  }else{
--    /* If a temporary file is requested, it is not opened immediately.
--    ** In this case we accept the default page size and delay actually
--    ** opening the file until the first call to OsWrite().
--    **
--    ** This branch is also run for an in-memory database. An in-memory
--    ** database is the same as a temp-file that is never written out to
--    ** disk and uses an in-memory rollback journal.
--    */ 
--    tempFile = 1;
--    pPager->eState = PAGER_READER;
--    pPager->eLock = EXCLUSIVE_LOCK;
--    readOnly = (vfsFlags&SQLITE_OPEN_READONLY);
--  }
- 
--  /* The following call to PagerSetPagesize() serves to set the value of 
--  ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer.
--  */
--  if( rc==SQLITE_OK ){
--    assert( pPager->memDb==0 );
--    rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1);
--    testcase( rc!=SQLITE_OK );
--  }
-+/*
-+** Read the first N bytes from the beginning of the file into memory
-+** that pDest points to. 
-+**
-+** If the pager was opened on a transient file (zFilename==""), or
-+** opened on a file less than N bytes in size, the output buffer is
-+** zeroed and SQLITE_OK returned. The rationale for this is that this 
-+** function is used to read database headers, and a new transient or
-+** zero sized database has a header than consists entirely of zeroes.
-+**
-+** If any IO error apart from SQLITE_IOERR_SHORT_READ is encountered,
-+** the error code is returned to the caller and the contents of the
-+** output buffer undefined.
-+*/
-+SQLITE_PRIVATE int sqlite3PagerReadFileheader(Pager *pPager, int N, unsigned char *pDest){
-+  int rc = SQLITE_OK;
-+  memset(pDest, 0, N);
-+  assert( isOpen(pPager->fd) || pPager->tempFile );
- 
--  /* If an error occurred in either of the blocks above, free the 
--  ** Pager structure and close the file.
-+  /* This routine is only called by btree immediately after creating
-+  ** the Pager object.  There has not been an opportunity to transition
-+  ** to WAL mode yet.
-   */
--  if( rc!=SQLITE_OK ){
--    assert( !pPager->pTmpSpace );
--    sqlite3OsClose(pPager->fd);
--    sqlite3_free(pPager);
--    return rc;
--  }
--
--  /* Initialize the PCache object. */
--  assert( nExtra<1000 );
--  nExtra = ROUND8(nExtra);
--  sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
--                    !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
--
--  PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename));
--  IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
-+  assert( !pagerUseWal(pPager) );
- 
--  pPager->useJournal = (u8)useJournal;
--  pPager->noReadlock = (noReadlock && readOnly) ?1:0;
--  /* pPager->stmtOpen = 0; */
--  /* pPager->stmtInUse = 0; */
--  /* pPager->nRef = 0; */
--  /* pPager->stmtSize = 0; */
--  /* pPager->stmtJSize = 0; */
--  /* pPager->nPage = 0; */
--  pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
--  /* pPager->state = PAGER_UNLOCK; */
--#if 0
--  assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
--#endif
--  /* pPager->errMask = 0; */
--  pPager->tempFile = (u8)tempFile;
--  assert( tempFile==PAGER_LOCKINGMODE_NORMAL 
--          || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
--  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
--  pPager->exclusiveMode = (u8)tempFile; 
--  pPager->changeCountDone = pPager->tempFile;
--  pPager->memDb = (u8)memDb;
--  pPager->readOnly = (u8)readOnly;
--  assert( useJournal || pPager->tempFile );
--  pPager->noSync = pPager->tempFile;
--  pPager->fullSync = pPager->noSync ?0:1;
--  pPager->syncFlags = pPager->noSync ? 0 : SQLITE_SYNC_NORMAL;
--  pPager->ckptSyncFlags = pPager->syncFlags;
--  /* pPager->pFirst = 0; */
--  /* pPager->pFirstSynced = 0; */
--  /* pPager->pLast = 0; */
--  pPager->nExtra = (u16)nExtra;
--  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
--  assert( isOpen(pPager->fd) || tempFile );
--  setSectorSize(pPager);
--  if( !useJournal ){
--    pPager->journalMode = PAGER_JOURNALMODE_OFF;
--  }else if( memDb ){
--    pPager->journalMode = PAGER_JOURNALMODE_MEMORY;
-+  if( isOpen(pPager->fd) ){
-+    IOTRACE(("DBHDR %p 0 %d\n", pPager, N))
-+    rc = sqlite3OsRead(pPager->fd, pDest, N, 0);
-+    if( rc==SQLITE_IOERR_SHORT_READ ){
-+      rc = SQLITE_OK;
-+    }
-   }
--  /* pPager->xBusyHandler = 0; */
--  /* pPager->pBusyHandlerArg = 0; */
--  pPager->xReiniter = xReinit;
--  /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
--
--  *ppPager = pPager;
--  return SQLITE_OK;
-+  return rc;
- }
- 
-+/*
-+** This function may only be called when a read-transaction is open on
-+** the pager. It returns the total number of pages in the database.
-+**
-+** However, if the file is between 1 and <page-size> bytes in size, then 
-+** this is considered a 1 page file.
-+*/
-+SQLITE_PRIVATE void sqlite3PagerPagecount(Pager *pPager, int *pnPage){
-+  assert( pPager->eState>=PAGER_READER );
-+  assert( pPager->eState!=PAGER_WRITER_FINISHED );
-+  *pnPage = (int)pPager->dbSize;
-+}
- 
- 
- /*
--** This function is called after transitioning from PAGER_UNLOCK to
--** PAGER_SHARED state. It tests if there is a hot journal present in
--** the file-system for the given pager. A hot journal is one that 
--** needs to be played back. According to this function, a hot-journal
--** file exists if the following criteria are met:
-+** Try to obtain a lock of type locktype on the database file. If
-+** a similar or greater lock is already held, this function is a no-op
-+** (returning SQLITE_OK immediately).
- **
--**   * The journal file exists in the file system, and
--**   * No process holds a RESERVED or greater lock on the database file, and
--**   * The database file itself is greater than 0 bytes in size, and
--**   * The first byte of the journal file exists and is not 0x00.
-+** Otherwise, attempt to obtain the lock using sqlite3OsLock(). Invoke 
-+** the busy callback if the lock is currently not available. Repeat 
-+** until the busy callback returns false or until the attempt to 
-+** obtain the lock succeeds.
- **
--** If the current size of the database file is 0 but a journal file
--** exists, that is probably an old journal left over from a prior
--** database with the same name. In this case the journal file is
--** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK
--** is returned.
-+** Return SQLITE_OK on success and an error code if we cannot obtain
-+** the lock. If the lock is obtained successfully, set the Pager.state 
-+** variable to locktype before returning.
-+*/
-+static int pager_wait_on_lock(Pager *pPager, int locktype){
-+  int rc;                              /* Return code */
-+
-+  /* Check that this is either a no-op (because the requested lock is 
-+  ** already held, or one of the transistions that the busy-handler
-+  ** may be invoked during, according to the comment above
-+  ** sqlite3PagerSetBusyhandler().
-+  */
-+  assert( (pPager->eLock>=locktype)
-+       || (pPager->eLock==NO_LOCK && locktype==SHARED_LOCK)
-+       || (pPager->eLock==RESERVED_LOCK && locktype==EXCLUSIVE_LOCK)
-+  );
-+
-+  do {
-+    rc = pagerLockDb(pPager, locktype);
-+  }while( rc==SQLITE_BUSY && pPager->xBusyHandler(pPager->pBusyHandlerArg) );
-+  return rc;
-+}
-+
-+/*
-+** Function assertTruncateConstraint(pPager) checks that one of the 
-+** following is true for all dirty pages currently in the page-cache:
- **
--** This routine does not check if there is a master journal filename
--** at the end of the file. If there is, and that master journal file
--** does not exist, then the journal file is not really hot. In this
--** case this routine will return a false-positive. The pager_playback()
--** routine will discover that the journal file is not really hot and 
--** will not roll it back. 
-+**   a) The page number is less than or equal to the size of the 
-+**      current database image, in pages, OR
- **
--** If a hot-journal file is found to exist, *pExists is set to 1 and 
--** SQLITE_OK returned. If no hot-journal file is present, *pExists is
--** set to 0 and SQLITE_OK returned. If an IO error occurs while trying
--** to determine whether or not a hot-journal file exists, the IO error
--** code is returned and the value of *pExists is undefined.
-+**   b) if the page content were written at this time, it would not
-+**      be necessary to write the current content out to the sub-journal
-+**      (as determined by function subjRequiresPage()).
-+**
-+** If the condition asserted by this function were not true, and the
-+** dirty page were to be discarded from the cache via the pagerStress()
-+** routine, pagerStress() would not write the current page content to
-+** the database file. If a savepoint transaction were rolled back after
-+** this happened, the correct behaviour would be to restore the current
-+** content of the page. However, since this content is not present in either
-+** the database file or the portion of the rollback journal and 
-+** sub-journal rolled back the content could not be restored and the
-+** database image would become corrupt. It is therefore fortunate that 
-+** this circumstance cannot arise.
- */
--static int hasHotJournal(Pager *pPager, int *pExists){
--  sqlite3_vfs * const pVfs = pPager->pVfs;
--  int rc = SQLITE_OK;           /* Return code */
--  int exists = 1;               /* True if a journal file is present */
--  int jrnlOpen = !!isOpen(pPager->jfd);
-+#if defined(SQLITE_DEBUG)
-+static void assertTruncateConstraintCb(PgHdr *pPg){
-+  assert( pPg->flags&PGHDR_DIRTY );
-+  assert( !subjRequiresPage(pPg) || pPg->pgno<=pPg->pPager->dbSize );
-+}
-+static void assertTruncateConstraint(Pager *pPager){
-+  sqlite3PcacheIterateDirty(pPager->pPCache, assertTruncateConstraintCb);
-+}
-+#else
-+# define assertTruncateConstraint(pPager)
-+#endif
- 
--  assert( pPager->useJournal );
--  assert( isOpen(pPager->fd) );
--  assert( pPager->eState==PAGER_OPEN );
-+/*
-+** Truncate the in-memory database file image to nPage pages. This 
-+** function does not actually modify the database file on disk. It 
-+** just sets the internal state of the pager object so that the 
-+** truncation will be done when the current transaction is committed.
-+*/
-+SQLITE_PRIVATE void sqlite3PagerTruncateImage(Pager *pPager, Pgno nPage){
-+  assert( pPager->dbSize>=nPage );
-+  assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
-+  pPager->dbSize = nPage;
-+  assertTruncateConstraint(pPager);
-+}
- 
--  assert( jrnlOpen==0 || ( sqlite3OsDeviceCharacteristics(pPager->jfd) &
--    SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN
--  ));
- 
--  *pExists = 0;
--  if( !jrnlOpen ){
--    rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
-+/*
-+** This function is called before attempting a hot-journal rollback. It
-+** syncs the journal file to disk, then sets pPager->journalHdr to the
-+** size of the journal file so that the pager_playback() routine knows
-+** that the entire journal file has been synced.
-+**
-+** Syncing a hot-journal to disk before attempting to roll it back ensures 
-+** that if a power-failure occurs during the rollback, the process that
-+** attempts rollback following system recovery sees the same journal
-+** content as this process.
-+**
-+** If everything goes as planned, SQLITE_OK is returned. Otherwise, 
-+** an SQLite error code.
-+*/
-+static int pagerSyncHotJournal(Pager *pPager){
-+  int rc = SQLITE_OK;
-+  if( !pPager->noSync ){
-+    rc = sqlite3OsSync(pPager->jfd, SQLITE_SYNC_NORMAL);
-   }
--  if( rc==SQLITE_OK && exists ){
--    int locked = 0;             /* True if some process holds a RESERVED lock */
-+  if( rc==SQLITE_OK ){
-+    rc = sqlite3OsFileSize(pPager->jfd, &pPager->journalHdr);
-+  }
-+  return rc;
-+}
-+
-+/*
-+** Shutdown the page cache.  Free all memory and close all files.
-+**
-+** If a transaction was in progress when this routine is called, that
-+** transaction is rolled back.  All outstanding pages are invalidated
-+** and their memory is freed.  Any attempt to use a page associated
-+** with this page cache after this function returns will likely
-+** result in a coredump.
-+**
-+** This function always succeeds. If a transaction is active an attempt
-+** is made to roll it back. If an error occurs during the rollback 
-+** a hot journal may be left in the filesystem but no error is returned
-+** to the caller.
-+*/
-+SQLITE_PRIVATE int sqlite3PagerClose(Pager *pPager){
-+  u8 *pTmp = (u8 *)pPager->pTmpSpace;
- 
--    /* Race condition here:  Another process might have been holding the
--    ** the RESERVED lock and have a journal open at the sqlite3OsAccess() 
--    ** call above, but then delete the journal and drop the lock before
--    ** we get to the following sqlite3OsCheckReservedLock() call.  If that
--    ** is the case, this routine might think there is a hot journal when
--    ** in fact there is none.  This results in a false-positive which will
--    ** be dealt with by the playback routine.  Ticket #3883.
-+  disable_simulated_io_errors();
-+  sqlite3BeginBenignMalloc();
-+  /* pPager->errCode = 0; */
-+  pPager->exclusiveMode = 0;
-+#ifndef SQLITE_OMIT_WAL
-+  sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags, pPager->pageSize, pTmp);
-+  pPager->pWal = 0;
-+#endif
-+  pager_reset(pPager);
-+  if( MEMDB ){
-+    pager_unlock(pPager);
-+  }else{
-+    /* If it is open, sync the journal file before calling UnlockAndRollback.
-+    ** If this is not done, then an unsynced portion of the open journal 
-+    ** file may be played back into the database. If a power failure occurs 
-+    ** while this is happening, the database could become corrupt.
-+    **
-+    ** If an error occurs while trying to sync the journal, shift the pager
-+    ** into the ERROR state. This causes UnlockAndRollback to unlock the
-+    ** database and close the journal file without attempting to roll it
-+    ** back or finalize it. The next database user will have to do hot-journal
-+    ** rollback before accessing the database file.
-     */
--    rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
--    if( rc==SQLITE_OK && !locked ){
--      Pgno nPage;                 /* Number of pages in database file */
--
--      /* Check the size of the database file. If it consists of 0 pages,
--      ** then delete the journal file. See the header comment above for 
--      ** the reasoning here.  Delete the obsolete journal file under
--      ** a RESERVED lock to avoid race conditions and to avoid violating
--      ** [H33020].
--      */
--      rc = pagerPagecount(pPager, &nPage);
--      if( rc==SQLITE_OK ){
--        if( nPage==0 ){
--          sqlite3BeginBenignMalloc();
--          if( pagerLockDb(pPager, RESERVED_LOCK)==SQLITE_OK ){
--            sqlite3OsDelete(pVfs, pPager->zJournal, 0);
--            if( !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK);
--          }
--          sqlite3EndBenignMalloc();
--        }else{
--          /* The journal file exists and no other connection has a reserved
--          ** or greater lock on the database file. Now check that there is
--          ** at least one non-zero bytes at the start of the journal file.
--          ** If there is, then we consider this journal to be hot. If not, 
--          ** it can be ignored.
--          */
--          if( !jrnlOpen ){
--            int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL;
--            rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f);
--          }
--          if( rc==SQLITE_OK ){
--            u8 first = 0;
--            rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0);
--            if( rc==SQLITE_IOERR_SHORT_READ ){
--              rc = SQLITE_OK;
--            }
--            if( !jrnlOpen ){
--              sqlite3OsClose(pPager->jfd);
--            }
--            *pExists = (first!=0);
--          }else if( rc==SQLITE_CANTOPEN ){
--            /* If we cannot open the rollback journal file in order to see if
--            ** its has a zero header, that might be due to an I/O error, or
--            ** it might be due to the race condition described above and in
--            ** ticket #3883.  Either way, assume that the journal is hot.
--            ** This might be a false positive.  But if it is, then the
--            ** automatic journal playback and recovery mechanism will deal
--            ** with it under an EXCLUSIVE lock where we do not need to
--            ** worry so much with race conditions.
--            */
--            *pExists = 1;
--            rc = SQLITE_OK;
--          }
--        }
--      }
-+    if( isOpen(pPager->jfd) ){
-+      pager_error(pPager, pagerSyncHotJournal(pPager));
-     }
-+    pagerUnlockAndRollback(pPager);
-   }
-+  sqlite3EndBenignMalloc();
-+  enable_simulated_io_errors();
-+  PAGERTRACE(("CLOSE %d\n", PAGERID(pPager)));
-+  IOTRACE(("CLOSE %p\n", pPager))
-+  sqlite3OsClose(pPager->jfd);
-+  sqlite3OsClose(pPager->fd);
-+  sqlite3PageFree(pTmp);
-+  sqlite3PcacheClose(pPager->pPCache);
- 
--  return rc;
-+#ifdef SQLITE_HAS_CODEC
-+  if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
-+#endif
-+
-+  assert( !pPager->aSavepoint && !pPager->pInJournal );
-+  assert( !isOpen(pPager->jfd) && !isOpen(pPager->sjfd) );
-+
-+  sqlite3_free(pPager);
-+  return SQLITE_OK;
- }
- 
-+#if !defined(NDEBUG) || defined(SQLITE_TEST)
- /*
--** This function is called to obtain a shared lock on the database file.
--** It is illegal to call sqlite3PagerAcquire() until after this function
--** has been successfully called. If a shared-lock is already held when
--** this function is called, it is a no-op.
-+** Return the page number for page pPg.
-+*/
-+SQLITE_PRIVATE Pgno sqlite3PagerPagenumber(DbPage *pPg){
-+  return pPg->pgno;
-+}
-+#endif
-+
-+/*
-+** Increment the reference count for page pPg.
-+*/
-+SQLITE_PRIVATE void sqlite3PagerRef(DbPage *pPg){
-+  sqlite3PcacheRef(pPg);
-+}
-+
-+/*
-+** Sync the journal. In other words, make sure all the pages that have
-+** been written to the journal have actually reached the surface of the
-+** disk and can be restored in the event of a hot-journal rollback.
- **
--** The following operations are also performed by this function.
-+** If the Pager.noSync flag is set, then this function is a no-op.
-+** Otherwise, the actions required depend on the journal-mode and the 
-+** device characteristics of the the file-system, as follows:
- **
--**   1) If the pager is currently in PAGER_OPEN state (no lock held
--**      on the database file), then an attempt is made to obtain a
--**      SHARED lock on the database file. Immediately after obtaining
--**      the SHARED lock, the file-system is checked for a hot-journal,
--**      which is played back if present. Following any hot-journal 
--**      rollback, the contents of the cache are validated by checking
--**      the 'change-counter' field of the database file header and
--**      discarded if they are found to be invalid.
-+**   * If the journal file is an in-memory journal file, no action need
-+**     be taken.
- **
--**   2) If the pager is running in exclusive-mode, and there are currently
--**      no outstanding references to any pages, and is in the error state,
--**      then an attempt is made to clear the error state by discarding
--**      the contents of the page cache and rolling back any open journal
--**      file.
-+**   * Otherwise, if the device does not support the SAFE_APPEND property,
-+**     then the nRec field of the most recently written journal header
-+**     is updated to contain the number of journal records that have
-+**     been written following it. If the pager is operating in full-sync
-+**     mode, then the journal file is synced before this field is updated.
- **
--** If everything is successful, SQLITE_OK is returned. If an IO error 
--** occurs while locking the database, checking for a hot-journal file or 
--** rolling back a journal file, the IO error code is returned.
-+**   * If the device does not support the SEQUENTIAL property, then 
-+**     journal file is synced.
-+**
-+** Or, in pseudo-code:
-+**
-+**   if( NOT <in-memory journal> ){
-+**     if( NOT SAFE_APPEND ){
-+**       if( <full-sync mode> ) xSync(<journal file>);
-+**       <update nRec field>
-+**     } 
-+**     if( NOT SEQUENTIAL ) xSync(<journal file>);
-+**   }
-+**
-+** If successful, this routine clears the PGHDR_NEED_SYNC flag of every 
-+** page currently held in memory before returning SQLITE_OK. If an IO
-+** error is encountered, then the IO error code is returned to the caller.
- */
--SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){
--  int rc = SQLITE_OK;                /* Return code */
-+static int syncJournal(Pager *pPager, int newHdr){
-+  int rc;                         /* Return code */
- 
--  /* This routine is only called from b-tree and only when there are no
--  ** outstanding pages. This implies that the pager state should either
--  ** be OPEN or READER. READER is only possible if the pager is or was in 
--  ** exclusive access mode.
--  */
--  assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
-+  assert( pPager->eState==PAGER_WRITER_CACHEMOD
-+       || pPager->eState==PAGER_WRITER_DBMOD
-+  );
-   assert( assert_pager_state(pPager) );
--  assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
--  if( NEVER(MEMDB && pPager->errCode) ){ return pPager->errCode; }
-+  assert( !pagerUseWal(pPager) );
- 
--  if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){
--    int bHotJournal = 1;          /* True if there exists a hot journal-file */
-+  rc = sqlite3PagerExclusiveLock(pPager);
-+  if( rc!=SQLITE_OK ) return rc;
- 
--    assert( !MEMDB );
--    assert( pPager->noReadlock==0 || pPager->readOnly );
-+  if( !pPager->noSync ){
-+    assert( !pPager->tempFile );
-+    if( isOpen(pPager->jfd) && pPager->journalMode!=PAGER_JOURNALMODE_MEMORY ){
-+      const int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
-+      assert( isOpen(pPager->jfd) );
- 
--    if( pPager->noReadlock==0 ){
--      rc = pager_wait_on_lock(pPager, SHARED_LOCK);
--      if( rc!=SQLITE_OK ){
--        assert( pPager->eLock==NO_LOCK || pPager->eLock==UNKNOWN_LOCK );
--        goto failed;
--      }
--    }
-+      if( 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
-+        /* This block deals with an obscure problem. If the last connection
-+        ** that wrote to this database was operating in persistent-journal
-+        ** mode, then the journal file may at this point actually be larger
-+        ** than Pager.journalOff bytes. If the next thing in the journal
-+        ** file happens to be a journal-header (written as part of the
-+        ** previous connection's transaction), and a crash or power-failure 
-+        ** occurs after nRec is updated but before this connection writes 
-+        ** anything else to the journal file (or commits/rolls back its 
-+        ** transaction), then SQLite may become confused when doing the 
-+        ** hot-journal rollback following recovery. It may roll back all
-+        ** of this connections data, then proceed to rolling back the old,
-+        ** out-of-date data that follows it. Database corruption.
-+        **
-+        ** To work around this, if the journal file does appear to contain
-+        ** a valid header following Pager.journalOff, then write a 0x00
-+        ** byte to the start of it to prevent it from being recognized.
-+        **
-+        ** Variable iNextHdrOffset is set to the offset at which this
-+        ** problematic header will occur, if it exists. aMagic is used 
-+        ** as a temporary buffer to inspect the first couple of bytes of
-+        ** the potential journal header.
-+        */
-+        i64 iNextHdrOffset;
-+        u8 aMagic[8];
-+        u8 zHeader[sizeof(aJournalMagic)+4];
- 
--    /* If a journal file exists, and there is no RESERVED lock on the
--    ** database file, then it either needs to be played back or deleted.
--    */
--    if( pPager->eLock<=SHARED_LOCK ){
--      rc = hasHotJournal(pPager, &bHotJournal);
--    }
--    if( rc!=SQLITE_OK ){
--      goto failed;
--    }
--    if( bHotJournal ){
--      /* Get an EXCLUSIVE lock on the database file. At this point it is
--      ** important that a RESERVED lock is not obtained on the way to the
--      ** EXCLUSIVE lock. If it were, another process might open the
--      ** database file, detect the RESERVED lock, and conclude that the
--      ** database is safe to read while this process is still rolling the 
--      ** hot-journal back.
--      ** 
--      ** Because the intermediate RESERVED lock is not requested, any
--      ** other process attempting to access the database file will get to 
--      ** this point in the code and fail to obtain its own EXCLUSIVE lock 
--      ** on the database file.
--      **
--      ** Unless the pager is in locking_mode=exclusive mode, the lock is
--      ** downgraded to SHARED_LOCK before this function returns.
--      */
--      rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
--      if( rc!=SQLITE_OK ){
--        goto failed;
--      }
-- 
--      /* If it is not already open and the file exists on disk, open the 
--      ** journal for read/write access. Write access is required because 
--      ** in exclusive-access mode the file descriptor will be kept open 
--      ** and possibly used for a transaction later on. Also, write-access 
--      ** is usually required to finalize the journal in journal_mode=persist 
--      ** mode (and also for journal_mode=truncate on some systems).
--      **
--      ** If the journal does not exist, it usually means that some 
--      ** other connection managed to get in and roll it back before 
--      ** this connection obtained the exclusive lock above. Or, it 
--      ** may mean that the pager was in the error-state when this
--      ** function was called and the journal file does not exist.
--      */
--      if( !isOpen(pPager->jfd) ){
--        sqlite3_vfs * const pVfs = pPager->pVfs;
--        int bExists;              /* True if journal file exists */
--        rc = sqlite3OsAccess(
--            pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &bExists);
--        if( rc==SQLITE_OK && bExists ){
--          int fout = 0;
--          int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
--          assert( !pPager->tempFile );
--          rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
--          assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
--          if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){
--            rc = SQLITE_CANTOPEN_BKPT;
--            sqlite3OsClose(pPager->jfd);
--          }
-+        memcpy(zHeader, aJournalMagic, sizeof(aJournalMagic));
-+        put32bits(&zHeader[sizeof(aJournalMagic)], pPager->nRec);
-+
-+        iNextHdrOffset = journalHdrOffset(pPager);
-+        rc = sqlite3OsRead(pPager->jfd, aMagic, 8, iNextHdrOffset);
-+        if( rc==SQLITE_OK && 0==memcmp(aMagic, aJournalMagic, 8) ){
-+          static const u8 zerobyte = 0;
-+          rc = sqlite3OsWrite(pPager->jfd, &zerobyte, 1, iNextHdrOffset);
-         }
--      }
-- 
--      /* Playback and delete the journal.  Drop the database write
--      ** lock and reacquire the read lock. Purge the cache before
--      ** playing back the hot-journal so that we don't end up with
--      ** an inconsistent cache.  Sync the hot journal before playing
--      ** it back since the process that crashed and left the hot journal
--      ** probably did not sync it and we are required to always sync
--      ** the journal before playing it back.
--      */
--      if( isOpen(pPager->jfd) ){
--        assert( rc==SQLITE_OK );
--        rc = pagerSyncHotJournal(pPager);
--        if( rc==SQLITE_OK ){
--          rc = pager_playback(pPager, 1);
--          pPager->eState = PAGER_OPEN;
-+        if( rc!=SQLITE_OK && rc!=SQLITE_IOERR_SHORT_READ ){
-+          return rc;
-         }
--      }else if( !pPager->exclusiveMode ){
--        pagerUnlockDb(pPager, SHARED_LOCK);
--      }
- 
--      if( rc!=SQLITE_OK ){
--        /* This branch is taken if an error occurs while trying to open
--        ** or roll back a hot-journal while holding an EXCLUSIVE lock. The
--        ** pager_unlock() routine will be called before returning to unlock
--        ** the file. If the unlock attempt fails, then Pager.eLock must be
--        ** set to UNKNOWN_LOCK (see the comment above the #define for 
--        ** UNKNOWN_LOCK above for an explanation). 
-+        /* Write the nRec value into the journal file header. If in
-+        ** full-synchronous mode, sync the journal first. This ensures that
-+        ** all data has really hit the disk before nRec is updated to mark
-+        ** it as a candidate for rollback.
-         **
--        ** In order to get pager_unlock() to do this, set Pager.eState to
--        ** PAGER_ERROR now. This is not actually counted as a transition
--        ** to ERROR state in the state diagram at the top of this file,
--        ** since we know that the same call to pager_unlock() will very
--        ** shortly transition the pager object to the OPEN state. Calling
--        ** assert_pager_state() would fail now, as it should not be possible
--        ** to be in ERROR state when there are zero outstanding page 
--        ** references.
-+        ** This is not required if the persistent media supports the
-+        ** SAFE_APPEND property. Because in this case it is not possible 
-+        ** for garbage data to be appended to the file, the nRec field
-+        ** is populated with 0xFFFFFFFF when the journal header is written
-+        ** and never needs to be updated.
-         */
--        pager_error(pPager, rc);
--        goto failed;
--      }
--
--      assert( pPager->eState==PAGER_OPEN );
--      assert( (pPager->eLock==SHARED_LOCK)
--           || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK)
--      );
--    }
--
--    if( !pPager->tempFile 
--     && (pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0) 
--    ){
--      /* The shared-lock has just been acquired on the database file
--      ** and there are already pages in the cache (from a previous
--      ** read or write transaction).  Check to see if the database
--      ** has been modified.  If the database has changed, flush the
--      ** cache.
--      **
--      ** Database changes is detected by looking at 15 bytes beginning
--      ** at offset 24 into the file.  The first 4 of these 16 bytes are
--      ** a 32-bit counter that is incremented with each change.  The
--      ** other bytes change randomly with each file change when
--      ** a codec is in use.
--      ** 
--      ** There is a vanishingly small chance that a change will not be 
--      ** detected.  The chance of an undetected change is so small that
--      ** it can be neglected.
--      */
--      Pgno nPage = 0;
--      char dbFileVers[sizeof(pPager->dbFileVers)];
--
--      rc = pagerPagecount(pPager, &nPage);
--      if( rc ) goto failed;
--
--      if( nPage>0 ){
--        IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
--        rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
--        if( rc!=SQLITE_OK ){
--          goto failed;
-+        if( pPager->fullSync && 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
-+          PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
-+          IOTRACE(("JSYNC %p\n", pPager))
-+          rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags);
-+          if( rc!=SQLITE_OK ) return rc;
-         }
--      }else{
--        memset(dbFileVers, 0, sizeof(dbFileVers));
-+        IOTRACE(("JHDR %p %lld\n", pPager, pPager->journalHdr));
-+        rc = sqlite3OsWrite(
-+            pPager->jfd, zHeader, sizeof(zHeader), pPager->journalHdr
-+        );
-+        if( rc!=SQLITE_OK ) return rc;
-+      }
-+      if( 0==(iDc&SQLITE_IOCAP_SEQUENTIAL) ){
-+        PAGERTRACE(("SYNC journal of %d\n", PAGERID(pPager)));
-+        IOTRACE(("JSYNC %p\n", pPager))
-+        rc = sqlite3OsSync(pPager->jfd, pPager->syncFlags| 
-+          (pPager->syncFlags==SQLITE_SYNC_FULL?SQLITE_SYNC_DATAONLY:0)
-+        );
-+        if( rc!=SQLITE_OK ) return rc;
-       }
- 
--      if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
--        pager_reset(pPager);
-+      pPager->journalHdr = pPager->journalOff;
-+      if( newHdr && 0==(iDc&SQLITE_IOCAP_SAFE_APPEND) ){
-+        pPager->nRec = 0;
-+        rc = writeJournalHdr(pPager);
-+        if( rc!=SQLITE_OK ) return rc;
-       }
-+    }else{
-+      pPager->journalHdr = pPager->journalOff;
-     }
--
--    /* If there is a WAL file in the file-system, open this database in WAL
--    ** mode. Otherwise, the following function call is a no-op.
--    */
--    rc = pagerOpenWalIfPresent(pPager);
--#ifndef SQLITE_OMIT_WAL
--    assert( pPager->pWal==0 || rc==SQLITE_OK );
--#endif
--  }
--
--  if( pagerUseWal(pPager) ){
--    assert( rc==SQLITE_OK );
--    rc = pagerBeginReadTransaction(pPager);
--  }
--
--  if( pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){
--    rc = pagerPagecount(pPager, &pPager->dbSize);
-   }
- 
-- failed:
--  if( rc!=SQLITE_OK ){
--    assert( !MEMDB );
--    pager_unlock(pPager);
--    assert( pPager->eState==PAGER_OPEN );
--  }else{
--    pPager->eState = PAGER_READER;
--  }
--  return rc;
--}
--
--/*
--** If the reference count has reached zero, rollback any active
--** transaction and unlock the pager.
--**
--** Except, in locking_mode=EXCLUSIVE when there is nothing to in
--** the rollback journal, the unlock is not performed and there is
--** nothing to rollback, so this routine is a no-op.
--*/ 
--static void pagerUnlockIfUnused(Pager *pPager){
--  if( (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
--    pagerUnlockAndRollback(pPager);
--  }
-+  /* Unless the pager is in noSync mode, the journal file was just 
-+  ** successfully synced. Either way, clear the PGHDR_NEED_SYNC flag on 
-+  ** all pages.
-+  */
-+  sqlite3PcacheClearSyncFlags(pPager->pPCache);
-+  pPager->eState = PAGER_WRITER_DBMOD;
-+  assert( assert_pager_state(pPager) );
-+  return SQLITE_OK;
- }
- 
- /*
--** Acquire a reference to page number pgno in pager pPager (a page
--** reference has type DbPage*). If the requested reference is 
--** successfully obtained, it is copied to *ppPage and SQLITE_OK returned.
--**
--** If the requested page is already in the cache, it is returned. 
--** Otherwise, a new page object is allocated and populated with data
--** read from the database file. In some cases, the pcache module may
--** choose not to allocate a new page object and may reuse an existing
--** object with no outstanding references.
--**
--** The extra data appended to a page is always initialized to zeros the 
--** first time a page is loaded into memory. If the page requested is 
--** already in the cache when this function is called, then the extra
--** data is left as it was when the page object was last used.
--**
--** If the database image is smaller than the requested page or if a 
--** non-zero value is passed as the noContent parameter and the 
--** requested page is not already stored in the cache, then no 
--** actual disk read occurs. In this case the memory image of the 
--** page is initialized to all zeros. 
--**
--** If noContent is true, it means that we do not care about the contents
--** of the page. This occurs in two seperate scenarios:
--**
--**   a) When reading a free-list leaf page from the database, and
-+** The argument is the first in a linked list of dirty pages connected
-+** by the PgHdr.pDirty pointer. This function writes each one of the
-+** in-memory pages in the list to the database file. The argument may
-+** be NULL, representing an empty list. In this case this function is
-+** a no-op.
- **
--**   b) When a savepoint is being rolled back and we need to load
--**      a new page into the cache to be filled with the data read
--**      from the savepoint journal.
-+** The pager must hold at least a RESERVED lock when this function
-+** is called. Before writing anything to the database file, this lock
-+** is upgraded to an EXCLUSIVE lock. If the lock cannot be obtained,
-+** SQLITE_BUSY is returned and no data is written to the database file.
-+** 
-+** If the pager is a temp-file pager and the actual file-system file
-+** is not yet open, it is created and opened before any data is 
-+** written out.
- **
--** If noContent is true, then the data returned is zeroed instead of
--** being read from the database. Additionally, the bits corresponding
--** to pgno in Pager.pInJournal (bitvec of pages already written to the
--** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open
--** savepoints are set. This means if the page is made writable at any
--** point in the future, using a call to sqlite3PagerWrite(), its contents
--** will not be journaled. This saves IO.
-+** Once the lock has been upgraded and, if necessary, the file opened,
-+** the pages are written out to the database file in list order. Writing
-+** a page is skipped if it meets either of the following criteria:
- **
--** The acquisition might fail for several reasons.  In all cases,
--** an appropriate error code is returned and *ppPage is set to NULL.
-+**   * The page number is greater than Pager.dbSize, or
-+**   * The PGHDR_DONT_WRITE flag is set on the page.
- **
--** See also sqlite3PagerLookup().  Both this routine and Lookup() attempt
--** to find a page in the in-memory cache first.  If the page is not already
--** in memory, this routine goes to disk to read it in whereas Lookup()
--** just returns 0.  This routine acquires a read-lock the first time it
--** has to go to disk, and could also playback an old journal if necessary.
--** Since Lookup() never goes to disk, it never has to deal with locks
--** or journal files.
-+** If writing out a page causes the database file to grow, Pager.dbFileSize
-+** is updated accordingly. If page 1 is written out, then the value cached
-+** in Pager.dbFileVers[] is updated to match the new value stored in
-+** the database file.
-+**
-+** If everything is successful, SQLITE_OK is returned. If an IO error 
-+** occurs, an IO error code is returned. Or, if the EXCLUSIVE lock cannot
-+** be obtained, SQLITE_BUSY is returned.
- */
--SQLITE_PRIVATE int sqlite3PagerAcquire(
--  Pager *pPager,      /* The pager open on the database file */
--  Pgno pgno,          /* Page number to fetch */
--  DbPage **ppPage,    /* Write a pointer to the page here */
--  int noContent       /* Do not bother reading content from disk if true */
--){
--  int rc;
--  PgHdr *pPg;
-+static int pager_write_pagelist(Pager *pPager, PgHdr *pList){
-+  int rc = SQLITE_OK;                  /* Return code */
- 
--  assert( pPager->eState>=PAGER_READER );
--  assert( assert_pager_state(pPager) );
-+  /* This function is only called for rollback pagers in WRITER_DBMOD state. */
-+  assert( !pagerUseWal(pPager) );
-+  assert( pPager->eState==PAGER_WRITER_DBMOD );
-+  assert( pPager->eLock==EXCLUSIVE_LOCK );
- 
--  if( pgno==0 ){
--    return SQLITE_CORRUPT_BKPT;
-+  /* If the file is a temp-file has not yet been opened, open it now. It
-+  ** is not possible for rc to be other than SQLITE_OK if this branch
-+  ** is taken, as pager_wait_on_lock() is a no-op for temp-files.
-+  */
-+  if( !isOpen(pPager->fd) ){
-+    assert( pPager->tempFile && rc==SQLITE_OK );
-+    rc = pagerOpentemp(pPager, pPager->fd, pPager->vfsFlags);
-   }
- 
--  /* If the pager is in the error state, return an error immediately. 
--  ** Otherwise, request the page from the PCache layer. */
--  if( pPager->errCode!=SQLITE_OK ){
--    rc = pPager->errCode;
--  }else{
--    rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
-+  /* Before the first write, give the VFS a hint of what the final
-+  ** file size will be.
-+  */
-+  assert( rc!=SQLITE_OK || isOpen(pPager->fd) );
-+  if( rc==SQLITE_OK && pPager->dbSize>pPager->dbHintSize ){
-+    sqlite3_int64 szFile = pPager->pageSize * (sqlite3_int64)pPager->dbSize;
-+    sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SIZE_HINT, &szFile);
-+    pPager->dbHintSize = pPager->dbSize;
-   }
- 
--  if( rc!=SQLITE_OK ){
--    /* Either the call to sqlite3PcacheFetch() returned an error or the
--    ** pager was already in the error-state when this function was called.
--    ** Set pPg to 0 and jump to the exception handler.  */
--    pPg = 0;
--    goto pager_acquire_err;
--  }
--  assert( (*ppPage)->pgno==pgno );
--  assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 );
-+  while( rc==SQLITE_OK && pList ){
-+    Pgno pgno = pList->pgno;
- 
--  if( (*ppPage)->pPager && !noContent ){
--    /* In this case the pcache already contains an initialized copy of
--    ** the page. Return without further ado.  */
--    assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) );
--    PAGER_INCR(pPager->nHit);
--    return SQLITE_OK;
-+    /* If there are dirty pages in the page cache with page numbers greater
-+    ** than Pager.dbSize, this means sqlite3PagerTruncateImage() was called to
-+    ** make the file smaller (presumably by auto-vacuum code). Do not write
-+    ** any such pages to the file.
-+    **
-+    ** Also, do not write out any page that has the PGHDR_DONT_WRITE flag
-+    ** set (set by sqlite3PagerDontWrite()).
-+    */
-+    if( pgno<=pPager->dbSize && 0==(pList->flags&PGHDR_DONT_WRITE) ){
-+      i64 offset = (pgno-1)*(i64)pPager->pageSize;   /* Offset to write */
-+      char *pData;                                   /* Data to write */    
- 
--  }else{
--    /* The pager cache has created a new page. Its content needs to 
--    ** be initialized.  */
-+      assert( (pList->flags&PGHDR_NEED_SYNC)==0 );
-+      if( pList->pgno==1 ) pager_write_changecounter(pList);
- 
--    PAGER_INCR(pPager->nMiss);
--    pPg = *ppPage;
--    pPg->pPager = pPager;
-+      /* Encode the database */
-+      CODEC2(pPager, pList->pData, pgno, 6, return SQLITE_NOMEM, pData);
- 
--    /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
--    ** number greater than this, or the unused locking-page, is requested. */
--    if( pgno>PAGER_MAX_PGNO || pgno==PAGER_MJ_PGNO(pPager) ){
--      rc = SQLITE_CORRUPT_BKPT;
--      goto pager_acquire_err;
--    }
-+      /* Write out the page data. */
-+      rc = sqlite3OsWrite(pPager->fd, pData, pPager->pageSize, offset);
- 
--    if( MEMDB || pPager->dbSize<pgno || noContent || !isOpen(pPager->fd) ){
--      if( pgno>pPager->mxPgno ){
--        rc = SQLITE_FULL;
--        goto pager_acquire_err;
--      }
--      if( noContent ){
--        /* Failure to set the bits in the InJournal bit-vectors is benign.
--        ** It merely means that we might do some extra work to journal a 
--        ** page that does not need to be journaled.  Nevertheless, be sure 
--        ** to test the case where a malloc error occurs while trying to set 
--        ** a bit in a bit vector.
--        */
--        sqlite3BeginBenignMalloc();
--        if( pgno<=pPager->dbOrigSize ){
--          TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno);
--          testcase( rc==SQLITE_NOMEM );
--        }
--        TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno);
--        testcase( rc==SQLITE_NOMEM );
--        sqlite3EndBenignMalloc();
-+      /* If page 1 was just written, update Pager.dbFileVers to match
-+      ** the value now stored in the database file. If writing this 
-+      ** page caused the database file to grow, update dbFileSize. 
-+      */
-+      if( pgno==1 ){
-+        memcpy(&pPager->dbFileVers, &pData[24], sizeof(pPager->dbFileVers));
-       }
--      memset(pPg->pData, 0, pPager->pageSize);
--      IOTRACE(("ZERO %p %d\n", pPager, pgno));
--    }else{
--      assert( pPg->pPager==pPager );
--      rc = readDbPage(pPg);
--      if( rc!=SQLITE_OK ){
--        goto pager_acquire_err;
-+      if( pgno>pPager->dbFileSize ){
-+        pPager->dbFileSize = pgno;
-       }
--    }
--    pager_set_pagehash(pPg);
--  }
- 
--  return SQLITE_OK;
-+      /* Update any backup objects copying the contents of this pager. */
-+      sqlite3BackupUpdate(pPager->pBackup, pgno, (u8*)pList->pData);
- 
--pager_acquire_err:
--  assert( rc!=SQLITE_OK );
--  if( pPg ){
--    sqlite3PcacheDrop(pPg);
-+      PAGERTRACE(("STORE %d page %d hash(%08x)\n",
-+                   PAGERID(pPager), pgno, pager_pagehash(pList)));
-+      IOTRACE(("PGOUT %p %d\n", pPager, pgno));
-+      PAGER_INCR(sqlite3_pager_writedb_count);
-+      PAGER_INCR(pPager->nWrite);
-+    }else{
-+      PAGERTRACE(("NOSTORE %d page %d\n", PAGERID(pPager), pgno));
-+    }
-+    pager_set_pagehash(pList);
-+    pList = pList->pDirty;
-   }
--  pagerUnlockIfUnused(pPager);
- 
--  *ppPage = 0;
-   return rc;
- }
- 
- /*
--** Acquire a page if it is already in the in-memory cache.  Do
--** not read the page from disk.  Return a pointer to the page,
--** or 0 if the page is not in cache. 
--**
--** See also sqlite3PagerGet().  The difference between this routine
--** and sqlite3PagerGet() is that _get() will go to the disk and read
--** in the page if the page is not already in cache.  This routine
--** returns NULL if the page is not in cache or if a disk I/O error 
--** has ever happened.
--*/
--SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
--  PgHdr *pPg = 0;
--  assert( pPager!=0 );
--  assert( pgno!=0 );
--  assert( pPager->pPCache!=0 );
--  assert( pPager->eState>=PAGER_READER && pPager->eState!=PAGER_ERROR );
--  sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
--  return pPg;
--}
--
--/*
--** Release a page reference.
-+** Ensure that the sub-journal file is open. If it is already open, this 
-+** function is a no-op.
- **
--** If the number of references to the page drop to zero, then the
--** page is added to the LRU list.  When all references to all pages
--** are released, a rollback occurs and the lock on the database is
--** removed.
-+** SQLITE_OK is returned if everything goes according to plan. An 
-+** SQLITE_IOERR_XXX error code is returned if a call to sqlite3OsOpen() 
-+** fails.
- */
--SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){
--  if( pPg ){
--    Pager *pPager = pPg->pPager;
--    sqlite3PcacheRelease(pPg);
--    pagerUnlockIfUnused(pPager);
-+static int openSubJournal(Pager *pPager){
-+  int rc = SQLITE_OK;
-+  if( !isOpen(pPager->sjfd) ){
-+    if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY || pPager->subjInMemory ){
-+      sqlite3MemJournalOpen(pPager->sjfd);
-+    }else{
-+      rc = pagerOpentemp(pPager, pPager->sjfd, SQLITE_OPEN_SUBJOURNAL);
-+    }
-   }
-+  return rc;
- }
- 
- /*
--** This function is called at the start of every write transaction.
--** There must already be a RESERVED or EXCLUSIVE lock on the database 
--** file when this routine is called.
--**
--** Open the journal file for pager pPager and write a journal header
--** to the start of it. If there are active savepoints, open the sub-journal
--** as well. This function is only used when the journal file is being 
--** opened to write a rollback log for a transaction. It is not used 
--** when opening a hot journal file to roll it back.
--**
--** If the journal file is already open (as it may be in exclusive mode),
--** then this function just writes a journal header to the start of the
--** already open file. 
-+** Append a record of the current state of page pPg to the sub-journal. 
-+** It is the callers responsibility to use subjRequiresPage() to check 
-+** that it is really required before calling this function.
- **
--** Whether or not the journal file is opened by this function, the
--** Pager.pInJournal bitvec structure is allocated.
-+** If successful, set the bit corresponding to pPg->pgno in the bitvecs
-+** for all open savepoints before returning.
- **
--** Return SQLITE_OK if everything is successful. Otherwise, return 
--** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or 
--** an IO error code if opening or writing the journal file fails.
-+** This function returns SQLITE_OK if everything is successful, an IO
-+** error code if the attempt to write to the sub-journal fails, or 
-+** SQLITE_NOMEM if a malloc fails while setting a bit in a savepoint
-+** bitvec.
- */
--static int pager_open_journal(Pager *pPager){
--  int rc = SQLITE_OK;                        /* Return code */
--  sqlite3_vfs * const pVfs = pPager->pVfs;   /* Local cache of vfs pointer */
-+static int subjournalPage(PgHdr *pPg){
-+  int rc = SQLITE_OK;
-+  Pager *pPager = pPg->pPager;
-+  if( pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
- 
--  assert( pPager->eState==PAGER_WRITER_LOCKED );
--  assert( assert_pager_state(pPager) );
--  assert( pPager->pInJournal==0 );
--  
--  /* If already in the error state, this function is a no-op.  But on
--  ** the other hand, this routine is never called if we are already in
--  ** an error state. */
--  if( NEVER(pPager->errCode) ) return pPager->errCode;
-+    /* Open the sub-journal, if it has not already been opened */
-+    assert( pPager->useJournal );
-+    assert( isOpen(pPager->jfd) || pagerUseWal(pPager) );
-+    assert( isOpen(pPager->sjfd) || pPager->nSubRec==0 );
-+    assert( pagerUseWal(pPager) 
-+         || pageInJournal(pPg) 
-+         || pPg->pgno>pPager->dbOrigSize 
-+    );
-+    rc = openSubJournal(pPager);
- 
--  if( !pagerUseWal(pPager) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
--    pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
--    if( pPager->pInJournal==0 ){
--      return SQLITE_NOMEM;
--    }
-+    /* If the sub-journal was opened successfully (or was already open),
-+    ** write the journal record into the file.  */
-+    if( rc==SQLITE_OK ){
-+      void *pData = pPg->pData;
-+      i64 offset = pPager->nSubRec*(4+pPager->pageSize);
-+      char *pData2;
-   
--    /* Open the journal file if it is not already open. */
--    if( !isOpen(pPager->jfd) ){
--      if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){
--        sqlite3MemJournalOpen(pPager->jfd);
--      }else{
--        const int flags =                   /* VFS flags to open journal file */
--          SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|
--          (pPager->tempFile ? 
--            (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL):
--            (SQLITE_OPEN_MAIN_JOURNAL)
--          );
--  #ifdef SQLITE_ENABLE_ATOMIC_WRITE
--        rc = sqlite3JournalOpen(
--            pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
--        );
--  #else
--        rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
--  #endif
-+      CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
-+      PAGERTRACE(("STMT-JOURNAL %d page %d\n", PAGERID(pPager), pPg->pgno));
-+      rc = write32bits(pPager->sjfd, offset, pPg->pgno);
-+      if( rc==SQLITE_OK ){
-+        rc = sqlite3OsWrite(pPager->sjfd, pData2, pPager->pageSize, offset+4);
-       }
--      assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
--    }
--  
--  
--    /* Write the first journal header to the journal file and open 
--    ** the sub-journal if necessary.
--    */
--    if( rc==SQLITE_OK ){
--      /* TODO: Check if all of these are really required. */
--      pPager->nRec = 0;
--      pPager->journalOff = 0;
--      pPager->setMaster = 0;
--      pPager->journalHdr = 0;
--      rc = writeJournalHdr(pPager);
-     }
-   }
--
--  if( rc!=SQLITE_OK ){
--    sqlite3BitvecDestroy(pPager->pInJournal);
--    pPager->pInJournal = 0;
--  }else{
--    assert( pPager->eState==PAGER_WRITER_LOCKED );
--    pPager->eState = PAGER_WRITER_CACHEMOD;
-+  if( rc==SQLITE_OK ){
-+    pPager->nSubRec++;
-+    assert( pPager->nSavepoint>0 );
-+    rc = addToSavepointBitvecs(pPager, pPg->pgno);
-   }
--
-   return rc;
- }
- 
- /*
--** Begin a write-transaction on the specified pager object. If a 
--** write-transaction has already been opened, this function is a no-op.
-+** This function is called by the pcache layer when it has reached some
-+** soft memory limit. The first argument is a pointer to a Pager object
-+** (cast as a void*). The pager is always 'purgeable' (not an in-memory
-+** database). The second argument is a reference to a page that is 
-+** currently dirty but has no outstanding references. The page
-+** is always associated with the Pager object passed as the first 
-+** argument.
- **
--** If the exFlag argument is false, then acquire at least a RESERVED
--** lock on the database file. If exFlag is true, then acquire at least
--** an EXCLUSIVE lock. If such a lock is already held, no locking 
--** functions need be called.
-+** The job of this function is to make pPg clean by writing its contents
-+** out to the database file, if possible. This may involve syncing the
-+** journal file. 
- **
--** If the subjInMemory argument is non-zero, then any sub-journal opened
--** within this transaction will be opened as an in-memory file. This
--** has no effect if the sub-journal is already opened (as it may be when
--** running in exclusive mode) or if the transaction does not require a
--** sub-journal. If the subjInMemory argument is zero, then any required
--** sub-journal is implemented in-memory if pPager is an in-memory database, 
--** or using a temporary file otherwise.
--*/
--SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
--  int rc = SQLITE_OK;
--
--  if( pPager->errCode ) return pPager->errCode;
--  assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR );
--  pPager->subjInMemory = (u8)subjInMemory;
--
--  if( ALWAYS(pPager->eState==PAGER_READER) ){
--    assert( pPager->pInJournal==0 );
--
--    if( pagerUseWal(pPager) ){
--      /* If the pager is configured to use locking_mode=exclusive, and an
--      ** exclusive lock on the database is not already held, obtain it now.
--      */
--      if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){
--        rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
--        if( rc!=SQLITE_OK ){
--          return rc;
--        }
--        sqlite3WalExclusiveMode(pPager->pWal, 1);
--      }
--
--      /* Grab the write lock on the log file. If successful, upgrade to
--      ** PAGER_RESERVED state. Otherwise, return an error code to the caller.
--      ** The busy-handler is not invoked if another connection already
--      ** holds the write-lock. If possible, the upper layer will call it.
--      */
--      rc = sqlite3WalBeginWriteTransaction(pPager->pWal);
--    }else{
--      /* Obtain a RESERVED lock on the database file. If the exFlag parameter
--      ** is true, then immediately upgrade this to an EXCLUSIVE lock. The
--      ** busy-handler callback can be used when upgrading to the EXCLUSIVE
--      ** lock, but not when obtaining the RESERVED lock.
--      */
--      rc = pagerLockDb(pPager, RESERVED_LOCK);
--      if( rc==SQLITE_OK && exFlag ){
--        rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
--      }
--    }
--
--    if( rc==SQLITE_OK ){
--      /* Change to WRITER_LOCKED state.
--      **
--      ** WAL mode sets Pager.eState to PAGER_WRITER_LOCKED or CACHEMOD
--      ** when it has an open transaction, but never to DBMOD or FINISHED.
--      ** This is because in those states the code to roll back savepoint 
--      ** transactions may copy data from the sub-journal into the database 
--      ** file as well as into the page cache. Which would be incorrect in 
--      ** WAL mode.
--      */
--      pPager->eState = PAGER_WRITER_LOCKED;
--      pPager->dbHintSize = pPager->dbSize;
--      pPager->dbFileSize = pPager->dbSize;
--      pPager->dbOrigSize = pPager->dbSize;
--      pPager->journalOff = 0;
--    }
--
--    assert( rc==SQLITE_OK || pPager->eState==PAGER_READER );
--    assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED );
--    assert( assert_pager_state(pPager) );
--  }
--
--  PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager)));
--  return rc;
--}
--
--/*
--** Mark a single data page as writeable. The page is written into the 
--** main journal or sub-journal as required. If the page is written into
--** one of the journals, the corresponding bit is set in the 
--** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs
--** of any open savepoints as appropriate.
-+** If successful, sqlite3PcacheMakeClean() is called on the page and
-+** SQLITE_OK returned. If an IO error occurs while trying to make the
-+** page clean, the IO error code is returned. If the page cannot be
-+** made clean for some other reason, but no error occurs, then SQLITE_OK
-+** is returned by sqlite3PcacheMakeClean() is not called.
- */
--static int pager_write(PgHdr *pPg){
--  void *pData = pPg->pData;
--  Pager *pPager = pPg->pPager;
-+static int pagerStress(void *p, PgHdr *pPg){
-+  Pager *pPager = (Pager *)p;
-   int rc = SQLITE_OK;
- 
--  /* This routine is not called unless a write-transaction has already 
--  ** been started. The journal file may or may not be open at this point.
--  ** It is never called in the ERROR state.
--  */
--  assert( pPager->eState==PAGER_WRITER_LOCKED
--       || pPager->eState==PAGER_WRITER_CACHEMOD
--       || pPager->eState==PAGER_WRITER_DBMOD
--  );
--  assert( assert_pager_state(pPager) );
--
--  /* If an error has been previously detected, report the same error
--  ** again. This should not happen, but the check provides robustness. */
--  if( NEVER(pPager->errCode) )  return pPager->errCode;
--
--  /* Higher-level routines never call this function if database is not
--  ** writable.  But check anyway, just for robustness. */
--  if( NEVER(pPager->readOnly) ) return SQLITE_PERM;
--
--  CHECK_PAGE(pPg);
--
--  /* The journal file needs to be opened. Higher level routines have already
--  ** obtained the necessary locks to begin the write-transaction, but the
--  ** rollback journal might not yet be open. Open it now if this is the case.
--  **
--  ** This is done before calling sqlite3PcacheMakeDirty() on the page. 
--  ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then
--  ** an error might occur and the pager would end up in WRITER_LOCKED state
--  ** with pages marked as dirty in the cache.
--  */
--  if( pPager->eState==PAGER_WRITER_LOCKED ){
--    rc = pager_open_journal(pPager);
--    if( rc!=SQLITE_OK ) return rc;
--  }
--  assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
--  assert( assert_pager_state(pPager) );
--
--  /* Mark the page as dirty.  If the page has already been written
--  ** to the journal then we can return right away.
--  */
--  sqlite3PcacheMakeDirty(pPg);
--  if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){
--    assert( !pagerUseWal(pPager) );
--  }else{
--  
--    /* The transaction journal now exists and we have a RESERVED or an
--    ** EXCLUSIVE lock on the main database file.  Write the current page to
--    ** the transaction journal if it is not there already.
--    */
--    if( !pageInJournal(pPg) && !pagerUseWal(pPager) ){
--      assert( pagerUseWal(pPager)==0 );
--      if( pPg->pgno<=pPager->dbOrigSize && isOpen(pPager->jfd) ){
--        u32 cksum;
--        char *pData2;
--        i64 iOff = pPager->journalOff;
--
--        /* We should never write to the journal file the page that
--        ** contains the database locks.  The following assert verifies
--        ** that we do not. */
--        assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
--
--        assert( pPager->journalHdr<=pPager->journalOff );
--        CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
--        cksum = pager_cksum(pPager, (u8*)pData2);
--
--        /* Even if an IO or diskfull error occurs while journalling the
--        ** page in the block above, set the need-sync flag for the page.
--        ** Otherwise, when the transaction is rolled back, the logic in
--        ** playback_one_page() will think that the page needs to be restored
--        ** in the database file. And if an IO error occurs while doing so,
--        ** then corruption may follow.
--        */
--        pPg->flags |= PGHDR_NEED_SYNC;
--
--        rc = write32bits(pPager->jfd, iOff, pPg->pgno);
--        if( rc!=SQLITE_OK ) return rc;
--        rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4);
--        if( rc!=SQLITE_OK ) return rc;
--        rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum);
--        if( rc!=SQLITE_OK ) return rc;
--
--        IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
--                 pPager->journalOff, pPager->pageSize));
--        PAGER_INCR(sqlite3_pager_writej_count);
--        PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n",
--             PAGERID(pPager), pPg->pgno, 
--             ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg)));
-+  assert( pPg->pPager==pPager );
-+  assert( pPg->flags&PGHDR_DIRTY );
- 
--        pPager->journalOff += 8 + pPager->pageSize;
--        pPager->nRec++;
--        assert( pPager->pInJournal!=0 );
--        rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
--        testcase( rc==SQLITE_NOMEM );
--        assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
--        rc |= addToSavepointBitvecs(pPager, pPg->pgno);
--        if( rc!=SQLITE_OK ){
--          assert( rc==SQLITE_NOMEM );
--          return rc;
--        }
--      }else{
--        if( pPager->eState!=PAGER_WRITER_DBMOD ){
--          pPg->flags |= PGHDR_NEED_SYNC;
--        }
--        PAGERTRACE(("APPEND %d page %d needSync=%d\n",
--                PAGERID(pPager), pPg->pgno,
--               ((pPg->flags&PGHDR_NEED_SYNC)?1:0)));
--      }
-+  /* The doNotSyncSpill flag is set during times when doing a sync of
-+  ** journal (and adding a new header) is not allowed.  This occurs
-+  ** during calls to sqlite3PagerWrite() while trying to journal multiple
-+  ** pages belonging to the same sector.
-+  **
-+  ** The doNotSpill flag inhibits all cache spilling regardless of whether
-+  ** or not a sync is required.  This is set during a rollback.
-+  **
-+  ** Spilling is also prohibited when in an error state since that could
-+  ** lead to database corruption.   In the current implementaton it 
-+  ** is impossible for sqlite3PCacheFetch() to be called with createFlag==1
-+  ** while in the error state, hence it is impossible for this routine to
-+  ** be called in the error state.  Nevertheless, we include a NEVER()
-+  ** test for the error state as a safeguard against future changes.
-+  */
-+  if( NEVER(pPager->errCode) ) return SQLITE_OK;
-+  if( pPager->doNotSpill ) return SQLITE_OK;
-+  if( pPager->doNotSyncSpill && (pPg->flags & PGHDR_NEED_SYNC)!=0 ){
-+    return SQLITE_OK;
-+  }
-+
-+  pPg->pDirty = 0;
-+  if( pagerUseWal(pPager) ){
-+    /* Write a single frame for this page to the log. */
-+    if( subjRequiresPage(pPg) ){ 
-+      rc = subjournalPage(pPg); 
-+    }
-+    if( rc==SQLITE_OK ){
-+      rc = pagerWalFrames(pPager, pPg, 0, 0, 0);
-     }
-+  }else{
-   
--    /* If the statement journal is open and the page is not in it,
--    ** then write the current page to the statement journal.  Note that
--    ** the statement journal format differs from the standard journal format
--    ** in that it omits the checksums and the header.
-+    /* Sync the journal file if required. */
-+    if( pPg->flags&PGHDR_NEED_SYNC 
-+     || pPager->eState==PAGER_WRITER_CACHEMOD
-+    ){
-+      rc = syncJournal(pPager, 1);
-+    }
-+  
-+    /* If the page number of this page is larger than the current size of
-+    ** the database image, it may need to be written to the sub-journal.
-+    ** This is because the call to pager_write_pagelist() below will not
-+    ** actually write data to the file in this case.
-+    **
-+    ** Consider the following sequence of events:
-+    **
-+    **   BEGIN;
-+    **     <journal page X>
-+    **     <modify page X>
-+    **     SAVEPOINT sp;
-+    **       <shrink database file to Y pages>
-+    **       pagerStress(page X)
-+    **     ROLLBACK TO sp;
-+    **
-+    ** If (X>Y), then when pagerStress is called page X will not be written
-+    ** out to the database file, but will be dropped from the cache. Then,
-+    ** following the "ROLLBACK TO sp" statement, reading page X will read
-+    ** data from the database file. This will be the copy of page X as it
-+    ** was when the transaction started, not as it was when "SAVEPOINT sp"
-+    ** was executed.
-+    **
-+    ** The solution is to write the current data for page X into the 
-+    ** sub-journal file now (if it is not already there), so that it will
-+    ** be restored to its current value when the "ROLLBACK TO sp" is 
-+    ** executed.
-     */
--    if( subjRequiresPage(pPg) ){
-+    if( NEVER(
-+        rc==SQLITE_OK && pPg->pgno>pPager->dbSize && subjRequiresPage(pPg)
-+    ) ){
-       rc = subjournalPage(pPg);
-     }
-+  
-+    /* Write the contents of the page out to the database file. */
-+    if( rc==SQLITE_OK ){
-+      assert( (pPg->flags&PGHDR_NEED_SYNC)==0 );
-+      rc = pager_write_pagelist(pPager, pPg);
-+    }
-   }
- 
--  /* Update the database size and return.
--  */
--  if( pPager->dbSize<pPg->pgno ){
--    pPager->dbSize = pPg->pgno;
-+  /* Mark the page as clean. */
-+  if( rc==SQLITE_OK ){
-+    PAGERTRACE(("STRESS %d page %d\n", PAGERID(pPager), pPg->pgno));
-+    sqlite3PcacheMakeClean(pPg);
-   }
--  return rc;
-+
-+  return pager_error(pPager, rc); 
- }
- 
-+
- /*
--** Mark a data page as writeable. This routine must be called before 
--** making changes to a page. The caller must check the return value 
--** of this function and be careful not to change any page data unless 
--** this routine returns SQLITE_OK.
-+** Allocate and initialize a new Pager object and put a pointer to it
-+** in *ppPager. The pager should eventually be freed by passing it
-+** to sqlite3PagerClose().
- **
--** The difference between this function and pager_write() is that this
--** function also deals with the special case where 2 or more pages
--** fit on a single disk sector. In this case all co-resident pages
--** must have been written to the journal file before returning.
-+** The zFilename argument is the path to the database file to open.
-+** If zFilename is NULL then a randomly-named temporary file is created
-+** and used as the file to be cached. Temporary files are be deleted
-+** automatically when they are closed. If zFilename is ":memory:" then 
-+** all information is held in cache. It is never written to disk. 
-+** This can be used to implement an in-memory database.
- **
--** If an error occurs, SQLITE_NOMEM or an IO error code is returned
--** as appropriate. Otherwise, SQLITE_OK.
-+** The nExtra parameter specifies the number of bytes of space allocated
-+** along with each page reference. This space is available to the user
-+** via the sqlite3PagerGetExtra() API.
-+**
-+** The flags argument is used to specify properties that affect the
-+** operation of the pager. It should be passed some bitwise combination
-+** of the PAGER_OMIT_JOURNAL and PAGER_NO_READLOCK flags.
-+**
-+** The vfsFlags parameter is a bitmask to pass to the flags parameter
-+** of the xOpen() method of the supplied VFS when opening files. 
-+**
-+** If the pager object is allocated and the specified file opened 
-+** successfully, SQLITE_OK is returned and *ppPager set to point to
-+** the new pager object. If an error occurs, *ppPager is set to NULL
-+** and error code returned. This function may return SQLITE_NOMEM
-+** (sqlite3Malloc() is used to allocate memory), SQLITE_CANTOPEN or 
-+** various SQLITE_IO_XXX errors.
- */
--SQLITE_PRIVATE int sqlite3PagerWrite(DbPage *pDbPage){
--  int rc = SQLITE_OK;
-+SQLITE_PRIVATE int sqlite3PagerOpen(
-+  sqlite3_vfs *pVfs,       /* The virtual file system to use */
-+  Pager **ppPager,         /* OUT: Return the Pager structure here */
-+  const char *zFilename,   /* Name of the database file to open */
-+  int nExtra,              /* Extra bytes append to each in-memory page */
-+  int flags,               /* flags controlling this file */
-+  int vfsFlags,            /* flags passed through to sqlite3_vfs.xOpen() */
-+  void (*xReinit)(DbPage*) /* Function to reinitialize pages */
-+){
-+  u8 *pPtr;
-+  Pager *pPager = 0;       /* Pager object to allocate and return */
-+  int rc = SQLITE_OK;      /* Return code */
-+  int tempFile = 0;        /* True for temp files (incl. in-memory files) */
-+  int memDb = 0;           /* True if this is an in-memory file */
-+  int readOnly = 0;        /* True if this is a read-only file */
-+  int journalFileSize;     /* Bytes to allocate for each journal fd */
-+  char *zPathname = 0;     /* Full path to database file */
-+  int nPathname = 0;       /* Number of bytes in zPathname */
-+  int useJournal = (flags & PAGER_OMIT_JOURNAL)==0; /* False to omit journal */
-+  int noReadlock = (flags & PAGER_NO_READLOCK)!=0;  /* True to omit read-lock */
-+  int pcacheSize = sqlite3PcacheSize();       /* Bytes to allocate for PCache */
-+  u32 szPageDflt = SQLITE_DEFAULT_PAGE_SIZE;  /* Default page size */
-+  const char *zUri = 0;    /* URI args to copy */
-+  int nUri = 0;            /* Number of bytes of URI args at *zUri */
- 
--  PgHdr *pPg = pDbPage;
--  Pager *pPager = pPg->pPager;
--  Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
-+  /* Figure out how much space is required for each journal file-handle
-+  ** (there are two of them, the main journal and the sub-journal). This
-+  ** is the maximum space required for an in-memory journal file handle 
-+  ** and a regular journal file-handle. Note that a "regular journal-handle"
-+  ** may be a wrapper capable of caching the first portion of the journal
-+  ** file in memory to implement the atomic-write optimization (see 
-+  ** source file journal.c).
-+  */
-+  if( sqlite3JournalSize(pVfs)>sqlite3MemJournalSize() ){
-+    journalFileSize = ROUND8(sqlite3JournalSize(pVfs));
-+  }else{
-+    journalFileSize = ROUND8(sqlite3MemJournalSize());
-+  }
- 
--  assert( pPager->eState>=PAGER_WRITER_LOCKED );
--  assert( pPager->eState!=PAGER_ERROR );
--  assert( assert_pager_state(pPager) );
-+  /* Set the output variable to NULL in case an error occurs. */
-+  *ppPager = 0;
- 
--  if( nPagePerSector>1 ){
--    Pgno nPageCount;          /* Total number of pages in database file */
--    Pgno pg1;                 /* First page of the sector pPg is located on. */
--    int nPage = 0;            /* Number of pages starting at pg1 to journal */
--    int ii;                   /* Loop counter */
--    int needSync = 0;         /* True if any page has PGHDR_NEED_SYNC */
-+#ifndef SQLITE_OMIT_MEMORYDB
-+  if( flags & PAGER_MEMORY ){
-+    memDb = 1;
-+    zFilename = 0;
-+  }
-+#endif
- 
--    /* Set the doNotSyncSpill flag to 1. This is because we cannot allow
--    ** a journal header to be written between the pages journaled by
--    ** this function.
--    */
--    assert( !MEMDB );
--    assert( pPager->doNotSyncSpill==0 );
--    pPager->doNotSyncSpill++;
-+  /* Compute and store the full pathname in an allocated buffer pointed
-+  ** to by zPathname, length nPathname. Or, if this is a temporary file,
-+  ** leave both nPathname and zPathname set to 0.
-+  */
-+  if( zFilename && zFilename[0] ){
-+    const char *z;
-+    nPathname = pVfs->mxPathname+1;
-+    zPathname = sqlite3Malloc(nPathname*2);
-+    if( zPathname==0 ){
-+      return SQLITE_NOMEM;
-+    }
-+    zPathname[0] = 0; /* Make sure initialized even if FullPathname() fails */
-+    rc = sqlite3OsFullPathname(pVfs, zFilename, nPathname, zPathname);
-+    nPathname = sqlite3Strlen30(zPathname);
-+    z = zUri = &zFilename[sqlite3Strlen30(zFilename)+1];
-+    while( *z ){
-+      z += sqlite3Strlen30(z)+1;
-+      z += sqlite3Strlen30(z)+1;
-+    }
-+    nUri = &z[1] - zUri;
-+    if( rc==SQLITE_OK && nPathname+8>pVfs->mxPathname ){
-+      /* This branch is taken when the journal path required by
-+      ** the database being opened will be more than pVfs->mxPathname
-+      ** bytes in length. This means the database cannot be opened,
-+      ** as it will not be possible to open the journal file or even
-+      ** check for a hot-journal before reading.
-+      */
-+      rc = SQLITE_CANTOPEN_BKPT;
-+    }
-+    if( rc!=SQLITE_OK ){
-+      sqlite3_free(zPathname);
-+      return rc;
-+    }
-+  }
- 
--    /* This trick assumes that both the page-size and sector-size are
--    ** an integer power of 2. It sets variable pg1 to the identifier
--    ** of the first page of the sector pPg is located on.
--    */
--    pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
-+  /* Allocate memory for the Pager structure, PCache object, the
-+  ** three file descriptors, the database file name and the journal 
-+  ** file name. The layout in memory is as follows:
-+  **
-+  **     Pager object                    (sizeof(Pager) bytes)
-+  **     PCache object                   (sqlite3PcacheSize() bytes)
-+  **     Database file handle            (pVfs->szOsFile bytes)
-+  **     Sub-journal file handle         (journalFileSize bytes)
-+  **     Main journal file handle        (journalFileSize bytes)
-+  **     Database file name              (nPathname+1 bytes)
-+  **     Journal file name               (nPathname+8+1 bytes)
-+  */
-+  pPtr = (u8 *)sqlite3MallocZero(
-+    ROUND8(sizeof(*pPager)) +      /* Pager structure */
-+    ROUND8(pcacheSize) +           /* PCache object */
-+    ROUND8(pVfs->szOsFile) +       /* The main db file */
-+    journalFileSize * 2 +          /* The two journal files */ 
-+    nPathname + 1 + nUri +         /* zFilename */
-+    nPathname + 8 + 1              /* zJournal */
-+#ifndef SQLITE_OMIT_WAL
-+    + nPathname + 4 + 1              /* zWal */
-+#endif
-+  );
-+  assert( EIGHT_BYTE_ALIGNMENT(SQLITE_INT_TO_PTR(journalFileSize)) );
-+  if( !pPtr ){
-+    sqlite3_free(zPathname);
-+    return SQLITE_NOMEM;
-+  }
-+  pPager =              (Pager*)(pPtr);
-+  pPager->pPCache =    (PCache*)(pPtr += ROUND8(sizeof(*pPager)));
-+  pPager->fd =   (sqlite3_file*)(pPtr += ROUND8(pcacheSize));
-+  pPager->sjfd = (sqlite3_file*)(pPtr += ROUND8(pVfs->szOsFile));
-+  pPager->jfd =  (sqlite3_file*)(pPtr += journalFileSize);
-+  pPager->zFilename =    (char*)(pPtr += journalFileSize);
-+  assert( EIGHT_BYTE_ALIGNMENT(pPager->jfd) );
- 
--    nPageCount = pPager->dbSize;
--    if( pPg->pgno>nPageCount ){
--      nPage = (pPg->pgno - pg1)+1;
--    }else if( (pg1+nPagePerSector-1)>nPageCount ){
--      nPage = nPageCount+1-pg1;
--    }else{
--      nPage = nPagePerSector;
--    }
--    assert(nPage>0);
--    assert(pg1<=pPg->pgno);
--    assert((pg1+nPage)>pPg->pgno);
-+  /* Fill in the Pager.zFilename and Pager.zJournal buffers, if required. */
-+  if( zPathname ){
-+    assert( nPathname>0 );
-+    pPager->zJournal =   (char*)(pPtr += nPathname + 1 + nUri);
-+    memcpy(pPager->zFilename, zPathname, nPathname);
-+    memcpy(&pPager->zFilename[nPathname+1], zUri, nUri);
-+    memcpy(pPager->zJournal, zPathname, nPathname);
-+    memcpy(&pPager->zJournal[nPathname], "-journal", 8);
-+    sqlite3FileSuffix3(pPager->zFilename, pPager->zJournal);
-+#ifndef SQLITE_OMIT_WAL
-+    pPager->zWal = &pPager->zJournal[nPathname+8+1];
-+    memcpy(pPager->zWal, zPathname, nPathname);
-+    memcpy(&pPager->zWal[nPathname], "-wal", 4);
-+    sqlite3FileSuffix3(pPager->zFilename, pPager->zWal);
-+#endif
-+    sqlite3_free(zPathname);
-+  }
-+  pPager->pVfs = pVfs;
-+  pPager->vfsFlags = vfsFlags;
- 
--    for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
--      Pgno pg = pg1+ii;
--      PgHdr *pPage;
--      if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
--        if( pg!=PAGER_MJ_PGNO(pPager) ){
--          rc = sqlite3PagerGet(pPager, pg, &pPage);
--          if( rc==SQLITE_OK ){
--            rc = pager_write(pPage);
--            if( pPage->flags&PGHDR_NEED_SYNC ){
--              needSync = 1;
--            }
--            sqlite3PagerUnref(pPage);
--          }
--        }
--      }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
--        if( pPage->flags&PGHDR_NEED_SYNC ){
--          needSync = 1;
--        }
--        sqlite3PagerUnref(pPage);
--      }
--    }
-+  /* Open the pager file.
-+  */
-+  if( zFilename && zFilename[0] ){
-+    int fout = 0;                    /* VFS flags returned by xOpen() */
-+    rc = sqlite3OsOpen(pVfs, pPager->zFilename, pPager->fd, vfsFlags, &fout);
-+    assert( !memDb );
-+    readOnly = (fout&SQLITE_OPEN_READONLY);
- 
--    /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages 
--    ** starting at pg1, then it needs to be set for all of them. Because
--    ** writing to any of these nPage pages may damage the others, the
--    ** journal file must contain sync()ed copies of all of them
--    ** before any of them can be written out to the database file.
-+    /* If the file was successfully opened for read/write access,
-+    ** choose a default page size in case we have to create the
-+    ** database file. The default page size is the maximum of:
-+    **
-+    **    + SQLITE_DEFAULT_PAGE_SIZE,
-+    **    + The value returned by sqlite3OsSectorSize()
-+    **    + The largest page size that can be written atomically.
-     */
--    if( rc==SQLITE_OK && needSync ){
--      assert( !MEMDB );
--      for(ii=0; ii<nPage; ii++){
--        PgHdr *pPage = pager_lookup(pPager, pg1+ii);
--        if( pPage ){
--          pPage->flags |= PGHDR_NEED_SYNC;
--          sqlite3PagerUnref(pPage);
-+    if( rc==SQLITE_OK && !readOnly ){
-+      setSectorSize(pPager);
-+      assert(SQLITE_DEFAULT_PAGE_SIZE<=SQLITE_MAX_DEFAULT_PAGE_SIZE);
-+      if( szPageDflt<pPager->sectorSize ){
-+        if( pPager->sectorSize>SQLITE_MAX_DEFAULT_PAGE_SIZE ){
-+          szPageDflt = SQLITE_MAX_DEFAULT_PAGE_SIZE;
-+        }else{
-+          szPageDflt = (u32)pPager->sectorSize;
-+        }
-+      }
-+#ifdef SQLITE_ENABLE_ATOMIC_WRITE
-+      {
-+        int iDc = sqlite3OsDeviceCharacteristics(pPager->fd);
-+        int ii;
-+        assert(SQLITE_IOCAP_ATOMIC512==(512>>8));
-+        assert(SQLITE_IOCAP_ATOMIC64K==(65536>>8));
-+        assert(SQLITE_MAX_DEFAULT_PAGE_SIZE<=65536);
-+        for(ii=szPageDflt; ii<=SQLITE_MAX_DEFAULT_PAGE_SIZE; ii=ii*2){
-+          if( iDc&(SQLITE_IOCAP_ATOMIC|(ii>>8)) ){
-+            szPageDflt = ii;
-+          }
-         }
-       }
-+#endif
-     }
--
--    assert( pPager->doNotSyncSpill==1 );
--    pPager->doNotSyncSpill--;
-   }else{
--    rc = pager_write(pDbPage);
-+    /* If a temporary file is requested, it is not opened immediately.
-+    ** In this case we accept the default page size and delay actually
-+    ** opening the file until the first call to OsWrite().
-+    **
-+    ** This branch is also run for an in-memory database. An in-memory
-+    ** database is the same as a temp-file that is never written out to
-+    ** disk and uses an in-memory rollback journal.
-+    */ 
-+    tempFile = 1;
-+    pPager->eState = PAGER_READER;
-+    pPager->eLock = EXCLUSIVE_LOCK;
-+    readOnly = (vfsFlags&SQLITE_OPEN_READONLY);
-   }
--  return rc;
--}
- 
--/*
--** Return TRUE if the page given in the argument was previously passed
--** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
--** to change the content of the page.
--*/
--#ifndef NDEBUG
--SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage *pPg){
--  return pPg->flags&PGHDR_DIRTY;
--}
--#endif
-+  /* The following call to PagerSetPagesize() serves to set the value of 
-+  ** Pager.pageSize and to allocate the Pager.pTmpSpace buffer.
-+  */
-+  if( rc==SQLITE_OK ){
-+    assert( pPager->memDb==0 );
-+    rc = sqlite3PagerSetPagesize(pPager, &szPageDflt, -1);
-+    testcase( rc!=SQLITE_OK );
-+  }
- 
--/*
--** A call to this routine tells the pager that it is not necessary to
--** write the information on page pPg back to the disk, even though
--** that page might be marked as dirty.  This happens, for example, when
--** the page has been added as a leaf of the freelist and so its
--** content no longer matters.
--**
--** The overlying software layer calls this routine when all of the data
--** on the given page is unused. The pager marks the page as clean so
--** that it does not get written to disk.
--**
--** Tests show that this optimization can quadruple the speed of large 
--** DELETE operations.
--*/
--SQLITE_PRIVATE void sqlite3PagerDontWrite(PgHdr *pPg){
--  Pager *pPager = pPg->pPager;
--  if( (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){
--    PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)));
--    IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
--    pPg->flags |= PGHDR_DONT_WRITE;
--    pager_set_pagehash(pPg);
-+  /* If an error occurred in either of the blocks above, free the 
-+  ** Pager structure and close the file.
-+  */
-+  if( rc!=SQLITE_OK ){
-+    assert( !pPager->pTmpSpace );
-+    sqlite3OsClose(pPager->fd);
-+    sqlite3_free(pPager);
-+    return rc;
-+  }
-+
-+  /* Initialize the PCache object. */
-+  assert( nExtra<1000 );
-+  nExtra = ROUND8(nExtra);
-+  sqlite3PcacheOpen(szPageDflt, nExtra, !memDb,
-+                    !memDb?pagerStress:0, (void *)pPager, pPager->pPCache);
-+
-+  PAGERTRACE(("OPEN %d %s\n", FILEHANDLEID(pPager->fd), pPager->zFilename));
-+  IOTRACE(("OPEN %p %s\n", pPager, pPager->zFilename))
-+
-+  pPager->useJournal = (u8)useJournal;
-+  pPager->noReadlock = (noReadlock && readOnly) ?1:0;
-+  /* pPager->stmtOpen = 0; */
-+  /* pPager->stmtInUse = 0; */
-+  /* pPager->nRef = 0; */
-+  /* pPager->stmtSize = 0; */
-+  /* pPager->stmtJSize = 0; */
-+  /* pPager->nPage = 0; */
-+  pPager->mxPgno = SQLITE_MAX_PAGE_COUNT;
-+  /* pPager->state = PAGER_UNLOCK; */
-+#if 0
-+  assert( pPager->state == (tempFile ? PAGER_EXCLUSIVE : PAGER_UNLOCK) );
-+#endif
-+  /* pPager->errMask = 0; */
-+  pPager->tempFile = (u8)tempFile;
-+  assert( tempFile==PAGER_LOCKINGMODE_NORMAL 
-+          || tempFile==PAGER_LOCKINGMODE_EXCLUSIVE );
-+  assert( PAGER_LOCKINGMODE_EXCLUSIVE==1 );
-+  pPager->exclusiveMode = (u8)tempFile; 
-+  pPager->changeCountDone = pPager->tempFile;
-+  pPager->memDb = (u8)memDb;
-+  pPager->readOnly = (u8)readOnly;
-+  assert( useJournal || pPager->tempFile );
-+  pPager->noSync = pPager->tempFile;
-+  pPager->fullSync = pPager->noSync ?0:1;
-+  pPager->syncFlags = pPager->noSync ? 0 : SQLITE_SYNC_NORMAL;
-+  pPager->ckptSyncFlags = pPager->syncFlags;
-+  /* pPager->pFirst = 0; */
-+  /* pPager->pFirstSynced = 0; */
-+  /* pPager->pLast = 0; */
-+  pPager->nExtra = (u16)nExtra;
-+  pPager->journalSizeLimit = SQLITE_DEFAULT_JOURNAL_SIZE_LIMIT;
-+  assert( isOpen(pPager->fd) || tempFile );
-+  setSectorSize(pPager);
-+  if( !useJournal ){
-+    pPager->journalMode = PAGER_JOURNALMODE_OFF;
-+  }else if( memDb ){
-+    pPager->journalMode = PAGER_JOURNALMODE_MEMORY;
-   }
-+  /* pPager->xBusyHandler = 0; */
-+  /* pPager->pBusyHandlerArg = 0; */
-+  pPager->xReiniter = xReinit;
-+  /* memset(pPager->aHash, 0, sizeof(pPager->aHash)); */
-+
-+  *ppPager = pPager;
-+  return SQLITE_OK;
- }
- 
-+
-+
- /*
--** This routine is called to increment the value of the database file 
--** change-counter, stored as a 4-byte big-endian integer starting at 
--** byte offset 24 of the pager file.  The secondary change counter at
--** 92 is also updated, as is the SQLite version number at offset 96.
-+** This function is called after transitioning from PAGER_UNLOCK to
-+** PAGER_SHARED state. It tests if there is a hot journal present in
-+** the file-system for the given pager. A hot journal is one that 
-+** needs to be played back. According to this function, a hot-journal
-+** file exists if the following criteria are met:
- **
--** But this only happens if the pPager->changeCountDone flag is false.
--** To avoid excess churning of page 1, the update only happens once.
--** See also the pager_write_changecounter() routine that does an 
--** unconditional update of the change counters.
-+**   * The journal file exists in the file system, and
-+**   * No process holds a RESERVED or greater lock on the database file, and
-+**   * The database file itself is greater than 0 bytes in size, and
-+**   * The first byte of the journal file exists and is not 0x00.
- **
--** If the isDirectMode flag is zero, then this is done by calling 
--** sqlite3PagerWrite() on page 1, then modifying the contents of the
--** page data. In this case the file will be updated when the current
--** transaction is committed.
-+** If the current size of the database file is 0 but a journal file
-+** exists, that is probably an old journal left over from a prior
-+** database with the same name. In this case the journal file is
-+** just deleted using OsDelete, *pExists is set to 0 and SQLITE_OK
-+** is returned.
- **
--** The isDirectMode flag may only be non-zero if the library was compiled
--** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case,
--** if isDirect is non-zero, then the database file is updated directly
--** by writing an updated version of page 1 using a call to the 
--** sqlite3OsWrite() function.
-+** This routine does not check if there is a master journal filename
-+** at the end of the file. If there is, and that master journal file
-+** does not exist, then the journal file is not really hot. In this
-+** case this routine will return a false-positive. The pager_playback()
-+** routine will discover that the journal file is not really hot and 
-+** will not roll it back. 
-+**
-+** If a hot-journal file is found to exist, *pExists is set to 1 and 
-+** SQLITE_OK returned. If no hot-journal file is present, *pExists is
-+** set to 0 and SQLITE_OK returned. If an IO error occurs while trying
-+** to determine whether or not a hot-journal file exists, the IO error
-+** code is returned and the value of *pExists is undefined.
- */
--static int pager_incr_changecounter(Pager *pPager, int isDirectMode){
--  int rc = SQLITE_OK;
--
--  assert( pPager->eState==PAGER_WRITER_CACHEMOD
--       || pPager->eState==PAGER_WRITER_DBMOD
--  );
--  assert( assert_pager_state(pPager) );
--
--  /* Declare and initialize constant integer 'isDirect'. If the
--  ** atomic-write optimization is enabled in this build, then isDirect
--  ** is initialized to the value passed as the isDirectMode parameter
--  ** to this function. Otherwise, it is always set to zero.
--  **
--  ** The idea is that if the atomic-write optimization is not
--  ** enabled at compile time, the compiler can omit the tests of
--  ** 'isDirect' below, as well as the block enclosed in the
--  ** "if( isDirect )" condition.
--  */
--#ifndef SQLITE_ENABLE_ATOMIC_WRITE
--# define DIRECT_MODE 0
--  assert( isDirectMode==0 );
--  UNUSED_PARAMETER(isDirectMode);
--#else
--# define DIRECT_MODE isDirectMode
--#endif
-+static int hasHotJournal(Pager *pPager, int *pExists){
-+  sqlite3_vfs * const pVfs = pPager->pVfs;
-+  int rc = SQLITE_OK;           /* Return code */
-+  int exists = 1;               /* True if a journal file is present */
-+  int jrnlOpen = !!isOpen(pPager->jfd);
- 
--  if( !pPager->changeCountDone && pPager->dbSize>0 ){
--    PgHdr *pPgHdr;                /* Reference to page 1 */
-+  assert( pPager->useJournal );
-+  assert( isOpen(pPager->fd) );
-+  assert( pPager->eState==PAGER_OPEN );
- 
--    assert( !pPager->tempFile && isOpen(pPager->fd) );
-+  assert( jrnlOpen==0 || ( sqlite3OsDeviceCharacteristics(pPager->jfd) &
-+    SQLITE_IOCAP_UNDELETABLE_WHEN_OPEN
-+  ));
- 
--    /* Open page 1 of the file for writing. */
--    rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
--    assert( pPgHdr==0 || rc==SQLITE_OK );
-+  *pExists = 0;
-+  if( !jrnlOpen ){
-+    rc = sqlite3OsAccess(pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &exists);
-+  }
-+  if( rc==SQLITE_OK && exists ){
-+    int locked = 0;             /* True if some process holds a RESERVED lock */
- 
--    /* If page one was fetched successfully, and this function is not
--    ** operating in direct-mode, make page 1 writable.  When not in 
--    ** direct mode, page 1 is always held in cache and hence the PagerGet()
--    ** above is always successful - hence the ALWAYS on rc==SQLITE_OK.
-+    /* Race condition here:  Another process might have been holding the
-+    ** the RESERVED lock and have a journal open at the sqlite3OsAccess() 
-+    ** call above, but then delete the journal and drop the lock before
-+    ** we get to the following sqlite3OsCheckReservedLock() call.  If that
-+    ** is the case, this routine might think there is a hot journal when
-+    ** in fact there is none.  This results in a false-positive which will
-+    ** be dealt with by the playback routine.  Ticket #3883.
-     */
--    if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){
--      rc = sqlite3PagerWrite(pPgHdr);
--    }
--
--    if( rc==SQLITE_OK ){
--      /* Actually do the update of the change counter */
--      pager_write_changecounter(pPgHdr);
-+    rc = sqlite3OsCheckReservedLock(pPager->fd, &locked);
-+    if( rc==SQLITE_OK && !locked ){
-+      Pgno nPage;                 /* Number of pages in database file */
- 
--      /* If running in direct mode, write the contents of page 1 to the file. */
--      if( DIRECT_MODE ){
--        const void *zBuf;
--        assert( pPager->dbFileSize>0 );
--        CODEC2(pPager, pPgHdr->pData, 1, 6, rc=SQLITE_NOMEM, zBuf);
--        if( rc==SQLITE_OK ){
--          rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
--        }
--        if( rc==SQLITE_OK ){
--          pPager->changeCountDone = 1;
-+      /* Check the size of the database file. If it consists of 0 pages,
-+      ** then delete the journal file. See the header comment above for 
-+      ** the reasoning here.  Delete the obsolete journal file under
-+      ** a RESERVED lock to avoid race conditions and to avoid violating
-+      ** [H33020].
-+      */
-+      rc = pagerPagecount(pPager, &nPage);
-+      if( rc==SQLITE_OK ){
-+        if( nPage==0 ){
-+          sqlite3BeginBenignMalloc();
-+          if( pagerLockDb(pPager, RESERVED_LOCK)==SQLITE_OK ){
-+            sqlite3OsDelete(pVfs, pPager->zJournal, 0);
-+            if( !pPager->exclusiveMode ) pagerUnlockDb(pPager, SHARED_LOCK);
-+          }
-+          sqlite3EndBenignMalloc();
-+        }else{
-+          /* The journal file exists and no other connection has a reserved
-+          ** or greater lock on the database file. Now check that there is
-+          ** at least one non-zero bytes at the start of the journal file.
-+          ** If there is, then we consider this journal to be hot. If not, 
-+          ** it can be ignored.
-+          */
-+          if( !jrnlOpen ){
-+            int f = SQLITE_OPEN_READONLY|SQLITE_OPEN_MAIN_JOURNAL;
-+            rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &f);
-+          }
-+          if( rc==SQLITE_OK ){
-+            u8 first = 0;
-+            rc = sqlite3OsRead(pPager->jfd, (void *)&first, 1, 0);
-+            if( rc==SQLITE_IOERR_SHORT_READ ){
-+              rc = SQLITE_OK;
-+            }
-+            if( !jrnlOpen ){
-+              sqlite3OsClose(pPager->jfd);
-+            }
-+            *pExists = (first!=0);
-+          }else if( rc==SQLITE_CANTOPEN ){
-+            /* If we cannot open the rollback journal file in order to see if
-+            ** its has a zero header, that might be due to an I/O error, or
-+            ** it might be due to the race condition described above and in
-+            ** ticket #3883.  Either way, assume that the journal is hot.
-+            ** This might be a false positive.  But if it is, then the
-+            ** automatic journal playback and recovery mechanism will deal
-+            ** with it under an EXCLUSIVE lock where we do not need to
-+            ** worry so much with race conditions.
-+            */
-+            *pExists = 1;
-+            rc = SQLITE_OK;
-+          }
-         }
--      }else{
--        pPager->changeCountDone = 1;
-       }
-     }
--
--    /* Release the page reference. */
--    sqlite3PagerUnref(pPgHdr);
--  }
--  return rc;
--}
--
--/*
--** Sync the database file to disk. This is a no-op for in-memory databases
--** or pages with the Pager.noSync flag set.
--**
--** If successful, or if called on a pager for which it is a no-op, this
--** function returns SQLITE_OK. Otherwise, an IO error code is returned.
--*/
--SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager){
--  int rc = SQLITE_OK;
--  if( !pPager->noSync ){
--    assert( !MEMDB );
--    rc = sqlite3OsSync(pPager->fd, pPager->syncFlags);
--  }else if( isOpen(pPager->fd) ){
--    assert( !MEMDB );
--    sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SYNC_OMITTED, (void *)&rc);
-   }
--  return rc;
--}
- 
--/*
--** This function may only be called while a write-transaction is active in
--** rollback. If the connection is in WAL mode, this call is a no-op. 
--** Otherwise, if the connection does not already have an EXCLUSIVE lock on 
--** the database file, an attempt is made to obtain one.
--**
--** If the EXCLUSIVE lock is already held or the attempt to obtain it is
--** successful, or the connection is in WAL mode, SQLITE_OK is returned.
--** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is 
--** returned.
--*/
--SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager *pPager){
--  int rc = SQLITE_OK;
--  assert( pPager->eState==PAGER_WRITER_CACHEMOD 
--       || pPager->eState==PAGER_WRITER_DBMOD 
--       || pPager->eState==PAGER_WRITER_LOCKED 
--  );
--  assert( assert_pager_state(pPager) );
--  if( 0==pagerUseWal(pPager) ){
--    rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
--  }
-   return rc;
- }
- 
- /*
--** Sync the database file for the pager pPager. zMaster points to the name
--** of a master journal file that should be written into the individual
--** journal file. zMaster may be NULL, which is interpreted as no master
--** journal (a single database transaction).
--**
--** This routine ensures that:
-+** This function is called to obtain a shared lock on the database file.
-+** It is illegal to call sqlite3PagerAcquire() until after this function
-+** has been successfully called. If a shared-lock is already held when
-+** this function is called, it is a no-op.
- **
--**   * The database file change-counter is updated,
--**   * the journal is synced (unless the atomic-write optimization is used),
--**   * all dirty pages are written to the database file, 
--**   * the database file is truncated (if required), and
--**   * the database file synced. 
-+** The following operations are also performed by this function.
- **
--** The only thing that remains to commit the transaction is to finalize 
--** (delete, truncate or zero the first part of) the journal file (or 
--** delete the master journal file if specified).
-+**   1) If the pager is currently in PAGER_OPEN state (no lock held
-+**      on the database file), then an attempt is made to obtain a
-+**      SHARED lock on the database file. Immediately after obtaining
-+**      the SHARED lock, the file-system is checked for a hot-journal,
-+**      which is played back if present. Following any hot-journal 
-+**      rollback, the contents of the cache are validated by checking
-+**      the 'change-counter' field of the database file header and
-+**      discarded if they are found to be invalid.
- **
--** Note that if zMaster==NULL, this does not overwrite a previous value
--** passed to an sqlite3PagerCommitPhaseOne() call.
-+**   2) If the pager is running in exclusive-mode, and there are currently
-+**      no outstanding references to any pages, and is in the error state,
-+**      then an attempt is made to clear the error state by discarding
-+**      the contents of the page cache and rolling back any open journal
-+**      file.
- **
--** If the final parameter - noSync - is true, then the database file itself
--** is not synced. The caller must call sqlite3PagerSync() directly to
--** sync the database file before calling CommitPhaseTwo() to delete the
--** journal file in this case.
-+** If everything is successful, SQLITE_OK is returned. If an IO error 
-+** occurs while locking the database, checking for a hot-journal file or 
-+** rolling back a journal file, the IO error code is returned.
- */
--SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne(
--  Pager *pPager,                  /* Pager object */
--  const char *zMaster,            /* If not NULL, the master journal name */
--  int noSync                      /* True to omit the xSync on the db file */
--){
--  int rc = SQLITE_OK;             /* Return code */
-+SQLITE_PRIVATE int sqlite3PagerSharedLock(Pager *pPager){
-+  int rc = SQLITE_OK;                /* Return code */
- 
--  assert( pPager->eState==PAGER_WRITER_LOCKED
--       || pPager->eState==PAGER_WRITER_CACHEMOD
--       || pPager->eState==PAGER_WRITER_DBMOD
--       || pPager->eState==PAGER_ERROR
--  );
-+  /* This routine is only called from b-tree and only when there are no
-+  ** outstanding pages. This implies that the pager state should either
-+  ** be OPEN or READER. READER is only possible if the pager is or was in 
-+  ** exclusive access mode.
-+  */
-+  assert( sqlite3PcacheRefCount(pPager->pPCache)==0 );
-   assert( assert_pager_state(pPager) );
-+  assert( pPager->eState==PAGER_OPEN || pPager->eState==PAGER_READER );
-+  if( NEVER(MEMDB && pPager->errCode) ){ return pPager->errCode; }
- 
--  /* If a prior error occurred, report that error again. */
--  if( NEVER(pPager->errCode) ) return pPager->errCode;
-+  if( !pagerUseWal(pPager) && pPager->eState==PAGER_OPEN ){
-+    int bHotJournal = 1;          /* True if there exists a hot journal-file */
- 
--  PAGERTRACE(("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n", 
--      pPager->zFilename, zMaster, pPager->dbSize));
-+    assert( !MEMDB );
-+    assert( pPager->noReadlock==0 || pPager->readOnly );
- 
--  /* If no database changes have been made, return early. */
--  if( pPager->eState<PAGER_WRITER_CACHEMOD ) return SQLITE_OK;
-+    if( pPager->noReadlock==0 ){
-+      rc = pager_wait_on_lock(pPager, SHARED_LOCK);
-+      if( rc!=SQLITE_OK ){
-+        assert( pPager->eLock==NO_LOCK || pPager->eLock==UNKNOWN_LOCK );
-+        goto failed;
-+      }
-+    }
- 
--  if( MEMDB ){
--    /* If this is an in-memory db, or no pages have been written to, or this
--    ** function has already been called, it is mostly a no-op.  However, any
--    ** backup in progress needs to be restarted.
-+    /* If a journal file exists, and there is no RESERVED lock on the
-+    ** database file, then it either needs to be played back or deleted.
-     */
--    sqlite3BackupRestart(pPager->pBackup);
--  }else{
--    if( pagerUseWal(pPager) ){
--      PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
--      PgHdr *pPageOne = 0;
--      if( pList==0 ){
--        /* Must have at least one page for the WAL commit flag.
--        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
--        rc = sqlite3PagerGet(pPager, 1, &pPageOne);
--        pList = pPageOne;
--        pList->pDirty = 0;
--      }
--      assert( rc==SQLITE_OK );
--      if( ALWAYS(pList) ){
--        rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1, 
--            (pPager->fullSync ? pPager->syncFlags : 0)
--        );
--      }
--      sqlite3PagerUnref(pPageOne);
--      if( rc==SQLITE_OK ){
--        sqlite3PcacheCleanAll(pPager->pPCache);
--      }
--    }else{
--      /* The following block updates the change-counter. Exactly how it
--      ** does this depends on whether or not the atomic-update optimization
--      ** was enabled at compile time, and if this transaction meets the 
--      ** runtime criteria to use the operation: 
--      **
--      **    * The file-system supports the atomic-write property for
--      **      blocks of size page-size, and 
--      **    * This commit is not part of a multi-file transaction, and
--      **    * Exactly one page has been modified and store in the journal file.
--      **
--      ** If the optimization was not enabled at compile time, then the
--      ** pager_incr_changecounter() function is called to update the change
--      ** counter in 'indirect-mode'. If the optimization is compiled in but
--      ** is not applicable to this transaction, call sqlite3JournalCreate()
--      ** to make sure the journal file has actually been created, then call
--      ** pager_incr_changecounter() to update the change-counter in indirect
--      ** mode. 
-+    if( pPager->eLock<=SHARED_LOCK ){
-+      rc = hasHotJournal(pPager, &bHotJournal);
-+    }
-+    if( rc!=SQLITE_OK ){
-+      goto failed;
-+    }
-+    if( bHotJournal ){
-+      /* Get an EXCLUSIVE lock on the database file. At this point it is
-+      ** important that a RESERVED lock is not obtained on the way to the
-+      ** EXCLUSIVE lock. If it were, another process might open the
-+      ** database file, detect the RESERVED lock, and conclude that the
-+      ** database is safe to read while this process is still rolling the 
-+      ** hot-journal back.
-+      ** 
-+      ** Because the intermediate RESERVED lock is not requested, any
-+      ** other process attempting to access the database file will get to 
-+      ** this point in the code and fail to obtain its own EXCLUSIVE lock 
-+      ** on the database file.
-       **
--      ** Otherwise, if the optimization is both enabled and applicable,
--      ** then call pager_incr_changecounter() to update the change-counter
--      ** in 'direct' mode. In this case the journal file will never be
--      ** created for this transaction.
-+      ** Unless the pager is in locking_mode=exclusive mode, the lock is
-+      ** downgraded to SHARED_LOCK before this function returns.
-       */
--  #ifdef SQLITE_ENABLE_ATOMIC_WRITE
--      PgHdr *pPg;
--      assert( isOpen(pPager->jfd) 
--           || pPager->journalMode==PAGER_JOURNALMODE_OFF 
--           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
--      );
--      if( !zMaster && isOpen(pPager->jfd) 
--       && pPager->journalOff==jrnlBufferSize(pPager) 
--       && pPager->dbSize>=pPager->dbOrigSize
--       && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
--      ){
--        /* Update the db file change counter via the direct-write method. The 
--        ** following call will modify the in-memory representation of page 1 
--        ** to include the updated change counter and then write page 1 
--        ** directly to the database file. Because of the atomic-write 
--        ** property of the host file-system, this is safe.
--        */
--        rc = pager_incr_changecounter(pPager, 1);
--      }else{
--        rc = sqlite3JournalCreate(pPager->jfd);
--        if( rc==SQLITE_OK ){
--          rc = pager_incr_changecounter(pPager, 0);
--        }
-+      rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
-+      if( rc!=SQLITE_OK ){
-+        goto failed;
-       }
--  #else
--      rc = pager_incr_changecounter(pPager, 0);
--  #endif
--      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
--  
--      /* If this transaction has made the database smaller, then all pages
--      ** being discarded by the truncation must be written to the journal
--      ** file. This can only happen in auto-vacuum mode.
-+ 
-+      /* If it is not already open and the file exists on disk, open the 
-+      ** journal for read/write access. Write access is required because 
-+      ** in exclusive-access mode the file descriptor will be kept open 
-+      ** and possibly used for a transaction later on. Also, write-access 
-+      ** is usually required to finalize the journal in journal_mode=persist 
-+      ** mode (and also for journal_mode=truncate on some systems).
-       **
--      ** Before reading the pages with page numbers larger than the 
--      ** current value of Pager.dbSize, set dbSize back to the value
--      ** that it took at the start of the transaction. Otherwise, the
--      ** calls to sqlite3PagerGet() return zeroed pages instead of 
--      ** reading data from the database file.
-+      ** If the journal does not exist, it usually means that some 
-+      ** other connection managed to get in and roll it back before 
-+      ** this connection obtained the exclusive lock above. Or, it 
-+      ** may mean that the pager was in the error-state when this
-+      ** function was called and the journal file does not exist.
-       */
--  #ifndef SQLITE_OMIT_AUTOVACUUM
--      if( pPager->dbSize<pPager->dbOrigSize 
--       && pPager->journalMode!=PAGER_JOURNALMODE_OFF
--      ){
--        Pgno i;                                   /* Iterator variable */
--        const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */
--        const Pgno dbSize = pPager->dbSize;       /* Database image size */ 
--        pPager->dbSize = pPager->dbOrigSize;
--        for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){
--          if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
--            PgHdr *pPage;             /* Page to journal */
--            rc = sqlite3PagerGet(pPager, i, &pPage);
--            if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
--            rc = sqlite3PagerWrite(pPage);
--            sqlite3PagerUnref(pPage);
--            if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
-+      if( !isOpen(pPager->jfd) ){
-+        sqlite3_vfs * const pVfs = pPager->pVfs;
-+        int bExists;              /* True if journal file exists */
-+        rc = sqlite3OsAccess(
-+            pVfs, pPager->zJournal, SQLITE_ACCESS_EXISTS, &bExists);
-+        if( rc==SQLITE_OK && bExists ){
-+          int fout = 0;
-+          int f = SQLITE_OPEN_READWRITE|SQLITE_OPEN_MAIN_JOURNAL;
-+          assert( !pPager->tempFile );
-+          rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, f, &fout);
-+          assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
-+          if( rc==SQLITE_OK && fout&SQLITE_OPEN_READONLY ){
-+            rc = SQLITE_CANTOPEN_BKPT;
-+            sqlite3OsClose(pPager->jfd);
-           }
-         }
--        pPager->dbSize = dbSize;
--      } 
--  #endif
--  
--      /* Write the master journal name into the journal file. If a master 
--      ** journal file name has already been written to the journal file, 
--      ** or if zMaster is NULL (no master journal), then this call is a no-op.
--      */
--      rc = writeMasterJournal(pPager, zMaster);
--      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
--  
--      /* Sync the journal file and write all dirty pages to the database.
--      ** If the atomic-update optimization is being used, this sync will not 
--      ** create the journal file or perform any real IO.
--      **
--      ** Because the change-counter page was just modified, unless the
--      ** atomic-update optimization is used it is almost certain that the
--      ** journal requires a sync here. However, in locking_mode=exclusive
--      ** on a system under memory pressure it is just possible that this is 
--      ** not the case. In this case it is likely enough that the redundant
--      ** xSync() call will be changed to a no-op by the OS anyhow. 
-+      }
-+ 
-+      /* Playback and delete the journal.  Drop the database write
-+      ** lock and reacquire the read lock. Purge the cache before
-+      ** playing back the hot-journal so that we don't end up with
-+      ** an inconsistent cache.  Sync the hot journal before playing
-+      ** it back since the process that crashed and left the hot journal
-+      ** probably did not sync it and we are required to always sync
-+      ** the journal before playing it back.
-       */
--      rc = syncJournal(pPager, 0);
--      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
--  
--      rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));
-+      if( isOpen(pPager->jfd) ){
-+        assert( rc==SQLITE_OK );
-+        rc = pagerSyncHotJournal(pPager);
-+        if( rc==SQLITE_OK ){
-+          rc = pager_playback(pPager, 1);
-+          pPager->eState = PAGER_OPEN;
-+        }
-+      }else if( !pPager->exclusiveMode ){
-+        pagerUnlockDb(pPager, SHARED_LOCK);
-+      }
-+
-       if( rc!=SQLITE_OK ){
--        assert( rc!=SQLITE_IOERR_BLOCKED );
--        goto commit_phase_one_exit;
-+        /* This branch is taken if an error occurs while trying to open
-+        ** or roll back a hot-journal while holding an EXCLUSIVE lock. The
-+        ** pager_unlock() routine will be called before returning to unlock
-+        ** the file. If the unlock attempt fails, then Pager.eLock must be
-+        ** set to UNKNOWN_LOCK (see the comment above the #define for 
-+        ** UNKNOWN_LOCK above for an explanation). 
-+        **
-+        ** In order to get pager_unlock() to do this, set Pager.eState to
-+        ** PAGER_ERROR now. This is not actually counted as a transition
-+        ** to ERROR state in the state diagram at the top of this file,
-+        ** since we know that the same call to pager_unlock() will very
-+        ** shortly transition the pager object to the OPEN state. Calling
-+        ** assert_pager_state() would fail now, as it should not be possible
-+        ** to be in ERROR state when there are zero outstanding page 
-+        ** references.
-+        */
-+        pager_error(pPager, rc);
-+        goto failed;
-       }
--      sqlite3PcacheCleanAll(pPager->pPCache);
--  
--      /* If the file on disk is not the same size as the database image,
--      ** then use pager_truncate to grow or shrink the file here.
-+
-+      assert( pPager->eState==PAGER_OPEN );
-+      assert( (pPager->eLock==SHARED_LOCK)
-+           || (pPager->exclusiveMode && pPager->eLock>SHARED_LOCK)
-+      );
-+    }
-+
-+    if( !pPager->tempFile 
-+     && (pPager->pBackup || sqlite3PcachePagecount(pPager->pPCache)>0) 
-+    ){
-+      /* The shared-lock has just been acquired on the database file
-+      ** and there are already pages in the cache (from a previous
-+      ** read or write transaction).  Check to see if the database
-+      ** has been modified.  If the database has changed, flush the
-+      ** cache.
-+      **
-+      ** Database changes is detected by looking at 15 bytes beginning
-+      ** at offset 24 into the file.  The first 4 of these 16 bytes are
-+      ** a 32-bit counter that is incremented with each change.  The
-+      ** other bytes change randomly with each file change when
-+      ** a codec is in use.
-+      ** 
-+      ** There is a vanishingly small chance that a change will not be 
-+      ** detected.  The chance of an undetected change is so small that
-+      ** it can be neglected.
-       */
--      if( pPager->dbSize!=pPager->dbFileSize ){
--        Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager));
--        assert( pPager->eState==PAGER_WRITER_DBMOD );
--        rc = pager_truncate(pPager, nNew);
--        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
-+      Pgno nPage = 0;
-+      char dbFileVers[sizeof(pPager->dbFileVers)];
-+
-+      rc = pagerPagecount(pPager, &nPage);
-+      if( rc ) goto failed;
-+
-+      if( nPage>0 ){
-+        IOTRACE(("CKVERS %p %d\n", pPager, sizeof(dbFileVers)));
-+        rc = sqlite3OsRead(pPager->fd, &dbFileVers, sizeof(dbFileVers), 24);
-+        if( rc!=SQLITE_OK ){
-+          goto failed;
-+        }
-+      }else{
-+        memset(dbFileVers, 0, sizeof(dbFileVers));
-       }
--  
--      /* Finally, sync the database file. */
--      if( !noSync ){
--        rc = sqlite3PagerSync(pPager);
-+
-+      if( memcmp(pPager->dbFileVers, dbFileVers, sizeof(dbFileVers))!=0 ){
-+        pager_reset(pPager);
-       }
--      IOTRACE(("DBSYNC %p\n", pPager))
-     }
-+
-+    /* If there is a WAL file in the file-system, open this database in WAL
-+    ** mode. Otherwise, the following function call is a no-op.
-+    */
-+    rc = pagerOpenWalIfPresent(pPager);
-+#ifndef SQLITE_OMIT_WAL
-+    assert( pPager->pWal==0 || rc==SQLITE_OK );
-+#endif
-   }
- 
--commit_phase_one_exit:
--  if( rc==SQLITE_OK && !pagerUseWal(pPager) ){
--    pPager->eState = PAGER_WRITER_FINISHED;
-+  if( pagerUseWal(pPager) ){
-+    assert( rc==SQLITE_OK );
-+    rc = pagerBeginReadTransaction(pPager);
-+  }
-+
-+  if( pPager->eState==PAGER_OPEN && rc==SQLITE_OK ){
-+    rc = pagerPagecount(pPager, &pPager->dbSize);
-+  }
-+
-+ failed:
-+  if( rc!=SQLITE_OK ){
-+    assert( !MEMDB );
-+    pager_unlock(pPager);
-+    assert( pPager->eState==PAGER_OPEN );
-+  }else{
-+    pPager->eState = PAGER_READER;
-   }
-   return rc;
- }
- 
--
- /*
--** When this function is called, the database file has been completely
--** updated to reflect the changes made by the current transaction and
--** synced to disk. The journal file still exists in the file-system 
--** though, and if a failure occurs at this point it will eventually
--** be used as a hot-journal and the current transaction rolled back.
--**
--** This function finalizes the journal file, either by deleting, 
--** truncating or partially zeroing it, so that it cannot be used 
--** for hot-journal rollback. Once this is done the transaction is
--** irrevocably committed.
-+** If the reference count has reached zero, rollback any active
-+** transaction and unlock the pager.
- **
--** If an error occurs, an IO error code is returned and the pager
--** moves into the error state. Otherwise, SQLITE_OK is returned.
--*/
--SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager *pPager){
--  int rc = SQLITE_OK;                  /* Return code */
--
--  /* This routine should not be called if a prior error has occurred.
--  ** But if (due to a coding error elsewhere in the system) it does get
--  ** called, just return the same error code without doing anything. */
--  if( NEVER(pPager->errCode) ) return pPager->errCode;
--
--  assert( pPager->eState==PAGER_WRITER_LOCKED
--       || pPager->eState==PAGER_WRITER_FINISHED
--       || (pagerUseWal(pPager) && pPager->eState==PAGER_WRITER_CACHEMOD)
--  );
--  assert( assert_pager_state(pPager) );
--
--  /* An optimization. If the database was not actually modified during
--  ** this transaction, the pager is running in exclusive-mode and is
--  ** using persistent journals, then this function is a no-op.
--  **
--  ** The start of the journal file currently contains a single journal 
--  ** header with the nRec field set to 0. If such a journal is used as
--  ** a hot-journal during hot-journal rollback, 0 changes will be made
--  ** to the database file. So there is no need to zero the journal 
--  ** header. Since the pager is in exclusive mode, there is no need
--  ** to drop any locks either.
--  */
--  if( pPager->eState==PAGER_WRITER_LOCKED 
--   && pPager->exclusiveMode 
--   && pPager->journalMode==PAGER_JOURNALMODE_PERSIST
--  ){
--    assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) || !pPager->journalOff );
--    pPager->eState = PAGER_READER;
--    return SQLITE_OK;
-+** Except, in locking_mode=EXCLUSIVE when there is nothing to in
-+** the rollback journal, the unlock is not performed and there is
-+** nothing to rollback, so this routine is a no-op.
-+*/ 
-+static void pagerUnlockIfUnused(Pager *pPager){
-+  if( (sqlite3PcacheRefCount(pPager->pPCache)==0) ){
-+    pagerUnlockAndRollback(pPager);
-   }
--
--  PAGERTRACE(("COMMIT %d\n", PAGERID(pPager)));
--  rc = pager_end_transaction(pPager, pPager->setMaster);
--  return pager_error(pPager, rc);
- }
- 
- /*
--** If a write transaction is open, then all changes made within the 
--** transaction are reverted and the current write-transaction is closed.
--** The pager falls back to PAGER_READER state if successful, or PAGER_ERROR
--** state if an error occurs.
-+** Acquire a reference to page number pgno in pager pPager (a page
-+** reference has type DbPage*). If the requested reference is 
-+** successfully obtained, it is copied to *ppPage and SQLITE_OK returned.
- **
--** If the pager is already in PAGER_ERROR state when this function is called,
--** it returns Pager.errCode immediately. No work is performed in this case.
-+** If the requested page is already in the cache, it is returned. 
-+** Otherwise, a new page object is allocated and populated with data
-+** read from the database file. In some cases, the pcache module may
-+** choose not to allocate a new page object and may reuse an existing
-+** object with no outstanding references.
- **
--** Otherwise, in rollback mode, this function performs two functions:
-+** The extra data appended to a page is always initialized to zeros the 
-+** first time a page is loaded into memory. If the page requested is 
-+** already in the cache when this function is called, then the extra
-+** data is left as it was when the page object was last used.
- **
--**   1) It rolls back the journal file, restoring all database file and 
--**      in-memory cache pages to the state they were in when the transaction
--**      was opened, and
-+** If the database image is smaller than the requested page or if a 
-+** non-zero value is passed as the noContent parameter and the 
-+** requested page is not already stored in the cache, then no 
-+** actual disk read occurs. In this case the memory image of the 
-+** page is initialized to all zeros. 
- **
--**   2) It finalizes the journal file, so that it is not used for hot
--**      rollback at any point in the future.
-+** If noContent is true, it means that we do not care about the contents
-+** of the page. This occurs in two seperate scenarios:
- **
--** Finalization of the journal file (task 2) is only performed if the 
--** rollback is successful.
-+**   a) When reading a free-list leaf page from the database, and
- **
--** In WAL mode, all cache-entries containing data modified within the
--** current transaction are either expelled from the cache or reverted to
--** their pre-transaction state by re-reading data from the database or
--** WAL files. The WAL transaction is then closed.
-+**   b) When a savepoint is being rolled back and we need to load
-+**      a new page into the cache to be filled with the data read
-+**      from the savepoint journal.
-+**
-+** If noContent is true, then the data returned is zeroed instead of
-+** being read from the database. Additionally, the bits corresponding
-+** to pgno in Pager.pInJournal (bitvec of pages already written to the
-+** journal file) and the PagerSavepoint.pInSavepoint bitvecs of any open
-+** savepoints are set. This means if the page is made writable at any
-+** point in the future, using a call to sqlite3PagerWrite(), its contents
-+** will not be journaled. This saves IO.
-+**
-+** The acquisition might fail for several reasons.  In all cases,
-+** an appropriate error code is returned and *ppPage is set to NULL.
-+**
-+** See also sqlite3PagerLookup().  Both this routine and Lookup() attempt
-+** to find a page in the in-memory cache first.  If the page is not already
-+** in memory, this routine goes to disk to read it in whereas Lookup()
-+** just returns 0.  This routine acquires a read-lock the first time it
-+** has to go to disk, and could also playback an old journal if necessary.
-+** Since Lookup() never goes to disk, it never has to deal with locks
-+** or journal files.
- */
--SQLITE_PRIVATE int sqlite3PagerRollback(Pager *pPager){
--  int rc = SQLITE_OK;                  /* Return code */
--  PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager)));
-+SQLITE_PRIVATE int sqlite3PagerAcquire(
-+  Pager *pPager,      /* The pager open on the database file */
-+  Pgno pgno,          /* Page number to fetch */
-+  DbPage **ppPage,    /* Write a pointer to the page here */
-+  int noContent       /* Do not bother reading content from disk if true */
-+){
-+  int rc;
-+  PgHdr *pPg;
- 
--  /* PagerRollback() is a no-op if called in READER or OPEN state. If
--  ** the pager is already in the ERROR state, the rollback is not 
--  ** attempted here. Instead, the error code is returned to the caller.
--  */
-+  assert( pPager->eState>=PAGER_READER );
-   assert( assert_pager_state(pPager) );
--  if( pPager->eState==PAGER_ERROR ) return pPager->errCode;
--  if( pPager->eState<=PAGER_READER ) return SQLITE_OK;
- 
--  if( pagerUseWal(pPager) ){
--    int rc2;
--    rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1);
--    rc2 = pager_end_transaction(pPager, pPager->setMaster);
--    if( rc==SQLITE_OK ) rc = rc2;
--  }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){
--    int eState = pPager->eState;
--    rc = pager_end_transaction(pPager, 0);
--    if( !MEMDB && eState>PAGER_WRITER_LOCKED ){
--      /* This can happen using journal_mode=off. Move the pager to the error 
--      ** state to indicate that the contents of the cache may not be trusted.
--      ** Any active readers will get SQLITE_ABORT.
--      */
--      pPager->errCode = SQLITE_ABORT;
--      pPager->eState = PAGER_ERROR;
--      return rc;
--    }
-+  if( pgno==0 ){
-+    return SQLITE_CORRUPT_BKPT;
-+  }
-+
-+  /* If the pager is in the error state, return an error immediately. 
-+  ** Otherwise, request the page from the PCache layer. */
-+  if( pPager->errCode!=SQLITE_OK ){
-+    rc = pPager->errCode;
-   }else{
--    rc = pager_playback(pPager, 0);
-+    rc = sqlite3PcacheFetch(pPager->pPCache, pgno, 1, ppPage);
-   }
- 
--  assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK );
--  assert( rc==SQLITE_OK || rc==SQLITE_FULL || (rc&0xFF)==SQLITE_IOERR );
-+  if( rc!=SQLITE_OK ){
-+    /* Either the call to sqlite3PcacheFetch() returned an error or the
-+    ** pager was already in the error-state when this function was called.
-+    ** Set pPg to 0 and jump to the exception handler.  */
-+    pPg = 0;
-+    goto pager_acquire_err;
-+  }
-+  assert( (*ppPage)->pgno==pgno );
-+  assert( (*ppPage)->pPager==pPager || (*ppPage)->pPager==0 );
- 
--  /* If an error occurs during a ROLLBACK, we can no longer trust the pager
--  ** cache. So call pager_error() on the way out to make any error persistent.
--  */
--  return pager_error(pPager, rc);
--}
-+  if( (*ppPage)->pPager && !noContent ){
-+    /* In this case the pcache already contains an initialized copy of
-+    ** the page. Return without further ado.  */
-+    assert( pgno<=PAGER_MAX_PGNO && pgno!=PAGER_MJ_PGNO(pPager) );
-+    PAGER_INCR(pPager->nHit);
-+    return SQLITE_OK;
- 
--/*
--** Return TRUE if the database file is opened read-only.  Return FALSE
--** if the database is (in theory) writable.
--*/
--SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager *pPager){
--  return pPager->readOnly;
--}
-+  }else{
-+    /* The pager cache has created a new page. Its content needs to 
-+    ** be initialized.  */
- 
--/*
--** Return the number of references to the pager.
--*/
--SQLITE_PRIVATE int sqlite3PagerRefcount(Pager *pPager){
--  return sqlite3PcacheRefCount(pPager->pPCache);
--}
-+    PAGER_INCR(pPager->nMiss);
-+    pPg = *ppPage;
-+    pPg->pPager = pPager;
- 
--/*
--** Return the approximate number of bytes of memory currently
--** used by the pager and its associated cache.
--*/
--SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager *pPager){
--  int perPageSize = pPager->pageSize + pPager->nExtra + sizeof(PgHdr)
--                                     + 5*sizeof(void*);
--  return perPageSize*sqlite3PcachePagecount(pPager->pPCache)
--           + sqlite3MallocSize(pPager)
--           + pPager->pageSize;
--}
-+    /* The maximum page number is 2^31. Return SQLITE_CORRUPT if a page
-+    ** number greater than this, or the unused locking-page, is requested. */
-+    if( pgno>PAGER_MAX_PGNO || pgno==PAGER_MJ_PGNO(pPager) ){
-+      rc = SQLITE_CORRUPT_BKPT;
-+      goto pager_acquire_err;
-+    }
- 
--/*
--** Return the number of references to the specified page.
--*/
--SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage *pPage){
--  return sqlite3PcachePageRefcount(pPage);
-+    if( MEMDB || pPager->dbSize<pgno || noContent || !isOpen(pPager->fd) ){
-+      if( pgno>pPager->mxPgno ){
-+        rc = SQLITE_FULL;
-+        goto pager_acquire_err;
-+      }
-+      if( noContent ){
-+        /* Failure to set the bits in the InJournal bit-vectors is benign.
-+        ** It merely means that we might do some extra work to journal a 
-+        ** page that does not need to be journaled.  Nevertheless, be sure 
-+        ** to test the case where a malloc error occurs while trying to set 
-+        ** a bit in a bit vector.
-+        */
-+        sqlite3BeginBenignMalloc();
-+        if( pgno<=pPager->dbOrigSize ){
-+          TESTONLY( rc = ) sqlite3BitvecSet(pPager->pInJournal, pgno);
-+          testcase( rc==SQLITE_NOMEM );
-+        }
-+        TESTONLY( rc = ) addToSavepointBitvecs(pPager, pgno);
-+        testcase( rc==SQLITE_NOMEM );
-+        sqlite3EndBenignMalloc();
-+      }
-+      memset(pPg->pData, 0, pPager->pageSize);
-+      IOTRACE(("ZERO %p %d\n", pPager, pgno));
-+    }else{
-+      assert( pPg->pPager==pPager );
-+      rc = readDbPage(pPg);
-+      if( rc!=SQLITE_OK ){
-+        goto pager_acquire_err;
-+      }
-+    }
-+    pager_set_pagehash(pPg);
-+  }
-+
-+  return SQLITE_OK;
-+
-+pager_acquire_err:
-+  assert( rc!=SQLITE_OK );
-+  if( pPg ){
-+    sqlite3PcacheDrop(pPg);
-+  }
-+  pagerUnlockIfUnused(pPager);
-+
-+  *ppPage = 0;
-+  return rc;
- }
- 
--#ifdef SQLITE_TEST
- /*
--** This routine is used for testing and analysis only.
-+** Acquire a page if it is already in the in-memory cache.  Do
-+** not read the page from disk.  Return a pointer to the page,
-+** or 0 if the page is not in cache. 
-+**
-+** See also sqlite3PagerGet().  The difference between this routine
-+** and sqlite3PagerGet() is that _get() will go to the disk and read
-+** in the page if the page is not already in cache.  This routine
-+** returns NULL if the page is not in cache or if a disk I/O error 
-+** has ever happened.
- */
--SQLITE_PRIVATE int *sqlite3PagerStats(Pager *pPager){
--  static int a[11];
--  a[0] = sqlite3PcacheRefCount(pPager->pPCache);
--  a[1] = sqlite3PcachePagecount(pPager->pPCache);
--  a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
--  a[3] = pPager->eState==PAGER_OPEN ? -1 : (int) pPager->dbSize;
--  a[4] = pPager->eState;
--  a[5] = pPager->errCode;
--  a[6] = pPager->nHit;
--  a[7] = pPager->nMiss;
--  a[8] = 0;  /* Used to be pPager->nOvfl */
--  a[9] = pPager->nRead;
--  a[10] = pPager->nWrite;
--  return a;
-+SQLITE_PRIVATE DbPage *sqlite3PagerLookup(Pager *pPager, Pgno pgno){
-+  PgHdr *pPg = 0;
-+  assert( pPager!=0 );
-+  assert( pgno!=0 );
-+  assert( pPager->pPCache!=0 );
-+  assert( pPager->eState>=PAGER_READER && pPager->eState!=PAGER_ERROR );
-+  sqlite3PcacheFetch(pPager->pPCache, pgno, 0, &pPg);
-+  return pPg;
- }
--#endif
- 
- /*
--** Return true if this is an in-memory pager.
-+** Release a page reference.
-+**
-+** If the number of references to the page drop to zero, then the
-+** page is added to the LRU list.  When all references to all pages
-+** are released, a rollback occurs and the lock on the database is
-+** removed.
- */
--SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager *pPager){
--  return MEMDB;
-+SQLITE_PRIVATE void sqlite3PagerUnref(DbPage *pPg){
-+  if( pPg ){
-+    Pager *pPager = pPg->pPager;
-+    sqlite3PcacheRelease(pPg);
-+    pagerUnlockIfUnused(pPager);
-+  }
- }
- 
- /*
--** Check that there are at least nSavepoint savepoints open. If there are
--** currently less than nSavepoints open, then open one or more savepoints
--** to make up the difference. If the number of savepoints is already
--** equal to nSavepoint, then this function is a no-op.
-+** This function is called at the start of every write transaction.
-+** There must already be a RESERVED or EXCLUSIVE lock on the database 
-+** file when this routine is called.
- **
--** If a memory allocation fails, SQLITE_NOMEM is returned. If an error 
--** occurs while opening the sub-journal file, then an IO error code is
--** returned. Otherwise, SQLITE_OK.
-+** Open the journal file for pager pPager and write a journal header
-+** to the start of it. If there are active savepoints, open the sub-journal
-+** as well. This function is only used when the journal file is being 
-+** opened to write a rollback log for a transaction. It is not used 
-+** when opening a hot journal file to roll it back.
-+**
-+** If the journal file is already open (as it may be in exclusive mode),
-+** then this function just writes a journal header to the start of the
-+** already open file. 
-+**
-+** Whether or not the journal file is opened by this function, the
-+** Pager.pInJournal bitvec structure is allocated.
-+**
-+** Return SQLITE_OK if everything is successful. Otherwise, return 
-+** SQLITE_NOMEM if the attempt to allocate Pager.pInJournal fails, or 
-+** an IO error code if opening or writing the journal file fails.
- */
--SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){
--  int rc = SQLITE_OK;                       /* Return code */
--  int nCurrent = pPager->nSavepoint;        /* Current number of savepoints */
-+static int pager_open_journal(Pager *pPager){
-+  int rc = SQLITE_OK;                        /* Return code */
-+  sqlite3_vfs * const pVfs = pPager->pVfs;   /* Local cache of vfs pointer */
- 
--  assert( pPager->eState>=PAGER_WRITER_LOCKED );
-+  assert( pPager->eState==PAGER_WRITER_LOCKED );
-   assert( assert_pager_state(pPager) );
-+  assert( pPager->pInJournal==0 );
-+  
-+  /* If already in the error state, this function is a no-op.  But on
-+  ** the other hand, this routine is never called if we are already in
-+  ** an error state. */
-+  if( NEVER(pPager->errCode) ) return pPager->errCode;
- 
--  if( nSavepoint>nCurrent && pPager->useJournal ){
--    int ii;                                 /* Iterator variable */
--    PagerSavepoint *aNew;                   /* New Pager.aSavepoint array */
--
--    /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM
--    ** if the allocation fails. Otherwise, zero the new portion in case a 
--    ** malloc failure occurs while populating it in the for(...) loop below.
--    */
--    aNew = (PagerSavepoint *)sqlite3Realloc(
--        pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint
--    );
--    if( !aNew ){
-+  if( !pagerUseWal(pPager) && pPager->journalMode!=PAGER_JOURNALMODE_OFF ){
-+    pPager->pInJournal = sqlite3BitvecCreate(pPager->dbSize);
-+    if( pPager->pInJournal==0 ){
-       return SQLITE_NOMEM;
-     }
--    memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint));
--    pPager->aSavepoint = aNew;
--
--    /* Populate the PagerSavepoint structures just allocated. */
--    for(ii=nCurrent; ii<nSavepoint; ii++){
--      aNew[ii].nOrig = pPager->dbSize;
--      if( isOpen(pPager->jfd) && pPager->journalOff>0 ){
--        aNew[ii].iOffset = pPager->journalOff;
-+  
-+    /* Open the journal file if it is not already open. */
-+    if( !isOpen(pPager->jfd) ){
-+      if( pPager->journalMode==PAGER_JOURNALMODE_MEMORY ){
-+        sqlite3MemJournalOpen(pPager->jfd);
-       }else{
--        aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager);
--      }
--      aNew[ii].iSubRec = pPager->nSubRec;
--      aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize);
--      if( !aNew[ii].pInSavepoint ){
--        return SQLITE_NOMEM;
--      }
--      if( pagerUseWal(pPager) ){
--        sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData);
-+        const int flags =                   /* VFS flags to open journal file */
-+          SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|
-+          (pPager->tempFile ? 
-+            (SQLITE_OPEN_DELETEONCLOSE|SQLITE_OPEN_TEMP_JOURNAL):
-+            (SQLITE_OPEN_MAIN_JOURNAL)
-+          );
-+  #ifdef SQLITE_ENABLE_ATOMIC_WRITE
-+        rc = sqlite3JournalOpen(
-+            pVfs, pPager->zJournal, pPager->jfd, flags, jrnlBufferSize(pPager)
-+        );
-+  #else
-+        rc = sqlite3OsOpen(pVfs, pPager->zJournal, pPager->jfd, flags, 0);
-+  #endif
-       }
--      pPager->nSavepoint = ii+1;
-+      assert( rc!=SQLITE_OK || isOpen(pPager->jfd) );
-     }
--    assert( pPager->nSavepoint==nSavepoint );
--    assertTruncateConstraint(pPager);
-+  
-+  
-+    /* Write the first journal header to the journal file and open 
-+    ** the sub-journal if necessary.
-+    */
-+    if( rc==SQLITE_OK ){
-+      /* TODO: Check if all of these are really required. */
-+      pPager->nRec = 0;
-+      pPager->journalOff = 0;
-+      pPager->setMaster = 0;
-+      pPager->journalHdr = 0;
-+      rc = writeJournalHdr(pPager);
-+    }
-+  }
-+
-+  if( rc!=SQLITE_OK ){
-+    sqlite3BitvecDestroy(pPager->pInJournal);
-+    pPager->pInJournal = 0;
-+  }else{
-+    assert( pPager->eState==PAGER_WRITER_LOCKED );
-+    pPager->eState = PAGER_WRITER_CACHEMOD;
-   }
- 
-   return rc;
- }
- 
- /*
--** This function is called to rollback or release (commit) a savepoint.
--** The savepoint to release or rollback need not be the most recently 
--** created savepoint.
--**
--** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE.
--** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with
--** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes
--** that have occurred since the specified savepoint was created.
--**
--** The savepoint to rollback or release is identified by parameter 
--** iSavepoint. A value of 0 means to operate on the outermost savepoint
--** (the first created). A value of (Pager.nSavepoint-1) means operate
--** on the most recently created savepoint. If iSavepoint is greater than
--** (Pager.nSavepoint-1), then this function is a no-op.
--**
--** If a negative value is passed to this function, then the current
--** transaction is rolled back. This is different to calling 
--** sqlite3PagerRollback() because this function does not terminate
--** the transaction or unlock the database, it just restores the 
--** contents of the database to its original state. 
-+** Begin a write-transaction on the specified pager object. If a 
-+** write-transaction has already been opened, this function is a no-op.
- **
--** In any case, all savepoints with an index greater than iSavepoint 
--** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE),
--** then savepoint iSavepoint is also destroyed.
-+** If the exFlag argument is false, then acquire at least a RESERVED
-+** lock on the database file. If exFlag is true, then acquire at least
-+** an EXCLUSIVE lock. If such a lock is already held, no locking 
-+** functions need be called.
- **
--** This function may return SQLITE_NOMEM if a memory allocation fails,
--** or an IO error code if an IO error occurs while rolling back a 
--** savepoint. If no errors occur, SQLITE_OK is returned.
--*/ 
--SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){
--  int rc = pPager->errCode;       /* Return code */
--
--  assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK );
--  assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK );
-+** If the subjInMemory argument is non-zero, then any sub-journal opened
-+** within this transaction will be opened as an in-memory file. This
-+** has no effect if the sub-journal is already opened (as it may be when
-+** running in exclusive mode) or if the transaction does not require a
-+** sub-journal. If the subjInMemory argument is zero, then any required
-+** sub-journal is implemented in-memory if pPager is an in-memory database, 
-+** or using a temporary file otherwise.
-+*/
-+SQLITE_PRIVATE int sqlite3PagerBegin(Pager *pPager, int exFlag, int subjInMemory){
-+  int rc = SQLITE_OK;
- 
--  if( rc==SQLITE_OK && iSavepoint<pPager->nSavepoint ){
--    int ii;            /* Iterator variable */
--    int nNew;          /* Number of remaining savepoints after this op. */
-+  if( pPager->errCode ) return pPager->errCode;
-+  assert( pPager->eState>=PAGER_READER && pPager->eState<PAGER_ERROR );
-+  pPager->subjInMemory = (u8)subjInMemory;
- 
--    /* Figure out how many savepoints will still be active after this
--    ** operation. Store this value in nNew. Then free resources associated 
--    ** with any savepoints that are destroyed by this operation.
--    */
--    nNew = iSavepoint + (( op==SAVEPOINT_RELEASE ) ? 0 : 1);
--    for(ii=nNew; ii<pPager->nSavepoint; ii++){
--      sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
--    }
--    pPager->nSavepoint = nNew;
-+  if( ALWAYS(pPager->eState==PAGER_READER) ){
-+    assert( pPager->pInJournal==0 );
- 
--    /* If this is a release of the outermost savepoint, truncate 
--    ** the sub-journal to zero bytes in size. */
--    if( op==SAVEPOINT_RELEASE ){
--      if( nNew==0 && isOpen(pPager->sjfd) ){
--        /* Only truncate if it is an in-memory sub-journal. */
--        if( sqlite3IsMemJournal(pPager->sjfd) ){
--          rc = sqlite3OsTruncate(pPager->sjfd, 0);
--          assert( rc==SQLITE_OK );
-+    if( pagerUseWal(pPager) ){
-+      /* If the pager is configured to use locking_mode=exclusive, and an
-+      ** exclusive lock on the database is not already held, obtain it now.
-+      */
-+      if( pPager->exclusiveMode && sqlite3WalExclusiveMode(pPager->pWal, -1) ){
-+        rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
-+        if( rc!=SQLITE_OK ){
-+          return rc;
-         }
--        pPager->nSubRec = 0;
-+        sqlite3WalExclusiveMode(pPager->pWal, 1);
-+      }
-+
-+      /* Grab the write lock on the log file. If successful, upgrade to
-+      ** PAGER_RESERVED state. Otherwise, return an error code to the caller.
-+      ** The busy-handler is not invoked if another connection already
-+      ** holds the write-lock. If possible, the upper layer will call it.
-+      */
-+      rc = sqlite3WalBeginWriteTransaction(pPager->pWal);
-+    }else{
-+      /* Obtain a RESERVED lock on the database file. If the exFlag parameter
-+      ** is true, then immediately upgrade this to an EXCLUSIVE lock. The
-+      ** busy-handler callback can be used when upgrading to the EXCLUSIVE
-+      ** lock, but not when obtaining the RESERVED lock.
-+      */
-+      rc = pagerLockDb(pPager, RESERVED_LOCK);
-+      if( rc==SQLITE_OK && exFlag ){
-+        rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
-       }
-     }
--    /* Else this is a rollback operation, playback the specified savepoint.
--    ** If this is a temp-file, it is possible that the journal file has
--    ** not yet been opened. In this case there have been no changes to
--    ** the database file, so the playback operation can be skipped.
--    */
--    else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){
--      PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1];
--      rc = pagerPlaybackSavepoint(pPager, pSavepoint);
--      assert(rc!=SQLITE_DONE);
-+
-+    if( rc==SQLITE_OK ){
-+      /* Change to WRITER_LOCKED state.
-+      **
-+      ** WAL mode sets Pager.eState to PAGER_WRITER_LOCKED or CACHEMOD
-+      ** when it has an open transaction, but never to DBMOD or FINISHED.
-+      ** This is because in those states the code to roll back savepoint 
-+      ** transactions may copy data from the sub-journal into the database 
-+      ** file as well as into the page cache. Which would be incorrect in 
-+      ** WAL mode.
-+      */
-+      pPager->eState = PAGER_WRITER_LOCKED;
-+      pPager->dbHintSize = pPager->dbSize;
-+      pPager->dbFileSize = pPager->dbSize;
-+      pPager->dbOrigSize = pPager->dbSize;
-+      pPager->journalOff = 0;
-     }
-+
-+    assert( rc==SQLITE_OK || pPager->eState==PAGER_READER );
-+    assert( rc!=SQLITE_OK || pPager->eState==PAGER_WRITER_LOCKED );
-+    assert( assert_pager_state(pPager) );
-   }
- 
-+  PAGERTRACE(("TRANSACTION %d\n", PAGERID(pPager)));
-   return rc;
- }
- 
- /*
--** Return the full pathname of the database file.
-+** Mark a single data page as writeable. The page is written into the 
-+** main journal or sub-journal as required. If the page is written into
-+** one of the journals, the corresponding bit is set in the 
-+** Pager.pInJournal bitvec and the PagerSavepoint.pInSavepoint bitvecs
-+** of any open savepoints as appropriate.
- */
--SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager *pPager){
--  return pPager->zFilename;
--}
-+static int pager_write(PgHdr *pPg){
-+  void *pData = pPg->pData;
-+  Pager *pPager = pPg->pPager;
-+  int rc = SQLITE_OK;
- 
--/*
--** Return the VFS structure for the pager.
--*/
--SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
--  return pPager->pVfs;
--}
-+  /* This routine is not called unless a write-transaction has already 
-+  ** been started. The journal file may or may not be open at this point.
-+  ** It is never called in the ERROR state.
-+  */
-+  assert( pPager->eState==PAGER_WRITER_LOCKED
-+       || pPager->eState==PAGER_WRITER_CACHEMOD
-+       || pPager->eState==PAGER_WRITER_DBMOD
-+  );
-+  assert( assert_pager_state(pPager) );
- 
--/*
--** Return the file handle for the database file associated
--** with the pager.  This might return NULL if the file has
--** not yet been opened.
--*/
--SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager *pPager){
--  return pPager->fd;
--}
-+  /* If an error has been previously detected, report the same error
-+  ** again. This should not happen, but the check provides robustness. */
-+  if( NEVER(pPager->errCode) )  return pPager->errCode;
- 
--/*
--** Return the full pathname of the journal file.
--*/
--SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager *pPager){
--  return pPager->zJournal;
--}
-+  /* Higher-level routines never call this function if database is not
-+  ** writable.  But check anyway, just for robustness. */
-+  if( NEVER(pPager->readOnly) ) return SQLITE_PERM;
- 
--/*
--** Return true if fsync() calls are disabled for this pager.  Return FALSE
--** if fsync()s are executed normally.
--*/
--SQLITE_PRIVATE int sqlite3PagerNosync(Pager *pPager){
--  return pPager->noSync;
--}
-+  CHECK_PAGE(pPg);
- 
--#ifdef SQLITE_HAS_CODEC
--/*
--** Set or retrieve the codec for this pager
--*/
--SQLITE_PRIVATE void sqlite3PagerSetCodec(
--  Pager *pPager,
--  void *(*xCodec)(void*,void*,Pgno,int),
--  void (*xCodecSizeChng)(void*,int,int),
--  void (*xCodecFree)(void*),
--  void *pCodec
--){
--  if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
--  pPager->xCodec = pPager->memDb ? 0 : xCodec;
--  pPager->xCodecSizeChng = xCodecSizeChng;
--  pPager->xCodecFree = xCodecFree;
--  pPager->pCodec = pCodec;
--  pagerReportSize(pPager);
--}
--SQLITE_PRIVATE void *sqlite3PagerGetCodec(Pager *pPager){
--  return pPager->pCodec;
-+  /* The journal file needs to be opened. Higher level routines have already
-+  ** obtained the necessary locks to begin the write-transaction, but the
-+  ** rollback journal might not yet be open. Open it now if this is the case.
-+  **
-+  ** This is done before calling sqlite3PcacheMakeDirty() on the page. 
-+  ** Otherwise, if it were done after calling sqlite3PcacheMakeDirty(), then
-+  ** an error might occur and the pager would end up in WRITER_LOCKED state
-+  ** with pages marked as dirty in the cache.
-+  */
-+  if( pPager->eState==PAGER_WRITER_LOCKED ){
-+    rc = pager_open_journal(pPager);
-+    if( rc!=SQLITE_OK ) return rc;
-+  }
-+  assert( pPager->eState>=PAGER_WRITER_CACHEMOD );
-+  assert( assert_pager_state(pPager) );
-+
-+  /* Mark the page as dirty.  If the page has already been written
-+  ** to the journal then we can return right away.
-+  */
-+  sqlite3PcacheMakeDirty(pPg);
-+  if( pageInJournal(pPg) && !subjRequiresPage(pPg) ){
-+    assert( !pagerUseWal(pPager) );
-+  }else{
-+  
-+    /* The transaction journal now exists and we have a RESERVED or an
-+    ** EXCLUSIVE lock on the main database file.  Write the current page to
-+    ** the transaction journal if it is not there already.
-+    */
-+    if( !pageInJournal(pPg) && !pagerUseWal(pPager) ){
-+      assert( pagerUseWal(pPager)==0 );
-+      if( pPg->pgno<=pPager->dbOrigSize && isOpen(pPager->jfd) ){
-+        u32 cksum;
-+        char *pData2;
-+        i64 iOff = pPager->journalOff;
-+
-+        /* We should never write to the journal file the page that
-+        ** contains the database locks.  The following assert verifies
-+        ** that we do not. */
-+        assert( pPg->pgno!=PAGER_MJ_PGNO(pPager) );
-+
-+        assert( pPager->journalHdr<=pPager->journalOff );
-+        CODEC2(pPager, pData, pPg->pgno, 7, return SQLITE_NOMEM, pData2);
-+        cksum = pager_cksum(pPager, (u8*)pData2);
-+
-+        /* Even if an IO or diskfull error occurs while journalling the
-+        ** page in the block above, set the need-sync flag for the page.
-+        ** Otherwise, when the transaction is rolled back, the logic in
-+        ** playback_one_page() will think that the page needs to be restored
-+        ** in the database file. And if an IO error occurs while doing so,
-+        ** then corruption may follow.
-+        */
-+        pPg->flags |= PGHDR_NEED_SYNC;
-+
-+        rc = write32bits(pPager->jfd, iOff, pPg->pgno);
-+        if( rc!=SQLITE_OK ) return rc;
-+        rc = sqlite3OsWrite(pPager->jfd, pData2, pPager->pageSize, iOff+4);
-+        if( rc!=SQLITE_OK ) return rc;
-+        rc = write32bits(pPager->jfd, iOff+pPager->pageSize+4, cksum);
-+        if( rc!=SQLITE_OK ) return rc;
-+
-+        IOTRACE(("JOUT %p %d %lld %d\n", pPager, pPg->pgno, 
-+                 pPager->journalOff, pPager->pageSize));
-+        PAGER_INCR(sqlite3_pager_writej_count);
-+        PAGERTRACE(("JOURNAL %d page %d needSync=%d hash(%08x)\n",
-+             PAGERID(pPager), pPg->pgno, 
-+             ((pPg->flags&PGHDR_NEED_SYNC)?1:0), pager_pagehash(pPg)));
-+
-+        pPager->journalOff += 8 + pPager->pageSize;
-+        pPager->nRec++;
-+        assert( pPager->pInJournal!=0 );
-+        rc = sqlite3BitvecSet(pPager->pInJournal, pPg->pgno);
-+        testcase( rc==SQLITE_NOMEM );
-+        assert( rc==SQLITE_OK || rc==SQLITE_NOMEM );
-+        rc |= addToSavepointBitvecs(pPager, pPg->pgno);
-+        if( rc!=SQLITE_OK ){
-+          assert( rc==SQLITE_NOMEM );
-+          return rc;
-+        }
-+      }else{
-+        if( pPager->eState!=PAGER_WRITER_DBMOD ){
-+          pPg->flags |= PGHDR_NEED_SYNC;
-+        }
-+        PAGERTRACE(("APPEND %d page %d needSync=%d\n",
-+                PAGERID(pPager), pPg->pgno,
-+               ((pPg->flags&PGHDR_NEED_SYNC)?1:0)));
-+      }
-+    }
-+  
-+    /* If the statement journal is open and the page is not in it,
-+    ** then write the current page to the statement journal.  Note that
-+    ** the statement journal format differs from the standard journal format
-+    ** in that it omits the checksums and the header.
-+    */
-+    if( subjRequiresPage(pPg) ){
-+      rc = subjournalPage(pPg);
-+    }
-+  }
-+
-+  /* Update the database size and return.
-+  */
-+  if( pPager->dbSize<pPg->pgno ){
-+    pPager->dbSize = pPg->pgno;
-+  }
-+  return rc;
- }
--#endif
- 
--#ifndef SQLITE_OMIT_AUTOVACUUM
- /*
--** Move the page pPg to location pgno in the file.
--**
--** There must be no references to the page previously located at
--** pgno (which we call pPgOld) though that page is allowed to be
--** in cache.  If the page previously located at pgno is not already
--** in the rollback journal, it is not put there by by this routine.
--**
--** References to the page pPg remain valid. Updating any
--** meta-data associated with pPg (i.e. data stored in the nExtra bytes
--** allocated along with the page) is the responsibility of the caller.
--**
--** A transaction must be active when this routine is called. It used to be
--** required that a statement transaction was not active, but this restriction
--** has been removed (CREATE INDEX needs to move a page when a statement
--** transaction is active).
-+** Mark a data page as writeable. This routine must be called before 
-+** making changes to a page. The caller must check the return value 
-+** of this function and be careful not to change any page data unless 
-+** this routine returns SQLITE_OK.
- **
--** If the fourth argument, isCommit, is non-zero, then this page is being
--** moved as part of a database reorganization just before the transaction 
--** is being committed. In this case, it is guaranteed that the database page 
--** pPg refers to will not be written to again within this transaction.
-+** The difference between this function and pager_write() is that this
-+** function also deals with the special case where 2 or more pages
-+** fit on a single disk sector. In this case all co-resident pages
-+** must have been written to the journal file before returning.
- **
--** This function may return SQLITE_NOMEM or an IO error code if an error
--** occurs. Otherwise, it returns SQLITE_OK.
-+** If an error occurs, SQLITE_NOMEM or an IO error code is returned
-+** as appropriate. Otherwise, SQLITE_OK.
- */
--SQLITE_PRIVATE int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
--  PgHdr *pPgOld;               /* The page being overwritten. */
--  Pgno needSyncPgno = 0;       /* Old value of pPg->pgno, if sync is required */
--  int rc;                      /* Return code */
--  Pgno origPgno;               /* The original page number */
-+SQLITE_PRIVATE int sqlite3PagerWrite(DbPage *pDbPage){
-+  int rc = SQLITE_OK;
- 
--  assert( pPg->nRef>0 );
--  assert( pPager->eState==PAGER_WRITER_CACHEMOD
--       || pPager->eState==PAGER_WRITER_DBMOD
--  );
--  assert( assert_pager_state(pPager) );
-+  PgHdr *pPg = pDbPage;
-+  Pager *pPager = pPg->pPager;
-+  Pgno nPagePerSector = (pPager->sectorSize/pPager->pageSize);
- 
--  /* In order to be able to rollback, an in-memory database must journal
--  ** the page we are moving from.
--  */
--  if( MEMDB ){
--    rc = sqlite3PagerWrite(pPg);
--    if( rc ) return rc;
--  }
-+  assert( pPager->eState>=PAGER_WRITER_LOCKED );
-+  assert( pPager->eState!=PAGER_ERROR );
-+  assert( assert_pager_state(pPager) );
- 
--  /* If the page being moved is dirty and has not been saved by the latest
--  ** savepoint, then save the current contents of the page into the 
--  ** sub-journal now. This is required to handle the following scenario:
--  **
--  **   BEGIN;
--  **     <journal page X, then modify it in memory>
--  **     SAVEPOINT one;
--  **       <Move page X to location Y>
--  **     ROLLBACK TO one;
--  **
--  ** If page X were not written to the sub-journal here, it would not
--  ** be possible to restore its contents when the "ROLLBACK TO one"
--  ** statement were is processed.
--  **
--  ** subjournalPage() may need to allocate space to store pPg->pgno into
--  ** one or more savepoint bitvecs. This is the reason this function
--  ** may return SQLITE_NOMEM.
--  */
--  if( pPg->flags&PGHDR_DIRTY
--   && subjRequiresPage(pPg)
--   && SQLITE_OK!=(rc = subjournalPage(pPg))
--  ){
--    return rc;
--  }
-+  if( nPagePerSector>1 ){
-+    Pgno nPageCount;          /* Total number of pages in database file */
-+    Pgno pg1;                 /* First page of the sector pPg is located on. */
-+    int nPage = 0;            /* Number of pages starting at pg1 to journal */
-+    int ii;                   /* Loop counter */
-+    int needSync = 0;         /* True if any page has PGHDR_NEED_SYNC */
- 
--  PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n", 
--      PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno));
--  IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
-+    /* Set the doNotSyncSpill flag to 1. This is because we cannot allow
-+    ** a journal header to be written between the pages journaled by
-+    ** this function.
-+    */
-+    assert( !MEMDB );
-+    assert( pPager->doNotSyncSpill==0 );
-+    pPager->doNotSyncSpill++;
- 
--  /* If the journal needs to be sync()ed before page pPg->pgno can
--  ** be written to, store pPg->pgno in local variable needSyncPgno.
--  **
--  ** If the isCommit flag is set, there is no need to remember that
--  ** the journal needs to be sync()ed before database page pPg->pgno 
--  ** can be written to. The caller has already promised not to write to it.
--  */
--  if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
--    needSyncPgno = pPg->pgno;
--    assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize );
--    assert( pPg->flags&PGHDR_DIRTY );
--  }
-+    /* This trick assumes that both the page-size and sector-size are
-+    ** an integer power of 2. It sets variable pg1 to the identifier
-+    ** of the first page of the sector pPg is located on.
-+    */
-+    pg1 = ((pPg->pgno-1) & ~(nPagePerSector-1)) + 1;
- 
--  /* If the cache contains a page with page-number pgno, remove it
--  ** from its hash chain. Also, if the PGHDR_NEED_SYNC flag was set for 
--  ** page pgno before the 'move' operation, it needs to be retained 
--  ** for the page moved there.
--  */
--  pPg->flags &= ~PGHDR_NEED_SYNC;
--  pPgOld = pager_lookup(pPager, pgno);
--  assert( !pPgOld || pPgOld->nRef==1 );
--  if( pPgOld ){
--    pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
--    if( MEMDB ){
--      /* Do not discard pages from an in-memory database since we might
--      ** need to rollback later.  Just move the page out of the way. */
--      sqlite3PcacheMove(pPgOld, pPager->dbSize+1);
-+    nPageCount = pPager->dbSize;
-+    if( pPg->pgno>nPageCount ){
-+      nPage = (pPg->pgno - pg1)+1;
-+    }else if( (pg1+nPagePerSector-1)>nPageCount ){
-+      nPage = nPageCount+1-pg1;
-     }else{
--      sqlite3PcacheDrop(pPgOld);
-+      nPage = nPagePerSector;
-     }
--  }
--
--  origPgno = pPg->pgno;
--  sqlite3PcacheMove(pPg, pgno);
--  sqlite3PcacheMakeDirty(pPg);
-+    assert(nPage>0);
-+    assert(pg1<=pPg->pgno);
-+    assert((pg1+nPage)>pPg->pgno);
- 
--  /* For an in-memory database, make sure the original page continues
--  ** to exist, in case the transaction needs to roll back.  Use pPgOld
--  ** as the original page since it has already been allocated.
--  */
--  if( MEMDB ){
--    assert( pPgOld );
--    sqlite3PcacheMove(pPgOld, origPgno);
--    sqlite3PagerUnref(pPgOld);
--  }
-+    for(ii=0; ii<nPage && rc==SQLITE_OK; ii++){
-+      Pgno pg = pg1+ii;
-+      PgHdr *pPage;
-+      if( pg==pPg->pgno || !sqlite3BitvecTest(pPager->pInJournal, pg) ){
-+        if( pg!=PAGER_MJ_PGNO(pPager) ){
-+          rc = sqlite3PagerGet(pPager, pg, &pPage);
-+          if( rc==SQLITE_OK ){
-+            rc = pager_write(pPage);
-+            if( pPage->flags&PGHDR_NEED_SYNC ){
-+              needSync = 1;
-+            }
-+            sqlite3PagerUnref(pPage);
-+          }
-+        }
-+      }else if( (pPage = pager_lookup(pPager, pg))!=0 ){
-+        if( pPage->flags&PGHDR_NEED_SYNC ){
-+          needSync = 1;
-+        }
-+        sqlite3PagerUnref(pPage);
-+      }
-+    }
- 
--  if( needSyncPgno ){
--    /* If needSyncPgno is non-zero, then the journal file needs to be 
--    ** sync()ed before any data is written to database file page needSyncPgno.
--    ** Currently, no such page exists in the page-cache and the 
--    ** "is journaled" bitvec flag has been set. This needs to be remedied by
--    ** loading the page into the pager-cache and setting the PGHDR_NEED_SYNC
--    ** flag.
--    **
--    ** If the attempt to load the page into the page-cache fails, (due
--    ** to a malloc() or IO failure), clear the bit in the pInJournal[]
--    ** array. Otherwise, if the page is loaded and written again in
--    ** this transaction, it may be written to the database file before
--    ** it is synced into the journal file. This way, it may end up in
--    ** the journal file twice, but that is not a problem.
-+    /* If the PGHDR_NEED_SYNC flag is set for any of the nPage pages 
-+    ** starting at pg1, then it needs to be set for all of them. Because
-+    ** writing to any of these nPage pages may damage the others, the
-+    ** journal file must contain sync()ed copies of all of them
-+    ** before any of them can be written out to the database file.
-     */
--    PgHdr *pPgHdr;
--    rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
--    if( rc!=SQLITE_OK ){
--      if( needSyncPgno<=pPager->dbOrigSize ){
--        assert( pPager->pTmpSpace!=0 );
--        sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace);
-+    if( rc==SQLITE_OK && needSync ){
-+      assert( !MEMDB );
-+      for(ii=0; ii<nPage; ii++){
-+        PgHdr *pPage = pager_lookup(pPager, pg1+ii);
-+        if( pPage ){
-+          pPage->flags |= PGHDR_NEED_SYNC;
-+          sqlite3PagerUnref(pPage);
-+        }
-       }
--      return rc;
-     }
--    pPgHdr->flags |= PGHDR_NEED_SYNC;
--    sqlite3PcacheMakeDirty(pPgHdr);
--    sqlite3PagerUnref(pPgHdr);
--  }
--
--  return SQLITE_OK;
--}
--#endif
- 
--/*
--** Return a pointer to the data for the specified page.
--*/
--SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *pPg){
--  assert( pPg->nRef>0 || pPg->pPager->memDb );
--  return pPg->pData;
-+    assert( pPager->doNotSyncSpill==1 );
-+    pPager->doNotSyncSpill--;
-+  }else{
-+    rc = pager_write(pDbPage);
-+  }
-+  return rc;
- }
- 
- /*
--** Return a pointer to the Pager.nExtra bytes of "extra" space 
--** allocated along with the specified page.
-+** Return TRUE if the page given in the argument was previously passed
-+** to sqlite3PagerWrite().  In other words, return TRUE if it is ok
-+** to change the content of the page.
- */
--SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *pPg){
--  return pPg->pExtra;
-+#ifndef NDEBUG
-+SQLITE_PRIVATE int sqlite3PagerIswriteable(DbPage *pPg){
-+  return pPg->flags&PGHDR_DIRTY;
- }
-+#endif
- 
- /*
--** Get/set the locking-mode for this pager. Parameter eMode must be one
--** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 
--** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
--** the locking-mode is set to the value specified.
-+** A call to this routine tells the pager that it is not necessary to
-+** write the information on page pPg back to the disk, even though
-+** that page might be marked as dirty.  This happens, for example, when
-+** the page has been added as a leaf of the freelist and so its
-+** content no longer matters.
- **
--** The returned value is either PAGER_LOCKINGMODE_NORMAL or
--** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
--** locking-mode.
-+** The overlying software layer calls this routine when all of the data
-+** on the given page is unused. The pager marks the page as clean so
-+** that it does not get written to disk.
-+**
-+** Tests show that this optimization can quadruple the speed of large 
-+** DELETE operations.
- */
--SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *pPager, int eMode){
--  assert( eMode==PAGER_LOCKINGMODE_QUERY
--            || eMode==PAGER_LOCKINGMODE_NORMAL
--            || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
--  assert( PAGER_LOCKINGMODE_QUERY<0 );
--  assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
--  assert( pPager->exclusiveMode || 0==sqlite3WalHeapMemory(pPager->pWal) );
--  if( eMode>=0 && !pPager->tempFile && !sqlite3WalHeapMemory(pPager->pWal) ){
--    pPager->exclusiveMode = (u8)eMode;
-+SQLITE_PRIVATE void sqlite3PagerDontWrite(PgHdr *pPg){
-+  Pager *pPager = pPg->pPager;
-+  if( (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){
-+    PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)));
-+    IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
-+    pPg->flags |= PGHDR_DONT_WRITE;
-+    pager_set_pagehash(pPg);
-   }
--  return (int)pPager->exclusiveMode;
- }
- 
- /*
--** Set the journal-mode for this pager. Parameter eMode must be one of:
--**
--**    PAGER_JOURNALMODE_DELETE
--**    PAGER_JOURNALMODE_TRUNCATE
--**    PAGER_JOURNALMODE_PERSIST
--**    PAGER_JOURNALMODE_OFF
--**    PAGER_JOURNALMODE_MEMORY
--**    PAGER_JOURNALMODE_WAL
--**
--** The journalmode is set to the value specified if the change is allowed.
--** The change may be disallowed for the following reasons:
-+** This routine is called to increment the value of the database file 
-+** change-counter, stored as a 4-byte big-endian integer starting at 
-+** byte offset 24 of the pager file.  The secondary change counter at
-+** 92 is also updated, as is the SQLite version number at offset 96.
- **
--**   *  An in-memory database can only have its journal_mode set to _OFF
--**      or _MEMORY.
-+** But this only happens if the pPager->changeCountDone flag is false.
-+** To avoid excess churning of page 1, the update only happens once.
-+** See also the pager_write_changecounter() routine that does an 
-+** unconditional update of the change counters.
- **
--**   *  Temporary databases cannot have _WAL journalmode.
-+** If the isDirectMode flag is zero, then this is done by calling 
-+** sqlite3PagerWrite() on page 1, then modifying the contents of the
-+** page data. In this case the file will be updated when the current
-+** transaction is committed.
- **
--** The returned indicate the current (possibly updated) journal-mode.
-+** The isDirectMode flag may only be non-zero if the library was compiled
-+** with the SQLITE_ENABLE_ATOMIC_WRITE macro defined. In this case,
-+** if isDirect is non-zero, then the database file is updated directly
-+** by writing an updated version of page 1 using a call to the 
-+** sqlite3OsWrite() function.
- */
--SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){
--  u8 eOld = pPager->journalMode;    /* Prior journalmode */
--
--#ifdef SQLITE_DEBUG
--  /* The print_pager_state() routine is intended to be used by the debugger
--  ** only.  We invoke it once here to suppress a compiler warning. */
--  print_pager_state(pPager);
--#endif
--
-+static int pager_incr_changecounter(Pager *pPager, int isDirectMode){
-+  int rc = SQLITE_OK;
- 
--  /* The eMode parameter is always valid */
--  assert(      eMode==PAGER_JOURNALMODE_DELETE
--            || eMode==PAGER_JOURNALMODE_TRUNCATE
--            || eMode==PAGER_JOURNALMODE_PERSIST
--            || eMode==PAGER_JOURNALMODE_OFF 
--            || eMode==PAGER_JOURNALMODE_WAL 
--            || eMode==PAGER_JOURNALMODE_MEMORY );
-+  assert( pPager->eState==PAGER_WRITER_CACHEMOD
-+       || pPager->eState==PAGER_WRITER_DBMOD
-+  );
-+  assert( assert_pager_state(pPager) );
- 
--  /* This routine is only called from the OP_JournalMode opcode, and
--  ** the logic there will never allow a temporary file to be changed
--  ** to WAL mode.
-+  /* Declare and initialize constant integer 'isDirect'. If the
-+  ** atomic-write optimization is enabled in this build, then isDirect
-+  ** is initialized to the value passed as the isDirectMode parameter
-+  ** to this function. Otherwise, it is always set to zero.
-+  **
-+  ** The idea is that if the atomic-write optimization is not
-+  ** enabled at compile time, the compiler can omit the tests of
-+  ** 'isDirect' below, as well as the block enclosed in the
-+  ** "if( isDirect )" condition.
-   */
--  assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL );
-+#ifndef SQLITE_ENABLE_ATOMIC_WRITE
-+# define DIRECT_MODE 0
-+  assert( isDirectMode==0 );
-+  UNUSED_PARAMETER(isDirectMode);
-+#else
-+# define DIRECT_MODE isDirectMode
-+#endif
- 
--  /* Do allow the journalmode of an in-memory database to be set to
--  ** anything other than MEMORY or OFF
--  */
--  if( MEMDB ){
--    assert( eOld==PAGER_JOURNALMODE_MEMORY || eOld==PAGER_JOURNALMODE_OFF );
--    if( eMode!=PAGER_JOURNALMODE_MEMORY && eMode!=PAGER_JOURNALMODE_OFF ){
--      eMode = eOld;
--    }
--  }
-+  if( !pPager->changeCountDone && pPager->dbSize>0 ){
-+    PgHdr *pPgHdr;                /* Reference to page 1 */
- 
--  if( eMode!=eOld ){
-+    assert( !pPager->tempFile && isOpen(pPager->fd) );
- 
--    /* Change the journal mode. */
--    assert( pPager->eState!=PAGER_ERROR );
--    pPager->journalMode = (u8)eMode;
-+    /* Open page 1 of the file for writing. */
-+    rc = sqlite3PagerGet(pPager, 1, &pPgHdr);
-+    assert( pPgHdr==0 || rc==SQLITE_OK );
- 
--    /* When transistioning from TRUNCATE or PERSIST to any other journal
--    ** mode except WAL, unless the pager is in locking_mode=exclusive mode,
--    ** delete the journal file.
-+    /* If page one was fetched successfully, and this function is not
-+    ** operating in direct-mode, make page 1 writable.  When not in 
-+    ** direct mode, page 1 is always held in cache and hence the PagerGet()
-+    ** above is always successful - hence the ALWAYS on rc==SQLITE_OK.
-     */
--    assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
--    assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 );
--    assert( (PAGER_JOURNALMODE_DELETE & 5)==0 );
--    assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 );
--    assert( (PAGER_JOURNALMODE_OFF & 5)==0 );
--    assert( (PAGER_JOURNALMODE_WAL & 5)==5 );
-+    if( !DIRECT_MODE && ALWAYS(rc==SQLITE_OK) ){
-+      rc = sqlite3PagerWrite(pPgHdr);
-+    }
- 
--    assert( isOpen(pPager->fd) || pPager->exclusiveMode );
--    if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){
-+    if( rc==SQLITE_OK ){
-+      /* Actually do the update of the change counter */
-+      pager_write_changecounter(pPgHdr);
- 
--      /* In this case we would like to delete the journal file. If it is
--      ** not possible, then that is not a problem. Deleting the journal file
--      ** here is an optimization only.
--      **
--      ** Before deleting the journal file, obtain a RESERVED lock on the
--      ** database file. This ensures that the journal file is not deleted
--      ** while it is in use by some other client.
--      */
--      sqlite3OsClose(pPager->jfd);
--      if( pPager->eLock>=RESERVED_LOCK ){
--        sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
--      }else{
--        int rc = SQLITE_OK;
--        int state = pPager->eState;
--        assert( state==PAGER_OPEN || state==PAGER_READER );
--        if( state==PAGER_OPEN ){
--          rc = sqlite3PagerSharedLock(pPager);
--        }
--        if( pPager->eState==PAGER_READER ){
--          assert( rc==SQLITE_OK );
--          rc = pagerLockDb(pPager, RESERVED_LOCK);
--        }
-+      /* If running in direct mode, write the contents of page 1 to the file. */
-+      if( DIRECT_MODE ){
-+        const void *zBuf;
-+        assert( pPager->dbFileSize>0 );
-+        CODEC2(pPager, pPgHdr->pData, 1, 6, rc=SQLITE_NOMEM, zBuf);
-         if( rc==SQLITE_OK ){
--          sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
-+          rc = sqlite3OsWrite(pPager->fd, zBuf, pPager->pageSize, 0);
-         }
--        if( rc==SQLITE_OK && state==PAGER_READER ){
--          pagerUnlockDb(pPager, SHARED_LOCK);
--        }else if( state==PAGER_OPEN ){
--          pager_unlock(pPager);
-+        if( rc==SQLITE_OK ){
-+          pPager->changeCountDone = 1;
-         }
--        assert( state==pPager->eState );
-+      }else{
-+        pPager->changeCountDone = 1;
-       }
-     }
--  }
--
--  /* Return the new journal mode */
--  return (int)pPager->journalMode;
--}
--
--/*
--** Return the current journal mode.
--*/
--SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){
--  return (int)pPager->journalMode;
--}
- 
--/*
--** Return TRUE if the pager is in a state where it is OK to change the
--** journalmode.  Journalmode changes can only happen when the database
--** is unmodified.
--*/
--SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){
--  assert( assert_pager_state(pPager) );
--  if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0;
--  if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0;
--  return 1;
-+    /* Release the page reference. */
-+    sqlite3PagerUnref(pPgHdr);
-+  }
-+  return rc;
- }
- 
- /*
--** Get/set the size-limit used for persistent journal files.
-+** Sync the database file to disk. This is a no-op for in-memory databases
-+** or pages with the Pager.noSync flag set.
- **
--** Setting the size limit to -1 means no limit is enforced.
--** An attempt to set a limit smaller than -1 is a no-op.
-+** If successful, or if called on a pager for which it is a no-op, this
-+** function returns SQLITE_OK. Otherwise, an IO error code is returned.
- */
--SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
--  if( iLimit>=-1 ){
--    pPager->journalSizeLimit = iLimit;
--    sqlite3WalLimit(pPager->pWal, iLimit);
-+SQLITE_PRIVATE int sqlite3PagerSync(Pager *pPager){
-+  int rc = SQLITE_OK;
-+  if( !pPager->noSync ){
-+    assert( !MEMDB );
-+    rc = sqlite3OsSync(pPager->fd, pPager->syncFlags);
-+  }else if( isOpen(pPager->fd) ){
-+    assert( !MEMDB );
-+    sqlite3OsFileControl(pPager->fd, SQLITE_FCNTL_SYNC_OMITTED, (void *)&rc);
-   }
--  return pPager->journalSizeLimit;
--}
--
--/*
--** Return a pointer to the pPager->pBackup variable. The backup module
--** in backup.c maintains the content of this variable. This module
--** uses it opaquely as an argument to sqlite3BackupRestart() and
--** sqlite3BackupUpdate() only.
--*/
--SQLITE_PRIVATE sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
--  return &pPager->pBackup;
-+  return rc;
- }
- 
--#ifndef SQLITE_OMIT_WAL
- /*
--** This function is called when the user invokes "PRAGMA wal_checkpoint",
--** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint()
--** or wal_blocking_checkpoint() API functions.
-+** This function may only be called while a write-transaction is active in
-+** rollback. If the connection is in WAL mode, this call is a no-op. 
-+** Otherwise, if the connection does not already have an EXCLUSIVE lock on 
-+** the database file, an attempt is made to obtain one.
- **
--** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
-+** If the EXCLUSIVE lock is already held or the attempt to obtain it is
-+** successful, or the connection is in WAL mode, SQLITE_OK is returned.
-+** Otherwise, either SQLITE_BUSY or an SQLITE_IOERR_XXX error code is 
-+** returned.
- */
--SQLITE_PRIVATE int sqlite3PagerCheckpoint(Pager *pPager, int eMode, int *pnLog, int *pnCkpt){
-+SQLITE_PRIVATE int sqlite3PagerExclusiveLock(Pager *pPager){
-   int rc = SQLITE_OK;
--  if( pPager->pWal ){
--    rc = sqlite3WalCheckpoint(pPager->pWal, eMode,
--        pPager->xBusyHandler, pPager->pBusyHandlerArg,
--        pPager->ckptSyncFlags, pPager->pageSize, (u8 *)pPager->pTmpSpace,
--        pnLog, pnCkpt
--    );
-+  assert( pPager->eState==PAGER_WRITER_CACHEMOD 
-+       || pPager->eState==PAGER_WRITER_DBMOD 
-+       || pPager->eState==PAGER_WRITER_LOCKED 
-+  );
-+  assert( assert_pager_state(pPager) );
-+  if( 0==pagerUseWal(pPager) ){
-+    rc = pager_wait_on_lock(pPager, EXCLUSIVE_LOCK);
-   }
-   return rc;
- }
- 
--SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager){
--  return sqlite3WalCallback(pPager->pWal);
--}
--
- /*
--** Return true if the underlying VFS for the given pager supports the
--** primitives necessary for write-ahead logging.
-+** Sync the database file for the pager pPager. zMaster points to the name
-+** of a master journal file that should be written into the individual
-+** journal file. zMaster may be NULL, which is interpreted as no master
-+** journal (a single database transaction).
-+**
-+** This routine ensures that:
-+**
-+**   * The database file change-counter is updated,
-+**   * the journal is synced (unless the atomic-write optimization is used),
-+**   * all dirty pages are written to the database file, 
-+**   * the database file is truncated (if required), and
-+**   * the database file synced. 
-+**
-+** The only thing that remains to commit the transaction is to finalize 
-+** (delete, truncate or zero the first part of) the journal file (or 
-+** delete the master journal file if specified).
-+**
-+** Note that if zMaster==NULL, this does not overwrite a previous value
-+** passed to an sqlite3PagerCommitPhaseOne() call.
-+**
-+** If the final parameter - noSync - is true, then the database file itself
-+** is not synced. The caller must call sqlite3PagerSync() directly to
-+** sync the database file before calling CommitPhaseTwo() to delete the
-+** journal file in this case.
- */
--SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager){
--  const sqlite3_io_methods *pMethods = pPager->fd->pMethods;
--  return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap);
--}
-+SQLITE_PRIVATE int sqlite3PagerCommitPhaseOne(
-+  Pager *pPager,                  /* Pager object */
-+  const char *zMaster,            /* If not NULL, the master journal name */
-+  int noSync                      /* True to omit the xSync on the db file */
-+){
-+  int rc = SQLITE_OK;             /* Return code */
- 
--/*
--** Attempt to take an exclusive lock on the database file. If a PENDING lock
--** is obtained instead, immediately release it.
--*/
--static int pagerExclusiveLock(Pager *pPager){
--  int rc;                         /* Return code */
-+  assert( pPager->eState==PAGER_WRITER_LOCKED
-+       || pPager->eState==PAGER_WRITER_CACHEMOD
-+       || pPager->eState==PAGER_WRITER_DBMOD
-+       || pPager->eState==PAGER_ERROR
-+  );
-+  assert( assert_pager_state(pPager) );
- 
--  assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK );
--  rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
--  if( rc!=SQLITE_OK ){
--    /* If the attempt to grab the exclusive lock failed, release the 
--    ** pending lock that may have been obtained instead.  */
--    pagerUnlockDb(pPager, SHARED_LOCK);
-+  /* If a prior error occurred, report that error again. */
-+  if( NEVER(pPager->errCode) ) return pPager->errCode;
-+
-+  PAGERTRACE(("DATABASE SYNC: File=%s zMaster=%s nSize=%d\n", 
-+      pPager->zFilename, zMaster, pPager->dbSize));
-+
-+  /* If no database changes have been made, return early. */
-+  if( pPager->eState<PAGER_WRITER_CACHEMOD ) return SQLITE_OK;
-+
-+  if( MEMDB ){
-+    /* If this is an in-memory db, or no pages have been written to, or this
-+    ** function has already been called, it is mostly a no-op.  However, any
-+    ** backup in progress needs to be restarted.
-+    */
-+    sqlite3BackupRestart(pPager->pBackup);
-+  }else{
-+    if( pagerUseWal(pPager) ){
-+      PgHdr *pList = sqlite3PcacheDirtyList(pPager->pPCache);
-+      PgHdr *pPageOne = 0;
-+      if( pList==0 ){
-+        /* Must have at least one page for the WAL commit flag.
-+        ** Ticket [2d1a5c67dfc2363e44f29d9bbd57f] 2011-05-18 */
-+        rc = sqlite3PagerGet(pPager, 1, &pPageOne);
-+        pList = pPageOne;
-+        pList->pDirty = 0;
-+      }
-+      assert( rc==SQLITE_OK );
-+      if( ALWAYS(pList) ){
-+        rc = pagerWalFrames(pPager, pList, pPager->dbSize, 1, 
-+            (pPager->fullSync ? pPager->syncFlags : 0)
-+        );
-+      }
-+      sqlite3PagerUnref(pPageOne);
-+      if( rc==SQLITE_OK ){
-+        sqlite3PcacheCleanAll(pPager->pPCache);
-+      }
-+    }else{
-+      /* The following block updates the change-counter. Exactly how it
-+      ** does this depends on whether or not the atomic-update optimization
-+      ** was enabled at compile time, and if this transaction meets the 
-+      ** runtime criteria to use the operation: 
-+      **
-+      **    * The file-system supports the atomic-write property for
-+      **      blocks of size page-size, and 
-+      **    * This commit is not part of a multi-file transaction, and
-+      **    * Exactly one page has been modified and store in the journal file.
-+      **
-+      ** If the optimization was not enabled at compile time, then the
-+      ** pager_incr_changecounter() function is called to update the change
-+      ** counter in 'indirect-mode'. If the optimization is compiled in but
-+      ** is not applicable to this transaction, call sqlite3JournalCreate()
-+      ** to make sure the journal file has actually been created, then call
-+      ** pager_incr_changecounter() to update the change-counter in indirect
-+      ** mode. 
-+      **
-+      ** Otherwise, if the optimization is both enabled and applicable,
-+      ** then call pager_incr_changecounter() to update the change-counter
-+      ** in 'direct' mode. In this case the journal file will never be
-+      ** created for this transaction.
-+      */
-+  #ifdef SQLITE_ENABLE_ATOMIC_WRITE
-+      PgHdr *pPg;
-+      assert( isOpen(pPager->jfd) 
-+           || pPager->journalMode==PAGER_JOURNALMODE_OFF 
-+           || pPager->journalMode==PAGER_JOURNALMODE_WAL 
-+      );
-+      if( !zMaster && isOpen(pPager->jfd) 
-+       && pPager->journalOff==jrnlBufferSize(pPager) 
-+       && pPager->dbSize>=pPager->dbOrigSize
-+       && (0==(pPg = sqlite3PcacheDirtyList(pPager->pPCache)) || 0==pPg->pDirty)
-+      ){
-+        /* Update the db file change counter via the direct-write method. The 
-+        ** following call will modify the in-memory representation of page 1 
-+        ** to include the updated change counter and then write page 1 
-+        ** directly to the database file. Because of the atomic-write 
-+        ** property of the host file-system, this is safe.
-+        */
-+        rc = pager_incr_changecounter(pPager, 1);
-+      }else{
-+        rc = sqlite3JournalCreate(pPager->jfd);
-+        if( rc==SQLITE_OK ){
-+          rc = pager_incr_changecounter(pPager, 0);
-+        }
-+      }
-+  #else
-+      rc = pager_incr_changecounter(pPager, 0);
-+  #endif
-+      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
-+  
-+      /* If this transaction has made the database smaller, then all pages
-+      ** being discarded by the truncation must be written to the journal
-+      ** file. This can only happen in auto-vacuum mode.
-+      **
-+      ** Before reading the pages with page numbers larger than the 
-+      ** current value of Pager.dbSize, set dbSize back to the value
-+      ** that it took at the start of the transaction. Otherwise, the
-+      ** calls to sqlite3PagerGet() return zeroed pages instead of 
-+      ** reading data from the database file.
-+      */
-+  #ifndef SQLITE_OMIT_AUTOVACUUM
-+      if( pPager->dbSize<pPager->dbOrigSize 
-+       && pPager->journalMode!=PAGER_JOURNALMODE_OFF
-+      ){
-+        Pgno i;                                   /* Iterator variable */
-+        const Pgno iSkip = PAGER_MJ_PGNO(pPager); /* Pending lock page */
-+        const Pgno dbSize = pPager->dbSize;       /* Database image size */ 
-+        pPager->dbSize = pPager->dbOrigSize;
-+        for( i=dbSize+1; i<=pPager->dbOrigSize; i++ ){
-+          if( !sqlite3BitvecTest(pPager->pInJournal, i) && i!=iSkip ){
-+            PgHdr *pPage;             /* Page to journal */
-+            rc = sqlite3PagerGet(pPager, i, &pPage);
-+            if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
-+            rc = sqlite3PagerWrite(pPage);
-+            sqlite3PagerUnref(pPage);
-+            if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
-+          }
-+        }
-+        pPager->dbSize = dbSize;
-+      } 
-+  #endif
-+  
-+      /* Write the master journal name into the journal file. If a master 
-+      ** journal file name has already been written to the journal file, 
-+      ** or if zMaster is NULL (no master journal), then this call is a no-op.
-+      */
-+      rc = writeMasterJournal(pPager, zMaster);
-+      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
-+  
-+      /* Sync the journal file and write all dirty pages to the database.
-+      ** If the atomic-update optimization is being used, this sync will not 
-+      ** create the journal file or perform any real IO.
-+      **
-+      ** Because the change-counter page was just modified, unless the
-+      ** atomic-update optimization is used it is almost certain that the
-+      ** journal requires a sync here. However, in locking_mode=exclusive
-+      ** on a system under memory pressure it is just possible that this is 
-+      ** not the case. In this case it is likely enough that the redundant
-+      ** xSync() call will be changed to a no-op by the OS anyhow. 
-+      */
-+      rc = syncJournal(pPager, 0);
-+      if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
-+  
-+      rc = pager_write_pagelist(pPager,sqlite3PcacheDirtyList(pPager->pPCache));
-+      if( rc!=SQLITE_OK ){
-+        assert( rc!=SQLITE_IOERR_BLOCKED );
-+        goto commit_phase_one_exit;
-+      }
-+      sqlite3PcacheCleanAll(pPager->pPCache);
-+  
-+      /* If the file on disk is not the same size as the database image,
-+      ** then use pager_truncate to grow or shrink the file here.
-+      */
-+      if( pPager->dbSize!=pPager->dbFileSize ){
-+        Pgno nNew = pPager->dbSize - (pPager->dbSize==PAGER_MJ_PGNO(pPager));
-+        assert( pPager->eState==PAGER_WRITER_DBMOD );
-+        rc = pager_truncate(pPager, nNew);
-+        if( rc!=SQLITE_OK ) goto commit_phase_one_exit;
-+      }
-+  
-+      /* Finally, sync the database file. */
-+      if( !noSync ){
-+        rc = sqlite3PagerSync(pPager);
-+      }
-+      IOTRACE(("DBSYNC %p\n", pPager))
-+    }
-   }
- 
-+commit_phase_one_exit:
-+  if( rc==SQLITE_OK && !pagerUseWal(pPager) ){
-+    pPager->eState = PAGER_WRITER_FINISHED;
-+  }
-   return rc;
- }
- 
-+
- /*
--** Call sqlite3WalOpen() to open the WAL handle. If the pager is in 
--** exclusive-locking mode when this function is called, take an EXCLUSIVE
--** lock on the database file and use heap-memory to store the wal-index
--** in. Otherwise, use the normal shared-memory.
-+** When this function is called, the database file has been completely
-+** updated to reflect the changes made by the current transaction and
-+** synced to disk. The journal file still exists in the file-system 
-+** though, and if a failure occurs at this point it will eventually
-+** be used as a hot-journal and the current transaction rolled back.
-+**
-+** This function finalizes the journal file, either by deleting, 
-+** truncating or partially zeroing it, so that it cannot be used 
-+** for hot-journal rollback. Once this is done the transaction is
-+** irrevocably committed.
-+**
-+** If an error occurs, an IO error code is returned and the pager
-+** moves into the error state. Otherwise, SQLITE_OK is returned.
- */
--static int pagerOpenWal(Pager *pPager){
--  int rc = SQLITE_OK;
-+SQLITE_PRIVATE int sqlite3PagerCommitPhaseTwo(Pager *pPager){
-+  int rc = SQLITE_OK;                  /* Return code */
- 
--  assert( pPager->pWal==0 && pPager->tempFile==0 );
--  assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK || pPager->noReadlock);
-+  /* This routine should not be called if a prior error has occurred.
-+  ** But if (due to a coding error elsewhere in the system) it does get
-+  ** called, just return the same error code without doing anything. */
-+  if( NEVER(pPager->errCode) ) return pPager->errCode;
- 
--  /* If the pager is already in exclusive-mode, the WAL module will use 
--  ** heap-memory for the wal-index instead of the VFS shared-memory 
--  ** implementation. Take the exclusive lock now, before opening the WAL
--  ** file, to make sure this is safe.
--  */
--  if( pPager->exclusiveMode ){
--    rc = pagerExclusiveLock(pPager);
--  }
-+  assert( pPager->eState==PAGER_WRITER_LOCKED
-+       || pPager->eState==PAGER_WRITER_FINISHED
-+       || (pagerUseWal(pPager) && pPager->eState==PAGER_WRITER_CACHEMOD)
-+  );
-+  assert( assert_pager_state(pPager) );
- 
--  /* Open the connection to the log file. If this operation fails, 
--  ** (e.g. due to malloc() failure), return an error code.
-+  /* An optimization. If the database was not actually modified during
-+  ** this transaction, the pager is running in exclusive-mode and is
-+  ** using persistent journals, then this function is a no-op.
-+  **
-+  ** The start of the journal file currently contains a single journal 
-+  ** header with the nRec field set to 0. If such a journal is used as
-+  ** a hot-journal during hot-journal rollback, 0 changes will be made
-+  ** to the database file. So there is no need to zero the journal 
-+  ** header. Since the pager is in exclusive mode, there is no need
-+  ** to drop any locks either.
-   */
--  if( rc==SQLITE_OK ){
--    rc = sqlite3WalOpen(pPager->pVfs, 
--        pPager->fd, pPager->zWal, pPager->exclusiveMode,
--        pPager->journalSizeLimit, &pPager->pWal
--    );
-+  if( pPager->eState==PAGER_WRITER_LOCKED 
-+   && pPager->exclusiveMode 
-+   && pPager->journalMode==PAGER_JOURNALMODE_PERSIST
-+  ){
-+    assert( pPager->journalOff==JOURNAL_HDR_SZ(pPager) || !pPager->journalOff );
-+    pPager->eState = PAGER_READER;
-+    return SQLITE_OK;
-   }
- 
--  return rc;
-+  PAGERTRACE(("COMMIT %d\n", PAGERID(pPager)));
-+  rc = pager_end_transaction(pPager, pPager->setMaster);
-+  return pager_error(pPager, rc);
- }
- 
--
- /*
--** The caller must be holding a SHARED lock on the database file to call
--** this function.
-+** If a write transaction is open, then all changes made within the 
-+** transaction are reverted and the current write-transaction is closed.
-+** The pager falls back to PAGER_READER state if successful, or PAGER_ERROR
-+** state if an error occurs.
- **
--** If the pager passed as the first argument is open on a real database
--** file (not a temp file or an in-memory database), and the WAL file
--** is not already open, make an attempt to open it now. If successful,
--** return SQLITE_OK. If an error occurs or the VFS used by the pager does 
--** not support the xShmXXX() methods, return an error code. *pbOpen is
--** not modified in either case.
-+** If the pager is already in PAGER_ERROR state when this function is called,
-+** it returns Pager.errCode immediately. No work is performed in this case.
- **
--** If the pager is open on a temp-file (or in-memory database), or if
--** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK
--** without doing anything.
-+** Otherwise, in rollback mode, this function performs two functions:
-+**
-+**   1) It rolls back the journal file, restoring all database file and 
-+**      in-memory cache pages to the state they were in when the transaction
-+**      was opened, and
-+**
-+**   2) It finalizes the journal file, so that it is not used for hot
-+**      rollback at any point in the future.
-+**
-+** Finalization of the journal file (task 2) is only performed if the 
-+** rollback is successful.
-+**
-+** In WAL mode, all cache-entries containing data modified within the
-+** current transaction are either expelled from the cache or reverted to
-+** their pre-transaction state by re-reading data from the database or
-+** WAL files. The WAL transaction is then closed.
- */
--SQLITE_PRIVATE int sqlite3PagerOpenWal(
--  Pager *pPager,                  /* Pager object */
--  int *pbOpen                     /* OUT: Set to true if call is a no-op */
--){
--  int rc = SQLITE_OK;             /* Return code */
-+SQLITE_PRIVATE int sqlite3PagerRollback(Pager *pPager){
-+  int rc = SQLITE_OK;                  /* Return code */
-+  PAGERTRACE(("ROLLBACK %d\n", PAGERID(pPager)));
- 
-+  /* PagerRollback() is a no-op if called in READER or OPEN state. If
-+  ** the pager is already in the ERROR state, the rollback is not 
-+  ** attempted here. Instead, the error code is returned to the caller.
-+  */
-   assert( assert_pager_state(pPager) );
--  assert( pPager->eState==PAGER_OPEN   || pbOpen );
--  assert( pPager->eState==PAGER_READER || !pbOpen );
--  assert( pbOpen==0 || *pbOpen==0 );
--  assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) );
--
--  if( !pPager->tempFile && !pPager->pWal ){
--    if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN;
--
--    /* Close any rollback journal previously open */
--    sqlite3OsClose(pPager->jfd);
-+  if( pPager->eState==PAGER_ERROR ) return pPager->errCode;
-+  if( pPager->eState<=PAGER_READER ) return SQLITE_OK;
- 
--    rc = pagerOpenWal(pPager);
--    if( rc==SQLITE_OK ){
--      pPager->journalMode = PAGER_JOURNALMODE_WAL;
--      pPager->eState = PAGER_OPEN;
-+  if( pagerUseWal(pPager) ){
-+    int rc2;
-+    rc = sqlite3PagerSavepoint(pPager, SAVEPOINT_ROLLBACK, -1);
-+    rc2 = pager_end_transaction(pPager, pPager->setMaster);
-+    if( rc==SQLITE_OK ) rc = rc2;
-+  }else if( !isOpen(pPager->jfd) || pPager->eState==PAGER_WRITER_LOCKED ){
-+    int eState = pPager->eState;
-+    rc = pager_end_transaction(pPager, 0);
-+    if( !MEMDB && eState>PAGER_WRITER_LOCKED ){
-+      /* This can happen using journal_mode=off. Move the pager to the error 
-+      ** state to indicate that the contents of the cache may not be trusted.
-+      ** Any active readers will get SQLITE_ABORT.
-+      */
-+      pPager->errCode = SQLITE_ABORT;
-+      pPager->eState = PAGER_ERROR;
-+      return rc;
-     }
-   }else{
--    *pbOpen = 1;
-+    rc = pager_playback(pPager, 0);
-   }
- 
--  return rc;
-+  assert( pPager->eState==PAGER_READER || rc!=SQLITE_OK );
-+  assert( rc==SQLITE_OK || rc==SQLITE_FULL || (rc&0xFF)==SQLITE_IOERR );
-+
-+  /* If an error occurs during a ROLLBACK, we can no longer trust the pager
-+  ** cache. So call pager_error() on the way out to make any error persistent.
-+  */
-+  return pager_error(pPager, rc);
- }
- 
- /*
--** This function is called to close the connection to the log file prior
--** to switching from WAL to rollback mode.
--**
--** Before closing the log file, this function attempts to take an 
--** EXCLUSIVE lock on the database file. If this cannot be obtained, an
--** error (SQLITE_BUSY) is returned and the log connection is not closed.
--** If successful, the EXCLUSIVE lock is not released before returning.
-+** Return TRUE if the database file is opened read-only.  Return FALSE
-+** if the database is (in theory) writable.
- */
--SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager){
--  int rc = SQLITE_OK;
--
--  assert( pPager->journalMode==PAGER_JOURNALMODE_WAL );
--
--  /* If the log file is not already open, but does exist in the file-system,
--  ** it may need to be checkpointed before the connection can switch to
--  ** rollback mode. Open it now so this can happen.
--  */
--  if( !pPager->pWal ){
--    int logexists = 0;
--    rc = pagerLockDb(pPager, SHARED_LOCK);
--    if( rc==SQLITE_OK ){
--      rc = sqlite3OsAccess(
--          pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists
--      );
--    }
--    if( rc==SQLITE_OK && logexists ){
--      rc = pagerOpenWal(pPager);
--    }
--  }
--    
--  /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on
--  ** the database file, the log and log-summary files will be deleted.
--  */
--  if( rc==SQLITE_OK && pPager->pWal ){
--    rc = pagerExclusiveLock(pPager);
--    if( rc==SQLITE_OK ){
--      rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags,
--                           pPager->pageSize, (u8*)pPager->pTmpSpace);
--      pPager->pWal = 0;
--    }
--  }
--  return rc;
-+SQLITE_PRIVATE u8 sqlite3PagerIsreadonly(Pager *pPager){
-+  return pPager->readOnly;
- }
- 
--#ifdef SQLITE_HAS_CODEC
- /*
--** This function is called by the wal module when writing page content
--** into the log file.
--**
--** This function returns a pointer to a buffer containing the encrypted
--** page content. If a malloc fails, this function may return NULL.
-+** Return the number of references to the pager.
- */
--SQLITE_PRIVATE void *sqlite3PagerCodec(PgHdr *pPg){
--  void *aData = 0;
--  CODEC2(pPg->pPager, pPg->pData, pPg->pgno, 6, return 0, aData);
--  return aData;
-+SQLITE_PRIVATE int sqlite3PagerRefcount(Pager *pPager){
-+  return sqlite3PcacheRefCount(pPager->pPCache);
- }
--#endif /* SQLITE_HAS_CODEC */
--
--#endif /* !SQLITE_OMIT_WAL */
- 
--#endif /* SQLITE_OMIT_DISKIO */
-+/*
-+** Return the approximate number of bytes of memory currently
-+** used by the pager and its associated cache.
-+*/
-+SQLITE_PRIVATE int sqlite3PagerMemUsed(Pager *pPager){
-+  int perPageSize = pPager->pageSize + pPager->nExtra + sizeof(PgHdr)
-+                                     + 5*sizeof(void*);
-+  return perPageSize*sqlite3PcachePagecount(pPager->pPCache)
-+           + sqlite3MallocSize(pPager)
-+           + pPager->pageSize;
-+}
- 
--/************** End of pager.c ***********************************************/
--/************** Begin file wal.c *********************************************/
- /*
--** 2010 February 1
--**
--** The author disclaims copyright to this source code.  In place of
--** a legal notice, here is a blessing:
--**
--**    May you do good and not evil.
--**    May you find forgiveness for yourself and forgive others.
--**    May you share freely, never taking more than you give.
--**
--*************************************************************************
--**
--** This file contains the implementation of a write-ahead log (WAL) used in 
--** "journal_mode=WAL" mode.
--**
--** WRITE-AHEAD LOG (WAL) FILE FORMAT
--**
--** A WAL file consists of a header followed by zero or more "frames".
--** Each frame records the revised content of a single page from the
--** database file.  All changes to the database are recorded by writing
--** frames into the WAL.  Transactions commit when a frame is written that
--** contains a commit marker.  A single WAL can and usually does record 
--** multiple transactions.  Periodically, the content of the WAL is
--** transferred back into the database file in an operation called a
--** "checkpoint".
--**
--** A single WAL file can be used multiple times.  In other words, the
--** WAL can fill up with frames and then be checkpointed and then new
--** frames can overwrite the old ones.  A WAL always grows from beginning
--** toward the end.  Checksums and counters attached to each frame are
--** used to determine which frames within the WAL are valid and which
--** are leftovers from prior checkpoints.
--**
--** The WAL header is 32 bytes in size and consists of the following eight
--** big-endian 32-bit unsigned integer values:
--**
--**     0: Magic number.  0x377f0682 or 0x377f0683
--**     4: File format version.  Currently 3007000
--**     8: Database page size.  Example: 1024
--**    12: Checkpoint sequence number
--**    16: Salt-1, random integer incremented with each checkpoint
--**    20: Salt-2, a different random integer changing with each ckpt
--**    24: Checksum-1 (first part of checksum for first 24 bytes of header).
--**    28: Checksum-2 (second part of checksum for first 24 bytes of header).
--**
--** Immediately following the wal-header are zero or more frames. Each
--** frame consists of a 24-byte frame-header followed by a <page-size> bytes
--** of page data. The frame-header is six big-endian 32-bit unsigned 
--** integer values, as follows:
--**
--**     0: Page number.
--**     4: For commit records, the size of the database image in pages 
--**        after the commit. For all other records, zero.
--**     8: Salt-1 (copied from the header)
--**    12: Salt-2 (copied from the header)
--**    16: Checksum-1.
--**    20: Checksum-2.
--**
--** A frame is considered valid if and only if the following conditions are
--** true:
--**
--**    (1) The salt-1 and salt-2 values in the frame-header match
--**        salt values in the wal-header
--**
--**    (2) The checksum values in the final 8 bytes of the frame-header
--**        exactly match the checksum computed consecutively on the
--**        WAL header and the first 8 bytes and the content of all frames
--**        up to and including the current frame.
--**
--** The checksum is computed using 32-bit big-endian integers if the
--** magic number in the first 4 bytes of the WAL is 0x377f0683 and it
--** is computed using little-endian if the magic number is 0x377f0682.
--** The checksum values are always stored in the frame header in a
--** big-endian format regardless of which byte order is used to compute
--** the checksum.  The checksum is computed by interpreting the input as
--** an even number of unsigned 32-bit integers: x[0] through x[N].  The
--** algorithm used for the checksum is as follows:
--** 
--**   for i from 0 to n-1 step 2:
--**     s0 += x[i] + s1;
--**     s1 += x[i+1] + s0;
--**   endfor
--**
--** Note that s0 and s1 are both weighted checksums using fibonacci weights
--** in reverse order (the largest fibonacci weight occurs on the first element
--** of the sequence being summed.)  The s1 value spans all 32-bit 
--** terms of the sequence whereas s0 omits the final term.
--**
--** On a checkpoint, the WAL is first VFS.xSync-ed, then valid content of the
--** WAL is transferred into the database, then the database is VFS.xSync-ed.
--** The VFS.xSync operations serve as write barriers - all writes launched
--** before the xSync must complete before any write that launches after the
--** xSync begins.
--**
--** After each checkpoint, the salt-1 value is incremented and the salt-2
--** value is randomized.  This prevents old and new frames in the WAL from
--** being considered valid at the same time and being checkpointing together
--** following a crash.
--**
--** READER ALGORITHM
--**
--** To read a page from the database (call it page number P), a reader
--** first checks the WAL to see if it contains page P.  If so, then the
--** last valid instance of page P that is a followed by a commit frame
--** or is a commit frame itself becomes the value read.  If the WAL
--** contains no copies of page P that are valid and which are a commit
--** frame or are followed by a commit frame, then page P is read from
--** the database file.
--**
--** To start a read transaction, the reader records the index of the last
--** valid frame in the WAL.  The reader uses this recorded "mxFrame" value
--** for all subsequent read operations.  New transactions can be appended
--** to the WAL, but as long as the reader uses its original mxFrame value
--** and ignores the newly appended content, it will see a consistent snapshot
--** of the database from a single point in time.  This technique allows
--** multiple concurrent readers to view different versions of the database
--** content simultaneously.
--**
--** The reader algorithm in the previous paragraphs works correctly, but 
--** because frames for page P can appear anywhere within the WAL, the
--** reader has to scan the entire WAL looking for page P frames.  If the
--** WAL is large (multiple megabytes is typical) that scan can be slow,
--** and read performance suffers.  To overcome this problem, a separate
--** data structure called the wal-index is maintained to expedite the
--** search for frames of a particular page.
--** 
--** WAL-INDEX FORMAT
--**
--** Conceptually, the wal-index is shared memory, though VFS implementations
--** might choose to implement the wal-index using a mmapped file.  Because
--** the wal-index is shared memory, SQLite does not support journal_mode=WAL 
--** on a network filesystem.  All users of the database must be able to
--** share memory.
--**
--** The wal-index is transient.  After a crash, the wal-index can (and should
--** be) reconstructed from the original WAL file.  In fact, the VFS is required
--** to either truncate or zero the header of the wal-index when the last
--** connection to it closes.  Because the wal-index is transient, it can
--** use an architecture-specific format; it does not have to be cross-platform.
--** Hence, unlike the database and WAL file formats which store all values
--** as big endian, the wal-index can store multi-byte values in the native
--** byte order of the host computer.
--**
--** The purpose of the wal-index is to answer this question quickly:  Given
--** a page number P, return the index of the last frame for page P in the WAL,
--** or return NULL if there are no frames for page P in the WAL.
--**
--** The wal-index consists of a header region, followed by an one or
--** more index blocks.  
--**
--** The wal-index header contains the total number of frames within the WAL
--** in the the mxFrame field.  
--**
--** Each index block except for the first contains information on 
--** HASHTABLE_NPAGE frames. The first index block contains information on
--** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and 
--** HASHTABLE_NPAGE are selected so that together the wal-index header and
--** first index block are the same size as all other index blocks in the
--** wal-index.
--**
--** Each index block contains two sections, a page-mapping that contains the
--** database page number associated with each wal frame, and a hash-table 
--** that allows readers to query an index block for a specific page number.
--** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE
--** for the first index block) 32-bit page numbers. The first entry in the 
--** first index-block contains the database page number corresponding to the
--** first frame in the WAL file. The first entry in the second index block
--** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in
--** the log, and so on.
--**
--** The last index block in a wal-index usually contains less than the full
--** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers,
--** depending on the contents of the WAL file. This does not change the
--** allocated size of the page-mapping array - the page-mapping array merely
--** contains unused entries.
--**
--** Even without using the hash table, the last frame for page P
--** can be found by scanning the page-mapping sections of each index block
--** starting with the last index block and moving toward the first, and
--** within each index block, starting at the end and moving toward the
--** beginning.  The first entry that equals P corresponds to the frame
--** holding the content for that page.
--**
--** The hash table consists of HASHTABLE_NSLOT 16-bit unsigned integers.
--** HASHTABLE_NSLOT = 2*HASHTABLE_NPAGE, and there is one entry in the
--** hash table for each page number in the mapping section, so the hash 
--** table is never more than half full.  The expected number of collisions 
--** prior to finding a match is 1.  Each entry of the hash table is an
--** 1-based index of an entry in the mapping section of the same
--** index block.   Let K be the 1-based index of the largest entry in
--** the mapping section.  (For index blocks other than the last, K will
--** always be exactly HASHTABLE_NPAGE (4096) and for the last index block
--** K will be (mxFrame%HASHTABLE_NPAGE).)  Unused slots of the hash table
--** contain a value of 0.
--**
--** To look for page P in the hash table, first compute a hash iKey on
--** P as follows:
--**
--**      iKey = (P * 383) % HASHTABLE_NSLOT
--**
--** Then start scanning entries of the hash table, starting with iKey
--** (wrapping around to the beginning when the end of the hash table is
--** reached) until an unused hash slot is found. Let the first unused slot
--** be at index iUnused.  (iUnused might be less than iKey if there was
--** wrap-around.) Because the hash table is never more than half full,
--** the search is guaranteed to eventually hit an unused entry.  Let 
--** iMax be the value between iKey and iUnused, closest to iUnused,
--** where aHash[iMax]==P.  If there is no iMax entry (if there exists
--** no hash slot such that aHash[i]==p) then page P is not in the
--** current index block.  Otherwise the iMax-th mapping entry of the
--** current index block corresponds to the last entry that references 
--** page P.
--**
--** A hash search begins with the last index block and moves toward the
--** first index block, looking for entries corresponding to page P.  On
--** average, only two or three slots in each index block need to be
--** examined in order to either find the last entry for page P, or to
--** establish that no such entry exists in the block.  Each index block
--** holds over 4000 entries.  So two or three index blocks are sufficient
--** to cover a typical 10 megabyte WAL file, assuming 1K pages.  8 or 10
--** comparisons (on average) suffice to either locate a frame in the
--** WAL or to establish that the frame does not exist in the WAL.  This
--** is much faster than scanning the entire 10MB WAL.
--**
--** Note that entries are added in order of increasing K.  Hence, one
--** reader might be using some value K0 and a second reader that started
--** at a later time (after additional transactions were added to the WAL
--** and to the wal-index) might be using a different value K1, where K1>K0.
--** Both readers can use the same hash table and mapping section to get
--** the correct result.  There may be entries in the hash table with
--** K>K0 but to the first reader, those entries will appear to be unused
--** slots in the hash table and so the first reader will get an answer as
--** if no values greater than K0 had ever been inserted into the hash table
--** in the first place - which is what reader one wants.  Meanwhile, the
--** second reader using K1 will see additional values that were inserted
--** later, which is exactly what reader two wants.  
--**
--** When a rollback occurs, the value of K is decreased. Hash table entries
--** that correspond to frames greater than the new K value are removed
--** from the hash table at this point.
-+** Return the number of references to the specified page.
- */
--#ifndef SQLITE_OMIT_WAL
-+SQLITE_PRIVATE int sqlite3PagerPageRefcount(DbPage *pPage){
-+  return sqlite3PcachePageRefcount(pPage);
-+}
- 
-+#ifdef SQLITE_TEST
-+/*
-+** This routine is used for testing and analysis only.
-+*/
-+SQLITE_PRIVATE int *sqlite3PagerStats(Pager *pPager){
-+  static int a[11];
-+  a[0] = sqlite3PcacheRefCount(pPager->pPCache);
-+  a[1] = sqlite3PcachePagecount(pPager->pPCache);
-+  a[2] = sqlite3PcacheGetCachesize(pPager->pPCache);
-+  a[3] = pPager->eState==PAGER_OPEN ? -1 : (int) pPager->dbSize;
-+  a[4] = pPager->eState;
-+  a[5] = pPager->errCode;
-+  a[6] = pPager->nHit;
-+  a[7] = pPager->nMiss;
-+  a[8] = 0;  /* Used to be pPager->nOvfl */
-+  a[9] = pPager->nRead;
-+  a[10] = pPager->nWrite;
-+  return a;
-+}
-+#endif
- 
- /*
--** Trace output macros
-+** Return true if this is an in-memory pager.
- */
--#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
--SQLITE_PRIVATE int sqlite3WalTrace = 0;
--# define WALTRACE(X)  if(sqlite3WalTrace) sqlite3DebugPrintf X
--#else
--# define WALTRACE(X)
--#endif
-+SQLITE_PRIVATE int sqlite3PagerIsMemdb(Pager *pPager){
-+  return MEMDB;
-+}
- 
- /*
--** The maximum (and only) versions of the wal and wal-index formats
--** that may be interpreted by this version of SQLite.
--**
--** If a client begins recovering a WAL file and finds that (a) the checksum
--** values in the wal-header are correct and (b) the version field is not
--** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN.
-+** Check that there are at least nSavepoint savepoints open. If there are
-+** currently less than nSavepoints open, then open one or more savepoints
-+** to make up the difference. If the number of savepoints is already
-+** equal to nSavepoint, then this function is a no-op.
- **
--** Similarly, if a client successfully reads a wal-index header (i.e. the 
--** checksum test is successful) and finds that the version field is not
--** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite
--** returns SQLITE_CANTOPEN.
-+** If a memory allocation fails, SQLITE_NOMEM is returned. If an error 
-+** occurs while opening the sub-journal file, then an IO error code is
-+** returned. Otherwise, SQLITE_OK.
- */
--#define WAL_MAX_VERSION      3007000
--#define WALINDEX_MAX_VERSION 3007000
-+SQLITE_PRIVATE int sqlite3PagerOpenSavepoint(Pager *pPager, int nSavepoint){
-+  int rc = SQLITE_OK;                       /* Return code */
-+  int nCurrent = pPager->nSavepoint;        /* Current number of savepoints */
- 
--/*
--** Indices of various locking bytes.   WAL_NREADER is the number
--** of available reader locks and should be at least 3.
--*/
--#define WAL_WRITE_LOCK         0
--#define WAL_ALL_BUT_WRITE      1
--#define WAL_CKPT_LOCK          1
--#define WAL_RECOVER_LOCK       2
--#define WAL_READ_LOCK(I)       (3+(I))
--#define WAL_NREADER            (SQLITE_SHM_NLOCK-3)
-+  assert( pPager->eState>=PAGER_WRITER_LOCKED );
-+  assert( assert_pager_state(pPager) );
- 
-+  if( nSavepoint>nCurrent && pPager->useJournal ){
-+    int ii;                                 /* Iterator variable */
-+    PagerSavepoint *aNew;                   /* New Pager.aSavepoint array */
- 
--/* Object declarations */
--typedef struct WalIndexHdr WalIndexHdr;
--typedef struct WalIterator WalIterator;
--typedef struct WalCkptInfo WalCkptInfo;
-+    /* Grow the Pager.aSavepoint array using realloc(). Return SQLITE_NOMEM
-+    ** if the allocation fails. Otherwise, zero the new portion in case a 
-+    ** malloc failure occurs while populating it in the for(...) loop below.
-+    */
-+    aNew = (PagerSavepoint *)sqlite3Realloc(
-+        pPager->aSavepoint, sizeof(PagerSavepoint)*nSavepoint
-+    );
-+    if( !aNew ){
-+      return SQLITE_NOMEM;
-+    }
-+    memset(&aNew[nCurrent], 0, (nSavepoint-nCurrent) * sizeof(PagerSavepoint));
-+    pPager->aSavepoint = aNew;
- 
-+    /* Populate the PagerSavepoint structures just allocated. */
-+    for(ii=nCurrent; ii<nSavepoint; ii++){
-+      aNew[ii].nOrig = pPager->dbSize;
-+      if( isOpen(pPager->jfd) && pPager->journalOff>0 ){
-+        aNew[ii].iOffset = pPager->journalOff;
-+      }else{
-+        aNew[ii].iOffset = JOURNAL_HDR_SZ(pPager);
-+      }
-+      aNew[ii].iSubRec = pPager->nSubRec;
-+      aNew[ii].pInSavepoint = sqlite3BitvecCreate(pPager->dbSize);
-+      if( !aNew[ii].pInSavepoint ){
-+        return SQLITE_NOMEM;
-+      }
-+      if( pagerUseWal(pPager) ){
-+        sqlite3WalSavepoint(pPager->pWal, aNew[ii].aWalData);
-+      }
-+      pPager->nSavepoint = ii+1;
-+    }
-+    assert( pPager->nSavepoint==nSavepoint );
-+    assertTruncateConstraint(pPager);
-+  }
- 
--/*
--** The following object holds a copy of the wal-index header content.
--**
--** The actual header in the wal-index consists of two copies of this
--** object.
--**
--** The szPage value can be any power of 2 between 512 and 32768, inclusive.
--** Or it can be 1 to represent a 65536-byte page.  The latter case was
--** added in 3.7.1 when support for 64K pages was added.  
--*/
--struct WalIndexHdr {
--  u32 iVersion;                   /* Wal-index version */
--  u32 unused;                     /* Unused (padding) field */
--  u32 iChange;                    /* Counter incremented each transaction */
--  u8 isInit;                      /* 1 when initialized */
--  u8 bigEndCksum;                 /* True if checksums in WAL are big-endian */
--  u16 szPage;                     /* Database page size in bytes. 1==64K */
--  u32 mxFrame;                    /* Index of last valid frame in the WAL */
--  u32 nPage;                      /* Size of database in pages */
--  u32 aFrameCksum[2];             /* Checksum of last frame in log */
--  u32 aSalt[2];                   /* Two salt values copied from WAL header */
--  u32 aCksum[2];                  /* Checksum over all prior fields */
--};
-+  return rc;
-+}
- 
- /*
--** A copy of the following object occurs in the wal-index immediately
--** following the second copy of the WalIndexHdr.  This object stores
--** information used by checkpoint.
--**
--** nBackfill is the number of frames in the WAL that have been written
--** back into the database. (We call the act of moving content from WAL to
--** database "backfilling".)  The nBackfill number is never greater than
--** WalIndexHdr.mxFrame.  nBackfill can only be increased by threads
--** holding the WAL_CKPT_LOCK lock (which includes a recovery thread).
--** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from
--** mxFrame back to zero when the WAL is reset.
-+** This function is called to rollback or release (commit) a savepoint.
-+** The savepoint to release or rollback need not be the most recently 
-+** created savepoint.
- **
--** There is one entry in aReadMark[] for each reader lock.  If a reader
--** holds read-lock K, then the value in aReadMark[K] is no greater than
--** the mxFrame for that reader.  The value READMARK_NOT_USED (0xffffffff)
--** for any aReadMark[] means that entry is unused.  aReadMark[0] is 
--** a special case; its value is never used and it exists as a place-holder
--** to avoid having to offset aReadMark[] indexs by one.  Readers holding
--** WAL_READ_LOCK(0) always ignore the entire WAL and read all content
--** directly from the database.
-+** Parameter op is always either SAVEPOINT_ROLLBACK or SAVEPOINT_RELEASE.
-+** If it is SAVEPOINT_RELEASE, then release and destroy the savepoint with
-+** index iSavepoint. If it is SAVEPOINT_ROLLBACK, then rollback all changes
-+** that have occurred since the specified savepoint was created.
- **
--** The value of aReadMark[K] may only be changed by a thread that
--** is holding an exclusive lock on WAL_READ_LOCK(K).  Thus, the value of
--** aReadMark[K] cannot changed while there is a reader is using that mark
--** since the reader will be holding a shared lock on WAL_READ_LOCK(K).
-+** The savepoint to rollback or release is identified by parameter 
-+** iSavepoint. A value of 0 means to operate on the outermost savepoint
-+** (the first created). A value of (Pager.nSavepoint-1) means operate
-+** on the most recently created savepoint. If iSavepoint is greater than
-+** (Pager.nSavepoint-1), then this function is a no-op.
- **
--** The checkpointer may only transfer frames from WAL to database where
--** the frame numbers are less than or equal to every aReadMark[] that is
--** in use (that is, every aReadMark[j] for which there is a corresponding
--** WAL_READ_LOCK(j)).  New readers (usually) pick the aReadMark[] with the
--** largest value and will increase an unused aReadMark[] to mxFrame if there
--** is not already an aReadMark[] equal to mxFrame.  The exception to the
--** previous sentence is when nBackfill equals mxFrame (meaning that everything
--** in the WAL has been backfilled into the database) then new readers
--** will choose aReadMark[0] which has value 0 and hence such reader will
--** get all their all content directly from the database file and ignore 
--** the WAL.
-+** If a negative value is passed to this function, then the current
-+** transaction is rolled back. This is different to calling 
-+** sqlite3PagerRollback() because this function does not terminate
-+** the transaction or unlock the database, it just restores the 
-+** contents of the database to its original state. 
- **
--** Writers normally append new frames to the end of the WAL.  However,
--** if nBackfill equals mxFrame (meaning that all WAL content has been
--** written back into the database) and if no readers are using the WAL
--** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then
--** the writer will first "reset" the WAL back to the beginning and start
--** writing new content beginning at frame 1.
-+** In any case, all savepoints with an index greater than iSavepoint 
-+** are destroyed. If this is a release operation (op==SAVEPOINT_RELEASE),
-+** then savepoint iSavepoint is also destroyed.
- **
--** We assume that 32-bit loads are atomic and so no locks are needed in
--** order to read from any aReadMark[] entries.
--*/
--struct WalCkptInfo {
--  u32 nBackfill;                  /* Number of WAL frames backfilled into DB */
--  u32 aReadMark[WAL_NREADER];     /* Reader marks */
--};
--#define READMARK_NOT_USED  0xffffffff
-+** This function may return SQLITE_NOMEM if a memory allocation fails,
-+** or an IO error code if an IO error occurs while rolling back a 
-+** savepoint. If no errors occur, SQLITE_OK is returned.
-+*/ 
-+SQLITE_PRIVATE int sqlite3PagerSavepoint(Pager *pPager, int op, int iSavepoint){
-+  int rc = pPager->errCode;       /* Return code */
- 
-+  assert( op==SAVEPOINT_RELEASE || op==SAVEPOINT_ROLLBACK );
-+  assert( iSavepoint>=0 || op==SAVEPOINT_ROLLBACK );
- 
--/* A block of WALINDEX_LOCK_RESERVED bytes beginning at
--** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems
--** only support mandatory file-locks, we do not read or write data
--** from the region of the file on which locks are applied.
--*/
--#define WALINDEX_LOCK_OFFSET   (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo))
--#define WALINDEX_LOCK_RESERVED 16
--#define WALINDEX_HDR_SIZE      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)
-+  if( rc==SQLITE_OK && iSavepoint<pPager->nSavepoint ){
-+    int ii;            /* Iterator variable */
-+    int nNew;          /* Number of remaining savepoints after this op. */
- 
--/* Size of header before each frame in wal */
--#define WAL_FRAME_HDRSIZE 24
-+    /* Figure out how many savepoints will still be active after this
-+    ** operation. Store this value in nNew. Then free resources associated 
-+    ** with any savepoints that are destroyed by this operation.
-+    */
-+    nNew = iSavepoint + (( op==SAVEPOINT_RELEASE ) ? 0 : 1);
-+    for(ii=nNew; ii<pPager->nSavepoint; ii++){
-+      sqlite3BitvecDestroy(pPager->aSavepoint[ii].pInSavepoint);
-+    }
-+    pPager->nSavepoint = nNew;
- 
--/* Size of write ahead log header, including checksum. */
--/* #define WAL_HDRSIZE 24 */
--#define WAL_HDRSIZE 32
-+    /* If this is a release of the outermost savepoint, truncate 
-+    ** the sub-journal to zero bytes in size. */
-+    if( op==SAVEPOINT_RELEASE ){
-+      if( nNew==0 && isOpen(pPager->sjfd) ){
-+        /* Only truncate if it is an in-memory sub-journal. */
-+        if( sqlite3IsMemJournal(pPager->sjfd) ){
-+          rc = sqlite3OsTruncate(pPager->sjfd, 0);
-+          assert( rc==SQLITE_OK );
-+        }
-+        pPager->nSubRec = 0;
-+      }
-+    }
-+    /* Else this is a rollback operation, playback the specified savepoint.
-+    ** If this is a temp-file, it is possible that the journal file has
-+    ** not yet been opened. In this case there have been no changes to
-+    ** the database file, so the playback operation can be skipped.
-+    */
-+    else if( pagerUseWal(pPager) || isOpen(pPager->jfd) ){
-+      PagerSavepoint *pSavepoint = (nNew==0)?0:&pPager->aSavepoint[nNew-1];
-+      rc = pagerPlaybackSavepoint(pPager, pSavepoint);
-+      assert(rc!=SQLITE_DONE);
-+    }
-+  }
- 
--/* WAL magic value. Either this value, or the same value with the least
--** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit
--** big-endian format in the first 4 bytes of a WAL file.
--**
--** If the LSB is set, then the checksums for each frame within the WAL
--** file are calculated by treating all data as an array of 32-bit 
--** big-endian words. Otherwise, they are calculated by interpreting 
--** all data as 32-bit little-endian words.
-+  return rc;
-+}
-+
-+/*
-+** Return the full pathname of the database file.
- */
--#define WAL_MAGIC 0x377f0682
-+SQLITE_PRIVATE const char *sqlite3PagerFilename(Pager *pPager){
-+  return pPager->zFilename;
-+}
- 
- /*
--** Return the offset of frame iFrame in the write-ahead log file, 
--** assuming a database page size of szPage bytes. The offset returned
--** is to the start of the write-ahead log frame-header.
-+** Return the VFS structure for the pager.
- */
--#define walFrameOffset(iFrame, szPage) (                               \
--  WAL_HDRSIZE + ((iFrame)-1)*(i64)((szPage)+WAL_FRAME_HDRSIZE)         \
--)
-+SQLITE_PRIVATE const sqlite3_vfs *sqlite3PagerVfs(Pager *pPager){
-+  return pPager->pVfs;
-+}
- 
- /*
--** An open write-ahead log file is represented by an instance of the
--** following object.
-+** Return the file handle for the database file associated
-+** with the pager.  This might return NULL if the file has
-+** not yet been opened.
- */
--struct Wal {
--  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
--  sqlite3_file *pDbFd;       /* File handle for the database file */
--  sqlite3_file *pWalFd;      /* File handle for WAL file */
--  u32 iCallback;             /* Value to pass to log callback (or 0) */
--  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
--  int nWiData;               /* Size of array apWiData */
--  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
--  u32 szPage;                /* Database page size */
--  i16 readLock;              /* Which read lock is being held.  -1 for none */
--  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
--  u8 writeLock;              /* True if in a write transaction */
--  u8 ckptLock;               /* True if holding a checkpoint lock */
--  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
--  WalIndexHdr hdr;           /* Wal-index header for current transaction */
--  const char *zWalName;      /* Name of WAL file */
--  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
--#ifdef SQLITE_DEBUG
--  u8 lockError;              /* True if a locking error has occurred */
--#endif
--};
-+SQLITE_PRIVATE sqlite3_file *sqlite3PagerFile(Pager *pPager){
-+  return pPager->fd;
-+}
- 
- /*
--** Candidate values for Wal.exclusiveMode.
-+** Return the full pathname of the journal file.
- */
--#define WAL_NORMAL_MODE     0
--#define WAL_EXCLUSIVE_MODE  1     
--#define WAL_HEAPMEMORY_MODE 2
-+SQLITE_PRIVATE const char *sqlite3PagerJournalname(Pager *pPager){
-+  return pPager->zJournal;
-+}
- 
- /*
--** Possible values for WAL.readOnly
-+** Return true if fsync() calls are disabled for this pager.  Return FALSE
-+** if fsync()s are executed normally.
- */
--#define WAL_RDWR        0    /* Normal read/write connection */
--#define WAL_RDONLY      1    /* The WAL file is readonly */
--#define WAL_SHM_RDONLY  2    /* The SHM file is readonly */
-+SQLITE_PRIVATE int sqlite3PagerNosync(Pager *pPager){
-+  return pPager->noSync;
-+}
- 
-+#ifdef SQLITE_HAS_CODEC
- /*
--** Each page of the wal-index mapping contains a hash-table made up of
--** an array of HASHTABLE_NSLOT elements of the following type.
-+** Set or retrieve the codec for this pager
- */
--typedef u16 ht_slot;
-+SQLITE_PRIVATE void sqlite3PagerSetCodec(
-+  Pager *pPager,
-+  void *(*xCodec)(void*,void*,Pgno,int),
-+  void (*xCodecSizeChng)(void*,int,int),
-+  void (*xCodecFree)(void*),
-+  void *pCodec
-+){
-+  if( pPager->xCodecFree ) pPager->xCodecFree(pPager->pCodec);
-+  pPager->xCodec = pPager->memDb ? 0 : xCodec;
-+  pPager->xCodecSizeChng = xCodecSizeChng;
-+  pPager->xCodecFree = xCodecFree;
-+  pPager->pCodec = pCodec;
-+  pagerReportSize(pPager);
-+}
-+SQLITE_PRIVATE void *sqlite3PagerGetCodec(Pager *pPager){
-+  return pPager->pCodec;
-+}
-+#endif
- 
-+#ifndef SQLITE_OMIT_AUTOVACUUM
- /*
--** This structure is used to implement an iterator that loops through
--** all frames in the WAL in database page order. Where two or more frames
--** correspond to the same database page, the iterator visits only the 
--** frame most recently written to the WAL (in other words, the frame with
--** the largest index).
-+** Move the page pPg to location pgno in the file.
- **
--** The internals of this structure are only accessed by:
-+** There must be no references to the page previously located at
-+** pgno (which we call pPgOld) though that page is allowed to be
-+** in cache.  If the page previously located at pgno is not already
-+** in the rollback journal, it is not put there by by this routine.
- **
--**   walIteratorInit() - Create a new iterator,
--**   walIteratorNext() - Step an iterator,
--**   walIteratorFree() - Free an iterator.
-+** References to the page pPg remain valid. Updating any
-+** meta-data associated with pPg (i.e. data stored in the nExtra bytes
-+** allocated along with the page) is the responsibility of the caller.
- **
--** This functionality is used by the checkpoint code (see walCheckpoint()).
--*/
--struct WalIterator {
--  int iPrior;                     /* Last result returned from the iterator */
--  int nSegment;                   /* Number of entries in aSegment[] */
--  struct WalSegment {
--    int iNext;                    /* Next slot in aIndex[] not yet returned */
--    ht_slot *aIndex;              /* i0, i1, i2... such that aPgno[iN] ascend */
--    u32 *aPgno;                   /* Array of page numbers. */
--    int nEntry;                   /* Nr. of entries in aPgno[] and aIndex[] */
--    int iZero;                    /* Frame number associated with aPgno[0] */
--  } aSegment[1];                  /* One for every 32KB page in the wal-index */
--};
--
--/*
--** Define the parameters of the hash tables in the wal-index file. There
--** is a hash-table following every HASHTABLE_NPAGE page numbers in the
--** wal-index.
-+** A transaction must be active when this routine is called. It used to be
-+** required that a statement transaction was not active, but this restriction
-+** has been removed (CREATE INDEX needs to move a page when a statement
-+** transaction is active).
- **
--** Changing any of these constants will alter the wal-index format and
--** create incompatibilities.
-+** If the fourth argument, isCommit, is non-zero, then this page is being
-+** moved as part of a database reorganization just before the transaction 
-+** is being committed. In this case, it is guaranteed that the database page 
-+** pPg refers to will not be written to again within this transaction.
-+**
-+** This function may return SQLITE_NOMEM or an IO error code if an error
-+** occurs. Otherwise, it returns SQLITE_OK.
- */
--#define HASHTABLE_NPAGE      4096                 /* Must be power of 2 */
--#define HASHTABLE_HASH_1     383                  /* Should be prime */
--#define HASHTABLE_NSLOT      (HASHTABLE_NPAGE*2)  /* Must be a power of 2 */
-+SQLITE_PRIVATE int sqlite3PagerMovepage(Pager *pPager, DbPage *pPg, Pgno pgno, int isCommit){
-+  PgHdr *pPgOld;               /* The page being overwritten. */
-+  Pgno needSyncPgno = 0;       /* Old value of pPg->pgno, if sync is required */
-+  int rc;                      /* Return code */
-+  Pgno origPgno;               /* The original page number */
- 
--/* 
--** The block of page numbers associated with the first hash-table in a
--** wal-index is smaller than usual. This is so that there is a complete
--** hash-table on each aligned 32KB page of the wal-index.
--*/
--#define HASHTABLE_NPAGE_ONE  (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32)))
-+  assert( pPg->nRef>0 );
-+  assert( pPager->eState==PAGER_WRITER_CACHEMOD
-+       || pPager->eState==PAGER_WRITER_DBMOD
-+  );
-+  assert( assert_pager_state(pPager) );
- 
--/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */
--#define WALINDEX_PGSZ   (                                         \
--    sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
--)
-+  /* In order to be able to rollback, an in-memory database must journal
-+  ** the page we are moving from.
-+  */
-+  if( MEMDB ){
-+    rc = sqlite3PagerWrite(pPg);
-+    if( rc ) return rc;
-+  }
- 
--/*
--** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
--** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
--** numbered from zero.
--**
--** If this call is successful, *ppPage is set to point to the wal-index
--** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
--** then an SQLite error code is returned and *ppPage is set to 0.
--*/
--static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
--  int rc = SQLITE_OK;
-+  /* If the page being moved is dirty and has not been saved by the latest
-+  ** savepoint, then save the current contents of the page into the 
-+  ** sub-journal now. This is required to handle the following scenario:
-+  **
-+  **   BEGIN;
-+  **     <journal page X, then modify it in memory>
-+  **     SAVEPOINT one;
-+  **       <Move page X to location Y>
-+  **     ROLLBACK TO one;
-+  **
-+  ** If page X were not written to the sub-journal here, it would not
-+  ** be possible to restore its contents when the "ROLLBACK TO one"
-+  ** statement were is processed.
-+  **
-+  ** subjournalPage() may need to allocate space to store pPg->pgno into
-+  ** one or more savepoint bitvecs. This is the reason this function
-+  ** may return SQLITE_NOMEM.
-+  */
-+  if( pPg->flags&PGHDR_DIRTY
-+   && subjRequiresPage(pPg)
-+   && SQLITE_OK!=(rc = subjournalPage(pPg))
-+  ){
-+    return rc;
-+  }
- 
--  /* Enlarge the pWal->apWiData[] array if required */
--  if( pWal->nWiData<=iPage ){
--    int nByte = sizeof(u32*)*(iPage+1);
--    volatile u32 **apNew;
--    apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte);
--    if( !apNew ){
--      *ppPage = 0;
--      return SQLITE_NOMEM;
-+  PAGERTRACE(("MOVE %d page %d (needSync=%d) moves to %d\n", 
-+      PAGERID(pPager), pPg->pgno, (pPg->flags&PGHDR_NEED_SYNC)?1:0, pgno));
-+  IOTRACE(("MOVE %p %d %d\n", pPager, pPg->pgno, pgno))
-+
-+  /* If the journal needs to be sync()ed before page pPg->pgno can
-+  ** be written to, store pPg->pgno in local variable needSyncPgno.
-+  **
-+  ** If the isCommit flag is set, there is no need to remember that
-+  ** the journal needs to be sync()ed before database page pPg->pgno 
-+  ** can be written to. The caller has already promised not to write to it.
-+  */
-+  if( (pPg->flags&PGHDR_NEED_SYNC) && !isCommit ){
-+    needSyncPgno = pPg->pgno;
-+    assert( pageInJournal(pPg) || pPg->pgno>pPager->dbOrigSize );
-+    assert( pPg->flags&PGHDR_DIRTY );
-+  }
-+
-+  /* If the cache contains a page with page-number pgno, remove it
-+  ** from its hash chain. Also, if the PGHDR_NEED_SYNC flag was set for 
-+  ** page pgno before the 'move' operation, it needs to be retained 
-+  ** for the page moved there.
-+  */
-+  pPg->flags &= ~PGHDR_NEED_SYNC;
-+  pPgOld = pager_lookup(pPager, pgno);
-+  assert( !pPgOld || pPgOld->nRef==1 );
-+  if( pPgOld ){
-+    pPg->flags |= (pPgOld->flags&PGHDR_NEED_SYNC);
-+    if( MEMDB ){
-+      /* Do not discard pages from an in-memory database since we might
-+      ** need to rollback later.  Just move the page out of the way. */
-+      sqlite3PcacheMove(pPgOld, pPager->dbSize+1);
-+    }else{
-+      sqlite3PcacheDrop(pPgOld);
-     }
--    memset((void*)&apNew[pWal->nWiData], 0,
--           sizeof(u32*)*(iPage+1-pWal->nWiData));
--    pWal->apWiData = apNew;
--    pWal->nWiData = iPage+1;
-   }
- 
--  /* Request a pointer to the required page from the VFS */
--  if( pWal->apWiData[iPage]==0 ){
--    if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
--      pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
--      if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM;
--    }else{
--      rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, 
--          pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
--      );
--      if( rc==SQLITE_READONLY ){
--        pWal->readOnly |= WAL_SHM_RDONLY;
--        rc = SQLITE_OK;
-+  origPgno = pPg->pgno;
-+  sqlite3PcacheMove(pPg, pgno);
-+  sqlite3PcacheMakeDirty(pPg);
-+
-+  /* For an in-memory database, make sure the original page continues
-+  ** to exist, in case the transaction needs to roll back.  Use pPgOld
-+  ** as the original page since it has already been allocated.
-+  */
-+  if( MEMDB ){
-+    assert( pPgOld );
-+    sqlite3PcacheMove(pPgOld, origPgno);
-+    sqlite3PagerUnref(pPgOld);
-+  }
-+
-+  if( needSyncPgno ){
-+    /* If needSyncPgno is non-zero, then the journal file needs to be 
-+    ** sync()ed before any data is written to database file page needSyncPgno.
-+    ** Currently, no such page exists in the page-cache and the 
-+    ** "is journaled" bitvec flag has been set. This needs to be remedied by
-+    ** loading the page into the pager-cache and setting the PGHDR_NEED_SYNC
-+    ** flag.
-+    **
-+    ** If the attempt to load the page into the page-cache fails, (due
-+    ** to a malloc() or IO failure), clear the bit in the pInJournal[]
-+    ** array. Otherwise, if the page is loaded and written again in
-+    ** this transaction, it may be written to the database file before
-+    ** it is synced into the journal file. This way, it may end up in
-+    ** the journal file twice, but that is not a problem.
-+    */
-+    PgHdr *pPgHdr;
-+    rc = sqlite3PagerGet(pPager, needSyncPgno, &pPgHdr);
-+    if( rc!=SQLITE_OK ){
-+      if( needSyncPgno<=pPager->dbOrigSize ){
-+        assert( pPager->pTmpSpace!=0 );
-+        sqlite3BitvecClear(pPager->pInJournal, needSyncPgno, pPager->pTmpSpace);
-       }
-+      return rc;
-     }
-+    pPgHdr->flags |= PGHDR_NEED_SYNC;
-+    sqlite3PcacheMakeDirty(pPgHdr);
-+    sqlite3PagerUnref(pPgHdr);
-   }
- 
--  *ppPage = pWal->apWiData[iPage];
--  assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
--  return rc;
-+  return SQLITE_OK;
- }
-+#endif
- 
- /*
--** Return a pointer to the WalCkptInfo structure in the wal-index.
-+** Return a pointer to the data for the specified page.
- */
--static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
--  assert( pWal->nWiData>0 && pWal->apWiData[0] );
--  return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]);
-+SQLITE_PRIVATE void *sqlite3PagerGetData(DbPage *pPg){
-+  assert( pPg->nRef>0 || pPg->pPager->memDb );
-+  return pPg->pData;
- }
- 
- /*
--** Return a pointer to the WalIndexHdr structure in the wal-index.
-+** Return a pointer to the Pager.nExtra bytes of "extra" space 
-+** allocated along with the specified page.
- */
--static volatile WalIndexHdr *walIndexHdr(Wal *pWal){
--  assert( pWal->nWiData>0 && pWal->apWiData[0] );
--  return (volatile WalIndexHdr*)pWal->apWiData[0];
-+SQLITE_PRIVATE void *sqlite3PagerGetExtra(DbPage *pPg){
-+  return pPg->pExtra;
- }
- 
- /*
--** The argument to this macro must be of type u32. On a little-endian
--** architecture, it returns the u32 value that results from interpreting
--** the 4 bytes as a big-endian value. On a big-endian architecture, it
--** returns the value that would be produced by intepreting the 4 bytes
--** of the input value as a little-endian integer.
-+** Get/set the locking-mode for this pager. Parameter eMode must be one
-+** of PAGER_LOCKINGMODE_QUERY, PAGER_LOCKINGMODE_NORMAL or 
-+** PAGER_LOCKINGMODE_EXCLUSIVE. If the parameter is not _QUERY, then
-+** the locking-mode is set to the value specified.
-+**
-+** The returned value is either PAGER_LOCKINGMODE_NORMAL or
-+** PAGER_LOCKINGMODE_EXCLUSIVE, indicating the current (possibly updated)
-+** locking-mode.
- */
--#define BYTESWAP32(x) ( \
--    (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8)  \
--  + (((x)&0x00FF0000)>>8)  + (((x)&0xFF000000)>>24) \
--)
-+SQLITE_PRIVATE int sqlite3PagerLockingMode(Pager *pPager, int eMode){
-+  assert( eMode==PAGER_LOCKINGMODE_QUERY
-+            || eMode==PAGER_LOCKINGMODE_NORMAL
-+            || eMode==PAGER_LOCKINGMODE_EXCLUSIVE );
-+  assert( PAGER_LOCKINGMODE_QUERY<0 );
-+  assert( PAGER_LOCKINGMODE_NORMAL>=0 && PAGER_LOCKINGMODE_EXCLUSIVE>=0 );
-+  assert( pPager->exclusiveMode || 0==sqlite3WalHeapMemory(pPager->pWal) );
-+  if( eMode>=0 && !pPager->tempFile && !sqlite3WalHeapMemory(pPager->pWal) ){
-+    pPager->exclusiveMode = (u8)eMode;
-+  }
-+  return (int)pPager->exclusiveMode;
-+}
- 
- /*
--** Generate or extend an 8 byte checksum based on the data in 
--** array aByte[] and the initial values of aIn[0] and aIn[1] (or
--** initial values of 0 and 0 if aIn==NULL).
-+** Set the journal-mode for this pager. Parameter eMode must be one of:
- **
--** The checksum is written back into aOut[] before returning.
-+**    PAGER_JOURNALMODE_DELETE
-+**    PAGER_JOURNALMODE_TRUNCATE
-+**    PAGER_JOURNALMODE_PERSIST
-+**    PAGER_JOURNALMODE_OFF
-+**    PAGER_JOURNALMODE_MEMORY
-+**    PAGER_JOURNALMODE_WAL
- **
--** nByte must be a positive multiple of 8.
-+** The journalmode is set to the value specified if the change is allowed.
-+** The change may be disallowed for the following reasons:
-+**
-+**   *  An in-memory database can only have its journal_mode set to _OFF
-+**      or _MEMORY.
-+**
-+**   *  Temporary databases cannot have _WAL journalmode.
-+**
-+** The returned indicate the current (possibly updated) journal-mode.
- */
--static void walChecksumBytes(
--  int nativeCksum, /* True for native byte-order, false for non-native */
--  u8 *a,           /* Content to be checksummed */
--  int nByte,       /* Bytes of content in a[].  Must be a multiple of 8. */
--  const u32 *aIn,  /* Initial checksum value input */
--  u32 *aOut        /* OUT: Final checksum value output */
--){
--  u32 s1, s2;
--  u32 *aData = (u32 *)a;
--  u32 *aEnd = (u32 *)&a[nByte];
-+SQLITE_PRIVATE int sqlite3PagerSetJournalMode(Pager *pPager, int eMode){
-+  u8 eOld = pPager->journalMode;    /* Prior journalmode */
- 
--  if( aIn ){
--    s1 = aIn[0];
--    s2 = aIn[1];
--  }else{
--    s1 = s2 = 0;
--  }
-+#ifdef SQLITE_DEBUG
-+  /* The print_pager_state() routine is intended to be used by the debugger
-+  ** only.  We invoke it once here to suppress a compiler warning. */
-+  print_pager_state(pPager);
-+#endif
- 
--  assert( nByte>=8 );
--  assert( (nByte&0x00000007)==0 );
- 
--  if( nativeCksum ){
--    do {
--      s1 += *aData++ + s2;
--      s2 += *aData++ + s1;
--    }while( aData<aEnd );
--  }else{
--    do {
--      s1 += BYTESWAP32(aData[0]) + s2;
--      s2 += BYTESWAP32(aData[1]) + s1;
--      aData += 2;
--    }while( aData<aEnd );
-+  /* The eMode parameter is always valid */
-+  assert(      eMode==PAGER_JOURNALMODE_DELETE
-+            || eMode==PAGER_JOURNALMODE_TRUNCATE
-+            || eMode==PAGER_JOURNALMODE_PERSIST
-+            || eMode==PAGER_JOURNALMODE_OFF 
-+            || eMode==PAGER_JOURNALMODE_WAL 
-+            || eMode==PAGER_JOURNALMODE_MEMORY );
-+
-+  /* This routine is only called from the OP_JournalMode opcode, and
-+  ** the logic there will never allow a temporary file to be changed
-+  ** to WAL mode.
-+  */
-+  assert( pPager->tempFile==0 || eMode!=PAGER_JOURNALMODE_WAL );
-+
-+  /* Do allow the journalmode of an in-memory database to be set to
-+  ** anything other than MEMORY or OFF
-+  */
-+  if( MEMDB ){
-+    assert( eOld==PAGER_JOURNALMODE_MEMORY || eOld==PAGER_JOURNALMODE_OFF );
-+    if( eMode!=PAGER_JOURNALMODE_MEMORY && eMode!=PAGER_JOURNALMODE_OFF ){
-+      eMode = eOld;
-+    }
-   }
- 
--  aOut[0] = s1;
--  aOut[1] = s2;
--}
-+  if( eMode!=eOld ){
- 
--static void walShmBarrier(Wal *pWal){
--  if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
--    sqlite3OsShmBarrier(pWal->pDbFd);
-+    /* Change the journal mode. */
-+    assert( pPager->eState!=PAGER_ERROR );
-+    pPager->journalMode = (u8)eMode;
-+
-+    /* When transistioning from TRUNCATE or PERSIST to any other journal
-+    ** mode except WAL, unless the pager is in locking_mode=exclusive mode,
-+    ** delete the journal file.
-+    */
-+    assert( (PAGER_JOURNALMODE_TRUNCATE & 5)==1 );
-+    assert( (PAGER_JOURNALMODE_PERSIST & 5)==1 );
-+    assert( (PAGER_JOURNALMODE_DELETE & 5)==0 );
-+    assert( (PAGER_JOURNALMODE_MEMORY & 5)==4 );
-+    assert( (PAGER_JOURNALMODE_OFF & 5)==0 );
-+    assert( (PAGER_JOURNALMODE_WAL & 5)==5 );
-+
-+    assert( isOpen(pPager->fd) || pPager->exclusiveMode );
-+    if( !pPager->exclusiveMode && (eOld & 5)==1 && (eMode & 1)==0 ){
-+
-+      /* In this case we would like to delete the journal file. If it is
-+      ** not possible, then that is not a problem. Deleting the journal file
-+      ** here is an optimization only.
-+      **
-+      ** Before deleting the journal file, obtain a RESERVED lock on the
-+      ** database file. This ensures that the journal file is not deleted
-+      ** while it is in use by some other client.
-+      */
-+      sqlite3OsClose(pPager->jfd);
-+      if( pPager->eLock>=RESERVED_LOCK ){
-+        sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
-+      }else{
-+        int rc = SQLITE_OK;
-+        int state = pPager->eState;
-+        assert( state==PAGER_OPEN || state==PAGER_READER );
-+        if( state==PAGER_OPEN ){
-+          rc = sqlite3PagerSharedLock(pPager);
-+        }
-+        if( pPager->eState==PAGER_READER ){
-+          assert( rc==SQLITE_OK );
-+          rc = pagerLockDb(pPager, RESERVED_LOCK);
-+        }
-+        if( rc==SQLITE_OK ){
-+          sqlite3OsDelete(pPager->pVfs, pPager->zJournal, 0);
-+        }
-+        if( rc==SQLITE_OK && state==PAGER_READER ){
-+          pagerUnlockDb(pPager, SHARED_LOCK);
-+        }else if( state==PAGER_OPEN ){
-+          pager_unlock(pPager);
-+        }
-+        assert( state==pPager->eState );
-+      }
-+    }
-   }
-+
-+  /* Return the new journal mode */
-+  return (int)pPager->journalMode;
- }
- 
- /*
--** Write the header information in pWal->hdr into the wal-index.
--**
--** The checksum on pWal->hdr is updated before it is written.
-+** Return the current journal mode.
- */
--static void walIndexWriteHdr(Wal *pWal){
--  volatile WalIndexHdr *aHdr = walIndexHdr(pWal);
--  const int nCksum = offsetof(WalIndexHdr, aCksum);
--
--  assert( pWal->writeLock );
--  pWal->hdr.isInit = 1;
--  pWal->hdr.iVersion = WALINDEX_MAX_VERSION;
--  walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum);
--  memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr));
--  walShmBarrier(pWal);
--  memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr));
-+SQLITE_PRIVATE int sqlite3PagerGetJournalMode(Pager *pPager){
-+  return (int)pPager->journalMode;
- }
- 
- /*
--** This function encodes a single frame header and writes it to a buffer
--** supplied by the caller. A frame-header is made up of a series of 
--** 4-byte big-endian integers, as follows:
--**
--**     0: Page number.
--**     4: For commit records, the size of the database image in pages 
--**        after the commit. For all other records, zero.
--**     8: Salt-1 (copied from the wal-header)
--**    12: Salt-2 (copied from the wal-header)
--**    16: Checksum-1.
--**    20: Checksum-2.
-+** Return TRUE if the pager is in a state where it is OK to change the
-+** journalmode.  Journalmode changes can only happen when the database
-+** is unmodified.
- */
--static void walEncodeFrame(
--  Wal *pWal,                      /* The write-ahead log */
--  u32 iPage,                      /* Database page number for frame */
--  u32 nTruncate,                  /* New db size (or 0 for non-commit frames) */
--  u8 *aData,                      /* Pointer to page data */
--  u8 *aFrame                      /* OUT: Write encoded frame here */
--){
--  int nativeCksum;                /* True for native byte-order checksums */
--  u32 *aCksum = pWal->hdr.aFrameCksum;
--  assert( WAL_FRAME_HDRSIZE==24 );
--  sqlite3Put4byte(&aFrame[0], iPage);
--  sqlite3Put4byte(&aFrame[4], nTruncate);
--  memcpy(&aFrame[8], pWal->hdr.aSalt, 8);
--
--  nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
--  walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
--  walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
--
--  sqlite3Put4byte(&aFrame[16], aCksum[0]);
--  sqlite3Put4byte(&aFrame[20], aCksum[1]);
-+SQLITE_PRIVATE int sqlite3PagerOkToChangeJournalMode(Pager *pPager){
-+  assert( assert_pager_state(pPager) );
-+  if( pPager->eState>=PAGER_WRITER_CACHEMOD ) return 0;
-+  if( NEVER(isOpen(pPager->jfd) && pPager->journalOff>0) ) return 0;
-+  return 1;
- }
- 
- /*
--** Check to see if the frame with header in aFrame[] and content
--** in aData[] is valid.  If it is a valid frame, fill *piPage and
--** *pnTruncate and return true.  Return if the frame is not valid.
-+** Get/set the size-limit used for persistent journal files.
-+**
-+** Setting the size limit to -1 means no limit is enforced.
-+** An attempt to set a limit smaller than -1 is a no-op.
- */
--static int walDecodeFrame(
--  Wal *pWal,                      /* The write-ahead log */
--  u32 *piPage,                    /* OUT: Database page number for frame */
--  u32 *pnTruncate,                /* OUT: New db size (or 0 if not commit) */
--  u8 *aData,                      /* Pointer to page data (for checksum) */
--  u8 *aFrame                      /* Frame data */
--){
--  int nativeCksum;                /* True for native byte-order checksums */
--  u32 *aCksum = pWal->hdr.aFrameCksum;
--  u32 pgno;                       /* Page number of the frame */
--  assert( WAL_FRAME_HDRSIZE==24 );
--
--  /* A frame is only valid if the salt values in the frame-header
--  ** match the salt values in the wal-header. 
--  */
--  if( memcmp(&pWal->hdr.aSalt, &aFrame[8], 8)!=0 ){
--    return 0;
--  }
--
--  /* A frame is only valid if the page number is creater than zero.
--  */
--  pgno = sqlite3Get4byte(&aFrame[0]);
--  if( pgno==0 ){
--    return 0;
--  }
--
--  /* A frame is only valid if a checksum of the WAL header,
--  ** all prior frams, the first 16 bytes of this frame-header, 
--  ** and the frame-data matches the checksum in the last 8 
--  ** bytes of this frame-header.
--  */
--  nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
--  walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
--  walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
--  if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) 
--   || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) 
--  ){
--    /* Checksum failed. */
--    return 0;
-+SQLITE_PRIVATE i64 sqlite3PagerJournalSizeLimit(Pager *pPager, i64 iLimit){
-+  if( iLimit>=-1 ){
-+    pPager->journalSizeLimit = iLimit;
-+    sqlite3WalLimit(pPager->pWal, iLimit);
-   }
--
--  /* If we reach this point, the frame is valid.  Return the page number
--  ** and the new database size.
--  */
--  *piPage = pgno;
--  *pnTruncate = sqlite3Get4byte(&aFrame[4]);
--  return 1;
-+  return pPager->journalSizeLimit;
- }
- 
--
--#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
- /*
--** Names of locks.  This routine is used to provide debugging output and is not
--** a part of an ordinary build.
-+** Return a pointer to the pPager->pBackup variable. The backup module
-+** in backup.c maintains the content of this variable. This module
-+** uses it opaquely as an argument to sqlite3BackupRestart() and
-+** sqlite3BackupUpdate() only.
- */
--static const char *walLockName(int lockIdx){
--  if( lockIdx==WAL_WRITE_LOCK ){
--    return "WRITE-LOCK";
--  }else if( lockIdx==WAL_CKPT_LOCK ){
--    return "CKPT-LOCK";
--  }else if( lockIdx==WAL_RECOVER_LOCK ){
--    return "RECOVER-LOCK";
--  }else{
--    static char zName[15];
--    sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]",
--                     lockIdx-WAL_READ_LOCK(0));
--    return zName;
--  }
-+SQLITE_PRIVATE sqlite3_backup **sqlite3PagerBackupPtr(Pager *pPager){
-+  return &pPager->pBackup;
- }
--#endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */
--    
- 
-+#ifndef SQLITE_OMIT_WAL
- /*
--** Set or release locks on the WAL.  Locks are either shared or exclusive.
--** A lock cannot be moved directly between shared and exclusive - it must go
--** through the unlocked state first.
-+** This function is called when the user invokes "PRAGMA wal_checkpoint",
-+** "PRAGMA wal_blocking_checkpoint" or calls the sqlite3_wal_checkpoint()
-+** or wal_blocking_checkpoint() API functions.
- **
--** In locking_mode=EXCLUSIVE, all of these routines become no-ops.
-+** Parameter eMode is one of SQLITE_CHECKPOINT_PASSIVE, FULL or RESTART.
- */
--static int walLockShared(Wal *pWal, int lockIdx){
--  int rc;
--  if( pWal->exclusiveMode ) return SQLITE_OK;
--  rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
--                        SQLITE_SHM_LOCK | SQLITE_SHM_SHARED);
--  WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal,
--            walLockName(lockIdx), rc ? "failed" : "ok"));
--  VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
-+SQLITE_PRIVATE int sqlite3PagerCheckpoint(Pager *pPager, int eMode, int *pnLog, int *pnCkpt){
-+  int rc = SQLITE_OK;
-+  if( pPager->pWal ){
-+    rc = sqlite3WalCheckpoint(pPager->pWal, eMode,
-+        pPager->xBusyHandler, pPager->pBusyHandlerArg,
-+        pPager->ckptSyncFlags, pPager->pageSize, (u8 *)pPager->pTmpSpace,
-+        pnLog, pnCkpt
-+    );
-+  }
-   return rc;
- }
--static void walUnlockShared(Wal *pWal, int lockIdx){
--  if( pWal->exclusiveMode ) return;
--  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
--                         SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
--  WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx)));
--}
--static int walLockExclusive(Wal *pWal, int lockIdx, int n){
--  int rc;
--  if( pWal->exclusiveMode ) return SQLITE_OK;
--  rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
--                        SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
--  WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
--            walLockName(lockIdx), n, rc ? "failed" : "ok"));
--  VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
--  return rc;
-+
-+SQLITE_PRIVATE int sqlite3PagerWalCallback(Pager *pPager){
-+  return sqlite3WalCallback(pPager->pWal);
- }
--static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
--  if( pWal->exclusiveMode ) return;
--  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
--                         SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
--  WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
--             walLockName(lockIdx), n));
-+
-+/*
-+** Return true if the underlying VFS for the given pager supports the
-+** primitives necessary for write-ahead logging.
-+*/
-+SQLITE_PRIVATE int sqlite3PagerWalSupported(Pager *pPager){
-+  const sqlite3_io_methods *pMethods = pPager->fd->pMethods;
-+  return pPager->exclusiveMode || (pMethods->iVersion>=2 && pMethods->xShmMap);
- }
- 
- /*
--** Compute a hash on a page number.  The resulting hash value must land
--** between 0 and (HASHTABLE_NSLOT-1).  The walHashNext() function advances
--** the hash to the next value in the event of a collision.
-+** Attempt to take an exclusive lock on the database file. If a PENDING lock
-+** is obtained instead, immediately release it.
- */
--static int walHash(u32 iPage){
--  assert( iPage>0 );
--  assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
--  return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
-+static int pagerExclusiveLock(Pager *pPager){
-+  int rc;                         /* Return code */
-+
-+  assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK );
-+  rc = pagerLockDb(pPager, EXCLUSIVE_LOCK);
-+  if( rc!=SQLITE_OK ){
-+    /* If the attempt to grab the exclusive lock failed, release the 
-+    ** pending lock that may have been obtained instead.  */
-+    pagerUnlockDb(pPager, SHARED_LOCK);
-+  }
-+
-+  return rc;
- }
--static int walNextHash(int iPriorHash){
--  return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
-+
-+/*
-+** Call sqlite3WalOpen() to open the WAL handle. If the pager is in 
-+** exclusive-locking mode when this function is called, take an EXCLUSIVE
-+** lock on the database file and use heap-memory to store the wal-index
-+** in. Otherwise, use the normal shared-memory.
-+*/
-+static int pagerOpenWal(Pager *pPager){
-+  int rc = SQLITE_OK;
-+
-+  assert( pPager->pWal==0 && pPager->tempFile==0 );
-+  assert( pPager->eLock==SHARED_LOCK || pPager->eLock==EXCLUSIVE_LOCK || pPager->noReadlock);
-+
-+  /* If the pager is already in exclusive-mode, the WAL module will use 
-+  ** heap-memory for the wal-index instead of the VFS shared-memory 
-+  ** implementation. Take the exclusive lock now, before opening the WAL
-+  ** file, to make sure this is safe.
-+  */
-+  if( pPager->exclusiveMode ){
-+    rc = pagerExclusiveLock(pPager);
-+  }
-+
-+  /* Open the connection to the log file. If this operation fails, 
-+  ** (e.g. due to malloc() failure), return an error code.
-+  */
-+  if( rc==SQLITE_OK ){
-+    rc = sqlite3WalOpen(pPager->pVfs, 
-+        pPager->fd, pPager->zWal, pPager->exclusiveMode,
-+        pPager->journalSizeLimit, &pPager->pWal
-+    );
-+  }
-+
-+  return rc;
- }
- 
--/* 
--** Return pointers to the hash table and page number array stored on
--** page iHash of the wal-index. The wal-index is broken into 32KB pages
--** numbered starting from 0.
-+
-+/*
-+** The caller must be holding a SHARED lock on the database file to call
-+** this function.
- **
--** Set output variable *paHash to point to the start of the hash table
--** in the wal-index file. Set *piZero to one less than the frame 
--** number of the first frame indexed by this hash table. If a
--** slot in the hash table is set to N, it refers to frame number 
--** (*piZero+N) in the log.
-+** If the pager passed as the first argument is open on a real database
-+** file (not a temp file or an in-memory database), and the WAL file
-+** is not already open, make an attempt to open it now. If successful,
-+** return SQLITE_OK. If an error occurs or the VFS used by the pager does 
-+** not support the xShmXXX() methods, return an error code. *pbOpen is
-+** not modified in either case.
- **
--** Finally, set *paPgno so that *paPgno[1] is the page number of the
--** first frame indexed by the hash table, frame (*piZero+1).
-+** If the pager is open on a temp-file (or in-memory database), or if
-+** the WAL file is already open, set *pbOpen to 1 and return SQLITE_OK
-+** without doing anything.
- */
--static int walHashGet(
--  Wal *pWal,                      /* WAL handle */
--  int iHash,                      /* Find the iHash'th table */
--  volatile ht_slot **paHash,      /* OUT: Pointer to hash index */
--  volatile u32 **paPgno,          /* OUT: Pointer to page number array */
--  u32 *piZero                     /* OUT: Frame associated with *paPgno[0] */
-+SQLITE_PRIVATE int sqlite3PagerOpenWal(
-+  Pager *pPager,                  /* Pager object */
-+  int *pbOpen                     /* OUT: Set to true if call is a no-op */
- ){
--  int rc;                         /* Return code */
--  volatile u32 *aPgno;
-+  int rc = SQLITE_OK;             /* Return code */
- 
--  rc = walIndexPage(pWal, iHash, &aPgno);
--  assert( rc==SQLITE_OK || iHash>0 );
-+  assert( assert_pager_state(pPager) );
-+  assert( pPager->eState==PAGER_OPEN   || pbOpen );
-+  assert( pPager->eState==PAGER_READER || !pbOpen );
-+  assert( pbOpen==0 || *pbOpen==0 );
-+  assert( pbOpen!=0 || (!pPager->tempFile && !pPager->pWal) );
- 
--  if( rc==SQLITE_OK ){
--    u32 iZero;
--    volatile ht_slot *aHash;
-+  if( !pPager->tempFile && !pPager->pWal ){
-+    if( !sqlite3PagerWalSupported(pPager) ) return SQLITE_CANTOPEN;
- 
--    aHash = (volatile ht_slot *)&aPgno[HASHTABLE_NPAGE];
--    if( iHash==0 ){
--      aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)];
--      iZero = 0;
--    }else{
--      iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
-+    /* Close any rollback journal previously open */
-+    sqlite3OsClose(pPager->jfd);
-+
-+    rc = pagerOpenWal(pPager);
-+    if( rc==SQLITE_OK ){
-+      pPager->journalMode = PAGER_JOURNALMODE_WAL;
-+      pPager->eState = PAGER_OPEN;
-     }
--  
--    *paPgno = &aPgno[-1];
--    *paHash = aHash;
--    *piZero = iZero;
-+  }else{
-+    *pbOpen = 1;
-   }
-+
-   return rc;
- }
- 
- /*
--** Return the number of the wal-index page that contains the hash-table
--** and page-number array that contain entries corresponding to WAL frame
--** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages 
--** are numbered starting from 0.
-+** This function is called to close the connection to the log file prior
-+** to switching from WAL to rollback mode.
-+**
-+** Before closing the log file, this function attempts to take an 
-+** EXCLUSIVE lock on the database file. If this cannot be obtained, an
-+** error (SQLITE_BUSY) is returned and the log connection is not closed.
-+** If successful, the EXCLUSIVE lock is not released before returning.
- */
--static int walFramePage(u32 iFrame){
--  int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
--  assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
--       && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
--       && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
--       && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
--       && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
--  );
--  return iHash;
--}
-+SQLITE_PRIVATE int sqlite3PagerCloseWal(Pager *pPager){
-+  int rc = SQLITE_OK;
- 
--/*
--** Return the page number associated with frame iFrame in this WAL.
--*/
--static u32 walFramePgno(Wal *pWal, u32 iFrame){
--  int iHash = walFramePage(iFrame);
--  if( iHash==0 ){
--    return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
-+  assert( pPager->journalMode==PAGER_JOURNALMODE_WAL );
-+
-+  /* If the log file is not already open, but does exist in the file-system,
-+  ** it may need to be checkpointed before the connection can switch to
-+  ** rollback mode. Open it now so this can happen.
-+  */
-+  if( !pPager->pWal ){
-+    int logexists = 0;
-+    rc = pagerLockDb(pPager, SHARED_LOCK);
-+    if( rc==SQLITE_OK ){
-+      rc = sqlite3OsAccess(
-+          pPager->pVfs, pPager->zWal, SQLITE_ACCESS_EXISTS, &logexists
-+      );
-+    }
-+    if( rc==SQLITE_OK && logexists ){
-+      rc = pagerOpenWal(pPager);
-+    }
-   }
--  return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
-+    
-+  /* Checkpoint and close the log. Because an EXCLUSIVE lock is held on
-+  ** the database file, the log and log-summary files will be deleted.
-+  */
-+  if( rc==SQLITE_OK && pPager->pWal ){
-+    rc = pagerExclusiveLock(pPager);
-+    if( rc==SQLITE_OK ){
-+      rc = sqlite3WalClose(pPager->pWal, pPager->ckptSyncFlags,
-+                           pPager->pageSize, (u8*)pPager->pTmpSpace);
-+      pPager->pWal = 0;
-+    }
-+  }
-+  return rc;
- }
- 
-+#ifdef SQLITE_HAS_CODEC
- /*
--** Remove entries from the hash table that point to WAL slots greater
--** than pWal->hdr.mxFrame.
--**
--** This function is called whenever pWal->hdr.mxFrame is decreased due
--** to a rollback or savepoint.
-+** This function is called by the wal module when writing page content
-+** into the log file.
- **
--** At most only the hash table containing pWal->hdr.mxFrame needs to be
--** updated.  Any later hash tables will be automatically cleared when
--** pWal->hdr.mxFrame advances to the point where those hash tables are
--** actually needed.
-+** This function returns a pointer to a buffer containing the encrypted
-+** page content. If a malloc fails, this function may return NULL.
- */
--static void walCleanupHash(Wal *pWal){
--  volatile ht_slot *aHash = 0;    /* Pointer to hash table to clear */
--  volatile u32 *aPgno = 0;        /* Page number array for hash table */
--  u32 iZero = 0;                  /* frame == (aHash[x]+iZero) */
--  int iLimit = 0;                 /* Zero values greater than this */
--  int nByte;                      /* Number of bytes to zero in aPgno[] */
--  int i;                          /* Used to iterate through aHash[] */
-+SQLITE_PRIVATE void *sqlite3PagerCodec(PgHdr *pPg){
-+  void *aData = 0;
-+  CODEC2(pPg->pPager, pPg->pData, pPg->pgno, 6, return 0, aData);
-+  return aData;
-+}
-+#endif /* SQLITE_HAS_CODEC */
- 
--  assert( pWal->writeLock );
--  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 );
--  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE );
--  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 );
-+#endif /* !SQLITE_OMIT_WAL */
- 
--  if( pWal->hdr.mxFrame==0 ) return;
-+#endif /* SQLITE_OMIT_DISKIO */
- 
--  /* Obtain pointers to the hash-table and page-number array containing 
--  ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed
--  ** that the page said hash-table and array reside on is already mapped.
--  */
--  assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) );
--  assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] );
--  walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero);
-+/* BEGIN CRYPTO */
-+#ifdef SQLITE_HAS_CODEC
-+SQLITE_PRIVATE void sqlite3pager_get_codec(Pager *pPager, void **ctx) {
-+  *ctx = pPager->pCodec;
-+}
- 
--  /* Zero all hash-table entries that correspond to frame numbers greater
--  ** than pWal->hdr.mxFrame.
--  */
--  iLimit = pWal->hdr.mxFrame - iZero;
--  assert( iLimit>0 );
--  for(i=0; i<HASHTABLE_NSLOT; i++){
--    if( aHash[i]>iLimit ){
--      aHash[i] = 0;
--    }
--  }
--  
--  /* Zero the entries in the aPgno array that correspond to frames with
--  ** frame numbers greater than pWal->hdr.mxFrame. 
--  */
--  nByte = (int)((char *)aHash - (char *)&aPgno[iLimit+1]);
--  memset((void *)&aPgno[iLimit+1], 0, nByte);
-+SQLITE_PRIVATE int sqlite3pager_is_mj_pgno(Pager *pPager, Pgno pgno) {
-+  return (PAGER_MJ_PGNO(pPager) == pgno) ? 1 : 0;
-+}
- 
--#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
--  /* Verify that the every entry in the mapping region is still reachable
--  ** via the hash table even after the cleanup.
--  */
--  if( iLimit ){
--    int i;           /* Loop counter */
--    int iKey;        /* Hash key */
--    for(i=1; i<=iLimit; i++){
--      for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){
--        if( aHash[iKey]==i ) break;
--      }
--      assert( aHash[iKey]==i );
--    }
--  }
--#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
-+SQLITE_PRIVATE sqlite3_file *sqlite3Pager_get_fd(Pager *pPager) {
-+  return (isOpen(pPager->fd)) ? pPager->fd : NULL;
- }
- 
-+SQLITE_PRIVATE void sqlite3pager_sqlite3PagerSetCodec(
-+  Pager *pPager,
-+  void *(*xCodec)(void*,void*,Pgno,int),
-+  void (*xCodecSizeChng)(void*,int,int),
-+  void (*xCodecFree)(void*),
-+  void *pCodec
-+){
-+  sqlite3PagerSetCodec(pPager, xCodec, xCodecSizeChng, xCodecFree, pCodec); 
-+}
- 
--/*
--** Set an entry in the wal-index that will map database page number
--** pPage into WAL frame iFrame.
--*/
--static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
--  int rc;                         /* Return code */
--  u32 iZero = 0;                  /* One less than frame number of aPgno[1] */
--  volatile u32 *aPgno = 0;        /* Page number array */
--  volatile ht_slot *aHash = 0;    /* Hash table */
- 
--  rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero);
-+#endif
-+/* END CRYPTO */
- 
--  /* Assuming the wal-index file was successfully mapped, populate the
--  ** page number array and hash table entry.
--  */
--  if( rc==SQLITE_OK ){
--    int iKey;                     /* Hash table key */
--    int idx;                      /* Value to write to hash-table slot */
--    int nCollide;                 /* Number of hash collisions */
- 
--    idx = iFrame - iZero;
--    assert( idx <= HASHTABLE_NSLOT/2 + 1 );
--    
--    /* If this is the first entry to be added to this hash-table, zero the
--    ** entire hash table and aPgno[] array before proceding. 
--    */
--    if( idx==1 ){
--      int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]);
--      memset((void*)&aPgno[1], 0, nByte);
--    }
-+/************** End of pager.c ***********************************************/
-+/************** Begin file wal.c *********************************************/
-+/*
-+** 2010 February 1
-+**
-+** The author disclaims copyright to this source code.  In place of
-+** a legal notice, here is a blessing:
-+**
-+**    May you do good and not evil.
-+**    May you find forgiveness for yourself and forgive others.
-+**    May you share freely, never taking more than you give.
-+**
-+*************************************************************************
-+**
-+** This file contains the implementation of a write-ahead log (WAL) used in 
-+** "journal_mode=WAL" mode.
-+**
-+** WRITE-AHEAD LOG (WAL) FILE FORMAT
-+**
-+** A WAL file consists of a header followed by zero or more "frames".
-+** Each frame records the revised content of a single page from the
-+** database file.  All changes to the database are recorded by writing
-+** frames into the WAL.  Transactions commit when a frame is written that
-+** contains a commit marker.  A single WAL can and usually does record 
-+** multiple transactions.  Periodically, the content of the WAL is
-+** transferred back into the database file in an operation called a
-+** "checkpoint".
-+**
-+** A single WAL file can be used multiple times.  In other words, the
-+** WAL can fill up with frames and then be checkpointed and then new
-+** frames can overwrite the old ones.  A WAL always grows from beginning
-+** toward the end.  Checksums and counters attached to each frame are
-+** used to determine which frames within the WAL are valid and which
-+** are leftovers from prior checkpoints.
-+**
-+** The WAL header is 32 bytes in size and consists of the following eight
-+** big-endian 32-bit unsigned integer values:
-+**
-+**     0: Magic number.  0x377f0682 or 0x377f0683
-+**     4: File format version.  Currently 3007000
-+**     8: Database page size.  Example: 1024
-+**    12: Checkpoint sequence number
-+**    16: Salt-1, random integer incremented with each checkpoint
-+**    20: Salt-2, a different random integer changing with each ckpt
-+**    24: Checksum-1 (first part of checksum for first 24 bytes of header).
-+**    28: Checksum-2 (second part of checksum for first 24 bytes of header).
-+**
-+** Immediately following the wal-header are zero or more frames. Each
-+** frame consists of a 24-byte frame-header followed by a <page-size> bytes
-+** of page data. The frame-header is six big-endian 32-bit unsigned 
-+** integer values, as follows:
-+**
-+**     0: Page number.
-+**     4: For commit records, the size of the database image in pages 
-+**        after the commit. For all other records, zero.
-+**     8: Salt-1 (copied from the header)
-+**    12: Salt-2 (copied from the header)
-+**    16: Checksum-1.
-+**    20: Checksum-2.
-+**
-+** A frame is considered valid if and only if the following conditions are
-+** true:
-+**
-+**    (1) The salt-1 and salt-2 values in the frame-header match
-+**        salt values in the wal-header
-+**
-+**    (2) The checksum values in the final 8 bytes of the frame-header
-+**        exactly match the checksum computed consecutively on the
-+**        WAL header and the first 8 bytes and the content of all frames
-+**        up to and including the current frame.
-+**
-+** The checksum is computed using 32-bit big-endian integers if the
-+** magic number in the first 4 bytes of the WAL is 0x377f0683 and it
-+** is computed using little-endian if the magic number is 0x377f0682.
-+** The checksum values are always stored in the frame header in a
-+** big-endian format regardless of which byte order is used to compute
-+** the checksum.  The checksum is computed by interpreting the input as
-+** an even number of unsigned 32-bit integers: x[0] through x[N].  The
-+** algorithm used for the checksum is as follows:
-+** 
-+**   for i from 0 to n-1 step 2:
-+**     s0 += x[i] + s1;
-+**     s1 += x[i+1] + s0;
-+**   endfor
-+**
-+** Note that s0 and s1 are both weighted checksums using fibonacci weights
-+** in reverse order (the largest fibonacci weight occurs on the first element
-+** of the sequence being summed.)  The s1 value spans all 32-bit 
-+** terms of the sequence whereas s0 omits the final term.
-+**
-+** On a checkpoint, the WAL is first VFS.xSync-ed, then valid content of the
-+** WAL is transferred into the database, then the database is VFS.xSync-ed.
-+** The VFS.xSync operations serve as write barriers - all writes launched
-+** before the xSync must complete before any write that launches after the
-+** xSync begins.
-+**
-+** After each checkpoint, the salt-1 value is incremented and the salt-2
-+** value is randomized.  This prevents old and new frames in the WAL from
-+** being considered valid at the same time and being checkpointing together
-+** following a crash.
-+**
-+** READER ALGORITHM
-+**
-+** To read a page from the database (call it page number P), a reader
-+** first checks the WAL to see if it contains page P.  If so, then the
-+** last valid instance of page P that is a followed by a commit frame
-+** or is a commit frame itself becomes the value read.  If the WAL
-+** contains no copies of page P that are valid and which are a commit
-+** frame or are followed by a commit frame, then page P is read from
-+** the database file.
-+**
-+** To start a read transaction, the reader records the index of the last
-+** valid frame in the WAL.  The reader uses this recorded "mxFrame" value
-+** for all subsequent read operations.  New transactions can be appended
-+** to the WAL, but as long as the reader uses its original mxFrame value
-+** and ignores the newly appended content, it will see a consistent snapshot
-+** of the database from a single point in time.  This technique allows
-+** multiple concurrent readers to view different versions of the database
-+** content simultaneously.
-+**
-+** The reader algorithm in the previous paragraphs works correctly, but 
-+** because frames for page P can appear anywhere within the WAL, the
-+** reader has to scan the entire WAL looking for page P frames.  If the
-+** WAL is large (multiple megabytes is typical) that scan can be slow,
-+** and read performance suffers.  To overcome this problem, a separate
-+** data structure called the wal-index is maintained to expedite the
-+** search for frames of a particular page.
-+** 
-+** WAL-INDEX FORMAT
-+**
-+** Conceptually, the wal-index is shared memory, though VFS implementations
-+** might choose to implement the wal-index using a mmapped file.  Because
-+** the wal-index is shared memory, SQLite does not support journal_mode=WAL 
-+** on a network filesystem.  All users of the database must be able to
-+** share memory.
-+**
-+** The wal-index is transient.  After a crash, the wal-index can (and should
-+** be) reconstructed from the original WAL file.  In fact, the VFS is required
-+** to either truncate or zero the header of the wal-index when the last
-+** connection to it closes.  Because the wal-index is transient, it can
-+** use an architecture-specific format; it does not have to be cross-platform.
-+** Hence, unlike the database and WAL file formats which store all values
-+** as big endian, the wal-index can store multi-byte values in the native
-+** byte order of the host computer.
-+**
-+** The purpose of the wal-index is to answer this question quickly:  Given
-+** a page number P, return the index of the last frame for page P in the WAL,
-+** or return NULL if there are no frames for page P in the WAL.
-+**
-+** The wal-index consists of a header region, followed by an one or
-+** more index blocks.  
-+**
-+** The wal-index header contains the total number of frames within the WAL
-+** in the the mxFrame field.  
-+**
-+** Each index block except for the first contains information on 
-+** HASHTABLE_NPAGE frames. The first index block contains information on
-+** HASHTABLE_NPAGE_ONE frames. The values of HASHTABLE_NPAGE_ONE and 
-+** HASHTABLE_NPAGE are selected so that together the wal-index header and
-+** first index block are the same size as all other index blocks in the
-+** wal-index.
-+**
-+** Each index block contains two sections, a page-mapping that contains the
-+** database page number associated with each wal frame, and a hash-table 
-+** that allows readers to query an index block for a specific page number.
-+** The page-mapping is an array of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE
-+** for the first index block) 32-bit page numbers. The first entry in the 
-+** first index-block contains the database page number corresponding to the
-+** first frame in the WAL file. The first entry in the second index block
-+** in the WAL file corresponds to the (HASHTABLE_NPAGE_ONE+1)th frame in
-+** the log, and so on.
-+**
-+** The last index block in a wal-index usually contains less than the full
-+** complement of HASHTABLE_NPAGE (or HASHTABLE_NPAGE_ONE) page-numbers,
-+** depending on the contents of the WAL file. This does not change the
-+** allocated size of the page-mapping array - the page-mapping array merely
-+** contains unused entries.
-+**
-+** Even without using the hash table, the last frame for page P
-+** can be found by scanning the page-mapping sections of each index block
-+** starting with the last index block and moving toward the first, and
-+** within each index block, starting at the end and moving toward the
-+** beginning.  The first entry that equals P corresponds to the frame
-+** holding the content for that page.
-+**
-+** The hash table consists of HASHTABLE_NSLOT 16-bit unsigned integers.
-+** HASHTABLE_NSLOT = 2*HASHTABLE_NPAGE, and there is one entry in the
-+** hash table for each page number in the mapping section, so the hash 
-+** table is never more than half full.  The expected number of collisions 
-+** prior to finding a match is 1.  Each entry of the hash table is an
-+** 1-based index of an entry in the mapping section of the same
-+** index block.   Let K be the 1-based index of the largest entry in
-+** the mapping section.  (For index blocks other than the last, K will
-+** always be exactly HASHTABLE_NPAGE (4096) and for the last index block
-+** K will be (mxFrame%HASHTABLE_NPAGE).)  Unused slots of the hash table
-+** contain a value of 0.
-+**
-+** To look for page P in the hash table, first compute a hash iKey on
-+** P as follows:
-+**
-+**      iKey = (P * 383) % HASHTABLE_NSLOT
-+**
-+** Then start scanning entries of the hash table, starting with iKey
-+** (wrapping around to the beginning when the end of the hash table is
-+** reached) until an unused hash slot is found. Let the first unused slot
-+** be at index iUnused.  (iUnused might be less than iKey if there was
-+** wrap-around.) Because the hash table is never more than half full,
-+** the search is guaranteed to eventually hit an unused entry.  Let 
-+** iMax be the value between iKey and iUnused, closest to iUnused,
-+** where aHash[iMax]==P.  If there is no iMax entry (if there exists
-+** no hash slot such that aHash[i]==p) then page P is not in the
-+** current index block.  Otherwise the iMax-th mapping entry of the
-+** current index block corresponds to the last entry that references 
-+** page P.
-+**
-+** A hash search begins with the last index block and moves toward the
-+** first index block, looking for entries corresponding to page P.  On
-+** average, only two or three slots in each index block need to be
-+** examined in order to either find the last entry for page P, or to
-+** establish that no such entry exists in the block.  Each index block
-+** holds over 4000 entries.  So two or three index blocks are sufficient
-+** to cover a typical 10 megabyte WAL file, assuming 1K pages.  8 or 10
-+** comparisons (on average) suffice to either locate a frame in the
-+** WAL or to establish that the frame does not exist in the WAL.  This
-+** is much faster than scanning the entire 10MB WAL.
-+**
-+** Note that entries are added in order of increasing K.  Hence, one
-+** reader might be using some value K0 and a second reader that started
-+** at a later time (after additional transactions were added to the WAL
-+** and to the wal-index) might be using a different value K1, where K1>K0.
-+** Both readers can use the same hash table and mapping section to get
-+** the correct result.  There may be entries in the hash table with
-+** K>K0 but to the first reader, those entries will appear to be unused
-+** slots in the hash table and so the first reader will get an answer as
-+** if no values greater than K0 had ever been inserted into the hash table
-+** in the first place - which is what reader one wants.  Meanwhile, the
-+** second reader using K1 will see additional values that were inserted
-+** later, which is exactly what reader two wants.  
-+**
-+** When a rollback occurs, the value of K is decreased. Hash table entries
-+** that correspond to frames greater than the new K value are removed
-+** from the hash table at this point.
-+*/
-+#ifndef SQLITE_OMIT_WAL
- 
--    /* If the entry in aPgno[] is already set, then the previous writer
--    ** must have exited unexpectedly in the middle of a transaction (after
--    ** writing one or more dirty pages to the WAL to free up memory). 
--    ** Remove the remnants of that writers uncommitted transaction from 
--    ** the hash-table before writing any new entries.
--    */
--    if( aPgno[idx] ){
--      walCleanupHash(pWal);
--      assert( !aPgno[idx] );
--    }
- 
--    /* Write the aPgno[] array entry and the hash-table slot. */
--    nCollide = idx;
--    for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){
--      if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT;
--    }
--    aPgno[idx] = iPage;
--    aHash[iKey] = (ht_slot)idx;
-+/*
-+** Trace output macros
-+*/
-+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
-+SQLITE_PRIVATE int sqlite3WalTrace = 0;
-+# define WALTRACE(X)  if(sqlite3WalTrace) sqlite3DebugPrintf X
-+#else
-+# define WALTRACE(X)
-+#endif
- 
--#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
--    /* Verify that the number of entries in the hash table exactly equals
--    ** the number of entries in the mapping region.
--    */
--    {
--      int i;           /* Loop counter */
--      int nEntry = 0;  /* Number of entries in the hash table */
--      for(i=0; i<HASHTABLE_NSLOT; i++){ if( aHash[i] ) nEntry++; }
--      assert( nEntry==idx );
--    }
-+/*
-+** The maximum (and only) versions of the wal and wal-index formats
-+** that may be interpreted by this version of SQLite.
-+**
-+** If a client begins recovering a WAL file and finds that (a) the checksum
-+** values in the wal-header are correct and (b) the version field is not
-+** WAL_MAX_VERSION, recovery fails and SQLite returns SQLITE_CANTOPEN.
-+**
-+** Similarly, if a client successfully reads a wal-index header (i.e. the 
-+** checksum test is successful) and finds that the version field is not
-+** WALINDEX_MAX_VERSION, then no read-transaction is opened and SQLite
-+** returns SQLITE_CANTOPEN.
-+*/
-+#define WAL_MAX_VERSION      3007000
-+#define WALINDEX_MAX_VERSION 3007000
-+
-+/*
-+** Indices of various locking bytes.   WAL_NREADER is the number
-+** of available reader locks and should be at least 3.
-+*/
-+#define WAL_WRITE_LOCK         0
-+#define WAL_ALL_BUT_WRITE      1
-+#define WAL_CKPT_LOCK          1
-+#define WAL_RECOVER_LOCK       2
-+#define WAL_READ_LOCK(I)       (3+(I))
-+#define WAL_NREADER            (SQLITE_SHM_NLOCK-3)
- 
--    /* Verify that the every entry in the mapping region is reachable
--    ** via the hash table.  This turns out to be a really, really expensive
--    ** thing to check, so only do this occasionally - not on every
--    ** iteration.
--    */
--    if( (idx&0x3ff)==0 ){
--      int i;           /* Loop counter */
--      for(i=1; i<=idx; i++){
--        for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){
--          if( aHash[iKey]==i ) break;
--        }
--        assert( aHash[iKey]==i );
--      }
--    }
--#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
--  }
- 
-+/* Object declarations */
-+typedef struct WalIndexHdr WalIndexHdr;
-+typedef struct WalIterator WalIterator;
-+typedef struct WalCkptInfo WalCkptInfo;
- 
--  return rc;
--}
- 
-+/*
-+** The following object holds a copy of the wal-index header content.
-+**
-+** The actual header in the wal-index consists of two copies of this
-+** object.
-+**
-+** The szPage value can be any power of 2 between 512 and 32768, inclusive.
-+** Or it can be 1 to represent a 65536-byte page.  The latter case was
-+** added in 3.7.1 when support for 64K pages was added.  
-+*/
-+struct WalIndexHdr {
-+  u32 iVersion;                   /* Wal-index version */
-+  u32 unused;                     /* Unused (padding) field */
-+  u32 iChange;                    /* Counter incremented each transaction */
-+  u8 isInit;                      /* 1 when initialized */
-+  u8 bigEndCksum;                 /* True if checksums in WAL are big-endian */
-+  u16 szPage;                     /* Database page size in bytes. 1==64K */
-+  u32 mxFrame;                    /* Index of last valid frame in the WAL */
-+  u32 nPage;                      /* Size of database in pages */
-+  u32 aFrameCksum[2];             /* Checksum of last frame in log */
-+  u32 aSalt[2];                   /* Two salt values copied from WAL header */
-+  u32 aCksum[2];                  /* Checksum over all prior fields */
-+};
- 
- /*
--** Recover the wal-index by reading the write-ahead log file. 
-+** A copy of the following object occurs in the wal-index immediately
-+** following the second copy of the WalIndexHdr.  This object stores
-+** information used by checkpoint.
- **
--** This routine first tries to establish an exclusive lock on the
--** wal-index to prevent other threads/processes from doing anything
--** with the WAL or wal-index while recovery is running.  The
--** WAL_RECOVER_LOCK is also held so that other threads will know
--** that this thread is running recovery.  If unable to establish
--** the necessary locks, this routine returns SQLITE_BUSY.
-+** nBackfill is the number of frames in the WAL that have been written
-+** back into the database. (We call the act of moving content from WAL to
-+** database "backfilling".)  The nBackfill number is never greater than
-+** WalIndexHdr.mxFrame.  nBackfill can only be increased by threads
-+** holding the WAL_CKPT_LOCK lock (which includes a recovery thread).
-+** However, a WAL_WRITE_LOCK thread can move the value of nBackfill from
-+** mxFrame back to zero when the WAL is reset.
-+**
-+** There is one entry in aReadMark[] for each reader lock.  If a reader
-+** holds read-lock K, then the value in aReadMark[K] is no greater than
-+** the mxFrame for that reader.  The value READMARK_NOT_USED (0xffffffff)
-+** for any aReadMark[] means that entry is unused.  aReadMark[0] is 
-+** a special case; its value is never used and it exists as a place-holder
-+** to avoid having to offset aReadMark[] indexs by one.  Readers holding
-+** WAL_READ_LOCK(0) always ignore the entire WAL and read all content
-+** directly from the database.
-+**
-+** The value of aReadMark[K] may only be changed by a thread that
-+** is holding an exclusive lock on WAL_READ_LOCK(K).  Thus, the value of
-+** aReadMark[K] cannot changed while there is a reader is using that mark
-+** since the reader will be holding a shared lock on WAL_READ_LOCK(K).
-+**
-+** The checkpointer may only transfer frames from WAL to database where
-+** the frame numbers are less than or equal to every aReadMark[] that is
-+** in use (that is, every aReadMark[j] for which there is a corresponding
-+** WAL_READ_LOCK(j)).  New readers (usually) pick the aReadMark[] with the
-+** largest value and will increase an unused aReadMark[] to mxFrame if there
-+** is not already an aReadMark[] equal to mxFrame.  The exception to the
-+** previous sentence is when nBackfill equals mxFrame (meaning that everything
-+** in the WAL has been backfilled into the database) then new readers
-+** will choose aReadMark[0] which has value 0 and hence such reader will
-+** get all their all content directly from the database file and ignore 
-+** the WAL.
-+**
-+** Writers normally append new frames to the end of the WAL.  However,
-+** if nBackfill equals mxFrame (meaning that all WAL content has been
-+** written back into the database) and if no readers are using the WAL
-+** (in other words, if there are no WAL_READ_LOCK(i) where i>0) then
-+** the writer will first "reset" the WAL back to the beginning and start
-+** writing new content beginning at frame 1.
-+**
-+** We assume that 32-bit loads are atomic and so no locks are needed in
-+** order to read from any aReadMark[] entries.
- */
--static int walIndexRecover(Wal *pWal){
--  int rc;                         /* Return Code */
--  i64 nSize;                      /* Size of log file */
--  u32 aFrameCksum[2] = {0, 0};
--  int iLock;                      /* Lock offset to lock for checkpoint */
--  int nLock;                      /* Number of locks to hold */
-+struct WalCkptInfo {
-+  u32 nBackfill;                  /* Number of WAL frames backfilled into DB */
-+  u32 aReadMark[WAL_NREADER];     /* Reader marks */
-+};
-+#define READMARK_NOT_USED  0xffffffff
- 
--  /* Obtain an exclusive lock on all byte in the locking range not already
--  ** locked by the caller. The caller is guaranteed to have locked the
--  ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte.
--  ** If successful, the same bytes that are locked here are unlocked before
--  ** this function returns.
--  */
--  assert( pWal->ckptLock==1 || pWal->ckptLock==0 );
--  assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 );
--  assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
--  assert( pWal->writeLock );
--  iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
--  nLock = SQLITE_SHM_NLOCK - iLock;
--  rc = walLockExclusive(pWal, iLock, nLock);
--  if( rc ){
--    return rc;
--  }
--  WALTRACE(("WAL%p: recovery begin...\n", pWal));
- 
--  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
-+/* A block of WALINDEX_LOCK_RESERVED bytes beginning at
-+** WALINDEX_LOCK_OFFSET is reserved for locks. Since some systems
-+** only support mandatory file-locks, we do not read or write data
-+** from the region of the file on which locks are applied.
-+*/
-+#define WALINDEX_LOCK_OFFSET   (sizeof(WalIndexHdr)*2 + sizeof(WalCkptInfo))
-+#define WALINDEX_LOCK_RESERVED 16
-+#define WALINDEX_HDR_SIZE      (WALINDEX_LOCK_OFFSET+WALINDEX_LOCK_RESERVED)
- 
--  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
--  if( rc!=SQLITE_OK ){
--    goto recovery_error;
--  }
-+/* Size of header before each frame in wal */
-+#define WAL_FRAME_HDRSIZE 24
- 
--  if( nSize>WAL_HDRSIZE ){
--    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
--    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
--    int szFrame;                  /* Number of bytes in buffer aFrame[] */
--    u8 *aData;                    /* Pointer to data part of aFrame buffer */
--    int iFrame;                   /* Index of last frame read */
--    i64 iOffset;                  /* Next offset to read from log file */
--    int szPage;                   /* Page size according to the log */
--    u32 magic;                    /* Magic value read from WAL header */
--    u32 version;                  /* Magic value read from WAL header */
-+/* Size of write ahead log header, including checksum. */
-+/* #define WAL_HDRSIZE 24 */
-+#define WAL_HDRSIZE 32
- 
--    /* Read in the WAL header. */
--    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
--    if( rc!=SQLITE_OK ){
--      goto recovery_error;
--    }
-+/* WAL magic value. Either this value, or the same value with the least
-+** significant bit also set (WAL_MAGIC | 0x00000001) is stored in 32-bit
-+** big-endian format in the first 4 bytes of a WAL file.
-+**
-+** If the LSB is set, then the checksums for each frame within the WAL
-+** file are calculated by treating all data as an array of 32-bit 
-+** big-endian words. Otherwise, they are calculated by interpreting 
-+** all data as 32-bit little-endian words.
-+*/
-+#define WAL_MAGIC 0x377f0682
- 
--    /* If the database page size is not a power of two, or is greater than
--    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
--    ** data. Similarly, if the 'magic' value is invalid, ignore the whole
--    ** WAL file.
--    */
--    magic = sqlite3Get4byte(&aBuf[0]);
--    szPage = sqlite3Get4byte(&aBuf[8]);
--    if( (magic&0xFFFFFFFE)!=WAL_MAGIC 
--     || szPage&(szPage-1) 
--     || szPage>SQLITE_MAX_PAGE_SIZE 
--     || szPage<512 
--    ){
--      goto finished;
--    }
--    pWal->hdr.bigEndCksum = (u8)(magic&0x00000001);
--    pWal->szPage = szPage;
--    pWal->nCkpt = sqlite3Get4byte(&aBuf[12]);
--    memcpy(&pWal->hdr.aSalt, &aBuf[16], 8);
-+/*
-+** Return the offset of frame iFrame in the write-ahead log file, 
-+** assuming a database page size of szPage bytes. The offset returned
-+** is to the start of the write-ahead log frame-header.
-+*/
-+#define walFrameOffset(iFrame, szPage) (                               \
-+  WAL_HDRSIZE + ((iFrame)-1)*(i64)((szPage)+WAL_FRAME_HDRSIZE)         \
-+)
- 
--    /* Verify that the WAL header checksum is correct */
--    walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, 
--        aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum
--    );
--    if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24])
--     || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28])
--    ){
--      goto finished;
--    }
-+/*
-+** An open write-ahead log file is represented by an instance of the
-+** following object.
-+*/
-+struct Wal {
-+  sqlite3_vfs *pVfs;         /* The VFS used to create pDbFd */
-+  sqlite3_file *pDbFd;       /* File handle for the database file */
-+  sqlite3_file *pWalFd;      /* File handle for WAL file */
-+  u32 iCallback;             /* Value to pass to log callback (or 0) */
-+  i64 mxWalSize;             /* Truncate WAL to this size upon reset */
-+  int nWiData;               /* Size of array apWiData */
-+  volatile u32 **apWiData;   /* Pointer to wal-index content in memory */
-+  u32 szPage;                /* Database page size */
-+  i16 readLock;              /* Which read lock is being held.  -1 for none */
-+  u8 exclusiveMode;          /* Non-zero if connection is in exclusive mode */
-+  u8 writeLock;              /* True if in a write transaction */
-+  u8 ckptLock;               /* True if holding a checkpoint lock */
-+  u8 readOnly;               /* WAL_RDWR, WAL_RDONLY, or WAL_SHM_RDONLY */
-+  WalIndexHdr hdr;           /* Wal-index header for current transaction */
-+  const char *zWalName;      /* Name of WAL file */
-+  u32 nCkpt;                 /* Checkpoint sequence counter in the wal-header */
-+#ifdef SQLITE_DEBUG
-+  u8 lockError;              /* True if a locking error has occurred */
-+#endif
-+};
- 
--    /* Verify that the version number on the WAL format is one that
--    ** are able to understand */
--    version = sqlite3Get4byte(&aBuf[4]);
--    if( version!=WAL_MAX_VERSION ){
--      rc = SQLITE_CANTOPEN_BKPT;
--      goto finished;
--    }
-+/*
-+** Candidate values for Wal.exclusiveMode.
-+*/
-+#define WAL_NORMAL_MODE     0
-+#define WAL_EXCLUSIVE_MODE  1     
-+#define WAL_HEAPMEMORY_MODE 2
- 
--    /* Malloc a buffer to read frames into. */
--    szFrame = szPage + WAL_FRAME_HDRSIZE;
--    aFrame = (u8 *)sqlite3_malloc(szFrame);
--    if( !aFrame ){
--      rc = SQLITE_NOMEM;
--      goto recovery_error;
--    }
--    aData = &aFrame[WAL_FRAME_HDRSIZE];
-+/*
-+** Possible values for WAL.readOnly
-+*/
-+#define WAL_RDWR        0    /* Normal read/write connection */
-+#define WAL_RDONLY      1    /* The WAL file is readonly */
-+#define WAL_SHM_RDONLY  2    /* The SHM file is readonly */
- 
--    /* Read all frames from the log file. */
--    iFrame = 0;
--    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
--      u32 pgno;                   /* Database page number for frame */
--      u32 nTruncate;              /* dbsize field from frame header */
--      int isValid;                /* True if this frame is valid */
-+/*
-+** Each page of the wal-index mapping contains a hash-table made up of
-+** an array of HASHTABLE_NSLOT elements of the following type.
-+*/
-+typedef u16 ht_slot;
- 
--      /* Read and decode the next log frame. */
--      rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
--      if( rc!=SQLITE_OK ) break;
--      isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
--      if( !isValid ) break;
--      rc = walIndexAppend(pWal, ++iFrame, pgno);
--      if( rc!=SQLITE_OK ) break;
-+/*
-+** This structure is used to implement an iterator that loops through
-+** all frames in the WAL in database page order. Where two or more frames
-+** correspond to the same database page, the iterator visits only the 
-+** frame most recently written to the WAL (in other words, the frame with
-+** the largest index).
-+**
-+** The internals of this structure are only accessed by:
-+**
-+**   walIteratorInit() - Create a new iterator,
-+**   walIteratorNext() - Step an iterator,
-+**   walIteratorFree() - Free an iterator.
-+**
-+** This functionality is used by the checkpoint code (see walCheckpoint()).
-+*/
-+struct WalIterator {
-+  int iPrior;                     /* Last result returned from the iterator */
-+  int nSegment;                   /* Number of entries in aSegment[] */
-+  struct WalSegment {
-+    int iNext;                    /* Next slot in aIndex[] not yet returned */
-+    ht_slot *aIndex;              /* i0, i1, i2... such that aPgno[iN] ascend */
-+    u32 *aPgno;                   /* Array of page numbers. */
-+    int nEntry;                   /* Nr. of entries in aPgno[] and aIndex[] */
-+    int iZero;                    /* Frame number associated with aPgno[0] */
-+  } aSegment[1];                  /* One for every 32KB page in the wal-index */
-+};
- 
--      /* If nTruncate is non-zero, this is a commit record. */
--      if( nTruncate ){
--        pWal->hdr.mxFrame = iFrame;
--        pWal->hdr.nPage = nTruncate;
--        pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
--        testcase( szPage<=32768 );
--        testcase( szPage>=65536 );
--        aFrameCksum[0] = pWal->hdr.aFrameCksum[0];
--        aFrameCksum[1] = pWal->hdr.aFrameCksum[1];
--      }
--    }
-+/*
-+** Define the parameters of the hash tables in the wal-index file. There
-+** is a hash-table following every HASHTABLE_NPAGE page numbers in the
-+** wal-index.
-+**
-+** Changing any of these constants will alter the wal-index format and
-+** create incompatibilities.
-+*/
-+#define HASHTABLE_NPAGE      4096                 /* Must be power of 2 */
-+#define HASHTABLE_HASH_1     383                  /* Should be prime */
-+#define HASHTABLE_NSLOT      (HASHTABLE_NPAGE*2)  /* Must be a power of 2 */
- 
--    sqlite3_free(aFrame);
--  }
-+/* 
-+** The block of page numbers associated with the first hash-table in a
-+** wal-index is smaller than usual. This is so that there is a complete
-+** hash-table on each aligned 32KB page of the wal-index.
-+*/
-+#define HASHTABLE_NPAGE_ONE  (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32)))
- 
--finished:
--  if( rc==SQLITE_OK ){
--    volatile WalCkptInfo *pInfo;
--    int i;
--    pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
--    pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
--    walIndexWriteHdr(pWal);
-+/* The wal-index is divided into pages of WALINDEX_PGSZ bytes each. */
-+#define WALINDEX_PGSZ   (                                         \
-+    sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
-+)
- 
--    /* Reset the checkpoint-header. This is safe because this thread is 
--    ** currently holding locks that exclude all other readers, writers and
--    ** checkpointers.
--    */
--    pInfo = walCkptInfo(pWal);
--    pInfo->nBackfill = 0;
--    pInfo->aReadMark[0] = 0;
--    for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
-+/*
-+** Obtain a pointer to the iPage'th page of the wal-index. The wal-index
-+** is broken into pages of WALINDEX_PGSZ bytes. Wal-index pages are
-+** numbered from zero.
-+**
-+** If this call is successful, *ppPage is set to point to the wal-index
-+** page and SQLITE_OK is returned. If an error (an OOM or VFS error) occurs,
-+** then an SQLite error code is returned and *ppPage is set to 0.
-+*/
-+static int walIndexPage(Wal *pWal, int iPage, volatile u32 **ppPage){
-+  int rc = SQLITE_OK;
- 
--    /* If more than one frame was recovered from the log file, report an
--    ** event via sqlite3_log(). This is to help with identifying performance
--    ** problems caused by applications routinely shutting down without
--    ** checkpointing the log file.
--    */
--    if( pWal->hdr.nPage ){
--      sqlite3_log(SQLITE_OK, "Recovered %d frames from WAL file %s",
--          pWal->hdr.nPage, pWal->zWalName
-+  /* Enlarge the pWal->apWiData[] array if required */
-+  if( pWal->nWiData<=iPage ){
-+    int nByte = sizeof(u32*)*(iPage+1);
-+    volatile u32 **apNew;
-+    apNew = (volatile u32 **)sqlite3_realloc((void *)pWal->apWiData, nByte);
-+    if( !apNew ){
-+      *ppPage = 0;
-+      return SQLITE_NOMEM;
-+    }
-+    memset((void*)&apNew[pWal->nWiData], 0,
-+           sizeof(u32*)*(iPage+1-pWal->nWiData));
-+    pWal->apWiData = apNew;
-+    pWal->nWiData = iPage+1;
-+  }
-+
-+  /* Request a pointer to the required page from the VFS */
-+  if( pWal->apWiData[iPage]==0 ){
-+    if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
-+      pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
-+      if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM;
-+    }else{
-+      rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ, 
-+          pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
-       );
-+      if( rc==SQLITE_READONLY ){
-+        pWal->readOnly |= WAL_SHM_RDONLY;
-+        rc = SQLITE_OK;
-+      }
-     }
-   }
- 
--recovery_error:
--  WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok"));
--  walUnlockExclusive(pWal, iLock, nLock);
-+  *ppPage = pWal->apWiData[iPage];
-+  assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
-   return rc;
- }
- 
- /*
--** Close an open wal-index.
-+** Return a pointer to the WalCkptInfo structure in the wal-index.
- */
--static void walIndexClose(Wal *pWal, int isDelete){
--  if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
--    int i;
--    for(i=0; i<pWal->nWiData; i++){
--      sqlite3_free((void *)pWal->apWiData[i]);
--      pWal->apWiData[i] = 0;
--    }
--  }else{
--    sqlite3OsShmUnmap(pWal->pDbFd, isDelete);
--  }
-+static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
-+  assert( pWal->nWiData>0 && pWal->apWiData[0] );
-+  return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]);
- }
- 
--/* 
--** Open a connection to the WAL file zWalName. The database file must 
--** already be opened on connection pDbFd. The buffer that zWalName points
--** to must remain valid for the lifetime of the returned Wal* handle.
-+/*
-+** Return a pointer to the WalIndexHdr structure in the wal-index.
-+*/
-+static volatile WalIndexHdr *walIndexHdr(Wal *pWal){
-+  assert( pWal->nWiData>0 && pWal->apWiData[0] );
-+  return (volatile WalIndexHdr*)pWal->apWiData[0];
-+}
-+
-+/*
-+** The argument to this macro must be of type u32. On a little-endian
-+** architecture, it returns the u32 value that results from interpreting
-+** the 4 bytes as a big-endian value. On a big-endian architecture, it
-+** returns the value that would be produced by intepreting the 4 bytes
-+** of the input value as a little-endian integer.
-+*/
-+#define BYTESWAP32(x) ( \
-+    (((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8)  \
-+  + (((x)&0x00FF0000)>>8)  + (((x)&0xFF000000)>>24) \
-+)
-+
-+/*
-+** Generate or extend an 8 byte checksum based on the data in 
-+** array aByte[] and the initial values of aIn[0] and aIn[1] (or
-+** initial values of 0 and 0 if aIn==NULL).
- **
--** A SHARED lock should be held on the database file when this function
--** is called. The purpose of this SHARED lock is to prevent any other
--** client from unlinking the WAL or wal-index file. If another process
--** were to do this just after this client opened one of these files, the
--** system would be badly broken.
-+** The checksum is written back into aOut[] before returning.
- **
--** If the log file is successfully opened, SQLITE_OK is returned and 
--** *ppWal is set to point to a new WAL handle. If an error occurs,
--** an SQLite error code is returned and *ppWal is left unmodified.
-+** nByte must be a positive multiple of 8.
- */
--SQLITE_PRIVATE int sqlite3WalOpen(
--  sqlite3_vfs *pVfs,              /* vfs module to open wal and wal-index */
--  sqlite3_file *pDbFd,            /* The open database file */
--  const char *zWalName,           /* Name of the WAL file */
--  int bNoShm,                     /* True to run in heap-memory mode */
--  i64 mxWalSize,                  /* Truncate WAL to this size on reset */
--  Wal **ppWal                     /* OUT: Allocated Wal handle */
-+static void walChecksumBytes(
-+  int nativeCksum, /* True for native byte-order, false for non-native */
-+  u8 *a,           /* Content to be checksummed */
-+  int nByte,       /* Bytes of content in a[].  Must be a multiple of 8. */
-+  const u32 *aIn,  /* Initial checksum value input */
-+  u32 *aOut        /* OUT: Final checksum value output */
- ){
--  int rc;                         /* Return Code */
--  Wal *pRet;                      /* Object to allocate and return */
--  int flags;                      /* Flags passed to OsOpen() */
--
--  assert( zWalName && zWalName[0] );
--  assert( pDbFd );
--
--  /* In the amalgamation, the os_unix.c and os_win.c source files come before
--  ** this source file.  Verify that the #defines of the locking byte offsets
--  ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value.
--  */
--#ifdef WIN_SHM_BASE
--  assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET );
--#endif
--#ifdef UNIX_SHM_BASE
--  assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET );
--#endif
--
--
--  /* Allocate an instance of struct Wal to return. */
--  *ppWal = 0;
--  pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile);
--  if( !pRet ){
--    return SQLITE_NOMEM;
--  }
--
--  pRet->pVfs = pVfs;
--  pRet->pWalFd = (sqlite3_file *)&pRet[1];
--  pRet->pDbFd = pDbFd;
--  pRet->readLock = -1;
--  pRet->mxWalSize = mxWalSize;
--  pRet->zWalName = zWalName;
--  pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE);
-+  u32 s1, s2;
-+  u32 *aData = (u32 *)a;
-+  u32 *aEnd = (u32 *)&a[nByte];
- 
--  /* Open file handle on the write-ahead log file. */
--  flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
--  rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags);
--  if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){
--    pRet->readOnly = WAL_RDONLY;
-+  if( aIn ){
-+    s1 = aIn[0];
-+    s2 = aIn[1];
-+  }else{
-+    s1 = s2 = 0;
-   }
- 
--  if( rc!=SQLITE_OK ){
--    walIndexClose(pRet, 0);
--    sqlite3OsClose(pRet->pWalFd);
--    sqlite3_free(pRet);
-+  assert( nByte>=8 );
-+  assert( (nByte&0x00000007)==0 );
-+
-+  if( nativeCksum ){
-+    do {
-+      s1 += *aData++ + s2;
-+      s2 += *aData++ + s1;
-+    }while( aData<aEnd );
-   }else{
--    *ppWal = pRet;
--    WALTRACE(("WAL%d: opened\n", pRet));
-+    do {
-+      s1 += BYTESWAP32(aData[0]) + s2;
-+      s2 += BYTESWAP32(aData[1]) + s1;
-+      aData += 2;
-+    }while( aData<aEnd );
-   }
--  return rc;
-+
-+  aOut[0] = s1;
-+  aOut[1] = s2;
- }
- 
--/*
--** Change the size to which the WAL file is trucated on each reset.
--*/
--SQLITE_PRIVATE void sqlite3WalLimit(Wal *pWal, i64 iLimit){
--  if( pWal ) pWal->mxWalSize = iLimit;
-+static void walShmBarrier(Wal *pWal){
-+  if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
-+    sqlite3OsShmBarrier(pWal->pDbFd);
-+  }
- }
- 
- /*
--** Find the smallest page number out of all pages held in the WAL that
--** has not been returned by any prior invocation of this method on the
--** same WalIterator object.   Write into *piFrame the frame index where
--** that page was last written into the WAL.  Write into *piPage the page
--** number.
-+** Write the header information in pWal->hdr into the wal-index.
- **
--** Return 0 on success.  If there are no pages in the WAL with a page
--** number larger than *piPage, then return 1.
-+** The checksum on pWal->hdr is updated before it is written.
- */
--static int walIteratorNext(
--  WalIterator *p,               /* Iterator */
--  u32 *piPage,                  /* OUT: The page number of the next page */
--  u32 *piFrame                  /* OUT: Wal frame index of next page */
--){
--  u32 iMin;                     /* Result pgno must be greater than iMin */
--  u32 iRet = 0xFFFFFFFF;        /* 0xffffffff is never a valid page number */
--  int i;                        /* For looping through segments */
--
--  iMin = p->iPrior;
--  assert( iMin<0xffffffff );
--  for(i=p->nSegment-1; i>=0; i--){
--    struct WalSegment *pSegment = &p->aSegment[i];
--    while( pSegment->iNext<pSegment->nEntry ){
--      u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]];
--      if( iPg>iMin ){
--        if( iPg<iRet ){
--          iRet = iPg;
--          *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext];
--        }
--        break;
--      }
--      pSegment->iNext++;
--    }
--  }
-+static void walIndexWriteHdr(Wal *pWal){
-+  volatile WalIndexHdr *aHdr = walIndexHdr(pWal);
-+  const int nCksum = offsetof(WalIndexHdr, aCksum);
- 
--  *piPage = p->iPrior = iRet;
--  return (iRet==0xFFFFFFFF);
-+  assert( pWal->writeLock );
-+  pWal->hdr.isInit = 1;
-+  pWal->hdr.iVersion = WALINDEX_MAX_VERSION;
-+  walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum);
-+  memcpy((void *)&aHdr[1], (void *)&pWal->hdr, sizeof(WalIndexHdr));
-+  walShmBarrier(pWal);
-+  memcpy((void *)&aHdr[0], (void *)&pWal->hdr, sizeof(WalIndexHdr));
- }
- 
- /*
--** This function merges two sorted lists into a single sorted list.
--**
--** aLeft[] and aRight[] are arrays of indices.  The sort key is
--** aContent[aLeft[]] and aContent[aRight[]].  Upon entry, the following
--** is guaranteed for all J<K:
--**
--**        aContent[aLeft[J]] < aContent[aLeft[K]]
--**        aContent[aRight[J]] < aContent[aRight[K]]
--**
--** This routine overwrites aRight[] with a new (probably longer) sequence
--** of indices such that the aRight[] contains every index that appears in
--** either aLeft[] or the old aRight[] and such that the second condition
--** above is still met.
--**
--** The aContent[aLeft[X]] values will be unique for all X.  And the
--** aContent[aRight[X]] values will be unique too.  But there might be
--** one or more combinations of X and Y such that
--**
--**      aLeft[X]!=aRight[Y]  &&  aContent[aLeft[X]] == aContent[aRight[Y]]
-+** This function encodes a single frame header and writes it to a buffer
-+** supplied by the caller. A frame-header is made up of a series of 
-+** 4-byte big-endian integers, as follows:
- **
--** When that happens, omit the aLeft[X] and use the aRight[Y] index.
-+**     0: Page number.
-+**     4: For commit records, the size of the database image in pages 
-+**        after the commit. For all other records, zero.
-+**     8: Salt-1 (copied from the wal-header)
-+**    12: Salt-2 (copied from the wal-header)
-+**    16: Checksum-1.
-+**    20: Checksum-2.
- */
--static void walMerge(
--  const u32 *aContent,            /* Pages in wal - keys for the sort */
--  ht_slot *aLeft,                 /* IN: Left hand input list */
--  int nLeft,                      /* IN: Elements in array *paLeft */
--  ht_slot **paRight,              /* IN/OUT: Right hand input list */
--  int *pnRight,                   /* IN/OUT: Elements in *paRight */
--  ht_slot *aTmp                   /* Temporary buffer */
-+static void walEncodeFrame(
-+  Wal *pWal,                      /* The write-ahead log */
-+  u32 iPage,                      /* Database page number for frame */
-+  u32 nTruncate,                  /* New db size (or 0 for non-commit frames) */
-+  u8 *aData,                      /* Pointer to page data */
-+  u8 *aFrame                      /* OUT: Write encoded frame here */
- ){
--  int iLeft = 0;                  /* Current index in aLeft */
--  int iRight = 0;                 /* Current index in aRight */
--  int iOut = 0;                   /* Current index in output buffer */
--  int nRight = *pnRight;
--  ht_slot *aRight = *paRight;
--
--  assert( nLeft>0 && nRight>0 );
--  while( iRight<nRight || iLeft<nLeft ){
--    ht_slot logpage;
--    Pgno dbpage;
--
--    if( (iLeft<nLeft) 
--     && (iRight>=nRight || aContent[aLeft[iLeft]]<aContent[aRight[iRight]])
--    ){
--      logpage = aLeft[iLeft++];
--    }else{
--      logpage = aRight[iRight++];
--    }
--    dbpage = aContent[logpage];
--
--    aTmp[iOut++] = logpage;
--    if( iLeft<nLeft && aContent[aLeft[iLeft]]==dbpage ) iLeft++;
-+  int nativeCksum;                /* True for native byte-order checksums */
-+  u32 *aCksum = pWal->hdr.aFrameCksum;
-+  assert( WAL_FRAME_HDRSIZE==24 );
-+  sqlite3Put4byte(&aFrame[0], iPage);
-+  sqlite3Put4byte(&aFrame[4], nTruncate);
-+  memcpy(&aFrame[8], pWal->hdr.aSalt, 8);
- 
--    assert( iLeft>=nLeft || aContent[aLeft[iLeft]]>dbpage );
--    assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage );
--  }
-+  nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
-+  walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
-+  walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
- 
--  *paRight = aLeft;
--  *pnRight = iOut;
--  memcpy(aLeft, aTmp, sizeof(aTmp[0])*iOut);
-+  sqlite3Put4byte(&aFrame[16], aCksum[0]);
-+  sqlite3Put4byte(&aFrame[20], aCksum[1]);
- }
- 
- /*
--** Sort the elements in list aList using aContent[] as the sort key.
--** Remove elements with duplicate keys, preferring to keep the
--** larger aList[] values.
--**
--** The aList[] entries are indices into aContent[].  The values in
--** aList[] are to be sorted so that for all J<K:
--**
--**      aContent[aList[J]] < aContent[aList[K]]
--**
--** For any X and Y such that
--**
--**      aContent[aList[X]] == aContent[aList[Y]]
--**
--** Keep the larger of the two values aList[X] and aList[Y] and discard
--** the smaller.
-+** Check to see if the frame with header in aFrame[] and content
-+** in aData[] is valid.  If it is a valid frame, fill *piPage and
-+** *pnTruncate and return true.  Return if the frame is not valid.
- */
--static void walMergesort(
--  const u32 *aContent,            /* Pages in wal */
--  ht_slot *aBuffer,               /* Buffer of at least *pnList items to use */
--  ht_slot *aList,                 /* IN/OUT: List to sort */
--  int *pnList                     /* IN/OUT: Number of elements in aList[] */
-+static int walDecodeFrame(
-+  Wal *pWal,                      /* The write-ahead log */
-+  u32 *piPage,                    /* OUT: Database page number for frame */
-+  u32 *pnTruncate,                /* OUT: New db size (or 0 if not commit) */
-+  u8 *aData,                      /* Pointer to page data (for checksum) */
-+  u8 *aFrame                      /* Frame data */
- ){
--  struct Sublist {
--    int nList;                    /* Number of elements in aList */
--    ht_slot *aList;               /* Pointer to sub-list content */
--  };
--
--  const int nList = *pnList;      /* Size of input list */
--  int nMerge = 0;                 /* Number of elements in list aMerge */
--  ht_slot *aMerge = 0;            /* List to be merged */
--  int iList;                      /* Index into input list */
--  int iSub = 0;                   /* Index into aSub array */
--  struct Sublist aSub[13];        /* Array of sub-lists */
--
--  memset(aSub, 0, sizeof(aSub));
--  assert( nList<=HASHTABLE_NPAGE && nList>0 );
--  assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) );
-+  int nativeCksum;                /* True for native byte-order checksums */
-+  u32 *aCksum = pWal->hdr.aFrameCksum;
-+  u32 pgno;                       /* Page number of the frame */
-+  assert( WAL_FRAME_HDRSIZE==24 );
- 
--  for(iList=0; iList<nList; iList++){
--    nMerge = 1;
--    aMerge = &aList[iList];
--    for(iSub=0; iList & (1<<iSub); iSub++){
--      struct Sublist *p = &aSub[iSub];
--      assert( p->aList && p->nList<=(1<<iSub) );
--      assert( p->aList==&aList[iList&~((2<<iSub)-1)] );
--      walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer);
--    }
--    aSub[iSub].aList = aMerge;
--    aSub[iSub].nList = nMerge;
-+  /* A frame is only valid if the salt values in the frame-header
-+  ** match the salt values in the wal-header. 
-+  */
-+  if( memcmp(&pWal->hdr.aSalt, &aFrame[8], 8)!=0 ){
-+    return 0;
-   }
- 
--  for(iSub++; iSub<ArraySize(aSub); iSub++){
--    if( nList & (1<<iSub) ){
--      struct Sublist *p = &aSub[iSub];
--      assert( p->nList<=(1<<iSub) );
--      assert( p->aList==&aList[nList&~((2<<iSub)-1)] );
--      walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer);
--    }
-+  /* A frame is only valid if the page number is creater than zero.
-+  */
-+  pgno = sqlite3Get4byte(&aFrame[0]);
-+  if( pgno==0 ){
-+    return 0;
-   }
--  assert( aMerge==aList );
--  *pnList = nMerge;
- 
--#ifdef SQLITE_DEBUG
--  {
--    int i;
--    for(i=1; i<*pnList; i++){
--      assert( aContent[aList[i]] > aContent[aList[i-1]] );
--    }
-+  /* A frame is only valid if a checksum of the WAL header,
-+  ** all prior frams, the first 16 bytes of this frame-header, 
-+  ** and the frame-data matches the checksum in the last 8 
-+  ** bytes of this frame-header.
-+  */
-+  nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
-+  walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
-+  walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
-+  if( aCksum[0]!=sqlite3Get4byte(&aFrame[16]) 
-+   || aCksum[1]!=sqlite3Get4byte(&aFrame[20]) 
-+  ){
-+    /* Checksum failed. */
-+    return 0;
-   }
--#endif
-+
-+  /* If we reach this point, the frame is valid.  Return the page number
-+  ** and the new database size.
-+  */
-+  *piPage = pgno;
-+  *pnTruncate = sqlite3Get4byte(&aFrame[4]);
-+  return 1;
- }
- 
--/* 
--** Free an iterator allocated by walIteratorInit().
-+
-+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
-+/*
-+** Names of locks.  This routine is used to provide debugging output and is not
-+** a part of an ordinary build.
- */
--static void walIteratorFree(WalIterator *p){
--  sqlite3ScratchFree(p);
-+static const char *walLockName(int lockIdx){
-+  if( lockIdx==WAL_WRITE_LOCK ){
-+    return "WRITE-LOCK";
-+  }else if( lockIdx==WAL_CKPT_LOCK ){
-+    return "CKPT-LOCK";
-+  }else if( lockIdx==WAL_RECOVER_LOCK ){
-+    return "RECOVER-LOCK";
-+  }else{
-+    static char zName[15];
-+    sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]",
-+                     lockIdx-WAL_READ_LOCK(0));
-+    return zName;
-+  }
- }
-+#endif /*defined(SQLITE_TEST) || defined(SQLITE_DEBUG) */
-+    
- 
- /*
--** Construct a WalInterator object that can be used to loop over all 
--** pages in the WAL in ascending order. The caller must hold the checkpoint
--** lock.
--**
--** On success, make *pp point to the newly allocated WalInterator object
--** return SQLITE_OK. Otherwise, return an error code. If this routine
--** returns an error, the value of *pp is undefined.
-+** Set or release locks on the WAL.  Locks are either shared or exclusive.
-+** A lock cannot be moved directly between shared and exclusive - it must go
-+** through the unlocked state first.
- **
--** The calling routine should invoke walIteratorFree() to destroy the
--** WalIterator object when it has finished with it.
-+** In locking_mode=EXCLUSIVE, all of these routines become no-ops.
- */
--static int walIteratorInit(Wal *pWal, WalIterator **pp){
--  WalIterator *p;                 /* Return value */
--  int nSegment;                   /* Number of segments to merge */
--  u32 iLast;                      /* Last frame in log */
--  int nByte;                      /* Number of bytes to allocate */
--  int i;                          /* Iterator variable */
--  ht_slot *aTmp;                  /* Temp space used by merge-sort */
--  int rc = SQLITE_OK;             /* Return Code */
-+static int walLockShared(Wal *pWal, int lockIdx){
-+  int rc;
-+  if( pWal->exclusiveMode ) return SQLITE_OK;
-+  rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
-+                        SQLITE_SHM_LOCK | SQLITE_SHM_SHARED);
-+  WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal,
-+            walLockName(lockIdx), rc ? "failed" : "ok"));
-+  VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
-+  return rc;
-+}
-+static void walUnlockShared(Wal *pWal, int lockIdx){
-+  if( pWal->exclusiveMode ) return;
-+  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
-+                         SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
-+  WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx)));
-+}
-+static int walLockExclusive(Wal *pWal, int lockIdx, int n){
-+  int rc;
-+  if( pWal->exclusiveMode ) return SQLITE_OK;
-+  rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
-+                        SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
-+  WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
-+            walLockName(lockIdx), n, rc ? "failed" : "ok"));
-+  VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && rc!=SQLITE_BUSY); )
-+  return rc;
-+}
-+static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
-+  if( pWal->exclusiveMode ) return;
-+  (void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
-+                         SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
-+  WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
-+             walLockName(lockIdx), n));
-+}
- 
--  /* This routine only runs while holding the checkpoint lock. And
--  ** it only runs if there is actually content in the log (mxFrame>0).
--  */
--  assert( pWal->ckptLock && pWal->hdr.mxFrame>0 );
--  iLast = pWal->hdr.mxFrame;
-+/*
-+** Compute a hash on a page number.  The resulting hash value must land
-+** between 0 and (HASHTABLE_NSLOT-1).  The walHashNext() function advances
-+** the hash to the next value in the event of a collision.
-+*/
-+static int walHash(u32 iPage){
-+  assert( iPage>0 );
-+  assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
-+  return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
-+}
-+static int walNextHash(int iPriorHash){
-+  return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
-+}
- 
--  /* Allocate space for the WalIterator object. */
--  nSegment = walFramePage(iLast) + 1;
--  nByte = sizeof(WalIterator) 
--        + (nSegment-1)*sizeof(struct WalSegment)
--        + iLast*sizeof(ht_slot);
--  p = (WalIterator *)sqlite3ScratchMalloc(nByte);
--  if( !p ){
--    return SQLITE_NOMEM;
--  }
--  memset(p, 0, nByte);
--  p->nSegment = nSegment;
-+/* 
-+** Return pointers to the hash table and page number array stored on
-+** page iHash of the wal-index. The wal-index is broken into 32KB pages
-+** numbered starting from 0.
-+**
-+** Set output variable *paHash to point to the start of the hash table
-+** in the wal-index file. Set *piZero to one less than the frame 
-+** number of the first frame indexed by this hash table. If a
-+** slot in the hash table is set to N, it refers to frame number 
-+** (*piZero+N) in the log.
-+**
-+** Finally, set *paPgno so that *paPgno[1] is the page number of the
-+** first frame indexed by the hash table, frame (*piZero+1).
-+*/
-+static int walHashGet(
-+  Wal *pWal,                      /* WAL handle */
-+  int iHash,                      /* Find the iHash'th table */
-+  volatile ht_slot **paHash,      /* OUT: Pointer to hash index */
-+  volatile u32 **paPgno,          /* OUT: Pointer to page number array */
-+  u32 *piZero                     /* OUT: Frame associated with *paPgno[0] */
-+){
-+  int rc;                         /* Return code */
-+  volatile u32 *aPgno;
- 
--  /* Allocate temporary space used by the merge-sort routine. This block
--  ** of memory will be freed before this function returns.
--  */
--  aTmp = (ht_slot *)sqlite3ScratchMalloc(
--      sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast)
--  );
--  if( !aTmp ){
--    rc = SQLITE_NOMEM;
--  }
-+  rc = walIndexPage(pWal, iHash, &aPgno);
-+  assert( rc==SQLITE_OK || iHash>0 );
- 
--  for(i=0; rc==SQLITE_OK && i<nSegment; i++){
--    volatile ht_slot *aHash;
-+  if( rc==SQLITE_OK ){
-     u32 iZero;
--    volatile u32 *aPgno;
--
--    rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero);
--    if( rc==SQLITE_OK ){
--      int j;                      /* Counter variable */
--      int nEntry;                 /* Number of entries in this segment */
--      ht_slot *aIndex;            /* Sorted index for this segment */
-+    volatile ht_slot *aHash;
- 
--      aPgno++;
--      if( (i+1)==nSegment ){
--        nEntry = (int)(iLast - iZero);
--      }else{
--        nEntry = (int)((u32*)aHash - (u32*)aPgno);
--      }
--      aIndex = &((ht_slot *)&p->aSegment[p->nSegment])[iZero];
--      iZero++;
--  
--      for(j=0; j<nEntry; j++){
--        aIndex[j] = (ht_slot)j;
--      }
--      walMergesort((u32 *)aPgno, aTmp, aIndex, &nEntry);
--      p->aSegment[i].iZero = iZero;
--      p->aSegment[i].nEntry = nEntry;
--      p->aSegment[i].aIndex = aIndex;
--      p->aSegment[i].aPgno = (u32 *)aPgno;
-+    aHash = (volatile ht_slot *)&aPgno[HASHTABLE_NPAGE];
-+    if( iHash==0 ){
-+      aPgno = &aPgno[WALINDEX_HDR_SIZE/sizeof(u32)];
-+      iZero = 0;
-+    }else{
-+      iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
-     }
-+  
-+    *paPgno = &aPgno[-1];
-+    *paHash = aHash;
-+    *piZero = iZero;
-   }
--  sqlite3ScratchFree(aTmp);
--
--  if( rc!=SQLITE_OK ){
--    walIteratorFree(p);
--  }
--  *pp = p;
-   return rc;
- }
- 
- /*
--** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and
--** n. If the attempt fails and parameter xBusy is not NULL, then it is a
--** busy-handler function. Invoke it and retry the lock until either the
--** lock is successfully obtained or the busy-handler returns 0.
-+** Return the number of the wal-index page that contains the hash-table
-+** and page-number array that contain entries corresponding to WAL frame
-+** iFrame. The wal-index is broken up into 32KB pages. Wal-index pages 
-+** are numbered starting from 0.
- */
--static int walBusyLock(
--  Wal *pWal,                      /* WAL connection */
--  int (*xBusy)(void*),            /* Function to call when busy */
--  void *pBusyArg,                 /* Context argument for xBusyHandler */
--  int lockIdx,                    /* Offset of first byte to lock */
--  int n                           /* Number of bytes to lock */
--){
--  int rc;
--  do {
--    rc = walLockExclusive(pWal, lockIdx, n);
--  }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) );
--  return rc;
-+static int walFramePage(u32 iFrame){
-+  int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
-+  assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
-+       && (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
-+       && (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
-+       && (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
-+       && (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
-+  );
-+  return iHash;
- }
- 
- /*
--** The cache of the wal-index header must be valid to call this function.
--** Return the page-size in bytes used by the database.
-+** Return the page number associated with frame iFrame in this WAL.
- */
--static int walPagesize(Wal *pWal){
--  return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
--}
--
--/*
--** Copy as much content as we can from the WAL back into the database file
--** in response to an sqlite3_wal_checkpoint() request or the equivalent.
--**
--** The amount of information copies from WAL to database might be limited
--** by active readers.  This routine will never overwrite a database page
--** that a concurrent reader might be using.
--**
--** All I/O barrier operations (a.k.a fsyncs) occur in this routine when
--** SQLite is in WAL-mode in synchronous=NORMAL.  That means that if 
--** checkpoints are always run by a background thread or background 
--** process, foreground threads will never block on a lengthy fsync call.
--**
--** Fsync is called on the WAL before writing content out of the WAL and
--** into the database.  This ensures that if the new content is persistent
--** in the WAL and can be recovered following a power-loss or hard reset.
--**
--** Fsync is also called on the database file if (and only if) the entire
--** WAL content is copied into the database file.  This second fsync makes
--** it safe to delete the WAL since the new content will persist in the
--** database file.
-+static u32 walFramePgno(Wal *pWal, u32 iFrame){
-+  int iHash = walFramePage(iFrame);
-+  if( iHash==0 ){
-+    return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
-+  }
-+  return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
-+}
-+
-+/*
-+** Remove entries from the hash table that point to WAL slots greater
-+** than pWal->hdr.mxFrame.
- **
--** This routine uses and updates the nBackfill field of the wal-index header.
--** This is the only routine tha will increase the value of nBackfill.  
--** (A WAL reset or recovery will revert nBackfill to zero, but not increase
--** its value.)
-+** This function is called whenever pWal->hdr.mxFrame is decreased due
-+** to a rollback or savepoint.
- **
--** The caller must be holding sufficient locks to ensure that no other
--** checkpoint is running (in any other thread or process) at the same
--** time.
-+** At most only the hash table containing pWal->hdr.mxFrame needs to be
-+** updated.  Any later hash tables will be automatically cleared when
-+** pWal->hdr.mxFrame advances to the point where those hash tables are
-+** actually needed.
- */
--static int walCheckpoint(
--  Wal *pWal,                      /* Wal connection */
--  int eMode,                      /* One of PASSIVE, FULL or RESTART */
--  int (*xBusyCall)(void*),        /* Function to call when busy */
--  void *pBusyArg,                 /* Context argument for xBusyHandler */
--  int sync_flags,                 /* Flags for OsSync() (or 0) */
--  u8 *zBuf                        /* Temporary buffer to use */
--){
--  int rc;                         /* Return code */
--  int szPage;                     /* Database page-size */
--  WalIterator *pIter = 0;         /* Wal iterator context */
--  u32 iDbpage = 0;                /* Next database page to write */
--  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */
--  u32 mxSafeFrame;                /* Max frame that can be backfilled */
--  u32 mxPage;                     /* Max database page to write */
--  int i;                          /* Loop counter */
--  volatile WalCkptInfo *pInfo;    /* The checkpoint status information */
--  int (*xBusy)(void*) = 0;        /* Function to call when waiting for locks */
-+static void walCleanupHash(Wal *pWal){
-+  volatile ht_slot *aHash = 0;    /* Pointer to hash table to clear */
-+  volatile u32 *aPgno = 0;        /* Page number array for hash table */
-+  u32 iZero = 0;                  /* frame == (aHash[x]+iZero) */
-+  int iLimit = 0;                 /* Zero values greater than this */
-+  int nByte;                      /* Number of bytes to zero in aPgno[] */
-+  int i;                          /* Used to iterate through aHash[] */
- 
--  szPage = walPagesize(pWal);
--  testcase( szPage<=32768 );
--  testcase( szPage>=65536 );
--  pInfo = walCkptInfo(pWal);
--  if( pInfo->nBackfill>=pWal->hdr.mxFrame ) return SQLITE_OK;
-+  assert( pWal->writeLock );
-+  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 );
-+  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE );
-+  testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 );
- 
--  /* Allocate the iterator */
--  rc = walIteratorInit(pWal, &pIter);
--  if( rc!=SQLITE_OK ){
--    return rc;
--  }
--  assert( pIter );
-+  if( pWal->hdr.mxFrame==0 ) return;
- 
--  if( eMode!=SQLITE_CHECKPOINT_PASSIVE ) xBusy = xBusyCall;
-+  /* Obtain pointers to the hash-table and page-number array containing 
-+  ** the entry that corresponds to frame pWal->hdr.mxFrame. It is guaranteed
-+  ** that the page said hash-table and array reside on is already mapped.
-+  */
-+  assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) );
-+  assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] );
-+  walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &aHash, &aPgno, &iZero);
- 
--  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
--  ** safe to write into the database.  Frames beyond mxSafeFrame might
--  ** overwrite database pages that are in use by active readers and thus
--  ** cannot be backfilled from the WAL.
-+  /* Zero all hash-table entries that correspond to frame numbers greater
-+  ** than pWal->hdr.mxFrame.
-   */
--  mxSafeFrame = pWal->hdr.mxFrame;
--  mxPage = pWal->hdr.nPage;
--  for(i=1; i<WAL_NREADER; i++){
--    u32 y = pInfo->aReadMark[i];
--    if( mxSafeFrame>y ){
--      assert( y<=pWal->hdr.mxFrame );
--      rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
--      if( rc==SQLITE_OK ){
--        pInfo->aReadMark[i] = READMARK_NOT_USED;
--        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
--      }else if( rc==SQLITE_BUSY ){
--        mxSafeFrame = y;
--        xBusy = 0;
--      }else{
--        goto walcheckpoint_out;
--      }
-+  iLimit = pWal->hdr.mxFrame - iZero;
-+  assert( iLimit>0 );
-+  for(i=0; i<HASHTABLE_NSLOT; i++){
-+    if( aHash[i]>iLimit ){
-+      aHash[i] = 0;
-     }
-   }
-+  
-+  /* Zero the entries in the aPgno array that correspond to frames with
-+  ** frame numbers greater than pWal->hdr.mxFrame. 
-+  */
-+  nByte = (int)((char *)aHash - (char *)&aPgno[iLimit+1]);
-+  memset((void *)&aPgno[iLimit+1], 0, nByte);
- 
--  if( pInfo->nBackfill<mxSafeFrame
--   && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))==SQLITE_OK
--  ){
--    i64 nSize;                    /* Current size of database file */
--    u32 nBackfill = pInfo->nBackfill;
--
--    /* Sync the WAL to disk */
--    if( sync_flags ){
--      rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
--    }
--
--    /* If the database file may grow as a result of this checkpoint, hint
--    ** about the eventual size of the db file to the VFS layer. 
--    */
--    if( rc==SQLITE_OK ){
--      i64 nReq = ((i64)mxPage * szPage);
--      rc = sqlite3OsFileSize(pWal->pDbFd, &nSize);
--      if( rc==SQLITE_OK && nSize<nReq ){
--        sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq);
-+#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
-+  /* Verify that the every entry in the mapping region is still reachable
-+  ** via the hash table even after the cleanup.
-+  */
-+  if( iLimit ){
-+    int i;           /* Loop counter */
-+    int iKey;        /* Hash key */
-+    for(i=1; i<=iLimit; i++){
-+      for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){
-+        if( aHash[iKey]==i ) break;
-       }
-+      assert( aHash[iKey]==i );
-     }
-+  }
-+#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
-+}
- 
--    /* Iterate through the contents of the WAL, copying data to the db file. */
--    while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
--      i64 iOffset;
--      assert( walFramePgno(pWal, iFrame)==iDbpage );
--      if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue;
--      iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE;
--      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */
--      rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset);
--      if( rc!=SQLITE_OK ) break;
--      iOffset = (iDbpage-1)*(i64)szPage;
--      testcase( IS_BIG_INT(iOffset) );
--      rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset);
--      if( rc!=SQLITE_OK ) break;
--    }
--
--    /* If work was actually accomplished... */
--    if( rc==SQLITE_OK ){
--      if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
--        i64 szDb = pWal->hdr.nPage*(i64)szPage;
--        testcase( IS_BIG_INT(szDb) );
--        rc = sqlite3OsTruncate(pWal->pDbFd, szDb);
--        if( rc==SQLITE_OK && sync_flags ){
--          rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
--        }
--      }
--      if( rc==SQLITE_OK ){
--        pInfo->nBackfill = mxSafeFrame;
--      }
--    }
- 
--    /* Release the reader lock held while backfilling */
--    walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
--  }
-+/*
-+** Set an entry in the wal-index that will map database page number
-+** pPage into WAL frame iFrame.
-+*/
-+static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
-+  int rc;                         /* Return code */
-+  u32 iZero = 0;                  /* One less than frame number of aPgno[1] */
-+  volatile u32 *aPgno = 0;        /* Page number array */
-+  volatile ht_slot *aHash = 0;    /* Hash table */
- 
--  if( rc==SQLITE_BUSY ){
--    /* Reset the return code so as not to report a checkpoint failure
--    ** just because there are active readers.  */
--    rc = SQLITE_OK;
--  }
-+  rc = walHashGet(pWal, walFramePage(iFrame), &aHash, &aPgno, &iZero);
- 
--  /* If this is an SQLITE_CHECKPOINT_RESTART operation, and the entire wal
--  ** file has been copied into the database file, then block until all
--  ** readers have finished using the wal file. This ensures that the next
--  ** process to write to the database restarts the wal file.
-+  /* Assuming the wal-index file was successfully mapped, populate the
-+  ** page number array and hash table entry.
-   */
--  if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
--    assert( pWal->writeLock );
--    if( pInfo->nBackfill<pWal->hdr.mxFrame ){
--      rc = SQLITE_BUSY;
--    }else if( eMode==SQLITE_CHECKPOINT_RESTART ){
--      assert( mxSafeFrame==pWal->hdr.mxFrame );
--      rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1);
--      if( rc==SQLITE_OK ){
--        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
--      }
-+  if( rc==SQLITE_OK ){
-+    int iKey;                     /* Hash table key */
-+    int idx;                      /* Value to write to hash-table slot */
-+    int nCollide;                 /* Number of hash collisions */
-+
-+    idx = iFrame - iZero;
-+    assert( idx <= HASHTABLE_NSLOT/2 + 1 );
-+    
-+    /* If this is the first entry to be added to this hash-table, zero the
-+    ** entire hash table and aPgno[] array before proceding. 
-+    */
-+    if( idx==1 ){
-+      int nByte = (int)((u8 *)&aHash[HASHTABLE_NSLOT] - (u8 *)&aPgno[1]);
-+      memset((void*)&aPgno[1], 0, nByte);
-     }
--  }
- 
-- walcheckpoint_out:
--  walIteratorFree(pIter);
--  return rc;
--}
-+    /* If the entry in aPgno[] is already set, then the previous writer
-+    ** must have exited unexpectedly in the middle of a transaction (after
-+    ** writing one or more dirty pages to the WAL to free up memory). 
-+    ** Remove the remnants of that writers uncommitted transaction from 
-+    ** the hash-table before writing any new entries.
-+    */
-+    if( aPgno[idx] ){
-+      walCleanupHash(pWal);
-+      assert( !aPgno[idx] );
-+    }
- 
--/*
--** Close a connection to a log file.
--*/
--SQLITE_PRIVATE int sqlite3WalClose(
--  Wal *pWal,                      /* Wal to close */
--  int sync_flags,                 /* Flags to pass to OsSync() (or 0) */
--  int nBuf,
--  u8 *zBuf                        /* Buffer of at least nBuf bytes */
--){
--  int rc = SQLITE_OK;
--  if( pWal ){
--    int isDelete = 0;             /* True to unlink wal and wal-index files */
-+    /* Write the aPgno[] array entry and the hash-table slot. */
-+    nCollide = idx;
-+    for(iKey=walHash(iPage); aHash[iKey]; iKey=walNextHash(iKey)){
-+      if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT;
-+    }
-+    aPgno[idx] = iPage;
-+    aHash[iKey] = (ht_slot)idx;
- 
--    /* If an EXCLUSIVE lock can be obtained on the database file (using the
--    ** ordinary, rollback-mode locking methods, this guarantees that the
--    ** connection associated with this log file is the only connection to
--    ** the database. In this case checkpoint the database and unlink both
--    ** the wal and wal-index files.
--    **
--    ** The EXCLUSIVE lock is not released before returning.
-+#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
-+    /* Verify that the number of entries in the hash table exactly equals
-+    ** the number of entries in the mapping region.
-     */
--    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
--    if( rc==SQLITE_OK ){
--      if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
--        pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
--      }
--      rc = sqlite3WalCheckpoint(
--          pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
--      );
--      if( rc==SQLITE_OK ){
--        isDelete = 1;
--      }
-+    {
-+      int i;           /* Loop counter */
-+      int nEntry = 0;  /* Number of entries in the hash table */
-+      for(i=0; i<HASHTABLE_NSLOT; i++){ if( aHash[i] ) nEntry++; }
-+      assert( nEntry==idx );
-     }
- 
--    walIndexClose(pWal, isDelete);
--    sqlite3OsClose(pWal->pWalFd);
--    if( isDelete ){
--      sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
-+    /* Verify that the every entry in the mapping region is reachable
-+    ** via the hash table.  This turns out to be a really, really expensive
-+    ** thing to check, so only do this occasionally - not on every
-+    ** iteration.
-+    */
-+    if( (idx&0x3ff)==0 ){
-+      int i;           /* Loop counter */
-+      for(i=1; i<=idx; i++){
-+        for(iKey=walHash(aPgno[i]); aHash[iKey]; iKey=walNextHash(iKey)){
-+          if( aHash[iKey]==i ) break;
-+        }
-+        assert( aHash[iKey]==i );
-+      }
-     }
--    WALTRACE(("WAL%p: closed\n", pWal));
--    sqlite3_free((void *)pWal->apWiData);
--    sqlite3_free(pWal);
-+#endif /* SQLITE_ENABLE_EXPENSIVE_ASSERT */
-   }
-+
-+
-   return rc;
- }
- 
-+
- /*
--** Try to read the wal-index header.  Return 0 on success and 1 if
--** there is a problem.
--**
--** The wal-index is in shared memory.  Another thread or process might
--** be writing the header at the same time this procedure is trying to
--** read it, which might result in inconsistency.  A dirty read is detected
--** by verifying that both copies of the header are the same and also by
--** a checksum on the header.
--**
--** If and only if the read is consistent and the header is different from
--** pWal->hdr, then pWal->hdr is updated to the content of the new header
--** and *pChanged is set to 1.
-+** Recover the wal-index by reading the write-ahead log file. 
- **
--** If the checksum cannot be verified return non-zero. If the header
--** is read successfully and the checksum verified, return zero.
-+** This routine first tries to establish an exclusive lock on the
-+** wal-index to prevent other threads/processes from doing anything
-+** with the WAL or wal-index while recovery is running.  The
-+** WAL_RECOVER_LOCK is also held so that other threads will know
-+** that this thread is running recovery.  If unable to establish
-+** the necessary locks, this routine returns SQLITE_BUSY.
- */
--static int walIndexTryHdr(Wal *pWal, int *pChanged){
--  u32 aCksum[2];                  /* Checksum on the header content */
--  WalIndexHdr h1, h2;             /* Two copies of the header content */
--  WalIndexHdr volatile *aHdr;     /* Header in shared memory */
--
--  /* The first page of the wal-index must be mapped at this point. */
--  assert( pWal->nWiData>0 && pWal->apWiData[0] );
-+static int walIndexRecover(Wal *pWal){
-+  int rc;                         /* Return Code */
-+  i64 nSize;                      /* Size of log file */
-+  u32 aFrameCksum[2] = {0, 0};
-+  int iLock;                      /* Lock offset to lock for checkpoint */
-+  int nLock;                      /* Number of locks to hold */
- 
--  /* Read the header. This might happen concurrently with a write to the
--  ** same area of shared memory on a different CPU in a SMP,
--  ** meaning it is possible that an inconsistent snapshot is read
--  ** from the file. If this happens, return non-zero.
--  **
--  ** There are two copies of the header at the beginning of the wal-index.
--  ** When reading, read [0] first then [1].  Writes are in the reverse order.
--  ** Memory barriers are used to prevent the compiler or the hardware from
--  ** reordering the reads and writes.
-+  /* Obtain an exclusive lock on all byte in the locking range not already
-+  ** locked by the caller. The caller is guaranteed to have locked the
-+  ** WAL_WRITE_LOCK byte, and may have also locked the WAL_CKPT_LOCK byte.
-+  ** If successful, the same bytes that are locked here are unlocked before
-+  ** this function returns.
-   */
--  aHdr = walIndexHdr(pWal);
--  memcpy(&h1, (void *)&aHdr[0], sizeof(h1));
--  walShmBarrier(pWal);
--  memcpy(&h2, (void *)&aHdr[1], sizeof(h2));
--
--  if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
--    return 1;   /* Dirty read */
--  }  
--  if( h1.isInit==0 ){
--    return 1;   /* Malformed header - probably all zeros */
--  }
--  walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum);
--  if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){
--    return 1;   /* Checksum does not match */
-+  assert( pWal->ckptLock==1 || pWal->ckptLock==0 );
-+  assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 );
-+  assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
-+  assert( pWal->writeLock );
-+  iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
-+  nLock = SQLITE_SHM_NLOCK - iLock;
-+  rc = walLockExclusive(pWal, iLock, nLock);
-+  if( rc ){
-+    return rc;
-   }
-+  WALTRACE(("WAL%p: recovery begin...\n", pWal));
- 
--  if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){
--    *pChanged = 1;
--    memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr));
--    pWal->szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
--    testcase( pWal->szPage<=32768 );
--    testcase( pWal->szPage>=65536 );
-+  memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
-+
-+  rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
-+  if( rc!=SQLITE_OK ){
-+    goto recovery_error;
-   }
- 
--  /* The header was successfully read. Return zero. */
--  return 0;
--}
-+  if( nSize>WAL_HDRSIZE ){
-+    u8 aBuf[WAL_HDRSIZE];         /* Buffer to load WAL header into */
-+    u8 *aFrame = 0;               /* Malloc'd buffer to load entire frame */
-+    int szFrame;                  /* Number of bytes in buffer aFrame[] */
-+    u8 *aData;                    /* Pointer to data part of aFrame buffer */
-+    int iFrame;                   /* Index of last frame read */
-+    i64 iOffset;                  /* Next offset to read from log file */
-+    int szPage;                   /* Page size according to the log */
-+    u32 magic;                    /* Magic value read from WAL header */
-+    u32 version;                  /* Magic value read from WAL header */
- 
--/*
--** Read the wal-index header from the wal-index and into pWal->hdr.
--** If the wal-header appears to be corrupt, try to reconstruct the
--** wal-index from the WAL before returning.
--**
--** Set *pChanged to 1 if the wal-index header value in pWal->hdr is
--** changed by this opertion.  If pWal->hdr is unchanged, set *pChanged
--** to 0.
--**
--** If the wal-index header is successfully read, return SQLITE_OK. 
--** Otherwise an SQLite error code.
--*/
--static int walIndexReadHdr(Wal *pWal, int *pChanged){
--  int rc;                         /* Return code */
--  int badHdr;                     /* True if a header read failed */
--  volatile u32 *page0;            /* Chunk of wal-index containing header */
-+    /* Read in the WAL header. */
-+    rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
-+    if( rc!=SQLITE_OK ){
-+      goto recovery_error;
-+    }
- 
--  /* Ensure that page 0 of the wal-index (the page that contains the 
--  ** wal-index header) is mapped. Return early if an error occurs here.
--  */
--  assert( pChanged );
--  rc = walIndexPage(pWal, 0, &page0);
--  if( rc!=SQLITE_OK ){
--    return rc;
--  };
--  assert( page0 || pWal->writeLock==0 );
-+    /* If the database page size is not a power of two, or is greater than
-+    ** SQLITE_MAX_PAGE_SIZE, conclude that the WAL file contains no valid 
-+    ** data. Similarly, if the 'magic' value is invalid, ignore the whole
-+    ** WAL file.
-+    */
-+    magic = sqlite3Get4byte(&aBuf[0]);
-+    szPage = sqlite3Get4byte(&aBuf[8]);
-+    if( (magic&0xFFFFFFFE)!=WAL_MAGIC 
-+     || szPage&(szPage-1) 
-+     || szPage>SQLITE_MAX_PAGE_SIZE 
-+     || szPage<512 
-+    ){
-+      goto finished;
-+    }
-+    pWal->hdr.bigEndCksum = (u8)(magic&0x00000001);
-+    pWal->szPage = szPage;
-+    pWal->nCkpt = sqlite3Get4byte(&aBuf[12]);
-+    memcpy(&pWal->hdr.aSalt, &aBuf[16], 8);
- 
--  /* If the first page of the wal-index has been mapped, try to read the
--  ** wal-index header immediately, without holding any lock. This usually
--  ** works, but may fail if the wal-index header is corrupt or currently 
--  ** being modified by another thread or process.
--  */
--  badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);
-+    /* Verify that the WAL header checksum is correct */
-+    walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN, 
-+        aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum
-+    );
-+    if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24])
-+     || pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28])
-+    ){
-+      goto finished;
-+    }
- 
--  /* If the first attempt failed, it might have been due to a race
--  ** with a writer.  So get a WRITE lock and try again.
--  */
--  assert( badHdr==0 || pWal->writeLock==0 );
--  if( badHdr ){
--    if( pWal->readOnly & WAL_SHM_RDONLY ){
--      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
--        walUnlockShared(pWal, WAL_WRITE_LOCK);
--        rc = SQLITE_READONLY_RECOVERY;
--      }
--    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
--      pWal->writeLock = 1;
--      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
--        badHdr = walIndexTryHdr(pWal, pChanged);
--        if( badHdr ){
--          /* If the wal-index header is still malformed even while holding
--          ** a WRITE lock, it can only mean that the header is corrupted and
--          ** needs to be reconstructed.  So run recovery to do exactly that.
--          */
--          rc = walIndexRecover(pWal);
--          *pChanged = 1;
--        }
-+    /* Verify that the version number on the WAL format is one that
-+    ** are able to understand */
-+    version = sqlite3Get4byte(&aBuf[4]);
-+    if( version!=WAL_MAX_VERSION ){
-+      rc = SQLITE_CANTOPEN_BKPT;
-+      goto finished;
-+    }
-+
-+    /* Malloc a buffer to read frames into. */
-+    szFrame = szPage + WAL_FRAME_HDRSIZE;
-+    aFrame = (u8 *)sqlite3_malloc(szFrame);
-+    if( !aFrame ){
-+      rc = SQLITE_NOMEM;
-+      goto recovery_error;
-+    }
-+    aData = &aFrame[WAL_FRAME_HDRSIZE];
-+
-+    /* Read all frames from the log file. */
-+    iFrame = 0;
-+    for(iOffset=WAL_HDRSIZE; (iOffset+szFrame)<=nSize; iOffset+=szFrame){
-+      u32 pgno;                   /* Database page number for frame */
-+      u32 nTruncate;              /* dbsize field from frame header */
-+      int isValid;                /* True if this frame is valid */
-+
-+      /* Read and decode the next log frame. */
-+      rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
-+      if( rc!=SQLITE_OK ) break;
-+      isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
-+      if( !isValid ) break;
-+      rc = walIndexAppend(pWal, ++iFrame, pgno);
-+      if( rc!=SQLITE_OK ) break;
-+
-+      /* If nTruncate is non-zero, this is a commit record. */
-+      if( nTruncate ){
-+        pWal->hdr.mxFrame = iFrame;
-+        pWal->hdr.nPage = nTruncate;
-+        pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
-+        testcase( szPage<=32768 );
-+        testcase( szPage>=65536 );
-+        aFrameCksum[0] = pWal->hdr.aFrameCksum[0];
-+        aFrameCksum[1] = pWal->hdr.aFrameCksum[1];
-       }
--      pWal->writeLock = 0;
--      walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
-     }
-+
-+    sqlite3_free(aFrame);
-   }
- 
--  /* If the header is read successfully, check the version number to make
--  ** sure the wal-index was not constructed with some future format that
--  ** this version of SQLite cannot understand.
--  */
--  if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
--    rc = SQLITE_CANTOPEN_BKPT;
-+finished:
-+  if( rc==SQLITE_OK ){
-+    volatile WalCkptInfo *pInfo;
-+    int i;
-+    pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
-+    pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
-+    walIndexWriteHdr(pWal);
-+
-+    /* Reset the checkpoint-header. This is safe because this thread is 
-+    ** currently holding locks that exclude all other readers, writers and
-+    ** checkpointers.
-+    */
-+    pInfo = walCkptInfo(pWal);
-+    pInfo->nBackfill = 0;
-+    pInfo->aReadMark[0] = 0;
-+    for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
-+
-+    /* If more than one frame was recovered from the log file, report an
-+    ** event via sqlite3_log(). This is to help with identifying performance
-+    ** problems caused by applications routinely shutting down without
-+    ** checkpointing the log file.
-+    */
-+    if( pWal->hdr.nPage ){
-+      sqlite3_log(SQLITE_OK, "Recovered %d frames from WAL file %s",
-+          pWal->hdr.nPage, pWal->zWalName
-+      );
-+    }
-   }
- 
-+recovery_error:
-+  WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok"));
-+  walUnlockExclusive(pWal, iLock, nLock);
-   return rc;
- }
- 
- /*
--** This is the value that walTryBeginRead returns when it needs to
--** be retried.
-+** Close an open wal-index.
- */
--#define WAL_RETRY  (-1)
-+static void walIndexClose(Wal *pWal, int isDelete){
-+  if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
-+    int i;
-+    for(i=0; i<pWal->nWiData; i++){
-+      sqlite3_free((void *)pWal->apWiData[i]);
-+      pWal->apWiData[i] = 0;
-+    }
-+  }else{
-+    sqlite3OsShmUnmap(pWal->pDbFd, isDelete);
-+  }
-+}
- 
--/*
--** Attempt to start a read transaction.  This might fail due to a race or
--** other transient condition.  When that happens, it returns WAL_RETRY to
--** indicate to the caller that it is safe to retry immediately.
--**
--** On success return SQLITE_OK.  On a permanent failure (such an
--** I/O error or an SQLITE_BUSY because another process is running
--** recovery) return a positive error code.
--**
--** The useWal parameter is true to force the use of the WAL and disable
--** the case where the WAL is bypassed because it has been completely
--** checkpointed.  If useWal==0 then this routine calls walIndexReadHdr() 
--** to make a copy of the wal-index header into pWal->hdr.  If the 
--** wal-index header has changed, *pChanged is set to 1 (as an indication 
--** to the caller that the local paget cache is obsolete and needs to be 
--** flushed.)  When useWal==1, the wal-index header is assumed to already
--** be loaded and the pChanged parameter is unused.
--**
--** The caller must set the cnt parameter to the number of prior calls to
--** this routine during the current read attempt that returned WAL_RETRY.
--** This routine will start taking more aggressive measures to clear the
--** race conditions after multiple WAL_RETRY returns, and after an excessive
--** number of errors will ultimately return SQLITE_PROTOCOL.  The
--** SQLITE_PROTOCOL return indicates that some other process has gone rogue
--** and is not honoring the locking protocol.  There is a vanishingly small
--** chance that SQLITE_PROTOCOL could be returned because of a run of really
--** bad luck when there is lots of contention for the wal-index, but that
--** possibility is so small that it can be safely neglected, we believe.
-+/* 
-+** Open a connection to the WAL file zWalName. The database file must 
-+** already be opened on connection pDbFd. The buffer that zWalName points
-+** to must remain valid for the lifetime of the returned Wal* handle.
- **
--** On success, this routine obtains a read lock on 
--** WAL_READ_LOCK(pWal->readLock).  The pWal->readLock integer is
--** in the range 0 <= pWal->readLock < WAL_NREADER.  If pWal->readLock==(-1)
--** that means the Wal does not hold any read lock.  The reader must not
--** access any database page that is modified by a WAL frame up to and
--** including frame number aReadMark[pWal->readLock].  The reader will
--** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0
--** Or if pWal->readLock==0, then the reader will ignore the WAL
--** completely and get all content directly from the database file.
--** If the useWal parameter is 1 then the WAL will never be ignored and
--** this routine will always set pWal->readLock>0 on success.
--** When the read transaction is completed, the caller must release the
--** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1.
-+** A SHARED lock should be held on the database file when this function
-+** is called. The purpose of this SHARED lock is to prevent any other
-+** client from unlinking the WAL or wal-index file. If another process
-+** were to do this just after this client opened one of these files, the
-+** system would be badly broken.
- **
--** This routine uses the nBackfill and aReadMark[] fields of the header
--** to select a particular WAL_READ_LOCK() that strives to let the
--** checkpoint process do as much work as possible.  This routine might
--** update values of the aReadMark[] array in the header, but if it does
--** so it takes care to hold an exclusive lock on the corresponding
--** WAL_READ_LOCK() while changing values.
-+** If the log file is successfully opened, SQLITE_OK is returned and 
-+** *ppWal is set to point to a new WAL handle. If an error occurs,
-+** an SQLite error code is returned and *ppWal is left unmodified.
- */
--static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
--  volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
--  u32 mxReadMark;                 /* Largest aReadMark[] value */
--  int mxI;                        /* Index of largest aReadMark[] value */
--  int i;                          /* Loop counter */
--  int rc = SQLITE_OK;             /* Return code  */
-+SQLITE_PRIVATE int sqlite3WalOpen(
-+  sqlite3_vfs *pVfs,              /* vfs module to open wal and wal-index */
-+  sqlite3_file *pDbFd,            /* The open database file */
-+  const char *zWalName,           /* Name of the WAL file */
-+  int bNoShm,                     /* True to run in heap-memory mode */
-+  i64 mxWalSize,                  /* Truncate WAL to this size on reset */
-+  Wal **ppWal                     /* OUT: Allocated Wal handle */
-+){
-+  int rc;                         /* Return Code */
-+  Wal *pRet;                      /* Object to allocate and return */
-+  int flags;                      /* Flags passed to OsOpen() */
- 
--  assert( pWal->readLock<0 );     /* Not currently locked */
-+  assert( zWalName && zWalName[0] );
-+  assert( pDbFd );
- 
--  /* Take steps to avoid spinning forever if there is a protocol error.
--  **
--  ** Circumstances that cause a RETRY should only last for the briefest
--  ** instances of time.  No I/O or other system calls are done while the
--  ** locks are held, so the locks should not be held for very long. But 
--  ** if we are unlucky, another process that is holding a lock might get
--  ** paged out or take a page-fault that is time-consuming to resolve, 
--  ** during the few nanoseconds that it is holding the lock.  In that case,
--  ** it might take longer than normal for the lock to free.
--  **
--  ** After 5 RETRYs, we begin calling sqlite3OsSleep().  The first few
--  ** calls to sqlite3OsSleep() have a delay of 1 microsecond.  Really this
--  ** is more of a scheduler yield than an actual delay.  But on the 10th
--  ** an subsequent retries, the delays start becoming longer and longer, 
--  ** so that on the 100th (and last) RETRY we delay for 21 milliseconds.
--  ** The total delay time before giving up is less than 1 second.
-+  /* In the amalgamation, the os_unix.c and os_win.c source files come before
-+  ** this source file.  Verify that the #defines of the locking byte offsets
-+  ** in os_unix.c and os_win.c agree with the WALINDEX_LOCK_OFFSET value.
-   */
--  if( cnt>5 ){
--    int nDelay = 1;                      /* Pause time in microseconds */
--    if( cnt>100 ){
--      VVA_ONLY( pWal->lockError = 1; )
--      return SQLITE_PROTOCOL;
--    }
--    if( cnt>=10 ) nDelay = (cnt-9)*238;  /* Max delay 21ms. Total delay 996ms */
--    sqlite3OsSleep(pWal->pVfs, nDelay);
--  }
-+#ifdef WIN_SHM_BASE
-+  assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET );
-+#endif
-+#ifdef UNIX_SHM_BASE
-+  assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET );
-+#endif
- 
--  if( !useWal ){
--    rc = walIndexReadHdr(pWal, pChanged);
--    if( rc==SQLITE_BUSY ){
--      /* If there is not a recovery running in another thread or process
--      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
--      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
--      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
--      ** would be technically correct.  But the race is benign since with
--      ** WAL_RETRY this routine will be called again and will probably be
--      ** right on the second iteration.
--      */
--      if( pWal->apWiData[0]==0 ){
--        /* This branch is taken when the xShmMap() method returns SQLITE_BUSY.
--        ** We assume this is a transient condition, so return WAL_RETRY. The
--        ** xShmMap() implementation used by the default unix and win32 VFS 
--        ** modules may return SQLITE_BUSY due to a race condition in the 
--        ** code that determines whether or not the shared-memory region 
--        ** must be zeroed before the requested page is returned.
--        */
--        rc = WAL_RETRY;
--      }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){
--        walUnlockShared(pWal, WAL_RECOVER_LOCK);
--        rc = WAL_RETRY;
--      }else if( rc==SQLITE_BUSY ){
--        rc = SQLITE_BUSY_RECOVERY;
--      }
--    }
--    if( rc!=SQLITE_OK ){
--      return rc;
--    }
--  }
- 
--  pInfo = walCkptInfo(pWal);
--  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
--    /* The WAL has been completely backfilled (or it is empty).
--    ** and can be safely ignored.
--    */
--    rc = walLockShared(pWal, WAL_READ_LOCK(0));
--    walShmBarrier(pWal);
--    if( rc==SQLITE_OK ){
--      if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
--        /* It is not safe to allow the reader to continue here if frames
--        ** may have been appended to the log before READ_LOCK(0) was obtained.
--        ** When holding READ_LOCK(0), the reader ignores the entire log file,
--        ** which implies that the database file contains a trustworthy
--        ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from
--        ** happening, this is usually correct.
--        **
--        ** However, if frames have been appended to the log (or if the log 
--        ** is wrapped and written for that matter) before the READ_LOCK(0)
--        ** is obtained, that is not necessarily true. A checkpointer may
--        ** have started to backfill the appended frames but crashed before
--        ** it finished. Leaving a corrupt image in the database file.
--        */
--        walUnlockShared(pWal, WAL_READ_LOCK(0));
--        return WAL_RETRY;
--      }
--      pWal->readLock = 0;
--      return SQLITE_OK;
--    }else if( rc!=SQLITE_BUSY ){
--      return rc;
--    }
-+  /* Allocate an instance of struct Wal to return. */
-+  *ppWal = 0;
-+  pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile);
-+  if( !pRet ){
-+    return SQLITE_NOMEM;
-   }
- 
--  /* If we get this far, it means that the reader will want to use
--  ** the WAL to get at content from recent commits.  The job now is
--  ** to select one of the aReadMark[] entries that is closest to
--  ** but not exceeding pWal->hdr.mxFrame and lock that entry.
--  */
--  mxReadMark = 0;
--  mxI = 0;
--  for(i=1; i<WAL_NREADER; i++){
--    u32 thisMark = pInfo->aReadMark[i];
--    if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){
--      assert( thisMark!=READMARK_NOT_USED );
--      mxReadMark = thisMark;
--      mxI = i;
--    }
-+  pRet->pVfs = pVfs;
-+  pRet->pWalFd = (sqlite3_file *)&pRet[1];
-+  pRet->pDbFd = pDbFd;
-+  pRet->readLock = -1;
-+  pRet->mxWalSize = mxWalSize;
-+  pRet->zWalName = zWalName;
-+  pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE);
-+
-+  /* Open file handle on the write-ahead log file. */
-+  flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
-+  rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags);
-+  if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){
-+    pRet->readOnly = WAL_RDONLY;
-   }
--  /* There was once an "if" here. The extra "{" is to preserve indentation. */
--  {
--    if( (pWal->readOnly & WAL_SHM_RDONLY)==0
--     && (mxReadMark<pWal->hdr.mxFrame || mxI==0)
--    ){
--      for(i=1; i<WAL_NREADER; i++){
--        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
--        if( rc==SQLITE_OK ){
--          mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame;
--          mxI = i;
--          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
--          break;
--        }else if( rc!=SQLITE_BUSY ){
--          return rc;
--        }
--      }
--    }
--    if( mxI==0 ){
--      assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
--      return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
--    }
- 
--    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
--    if( rc ){
--      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
--    }
--    /* Now that the read-lock has been obtained, check that neither the
--    ** value in the aReadMark[] array or the contents of the wal-index
--    ** header have changed.
--    **
--    ** It is necessary to check that the wal-index header did not change
--    ** between the time it was read and when the shared-lock was obtained
--    ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility
--    ** that the log file may have been wrapped by a writer, or that frames
--    ** that occur later in the log than pWal->hdr.mxFrame may have been
--    ** copied into the database by a checkpointer. If either of these things
--    ** happened, then reading the database with the current value of
--    ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
--    ** instead.
--    **
--    ** This does not guarantee that the copy of the wal-index header is up to
--    ** date before proceeding. That would not be possible without somehow
--    ** blocking writers. It only guarantees that a dangerous checkpoint or 
--    ** log-wrap (either of which would require an exclusive lock on
--    ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid.
--    */
--    walShmBarrier(pWal);
--    if( pInfo->aReadMark[mxI]!=mxReadMark
--     || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
--    ){
--      walUnlockShared(pWal, WAL_READ_LOCK(mxI));
--      return WAL_RETRY;
--    }else{
--      assert( mxReadMark<=pWal->hdr.mxFrame );
--      pWal->readLock = (i16)mxI;
--    }
-+  if( rc!=SQLITE_OK ){
-+    walIndexClose(pRet, 0);
-+    sqlite3OsClose(pRet->pWalFd);
-+    sqlite3_free(pRet);
-+  }else{
-+    *ppWal = pRet;
-+    WALTRACE(("WAL%d: opened\n", pRet));
-   }
-   return rc;
- }
- 
- /*
--** Begin a read transaction on the database.
--**
--** This routine used to be called sqlite3OpenSnapshot() and with good reason:
--** it takes a snapshot of the state of the WAL and wal-index for the current
--** instant in time.  The current thread will continue to use this snapshot.
--** Other threads might append new content to the WAL and wal-index but
--** that extra content is ignored by the current thread.
--**
--** If the database contents have changes since the previous read
--** transaction, then *pChanged is set to 1 before returning.  The
--** Pager layer will use this to know that is cache is stale and
--** needs to be flushed.
-+** Change the size to which the WAL file is trucated on each reset.
- */
--SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
--  int rc;                         /* Return code */
--  int cnt = 0;                    /* Number of TryBeginRead attempts */
--
--  do{
--    rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
--  }while( rc==WAL_RETRY );
--  testcase( (rc&0xff)==SQLITE_BUSY );
--  testcase( (rc&0xff)==SQLITE_IOERR );
--  testcase( rc==SQLITE_PROTOCOL );
--  testcase( rc==SQLITE_OK );
--  return rc;
-+SQLITE_PRIVATE void sqlite3WalLimit(Wal *pWal, i64 iLimit){
-+  if( pWal ) pWal->mxWalSize = iLimit;
- }
- 
- /*
--** Finish with a read transaction.  All this does is release the
--** read-lock.
-+** Find the smallest page number out of all pages held in the WAL that
-+** has not been returned by any prior invocation of this method on the
-+** same WalIterator object.   Write into *piFrame the frame index where
-+** that page was last written into the WAL.  Write into *piPage the page
-+** number.
-+**
-+** Return 0 on success.  If there are no pages in the WAL with a page
-+** number larger than *piPage, then return 1.
- */
--SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){
--  sqlite3WalEndWriteTransaction(pWal);
--  if( pWal->readLock>=0 ){
--    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
--    pWal->readLock = -1;
-+static int walIteratorNext(
-+  WalIterator *p,               /* Iterator */
-+  u32 *piPage,                  /* OUT: The page number of the next page */
-+  u32 *piFrame                  /* OUT: Wal frame index of next page */
-+){
-+  u32 iMin;                     /* Result pgno must be greater than iMin */
-+  u32 iRet = 0xFFFFFFFF;        /* 0xffffffff is never a valid page number */
-+  int i;                        /* For looping through segments */
-+
-+  iMin = p->iPrior;
-+  assert( iMin<0xffffffff );
-+  for(i=p->nSegment-1; i>=0; i--){
-+    struct WalSegment *pSegment = &p->aSegment[i];
-+    while( pSegment->iNext<pSegment->nEntry ){
-+      u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]];
-+      if( iPg>iMin ){
-+        if( iPg<iRet ){
-+          iRet = iPg;
-+          *piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext];
-+        }
-+        break;
-+      }
-+      pSegment->iNext++;
-+    }
-   }
-+
-+  *piPage = p->iPrior = iRet;
-+  return (iRet==0xFFFFFFFF);
- }
- 
- /*
--** Read a page from the WAL, if it is present in the WAL and if the 
--** current read transaction is configured to use the WAL.  
-+** This function merges two sorted lists into a single sorted list.
- **
--** The *pInWal is set to 1 if the requested page is in the WAL and
--** has been loaded.  Or *pInWal is set to 0 if the page was not in 
--** the WAL and needs to be read out of the database.
-+** aLeft[] and aRight[] are arrays of indices.  The sort key is
-+** aContent[aLeft[]] and aContent[aRight[]].  Upon entry, the following
-+** is guaranteed for all J<K:
-+**
-+**        aContent[aLeft[J]] < aContent[aLeft[K]]
-+**        aContent[aRight[J]] < aContent[aRight[K]]
-+**
-+** This routine overwrites aRight[] with a new (probably longer) sequence
-+** of indices such that the aRight[] contains every index that appears in
-+** either aLeft[] or the old aRight[] and such that the second condition
-+** above is still met.
-+**
-+** The aContent[aLeft[X]] values will be unique for all X.  And the
-+** aContent[aRight[X]] values will be unique too.  But there might be
-+** one or more combinations of X and Y such that
-+**
-+**      aLeft[X]!=aRight[Y]  &&  aContent[aLeft[X]] == aContent[aRight[Y]]
-+**
-+** When that happens, omit the aLeft[X] and use the aRight[Y] index.
- */
--SQLITE_PRIVATE int sqlite3WalRead(
--  Wal *pWal,                      /* WAL handle */
--  Pgno pgno,                      /* Database page number to read data for */
--  int *pInWal,                    /* OUT: True if data is read from WAL */
--  int nOut,                       /* Size of buffer pOut in bytes */
--  u8 *pOut                        /* Buffer to write page data to */
-+static void walMerge(
-+  const u32 *aContent,            /* Pages in wal - keys for the sort */
-+  ht_slot *aLeft,                 /* IN: Left hand input list */
-+  int nLeft,                      /* IN: Elements in array *paLeft */
-+  ht_slot **paRight,              /* IN/OUT: Right hand input list */
-+  int *pnRight,                   /* IN/OUT: Elements in *paRight */
-+  ht_slot *aTmp                   /* Temporary buffer */
- ){
--  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
--  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
--  int iHash;                      /* Used to loop through N hash tables */
-+  int iLeft = 0;                  /* Current index in aLeft */
-+  int iRight = 0;                 /* Current index in aRight */
-+  int iOut = 0;                   /* Current index in output buffer */
-+  int nRight = *pnRight;
-+  ht_slot *aRight = *paRight;
- 
--  /* This routine is only be called from within a read transaction. */
--  assert( pWal->readLock>=0 || pWal->lockError );
-+  assert( nLeft>0 && nRight>0 );
-+  while( iRight<nRight || iLeft<nLeft ){
-+    ht_slot logpage;
-+    Pgno dbpage;
- 
--  /* If the "last page" field of the wal-index header snapshot is 0, then
--  ** no data will be read from the wal under any circumstances. Return early
--  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
--  ** then the WAL is ignored by the reader so return early, as if the 
--  ** WAL were empty.
--  */
--  if( iLast==0 || pWal->readLock==0 ){
--    *pInWal = 0;
--    return SQLITE_OK;
-+    if( (iLeft<nLeft) 
-+     && (iRight>=nRight || aContent[aLeft[iLeft]]<aContent[aRight[iRight]])
-+    ){
-+      logpage = aLeft[iLeft++];
-+    }else{
-+      logpage = aRight[iRight++];
-+    }
-+    dbpage = aContent[logpage];
-+
-+    aTmp[iOut++] = logpage;
-+    if( iLeft<nLeft && aContent[aLeft[iLeft]]==dbpage ) iLeft++;
-+
-+    assert( iLeft>=nLeft || aContent[aLeft[iLeft]]>dbpage );
-+    assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage );
-   }
- 
--  /* Search the hash table or tables for an entry matching page number
--  ** pgno. Each iteration of the following for() loop searches one
--  ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
--  **
--  ** This code might run concurrently to the code in walIndexAppend()
--  ** that adds entries to the wal-index (and possibly to this hash 
--  ** table). This means the value just read from the hash 
--  ** slot (aHash[iKey]) may have been added before or after the 
--  ** current read transaction was opened. Values added after the
--  ** read transaction was opened may have been written incorrectly -
--  ** i.e. these slots may contain garbage data. However, we assume
--  ** that any slots written before the current read transaction was
--  ** opened remain unmodified.
--  **
--  ** For the reasons above, the if(...) condition featured in the inner
--  ** loop of the following block is more stringent that would be required 
--  ** if we had exclusive access to the hash-table:
--  **
--  **   (aPgno[iFrame]==pgno): 
--  **     This condition filters out normal hash-table collisions.
--  **
--  **   (iFrame<=iLast): 
--  **     This condition filters out entries that were added to the hash
--  **     table after the current read-transaction had started.
--  */
--  for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
--    volatile ht_slot *aHash;      /* Pointer to hash table */
--    volatile u32 *aPgno;          /* Pointer to array of page numbers */
--    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
--    int iKey;                     /* Hash slot index */
--    int nCollide;                 /* Number of hash collisions remaining */
--    int rc;                       /* Error code */
-+  *paRight = aLeft;
-+  *pnRight = iOut;
-+  memcpy(aLeft, aTmp, sizeof(aTmp[0])*iOut);
-+}
-+
-+/*
-+** Sort the elements in list aList using aContent[] as the sort key.
-+** Remove elements with duplicate keys, preferring to keep the
-+** larger aList[] values.
-+**
-+** The aList[] entries are indices into aContent[].  The values in
-+** aList[] are to be sorted so that for all J<K:
-+**
-+**      aContent[aList[J]] < aContent[aList[K]]
-+**
-+** For any X and Y such that
-+**
-+**      aContent[aList[X]] == aContent[aList[Y]]
-+**
-+** Keep the larger of the two values aList[X] and aList[Y] and discard
-+** the smaller.
-+*/
-+static void walMergesort(
-+  const u32 *aContent,            /* Pages in wal */
-+  ht_slot *aBuffer,               /* Buffer of at least *pnList items to use */
-+  ht_slot *aList,                 /* IN/OUT: List to sort */
-+  int *pnList                     /* IN/OUT: Number of elements in aList[] */
-+){
-+  struct Sublist {
-+    int nList;                    /* Number of elements in aList */
-+    ht_slot *aList;               /* Pointer to sub-list content */
-+  };
-+
-+  const int nList = *pnList;      /* Size of input list */
-+  int nMerge = 0;                 /* Number of elements in list aMerge */
-+  ht_slot *aMerge = 0;            /* List to be merged */
-+  int iList;                      /* Index into input list */
-+  int iSub = 0;                   /* Index into aSub array */
-+  struct Sublist aSub[13];        /* Array of sub-lists */
-+
-+  memset(aSub, 0, sizeof(aSub));
-+  assert( nList<=HASHTABLE_NPAGE && nList>0 );
-+  assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) );
- 
--    rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
--    if( rc!=SQLITE_OK ){
--      return rc;
-+  for(iList=0; iList<nList; iList++){
-+    nMerge = 1;
-+    aMerge = &aList[iList];
-+    for(iSub=0; iList & (1<<iSub); iSub++){
-+      struct Sublist *p = &aSub[iSub];
-+      assert( p->aList && p->nList<=(1<<iSub) );
-+      assert( p->aList==&aList[iList&~((2<<iSub)-1)] );
-+      walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer);
-     }
--    nCollide = HASHTABLE_NSLOT;
--    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
--      u32 iFrame = aHash[iKey] + iZero;
--      if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){
--        assert( iFrame>iRead );
--        iRead = iFrame;
--      }
--      if( (nCollide--)==0 ){
--        return SQLITE_CORRUPT_BKPT;
--      }
-+    aSub[iSub].aList = aMerge;
-+    aSub[iSub].nList = nMerge;
-+  }
-+
-+  for(iSub++; iSub<ArraySize(aSub); iSub++){
-+    if( nList & (1<<iSub) ){
-+      struct Sublist *p = &aSub[iSub];
-+      assert( p->nList<=(1<<iSub) );
-+      assert( p->aList==&aList[nList&~((2<<iSub)-1)] );
-+      walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer);
-     }
-   }
-+  assert( aMerge==aList );
-+  *pnList = nMerge;
- 
--#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
--  /* If expensive assert() statements are available, do a linear search
--  ** of the wal-index file content. Make sure the results agree with the
--  ** result obtained using the hash indexes above.  */
-+#ifdef SQLITE_DEBUG
-   {
--    u32 iRead2 = 0;
--    u32 iTest;
--    for(iTest=iLast; iTest>0; iTest--){
--      if( walFramePgno(pWal, iTest)==pgno ){
--        iRead2 = iTest;
--        break;
--      }
-+    int i;
-+    for(i=1; i<*pnList; i++){
-+      assert( aContent[aList[i]] > aContent[aList[i-1]] );
-     }
--    assert( iRead==iRead2 );
-   }
- #endif
--
--  /* If iRead is non-zero, then it is the log frame number that contains the
--  ** required page. Read and return data from the log file.
--  */
--  if( iRead ){
--    int sz;
--    i64 iOffset;
--    sz = pWal->hdr.szPage;
--    sz = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
--    testcase( sz<=32768 );
--    testcase( sz>=65536 );
--    iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
--    *pInWal = 1;
--    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
--    return sqlite3OsRead(pWal->pWalFd, pOut, nOut, iOffset);
--  }
--
--  *pInWal = 0;
--  return SQLITE_OK;
- }
- 
--
- /* 
--** Return the size of the database in pages (or zero, if unknown).
-+** Free an iterator allocated by walIteratorInit().
- */
--SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal){
--  if( pWal && ALWAYS(pWal->readLock>=0) ){
--    return pWal->hdr.nPage;
--  }
--  return 0;
-+static void walIteratorFree(WalIterator *p){
-+  sqlite3ScratchFree(p);
- }
- 
--
--/* 
--** This function starts a write transaction on the WAL.
--**
--** A read transaction must have already been started by a prior call
--** to sqlite3WalBeginReadTransaction().
-+/*
-+** Construct a WalInterator object that can be used to loop over all 
-+** pages in the WAL in ascending order. The caller must hold the checkpoint
-+** lock.
- **
--** If another thread or process has written into the database since
--** the read transaction was started, then it is not possible for this
--** thread to write as doing so would cause a fork.  So this routine
--** returns SQLITE_BUSY in that case and no write transaction is started.
-+** On success, make *pp point to the newly allocated WalInterator object
-+** return SQLITE_OK. Otherwise, return an error code. If this routine
-+** returns an error, the value of *pp is undefined.
- **
--** There can only be a single writer active at a time.
-+** The calling routine should invoke walIteratorFree() to destroy the
-+** WalIterator object when it has finished with it.
- */
--SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal){
--  int rc;
-+static int walIteratorInit(Wal *pWal, WalIterator **pp){
-+  WalIterator *p;                 /* Return value */
-+  int nSegment;                   /* Number of segments to merge */
-+  u32 iLast;                      /* Last frame in log */
-+  int nByte;                      /* Number of bytes to allocate */
-+  int i;                          /* Iterator variable */
-+  ht_slot *aTmp;                  /* Temp space used by merge-sort */
-+  int rc = SQLITE_OK;             /* Return Code */
- 
--  /* Cannot start a write transaction without first holding a read
--  ** transaction. */
--  assert( pWal->readLock>=0 );
-+  /* This routine only runs while holding the checkpoint lock. And
-+  ** it only runs if there is actually content in the log (mxFrame>0).
-+  */
-+  assert( pWal->ckptLock && pWal->hdr.mxFrame>0 );
-+  iLast = pWal->hdr.mxFrame;
- 
--  if( pWal->readOnly ){
--    return SQLITE_READONLY;
-+  /* Allocate space for the WalIterator object. */
-+  nSegment = walFramePage(iLast) + 1;
-+  nByte = sizeof(WalIterator) 
-+        + (nSegment-1)*sizeof(struct WalSegment)
-+        + iLast*sizeof(ht_slot);
-+  p = (WalIterator *)sqlite3ScratchMalloc(nByte);
-+  if( !p ){
-+    return SQLITE_NOMEM;
-   }
-+  memset(p, 0, nByte);
-+  p->nSegment = nSegment;
- 
--  /* Only one writer allowed at a time.  Get the write lock.  Return
--  ** SQLITE_BUSY if unable.
-+  /* Allocate temporary space used by the merge-sort routine. This block
-+  ** of memory will be freed before this function returns.
-   */
--  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
--  if( rc ){
--    return rc;
-+  aTmp = (ht_slot *)sqlite3ScratchMalloc(
-+      sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast)
-+  );
-+  if( !aTmp ){
-+    rc = SQLITE_NOMEM;
-   }
--  pWal->writeLock = 1;
- 
--  /* If another connection has written to the database file since the
--  ** time the read transaction on this connection was started, then
--  ** the write is disallowed.
--  */
--  if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
--    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
--    pWal->writeLock = 0;
--    rc = SQLITE_BUSY;
--  }
-+  for(i=0; rc==SQLITE_OK && i<nSegment; i++){
-+    volatile ht_slot *aHash;
-+    u32 iZero;
-+    volatile u32 *aPgno;
- 
--  return rc;
--}
-+    rc = walHashGet(pWal, i, &aHash, &aPgno, &iZero);
-+    if( rc==SQLITE_OK ){
-+      int j;                      /* Counter variable */
-+      int nEntry;                 /* Number of entries in this segment */
-+      ht_slot *aIndex;            /* Sorted index for this segment */
- 
--/*
--** End a write transaction.  The commit has already been done.  This
--** routine merely releases the lock.
--*/
--SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal){
--  if( pWal->writeLock ){
--    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
--    pWal->writeLock = 0;
-+      aPgno++;
-+      if( (i+1)==nSegment ){
-+        nEntry = (int)(iLast - iZero);
-+      }else{
-+        nEntry = (int)((u32*)aHash - (u32*)aPgno);
-+      }
-+      aIndex = &((ht_slot *)&p->aSegment[p->nSegment])[iZero];
-+      iZero++;
-+  
-+      for(j=0; j<nEntry; j++){
-+        aIndex[j] = (ht_slot)j;
-+      }
-+      walMergesort((u32 *)aPgno, aTmp, aIndex, &nEntry);
-+      p->aSegment[i].iZero = iZero;
-+      p->aSegment[i].nEntry = nEntry;
-+      p->aSegment[i].aIndex = aIndex;
-+      p->aSegment[i].aPgno = (u32 *)aPgno;
-+    }
-   }
--  return SQLITE_OK;
-+  sqlite3ScratchFree(aTmp);
+-** The author disclaims copyright to this source code.  In place of
++/* BEGIN CRYPTO */
++#ifdef SQLITE_HAS_CODEC
++SQLITE_PRIVATE void sqlite3pager_get_codec(Pager *pPager, void **ctx) {
++  *ctx = pPager->pCodec;
++}
 +
-+  if( rc!=SQLITE_OK ){
-+    walIteratorFree(p);
-+  }
-+  *pp = p;
-+  return rc;
- }
- 
- /*
--** If any data has been written (but not committed) to the log file, this
--** function moves the write-pointer back to the start of the transaction.
--**
--** Additionally, the callback function is invoked for each frame written
--** to the WAL since the start of the transaction. If the callback returns
--** other than SQLITE_OK, it is not invoked again and the error code is
--** returned to the caller.
--**
--** Otherwise, if the callback function does not return an error, this
--** function returns SQLITE_OK.
-+** Attempt to obtain the exclusive WAL lock defined by parameters lockIdx and
-+** n. If the attempt fails and parameter xBusy is not NULL, then it is a
-+** busy-handler function. Invoke it and retry the lock until either the
-+** lock is successfully obtained or the busy-handler returns 0.
- */
--SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
--  int rc = SQLITE_OK;
--  if( ALWAYS(pWal->writeLock) ){
--    Pgno iMax = pWal->hdr.mxFrame;
--    Pgno iFrame;
--  
--    /* Restore the clients cache of the wal-index header to the state it
--    ** was in before the client began writing to the database. 
--    */
--    memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
--
--    for(iFrame=pWal->hdr.mxFrame+1; 
--        ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; 
--        iFrame++
--    ){
--      /* This call cannot fail. Unless the page for which the page number
--      ** is passed as the second argument is (a) in the cache and 
--      ** (b) has an outstanding reference, then xUndo is either a no-op
--      ** (if (a) is false) or simply expels the page from the cache (if (b)
--      ** is false).
--      **
--      ** If the upper layer is doing a rollback, it is guaranteed that there
--      ** are no outstanding references to any page other than page 1. And
--      ** page 1 is never written to the log until the transaction is
--      ** committed. As a result, the call to xUndo may not fail.
--      */
--      assert( walFramePgno(pWal, iFrame)!=1 );
--      rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
--    }
--    walCleanupHash(pWal);
--  }
--  assert( rc==SQLITE_OK );
-+static int walBusyLock(
-+  Wal *pWal,                      /* WAL connection */
-+  int (*xBusy)(void*),            /* Function to call when busy */
-+  void *pBusyArg,                 /* Context argument for xBusyHandler */
-+  int lockIdx,                    /* Offset of first byte to lock */
-+  int n                           /* Number of bytes to lock */
-+){
-+  int rc;
-+  do {
-+    rc = walLockExclusive(pWal, lockIdx, n);
-+  }while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) );
-   return rc;
- }
- 
--/* 
--** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 
--** values. This function populates the array with values required to 
--** "rollback" the write position of the WAL handle back to the current 
--** point in the event of a savepoint rollback (via WalSavepointUndo()).
-+/*
-+** The cache of the wal-index header must be valid to call this function.
-+** Return the page-size in bytes used by the database.
- */
--SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
--  assert( pWal->writeLock );
--  aWalData[0] = pWal->hdr.mxFrame;
--  aWalData[1] = pWal->hdr.aFrameCksum[0];
--  aWalData[2] = pWal->hdr.aFrameCksum[1];
--  aWalData[3] = pWal->nCkpt;
-+static int walPagesize(Wal *pWal){
-+  return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
- }
- 
--/* 
--** Move the write position of the WAL back to the point identified by
--** the values in the aWalData[] array. aWalData must point to an array
--** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated
--** by a call to WalSavepoint().
-+/*
-+** Copy as much content as we can from the WAL back into the database file
-+** in response to an sqlite3_wal_checkpoint() request or the equivalent.
-+**
-+** The amount of information copies from WAL to database might be limited
-+** by active readers.  This routine will never overwrite a database page
-+** that a concurrent reader might be using.
-+**
-+** All I/O barrier operations (a.k.a fsyncs) occur in this routine when
-+** SQLite is in WAL-mode in synchronous=NORMAL.  That means that if 
-+** checkpoints are always run by a background thread or background 
-+** process, foreground threads will never block on a lengthy fsync call.
-+**
-+** Fsync is called on the WAL before writing content out of the WAL and
-+** into the database.  This ensures that if the new content is persistent
-+** in the WAL and can be recovered following a power-loss or hard reset.
-+**
-+** Fsync is also called on the database file if (and only if) the entire
-+** WAL content is copied into the database file.  This second fsync makes
-+** it safe to delete the WAL since the new content will persist in the
-+** database file.
-+**
-+** This routine uses and updates the nBackfill field of the wal-index header.
-+** This is the only routine tha will increase the value of nBackfill.  
-+** (A WAL reset or recovery will revert nBackfill to zero, but not increase
-+** its value.)
-+**
-+** The caller must be holding sufficient locks to ensure that no other
-+** checkpoint is running (in any other thread or process) at the same
-+** time.
- */
--SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
--  int rc = SQLITE_OK;
-+static int walCheckpoint(
-+  Wal *pWal,                      /* Wal connection */
-+  int eMode,                      /* One of PASSIVE, FULL or RESTART */
-+  int (*xBusyCall)(void*),        /* Function to call when busy */
-+  void *pBusyArg,                 /* Context argument for xBusyHandler */
-+  int sync_flags,                 /* Flags for OsSync() (or 0) */
-+  u8 *zBuf                        /* Temporary buffer to use */
-+){
-+  int rc;                         /* Return code */
-+  int szPage;                     /* Database page-size */
-+  WalIterator *pIter = 0;         /* Wal iterator context */
-+  u32 iDbpage = 0;                /* Next database page to write */
-+  u32 iFrame = 0;                 /* Wal frame containing data for iDbpage */
-+  u32 mxSafeFrame;                /* Max frame that can be backfilled */
-+  u32 mxPage;                     /* Max database page to write */
-+  int i;                          /* Loop counter */
-+  volatile WalCkptInfo *pInfo;    /* The checkpoint status information */
-+  int (*xBusy)(void*) = 0;        /* Function to call when waiting for locks */
- 
--  assert( pWal->writeLock );
--  assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame );
-+  szPage = walPagesize(pWal);
-+  testcase( szPage<=32768 );
-+  testcase( szPage>=65536 );
-+  pInfo = walCkptInfo(pWal);
-+  if( pInfo->nBackfill>=pWal->hdr.mxFrame ) return SQLITE_OK;
- 
--  if( aWalData[3]!=pWal->nCkpt ){
--    /* This savepoint was opened immediately after the write-transaction
--    ** was started. Right after that, the writer decided to wrap around
--    ** to the start of the log. Update the savepoint values to match.
--    */
--    aWalData[0] = 0;
--    aWalData[3] = pWal->nCkpt;
-+  /* Allocate the iterator */
-+  rc = walIteratorInit(pWal, &pIter);
-+  if( rc!=SQLITE_OK ){
-+    return rc;
-   }
-+  assert( pIter );
- 
--  if( aWalData[0]<pWal->hdr.mxFrame ){
--    pWal->hdr.mxFrame = aWalData[0];
--    pWal->hdr.aFrameCksum[0] = aWalData[1];
--    pWal->hdr.aFrameCksum[1] = aWalData[2];
--    walCleanupHash(pWal);
-+  if( eMode!=SQLITE_CHECKPOINT_PASSIVE ) xBusy = xBusyCall;
++SQLITE_PRIVATE int sqlite3pager_is_mj_pgno(Pager *pPager, Pgno pgno) {
++  return (PAGER_MJ_PGNO(pPager) == pgno) ? 1 : 0;
++}
 +
-+  /* Compute in mxSafeFrame the index of the last frame of the WAL that is
-+  ** safe to write into the database.  Frames beyond mxSafeFrame might
-+  ** overwrite database pages that are in use by active readers and thus
-+  ** cannot be backfilled from the WAL.
-+  */
-+  mxSafeFrame = pWal->hdr.mxFrame;
-+  mxPage = pWal->hdr.nPage;
-+  for(i=1; i<WAL_NREADER; i++){
-+    u32 y = pInfo->aReadMark[i];
-+    if( mxSafeFrame>y ){
-+      assert( y<=pWal->hdr.mxFrame );
-+      rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
-+      if( rc==SQLITE_OK ){
-+        pInfo->aReadMark[i] = READMARK_NOT_USED;
-+        walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
-+      }else if( rc==SQLITE_BUSY ){
-+        mxSafeFrame = y;
-+        xBusy = 0;
-+      }else{
-+        goto walcheckpoint_out;
-+      }
-+    }
-   }
- 
--  return rc;
--}
-+  if( pInfo->nBackfill<mxSafeFrame
-+   && (rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(0), 1))==SQLITE_OK
-+  ){
-+    i64 nSize;                    /* Current size of database file */
-+    u32 nBackfill = pInfo->nBackfill;
- 
--/*
--** This function is called just before writing a set of frames to the log
--** file (see sqlite3WalFrames()). It checks to see if, instead of appending
--** to the current log file, it is possible to overwrite the start of the
--** existing log file with the new frames (i.e. "reset" the log). If so,
--** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left
--** unchanged.
--**
--** SQLITE_OK is returned if no error is encountered (regardless of whether
--** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned
--** if an error occurs.
--*/
--static int walRestartLog(Wal *pWal){
--  int rc = SQLITE_OK;
--  int cnt;
-+    /* Sync the WAL to disk */
-+    if( sync_flags ){
-+      rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
-+    }
- 
--  if( pWal->readLock==0 ){
--    volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
--    assert( pInfo->nBackfill==pWal->hdr.mxFrame );
--    if( pInfo->nBackfill>0 ){
--      u32 salt1;
--      sqlite3_randomness(4, &salt1);
--      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
--      if( rc==SQLITE_OK ){
--        /* If all readers are using WAL_READ_LOCK(0) (in other words if no
--        ** readers are currently using the WAL), then the transactions
--        ** frames will overwrite the start of the existing log. Update the
--        ** wal-index header to reflect this.
--        **
--        ** In theory it would be Ok to update the cache of the header only
--        ** at this point. But updating the actual wal-index header is also
--        ** safe and means there is no special case for sqlite3WalUndo()
--        ** to handle if this transaction is rolled back.
--        */
--        int i;                    /* Loop counter */
--        u32 *aSalt = pWal->hdr.aSalt;       /* Big-endian salt values */
-+    /* If the database file may grow as a result of this checkpoint, hint
-+    ** about the eventual size of the db file to the VFS layer. 
-+    */
-+    if( rc==SQLITE_OK ){
-+      i64 nReq = ((i64)mxPage * szPage);
-+      rc = sqlite3OsFileSize(pWal->pDbFd, &nSize);
-+      if( rc==SQLITE_OK && nSize<nReq ){
-+        sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT, &nReq);
-+      }
-+    }
- 
--        /* Limit the size of WAL file if the journal_size_limit PRAGMA is
--        ** set to a non-negative value.  Log errors encountered
--        ** during the truncation attempt. */
--        if( pWal->mxWalSize>=0 ){
--          i64 sz;
--          int rx;
--          sqlite3BeginBenignMalloc();
--          rx = sqlite3OsFileSize(pWal->pWalFd, &sz);
--          if( rx==SQLITE_OK && (sz > pWal->mxWalSize) ){
--            rx = sqlite3OsTruncate(pWal->pWalFd, pWal->mxWalSize);
--          }
--          sqlite3EndBenignMalloc();
--          if( rx ){
--            sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName);
--          }
-+    /* Iterate through the contents of the WAL, copying data to the db file. */
-+    while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
-+      i64 iOffset;
-+      assert( walFramePgno(pWal, iFrame)==iDbpage );
-+      if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ) continue;
-+      iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE;
-+      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL file */
-+      rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset);
-+      if( rc!=SQLITE_OK ) break;
-+      iOffset = (iDbpage-1)*(i64)szPage;
-+      testcase( IS_BIG_INT(iOffset) );
-+      rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset);
-+      if( rc!=SQLITE_OK ) break;
-+    }
++SQLITE_PRIVATE sqlite3_file *sqlite3Pager_get_fd(Pager *pPager) {
++  return (isOpen(pPager->fd)) ? pPager->fd : NULL;
++}
 +
-+    /* If work was actually accomplished... */
-+    if( rc==SQLITE_OK ){
-+      if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
-+        i64 szDb = pWal->hdr.nPage*(i64)szPage;
-+        testcase( IS_BIG_INT(szDb) );
-+        rc = sqlite3OsTruncate(pWal->pDbFd, szDb);
-+        if( rc==SQLITE_OK && sync_flags ){
-+          rc = sqlite3OsSync(pWal->pDbFd, sync_flags);
-         }
-+      }
-+      if( rc==SQLITE_OK ){
-+        pInfo->nBackfill = mxSafeFrame;
-+      }
-+    }
++SQLITE_PRIVATE void sqlite3pager_sqlite3PagerSetCodec(
++  Pager *pPager,
++  void *(*xCodec)(void*,void*,Pgno,int),
++  void (*xCodecSizeChng)(void*,int,int),
++  void (*xCodecFree)(void*),
++  void *pCodec
++){
++  sqlite3PagerSetCodec(pPager, xCodec, xCodecSizeChng, xCodecFree, pCodec); 
++}
 +
-+    /* Release the reader lock held while backfilling */
-+    walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
-+  }
 +
-+  if( rc==SQLITE_BUSY ){
-+    /* Reset the return code so as not to report a checkpoint failure
-+    ** just because there are active readers.  */
-+    rc = SQLITE_OK;
-+  }
- 
--        pWal->nCkpt++;
--        pWal->hdr.mxFrame = 0;
--        sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0]));
--        aSalt[1] = salt1;
--        walIndexWriteHdr(pWal);
--        pInfo->nBackfill = 0;
--        for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
--        assert( pInfo->aReadMark[0]==0 );
-+  /* If this is an SQLITE_CHECKPOINT_RESTART operation, and the entire wal
-+  ** file has been copied into the database file, then block until all
-+  ** readers have finished using the wal file. This ensures that the next
-+  ** process to write to the database restarts the wal file.
-+  */
-+  if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
-+    assert( pWal->writeLock );
-+    if( pInfo->nBackfill<pWal->hdr.mxFrame ){
-+      rc = SQLITE_BUSY;
-+    }else if( eMode==SQLITE_CHECKPOINT_RESTART ){
-+      assert( mxSafeFrame==pWal->hdr.mxFrame );
-+      rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1);
-+      if( rc==SQLITE_OK ){
-         walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
--      }else if( rc!=SQLITE_BUSY ){
--        return rc;
-       }
-     }
--    walUnlockShared(pWal, WAL_READ_LOCK(0));
--    pWal->readLock = -1;
--    cnt = 0;
--    do{
--      int notUsed;
--      rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
--    }while( rc==WAL_RETRY );
--    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
--    testcase( (rc&0xff)==SQLITE_IOERR );
--    testcase( rc==SQLITE_PROTOCOL );
--    testcase( rc==SQLITE_OK );
-   }
++#endif
++/* END CRYPTO */
 +
-+ walcheckpoint_out:
-+  walIteratorFree(pIter);
-   return rc;
- }
- 
--/* 
--** Write a set of frames to the log. The caller must hold the write-lock
--** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
-+/*
-+** Close a connection to a log file.
- */
--SQLITE_PRIVATE int sqlite3WalFrames(
--  Wal *pWal,                      /* Wal handle to write to */
--  int szPage,                     /* Database page-size in bytes */
--  PgHdr *pList,                   /* List of dirty pages to write */
--  Pgno nTruncate,                 /* Database size after this commit */
--  int isCommit,                   /* True if this is a commit */
--  int sync_flags                  /* Flags to pass to OsSync() (or 0) */
-+SQLITE_PRIVATE int sqlite3WalClose(
-+  Wal *pWal,                      /* Wal to close */
-+  int sync_flags,                 /* Flags to pass to OsSync() (or 0) */
-+  int nBuf,
-+  u8 *zBuf                        /* Buffer of at least nBuf bytes */
- ){
--  int rc;                         /* Used to catch return codes */
--  u32 iFrame;                     /* Next frame address */
--  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
--  PgHdr *p;                       /* Iterator to run through pList with. */
--  PgHdr *pLast = 0;               /* Last frame in list */
--  int nLast = 0;                  /* Number of extra copies of last page */
-+  int rc = SQLITE_OK;
-+  if( pWal ){
-+    int isDelete = 0;             /* True to unlink wal and wal-index files */
- 
--  assert( pList );
--  assert( pWal->writeLock );
-+    /* If an EXCLUSIVE lock can be obtained on the database file (using the
-+    ** ordinary, rollback-mode locking methods, this guarantees that the
-+    ** connection associated with this log file is the only connection to
-+    ** the database. In this case checkpoint the database and unlink both
-+    ** the wal and wal-index files.
-+    **
-+    ** The EXCLUSIVE lock is not released before returning.
-+    */
-+    rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE);
-+    if( rc==SQLITE_OK ){
-+      if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
-+        pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
-+      }
-+      rc = sqlite3WalCheckpoint(
-+          pWal, SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
-+      );
-+      if( rc==SQLITE_OK ){
-+        isDelete = 1;
-+      }
-+    }
- 
--#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
--  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
--    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
--              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
-+    walIndexClose(pWal, isDelete);
-+    sqlite3OsClose(pWal->pWalFd);
-+    if( isDelete ){
-+      sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
-+    }
-+    WALTRACE(("WAL%p: closed\n", pWal));
-+    sqlite3_free((void *)pWal->apWiData);
-+    sqlite3_free(pWal);
-   }
--#endif
-+  return rc;
-+}
- 
--  /* See if it is possible to write these frames into the start of the
--  ** log file, instead of appending to it at pWal->hdr.mxFrame.
--  */
--  if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
--    return rc;
--  }
-+/*
-+** Try to read the wal-index header.  Return 0 on success and 1 if
-+** there is a problem.
-+**
-+** The wal-index is in shared memory.  Another thread or process might
-+** be writing the header at the same time this procedure is trying to
-+** read it, which might result in inconsistency.  A dirty read is detected
-+** by verifying that both copies of the header are the same and also by
-+** a checksum on the header.
-+**
-+** If and only if the read is consistent and the header is different from
-+** pWal->hdr, then pWal->hdr is updated to the content of the new header
-+** and *pChanged is set to 1.
-+**
-+** If the checksum cannot be verified return non-zero. If the header
-+** is read successfully and the checksum verified, return zero.
-+*/
-+static int walIndexTryHdr(Wal *pWal, int *pChanged){
-+  u32 aCksum[2];                  /* Checksum on the header content */
-+  WalIndexHdr h1, h2;             /* Two copies of the header content */
-+  WalIndexHdr volatile *aHdr;     /* Header in shared memory */
- 
--  /* If this is the first frame written into the log, write the WAL
--  ** header to the start of the WAL file. See comments at the top of
--  ** this source file for a description of the WAL header format.
-+  /* The first page of the wal-index must be mapped at this point. */
-+  assert( pWal->nWiData>0 && pWal->apWiData[0] );
 +
-+  /* Read the header. This might happen concurrently with a write to the
-+  ** same area of shared memory on a different CPU in a SMP,
-+  ** meaning it is possible that an inconsistent snapshot is read
-+  ** from the file. If this happens, return non-zero.
-+  **
-+  ** There are two copies of the header at the beginning of the wal-index.
-+  ** When reading, read [0] first then [1].  Writes are in the reverse order.
-+  ** Memory barriers are used to prevent the compiler or the hardware from
-+  ** reordering the reads and writes.
-   */
--  iFrame = pWal->hdr.mxFrame;
--  if( iFrame==0 ){
--    u8 aWalHdr[WAL_HDRSIZE];      /* Buffer to assemble wal-header in */
--    u32 aCksum[2];                /* Checksum for wal-header */
-+  aHdr = walIndexHdr(pWal);
-+  memcpy(&h1, (void *)&aHdr[0], sizeof(h1));
-+  walShmBarrier(pWal);
-+  memcpy(&h2, (void *)&aHdr[1], sizeof(h2));
- 
--    sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
--    sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION);
--    sqlite3Put4byte(&aWalHdr[8], szPage);
--    sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
--    sqlite3_randomness(8, pWal->hdr.aSalt);
--    memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8);
--    walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum);
--    sqlite3Put4byte(&aWalHdr[24], aCksum[0]);
--    sqlite3Put4byte(&aWalHdr[28], aCksum[1]);
--    
--    pWal->szPage = szPage;
--    pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN;
--    pWal->hdr.aFrameCksum[0] = aCksum[0];
--    pWal->hdr.aFrameCksum[1] = aCksum[1];
-+  if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
-+    return 1;   /* Dirty read */
-+  }  
-+  if( h1.isInit==0 ){
-+    return 1;   /* Malformed header - probably all zeros */
-+  }
-+  walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum);
-+  if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){
-+    return 1;   /* Checksum does not match */
-+  }
- 
--    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
--    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
--    if( rc!=SQLITE_OK ){
--      return rc;
--    }
-+  if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){
-+    *pChanged = 1;
-+    memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr));
-+    pWal->szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
-+    testcase( pWal->szPage<=32768 );
-+    testcase( pWal->szPage>=65536 );
-   }
--  assert( (int)pWal->szPage==szPage );
- 
--  /* Write the log file. */
--  for(p=pList; p; p=p->pDirty){
--    u32 nDbsize;                  /* Db-size field for frame header */
--    i64 iOffset;                  /* Write offset in log file */
--    void *pData;
--   
--    iOffset = walFrameOffset(++iFrame, szPage);
--    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
--    
--    /* Populate and write the frame header */
--    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
--#if defined(SQLITE_HAS_CODEC)
--    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
--#else
--    pData = p->pData;
--#endif
--    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame);
--    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
--    if( rc!=SQLITE_OK ){
--      return rc;
--    }
-+  /* The header was successfully read. Return zero. */
-+  return 0;
-+}
- 
--    /* Write the page data */
--    rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset+sizeof(aFrame));
--    if( rc!=SQLITE_OK ){
--      return rc;
--    }
--    pLast = p;
--  }
++/************** End of pager.c ***********************************************/
++/************** Begin file wal.c *********************************************/
 +/*
-+** Read the wal-index header from the wal-index and into pWal->hdr.
-+** If the wal-header appears to be corrupt, try to reconstruct the
-+** wal-index from the WAL before returning.
-+**
-+** Set *pChanged to 1 if the wal-index header value in pWal->hdr is
-+** changed by this opertion.  If pWal->hdr is unchanged, set *pChanged
-+** to 0.
++** 2010 February 1
 +**
-+** If the wal-index header is successfully read, return SQLITE_OK. 
-+** Otherwise an SQLite error code.
-+*/
-+static int walIndexReadHdr(Wal *pWal, int *pChanged){
-+  int rc;                         /* Return code */
-+  int badHdr;                     /* True if a header read failed */
-+  volatile u32 *page0;            /* Chunk of wal-index containing header */
- 
--  /* Sync the log file if the 'isSync' flag was specified. */
--  if( sync_flags ){
--    i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd);
--    i64 iOffset = walFrameOffset(iFrame+1, szPage);
-+  /* Ensure that page 0 of the wal-index (the page that contains the 
-+  ** wal-index header) is mapped. Return early if an error occurs here.
-+  */
-+  assert( pChanged );
-+  rc = walIndexPage(pWal, 0, &page0);
-+  if( rc!=SQLITE_OK ){
-+    return rc;
-+  };
-+  assert( page0 || pWal->writeLock==0 );
- 
--    assert( isCommit );
--    assert( iSegment>0 );
-+  /* If the first page of the wal-index has been mapped, try to read the
-+  ** wal-index header immediately, without holding any lock. This usually
-+  ** works, but may fail if the wal-index header is corrupt or currently 
-+  ** being modified by another thread or process.
-+  */
-+  badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);
- 
--    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
--    while( iOffset<iSegment ){
--      void *pData;
--#if defined(SQLITE_HAS_CODEC)
--      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
--#else
--      pData = pLast->pData;
--#endif
--      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
--      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
--      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
--      if( rc!=SQLITE_OK ){
--        return rc;
-+  /* If the first attempt failed, it might have been due to a race
-+  ** with a writer.  So get a WRITE lock and try again.
-+  */
-+  assert( badHdr==0 || pWal->writeLock==0 );
-+  if( badHdr ){
-+    if( pWal->readOnly & WAL_SHM_RDONLY ){
-+      if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
-+        walUnlockShared(pWal, WAL_WRITE_LOCK);
-+        rc = SQLITE_READONLY_RECOVERY;
-       }
--      iOffset += WAL_FRAME_HDRSIZE;
--      rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset); 
--      if( rc!=SQLITE_OK ){
--        return rc;
-+    }else if( SQLITE_OK==(rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1)) ){
-+      pWal->writeLock = 1;
-+      if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
-+        badHdr = walIndexTryHdr(pWal, pChanged);
-+        if( badHdr ){
-+          /* If the wal-index header is still malformed even while holding
-+          ** a WRITE lock, it can only mean that the header is corrupted and
-+          ** needs to be reconstructed.  So run recovery to do exactly that.
-+          */
-+          rc = walIndexRecover(pWal);
-+          *pChanged = 1;
-+        }
-       }
--      nLast++;
--      iOffset += szPage;
-+      pWal->writeLock = 0;
-+      walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
-     }
--
--    rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
-   }
- 
--  /* Append data to the wal-index. It is not necessary to lock the 
--  ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index
--  ** guarantees that there are no other writers, and no data that may
--  ** be in use by existing readers is being overwritten.
-+  /* If the header is read successfully, check the version number to make
-+  ** sure the wal-index was not constructed with some future format that
-+  ** this version of SQLite cannot understand.
-   */
--  iFrame = pWal->hdr.mxFrame;
--  for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){
--    iFrame++;
--    rc = walIndexAppend(pWal, iFrame, p->pgno);
--  }
--  while( nLast>0 && rc==SQLITE_OK ){
--    iFrame++;
--    nLast--;
--    rc = walIndexAppend(pWal, iFrame, pLast->pgno);
--  }
--
--  if( rc==SQLITE_OK ){
--    /* Update the private copy of the header. */
--    pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
--    testcase( szPage<=32768 );
--    testcase( szPage>=65536 );
++** The author disclaims copyright to this source code.  In place of
+ ** a legal notice, here is a blessing:
+ **
+ **    May you do good and not evil.
+@@ -46941,870 +48957,223 @@
+     pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
+     testcase( szPage<=32768 );
+     testcase( szPage>=65536 );
 -    pWal->hdr.mxFrame = iFrame;
 -    if( isCommit ){
 -      pWal->hdr.iChange++;
@@ -18134,77 +2628,22 @@
 -      walIndexWriteHdr(pWal);
 -      pWal->iCallback = iFrame;
 -    }
-+  if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
-+    rc = SQLITE_CANTOPEN_BKPT;
-   }
- 
+-  }
+-
 -  WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok"));
-   return rc;
- }
- 
+-  return rc;
+-}
+-
 -/* 
 -** This routine is called to implement sqlite3_wal_checkpoint() and
 -** related interfaces.
-+/*
-+** This is the value that walTryBeginRead returns when it needs to
-+** be retried.
-+*/
-+#define WAL_RETRY  (-1)
-+
-+/*
-+** Attempt to start a read transaction.  This might fail due to a race or
-+** other transient condition.  When that happens, it returns WAL_RETRY to
-+** indicate to the caller that it is safe to retry immediately.
- **
+-**
 -** Obtain a CHECKPOINT lock and then backfill as much information as
 -** we can from WAL into the database.
-+** On success return SQLITE_OK.  On a permanent failure (such an
-+** I/O error or an SQLITE_BUSY because another process is running
-+** recovery) return a positive error code.
- **
+-**
 -** If parameter xBusy is not NULL, it is a pointer to a busy-handler
 -** callback. In this case this function runs a blocking checkpoint.
-+** The useWal parameter is true to force the use of the WAL and disable
-+** the case where the WAL is bypassed because it has been completely
-+** checkpointed.  If useWal==0 then this routine calls walIndexReadHdr() 
-+** to make a copy of the wal-index header into pWal->hdr.  If the 
-+** wal-index header has changed, *pChanged is set to 1 (as an indication 
-+** to the caller that the local paget cache is obsolete and needs to be 
-+** flushed.)  When useWal==1, the wal-index header is assumed to already
-+** be loaded and the pChanged parameter is unused.
-+**
-+** The caller must set the cnt parameter to the number of prior calls to
-+** this routine during the current read attempt that returned WAL_RETRY.
-+** This routine will start taking more aggressive measures to clear the
-+** race conditions after multiple WAL_RETRY returns, and after an excessive
-+** number of errors will ultimately return SQLITE_PROTOCOL.  The
-+** SQLITE_PROTOCOL return indicates that some other process has gone rogue
-+** and is not honoring the locking protocol.  There is a vanishingly small
-+** chance that SQLITE_PROTOCOL could be returned because of a run of really
-+** bad luck when there is lots of contention for the wal-index, but that
-+** possibility is so small that it can be safely neglected, we believe.
-+**
-+** On success, this routine obtains a read lock on 
-+** WAL_READ_LOCK(pWal->readLock).  The pWal->readLock integer is
-+** in the range 0 <= pWal->readLock < WAL_NREADER.  If pWal->readLock==(-1)
-+** that means the Wal does not hold any read lock.  The reader must not
-+** access any database page that is modified by a WAL frame up to and
-+** including frame number aReadMark[pWal->readLock].  The reader will
-+** use WAL frames up to and including pWal->hdr.mxFrame if pWal->readLock>0
-+** Or if pWal->readLock==0, then the reader will ignore the WAL
-+** completely and get all content directly from the database file.
-+** If the useWal parameter is 1 then the WAL will never be ignored and
-+** this routine will always set pWal->readLock>0 on success.
-+** When the read transaction is completed, the caller must release the
-+** lock on WAL_READ_LOCK(pWal->readLock) and set pWal->readLock to -1.
-+**
-+** This routine uses the nBackfill and aReadMark[] fields of the header
-+** to select a particular WAL_READ_LOCK() that strives to let the
-+** checkpoint process do as much work as possible.  This routine might
-+** update values of the aReadMark[] array in the header, but if it does
-+** so it takes care to hold an exclusive lock on the corresponding
-+** WAL_READ_LOCK() while changing values.
- */
+-*/
 -SQLITE_PRIVATE int sqlite3WalCheckpoint(
 -  Wal *pWal,                      /* Wal connection */
 -  int eMode,                      /* PASSIVE, FULL or RESTART */
@@ -18219,117 +2658,21 @@
 -  int rc;                         /* Return code */
 -  int isChanged = 0;              /* True if a new wal-index header is loaded */
 -  int eMode2 = eMode;             /* Mode to pass to walCheckpoint() */
-+static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
-+  volatile WalCkptInfo *pInfo;    /* Checkpoint information in wal-index */
-+  u32 mxReadMark;                 /* Largest aReadMark[] value */
-+  int mxI;                        /* Index of largest aReadMark[] value */
-+  int i;                          /* Loop counter */
-+  int rc = SQLITE_OK;             /* Return code  */
- 
--  assert( pWal->ckptLock==0 );
--  assert( pWal->writeLock==0 );
-+  assert( pWal->readLock<0 );     /* Not currently locked */
- 
--  if( pWal->readOnly ) return SQLITE_READONLY;
--  WALTRACE(("WAL%p: checkpoint begins\n", pWal));
--  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
--  if( rc ){
--    /* Usually this is SQLITE_BUSY meaning that another thread or process
--    ** is already running a checkpoint, or maybe a recovery.  But it might
--    ** also be SQLITE_IOERR. */
--    return rc;
-+  /* Take steps to avoid spinning forever if there is a protocol error.
-+  **
-+  ** Circumstances that cause a RETRY should only last for the briefest
-+  ** instances of time.  No I/O or other system calls are done while the
-+  ** locks are held, so the locks should not be held for very long. But 
-+  ** if we are unlucky, another process that is holding a lock might get
-+  ** paged out or take a page-fault that is time-consuming to resolve, 
-+  ** during the few nanoseconds that it is holding the lock.  In that case,
-+  ** it might take longer than normal for the lock to free.
-+  **
-+  ** After 5 RETRYs, we begin calling sqlite3OsSleep().  The first few
-+  ** calls to sqlite3OsSleep() have a delay of 1 microsecond.  Really this
-+  ** is more of a scheduler yield than an actual delay.  But on the 10th
-+  ** an subsequent retries, the delays start becoming longer and longer, 
-+  ** so that on the 100th (and last) RETRY we delay for 21 milliseconds.
-+  ** The total delay time before giving up is less than 1 second.
-+  */
-+  if( cnt>5 ){
-+    int nDelay = 1;                      /* Pause time in microseconds */
-+    if( cnt>100 ){
-+      VVA_ONLY( pWal->lockError = 1; )
-+      return SQLITE_PROTOCOL;
-+    }
-+    if( cnt>=10 ) nDelay = (cnt-9)*238;  /* Max delay 21ms. Total delay 996ms */
-+    sqlite3OsSleep(pWal->pVfs, nDelay);
-+  }
-+
-+  if( !useWal ){
-+    rc = walIndexReadHdr(pWal, pChanged);
-+    if( rc==SQLITE_BUSY ){
-+      /* If there is not a recovery running in another thread or process
-+      ** then convert BUSY errors to WAL_RETRY.  If recovery is known to
-+      ** be running, convert BUSY to BUSY_RECOVERY.  There is a race here
-+      ** which might cause WAL_RETRY to be returned even if BUSY_RECOVERY
-+      ** would be technically correct.  But the race is benign since with
-+      ** WAL_RETRY this routine will be called again and will probably be
-+      ** right on the second iteration.
-+      */
-+      if( pWal->apWiData[0]==0 ){
-+        /* This branch is taken when the xShmMap() method returns SQLITE_BUSY.
-+        ** We assume this is a transient condition, so return WAL_RETRY. The
-+        ** xShmMap() implementation used by the default unix and win32 VFS 
-+        ** modules may return SQLITE_BUSY due to a race condition in the 
-+        ** code that determines whether or not the shared-memory region 
-+        ** must be zeroed before the requested page is returned.
-+        */
-+        rc = WAL_RETRY;
-+      }else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){
-+        walUnlockShared(pWal, WAL_RECOVER_LOCK);
-+        rc = WAL_RETRY;
-+      }else if( rc==SQLITE_BUSY ){
-+        rc = SQLITE_BUSY_RECOVERY;
-+      }
-+    }
-+    if( rc!=SQLITE_OK ){
-+      return rc;
-+    }
-+  }
-+
-+  pInfo = walCkptInfo(pWal);
-+  if( !useWal && pInfo->nBackfill==pWal->hdr.mxFrame ){
-+    /* The WAL has been completely backfilled (or it is empty).
-+    ** and can be safely ignored.
-+    */
-+    rc = walLockShared(pWal, WAL_READ_LOCK(0));
-+    walShmBarrier(pWal);
-+    if( rc==SQLITE_OK ){
-+      if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
-+        /* It is not safe to allow the reader to continue here if frames
-+        ** may have been appended to the log before READ_LOCK(0) was obtained.
-+        ** When holding READ_LOCK(0), the reader ignores the entire log file,
-+        ** which implies that the database file contains a trustworthy
-+        ** snapshoT. Since holding READ_LOCK(0) prevents a checkpoint from
-+        ** happening, this is usually correct.
-+        **
-+        ** However, if frames have been appended to the log (or if the log 
-+        ** is wrapped and written for that matter) before the READ_LOCK(0)
-+        ** is obtained, that is not necessarily true. A checkpointer may
-+        ** have started to backfill the appended frames but crashed before
-+        ** it finished. Leaving a corrupt image in the database file.
-+        */
-+        walUnlockShared(pWal, WAL_READ_LOCK(0));
-+        return WAL_RETRY;
-+      }
-+      pWal->readLock = 0;
-+      return SQLITE_OK;
-+    }else if( rc!=SQLITE_BUSY ){
-+      return rc;
-+    }
-   }
+-
+-  assert( pWal->ckptLock==0 );
+-  assert( pWal->writeLock==0 );
+-
+-  if( pWal->readOnly ) return SQLITE_READONLY;
+-  WALTRACE(("WAL%p: checkpoint begins\n", pWal));
+-  rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
+-  if( rc ){
+-    /* Usually this is SQLITE_BUSY meaning that another thread or process
+-    ** is already running a checkpoint, or maybe a recovery.  But it might
+-    ** also be SQLITE_IOERR. */
+-    return rc;
+-  }
 -  pWal->ckptLock = 1;
- 
+-
 -  /* If this is a blocking-checkpoint, then obtain the write-lock as well
 -  ** to prevent any writers from running while the checkpoint is underway.
 -  ** This has to be done before the call to walIndexReadHdr() below.
@@ -18338,11 +2681,7 @@
 -  ** run instead. Since the checkpointer is not holding the writer lock,
 -  ** there is no point in blocking waiting for any readers. Assuming no 
 -  ** other error occurs, this function will return SQLITE_BUSY to the caller.
-+  /* If we get this far, it means that the reader will want to use
-+  ** the WAL to get at content from recent commits.  The job now is
-+  ** to select one of the aReadMark[] entries that is closest to
-+  ** but not exceeding pWal->hdr.mxFrame and lock that entry.
-   */
+-  */
 -  if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){
 -    rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_WRITE_LOCK, 1);
 -    if( rc==SQLITE_OK ){
@@ -18350,16 +2689,8 @@
 -    }else if( rc==SQLITE_BUSY ){
 -      eMode2 = SQLITE_CHECKPOINT_PASSIVE;
 -      rc = SQLITE_OK;
-+  mxReadMark = 0;
-+  mxI = 0;
-+  for(i=1; i<WAL_NREADER; i++){
-+    u32 thisMark = pInfo->aReadMark[i];
-+    if( mxReadMark<=thisMark && thisMark<=pWal->hdr.mxFrame ){
-+      assert( thisMark!=READMARK_NOT_USED );
-+      mxReadMark = thisMark;
-+      mxI = i;
-     }
-   }
+-    }
+-  }
 -
 -  /* Read the wal-index header. */
 -  if( rc==SQLITE_OK ){
@@ -18372,64 +2703,22 @@
 -      rc = SQLITE_CORRUPT_BKPT;
 -    }else{
 -      rc = walCheckpoint(pWal, eMode2, xBusy, pBusyArg, sync_flags, zBuf);
-+  /* There was once an "if" here. The extra "{" is to preserve indentation. */
-+  {
-+    if( (pWal->readOnly & WAL_SHM_RDONLY)==0
-+     && (mxReadMark<pWal->hdr.mxFrame || mxI==0)
-+    ){
-+      for(i=1; i<WAL_NREADER; i++){
-+        rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
-+        if( rc==SQLITE_OK ){
-+          mxReadMark = pInfo->aReadMark[i] = pWal->hdr.mxFrame;
-+          mxI = i;
-+          walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
-+          break;
-+        }else if( rc!=SQLITE_BUSY ){
-+          return rc;
-+        }
-+      }
-     }
+-    }
 -
 -    /* If no error occurred, set the output variables. */
 -    if( rc==SQLITE_OK || rc==SQLITE_BUSY ){
 -      if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame;
 -      if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill);
-+    if( mxI==0 ){
-+      assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
-+      return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTLOCK;
-     }
+-    }
 -  }
- 
+-
 -  if( isChanged ){
 -    /* If a new wal-index header was loaded before the checkpoint was 
 -    ** performed, then the pager-cache associated with pWal is now
 -    ** out of date. So zero the cached wal-index header to ensure that
 -    ** next time the pager opens a snapshot on this database it knows that
 -    ** the cache needs to be reset.
-+    rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
-+    if( rc ){
-+      return rc==SQLITE_BUSY ? WAL_RETRY : rc;
-+    }
-+    /* Now that the read-lock has been obtained, check that neither the
-+    ** value in the aReadMark[] array or the contents of the wal-index
-+    ** header have changed.
-+    **
-+    ** It is necessary to check that the wal-index header did not change
-+    ** between the time it was read and when the shared-lock was obtained
-+    ** on WAL_READ_LOCK(mxI) was obtained to account for the possibility
-+    ** that the log file may have been wrapped by a writer, or that frames
-+    ** that occur later in the log than pWal->hdr.mxFrame may have been
-+    ** copied into the database by a checkpointer. If either of these things
-+    ** happened, then reading the database with the current value of
-+    ** pWal->hdr.mxFrame risks reading a corrupted snapshot. So, retry
-+    ** instead.
-+    **
-+    ** This does not guarantee that the copy of the wal-index header is up to
-+    ** date before proceeding. That would not be possible without somehow
-+    ** blocking writers. It only guarantees that a dangerous checkpoint or 
-+    ** log-wrap (either of which would require an exclusive lock on
-+    ** WAL_READ_LOCK(mxI)) has not occurred since the snapshot was valid.
-     */
+-    */
 -    memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
 -  }
 -
@@ -18451,22 +2740,11 @@
 -  if( pWal ){
 -    ret = pWal->iCallback;
 -    pWal->iCallback = 0;
-+    walShmBarrier(pWal);
-+    if( pInfo->aReadMark[mxI]!=mxReadMark
-+     || memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
-+    ){
-+      walUnlockShared(pWal, WAL_READ_LOCK(mxI));
-+      return WAL_RETRY;
-+    }else{
-+      assert( mxReadMark<=pWal->hdr.mxFrame );
-+      pWal->readLock = (i16)mxI;
-+    }
-   }
+-  }
 -  return (int)ret;
-+  return rc;
- }
- 
- /*
+-}
+-
+-/*
 -** This function is called to change the WAL subsystem into or out
 -** of locking_mode=EXCLUSIVE.
 -**
@@ -18477,29 +2755,19 @@
 -** transition out of exclusive-mode is successful, return 1.  This
 -** operation must occur while the pager is still holding the exclusive
 -** lock on the main database file.
-+** Begin a read transaction on the database.
- **
+-**
 -** If op is one, then change from locking_mode=NORMAL into 
 -** locking_mode=EXCLUSIVE.  This means that the pWal->readLock must
 -** be released.  Return 1 if the transition is made and 0 if the
 -** WAL is already in exclusive-locking mode - meaning that this
 -** routine is a no-op.  The pager must already hold the exclusive lock
 -** on the main database file before invoking this operation.
-+** This routine used to be called sqlite3OpenSnapshot() and with good reason:
-+** it takes a snapshot of the state of the WAL and wal-index for the current
-+** instant in time.  The current thread will continue to use this snapshot.
-+** Other threads might append new content to the WAL and wal-index but
-+** that extra content is ignored by the current thread.
- **
+-**
 -** If op is negative, then do a dry-run of the op==1 case but do
 -** not actually change anything. The pager uses this to see if it
 -** should acquire the database exclusive lock prior to invoking
 -** the op==1 case.
-+** If the database contents have changes since the previous read
-+** transaction, then *pChanged is set to 1 before returning.  The
-+** Pager layer will use this to know that is cache is stale and
-+** needs to be flushed.
- */
+-*/
 -SQLITE_PRIVATE int sqlite3WalExclusiveMode(Wal *pWal, int op){
 -  int rc;
 -  assert( pWal->writeLock==0 );
@@ -18513,10 +2781,7 @@
 -  */
 -  assert( pWal->readLock>=0 || pWal->lockError );
 -  assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) );
-+SQLITE_PRIVATE int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
-+  int rc;                         /* Return code */
-+  int cnt = 0;                    /* Number of TryBeginRead attempts */
- 
+-
 -  if( op==0 ){
 -    if( pWal->exclusiveMode ){
 -      pWal->exclusiveMode = 0;
@@ -18537,39 +2802,23 @@
 -  }else{
 -    rc = pWal->exclusiveMode==0;
 -  }
-+  do{
-+    rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
-+  }while( rc==WAL_RETRY );
-+  testcase( (rc&0xff)==SQLITE_BUSY );
-+  testcase( (rc&0xff)==SQLITE_IOERR );
-+  testcase( rc==SQLITE_PROTOCOL );
-+  testcase( rc==SQLITE_OK );
-   return rc;
- }
- 
+-  return rc;
+-}
+-
 -/* 
 -** Return true if the argument is non-NULL and the WAL module is using
 -** heap-memory for the wal-index. Otherwise, if the argument is NULL or the
 -** WAL module is using shared-memory, return false. 
-+/*
-+** Finish with a read transaction.  All this does is release the
-+** read-lock.
- */
+-*/
 -SQLITE_PRIVATE int sqlite3WalHeapMemory(Wal *pWal){
 -  return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE );
-+SQLITE_PRIVATE void sqlite3WalEndReadTransaction(Wal *pWal){
-+  sqlite3WalEndWriteTransaction(pWal);
-+  if( pWal->readLock>=0 ){
-+    walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
-+    pWal->readLock = -1;
-+  }
- }
- 
+-}
+-
 -#endif /* #ifndef SQLITE_OMIT_WAL */
 -
 -/************** End of wal.c *************************************************/
 -/************** Begin file btmutex.c *****************************************/
- /*
+-/*
 -** 2007 August 27
 -**
 -** The author disclaims copyright to this source code.  In place of
@@ -18580,17 +2829,12 @@
 -**    May you share freely, never taking more than you give.
 -**
 -*************************************************************************
-+** Read a page from the WAL, if it is present in the WAL and if the 
-+** current read transaction is configured to use the WAL.  
- **
+-**
 -** This file contains code used to implement mutexes on Btree objects.
 -** This code really belongs in btree.c.  But btree.c is getting too
 -** big and we want to break it down some.  This packaged seemed like
 -** a good breakout.
-+** The *pInWal is set to 1 if the requested page is in the WAL and
-+** has been loaded.  Or *pInWal is set to 0 if the page was not in 
-+** the WAL and needs to be read out of the database.
- */
+-*/
 -/************** Include btreeInt.h in the middle of btmutex.c ****************/
 -/************** Begin file btreeInt.h ****************************************/
 -/*
@@ -18792,197 +3036,39 @@
 -** Overflow pages form a linked list.  Each page except the last is completely
 -** filled with data (pagesize - 4 bytes).  The last page can have as little
 -** as 1 byte of data.
-+SQLITE_PRIVATE int sqlite3WalRead(
-+  Wal *pWal,                      /* WAL handle */
-+  Pgno pgno,                      /* Database page number to read data for */
-+  int *pInWal,                    /* OUT: True if data is read from WAL */
-+  int nOut,                       /* Size of buffer pOut in bytes */
-+  u8 *pOut                        /* Buffer to write page data to */
-+){
-+  u32 iRead = 0;                  /* If !=0, WAL frame to return data from */
-+  u32 iLast = pWal->hdr.mxFrame;  /* Last page in WAL for this reader */
-+  int iHash;                      /* Used to loop through N hash tables */
-+
-+  /* This routine is only be called from within a read transaction. */
-+  assert( pWal->readLock>=0 || pWal->lockError );
-+
-+  /* If the "last page" field of the wal-index header snapshot is 0, then
-+  ** no data will be read from the wal under any circumstances. Return early
-+  ** in this case as an optimization.  Likewise, if pWal->readLock==0, 
-+  ** then the WAL is ignored by the reader so return early, as if the 
-+  ** WAL were empty.
-+  */
-+  if( iLast==0 || pWal->readLock==0 ){
-+    *pInWal = 0;
-+    return SQLITE_OK;
-+  }
-+
-+  /* Search the hash table or tables for an entry matching page number
-+  ** pgno. Each iteration of the following for() loop searches one
-+  ** hash table (each hash table indexes up to HASHTABLE_NPAGE frames).
-+  **
-+  ** This code might run concurrently to the code in walIndexAppend()
-+  ** that adds entries to the wal-index (and possibly to this hash 
-+  ** table). This means the value just read from the hash 
-+  ** slot (aHash[iKey]) may have been added before or after the 
-+  ** current read transaction was opened. Values added after the
-+  ** read transaction was opened may have been written incorrectly -
-+  ** i.e. these slots may contain garbage data. However, we assume
-+  ** that any slots written before the current read transaction was
-+  ** opened remain unmodified.
-+  **
-+  ** For the reasons above, the if(...) condition featured in the inner
-+  ** loop of the following block is more stringent that would be required 
-+  ** if we had exclusive access to the hash-table:
-+  **
-+  **   (aPgno[iFrame]==pgno): 
-+  **     This condition filters out normal hash-table collisions.
-+  **
-+  **   (iFrame<=iLast): 
-+  **     This condition filters out entries that were added to the hash
-+  **     table after the current read-transaction had started.
-+  */
-+  for(iHash=walFramePage(iLast); iHash>=0 && iRead==0; iHash--){
-+    volatile ht_slot *aHash;      /* Pointer to hash table */
-+    volatile u32 *aPgno;          /* Pointer to array of page numbers */
-+    u32 iZero;                    /* Frame number corresponding to aPgno[0] */
-+    int iKey;                     /* Hash slot index */
-+    int nCollide;                 /* Number of hash collisions remaining */
-+    int rc;                       /* Error code */
-+
-+    rc = walHashGet(pWal, iHash, &aHash, &aPgno, &iZero);
-+    if( rc!=SQLITE_OK ){
-+      return rc;
-+    }
-+    nCollide = HASHTABLE_NSLOT;
-+    for(iKey=walHash(pgno); aHash[iKey]; iKey=walNextHash(iKey)){
-+      u32 iFrame = aHash[iKey] + iZero;
-+      if( iFrame<=iLast && aPgno[aHash[iKey]]==pgno ){
-+        assert( iFrame>iRead );
-+        iRead = iFrame;
-+      }
-+      if( (nCollide--)==0 ){
-+        return SQLITE_CORRUPT_BKPT;
-+      }
-+    }
-+  }
-+
-+#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
-+  /* If expensive assert() statements are available, do a linear search
-+  ** of the wal-index file content. Make sure the results agree with the
-+  ** result obtained using the hash indexes above.  */
-+  {
-+    u32 iRead2 = 0;
-+    u32 iTest;
-+    for(iTest=iLast; iTest>0; iTest--){
-+      if( walFramePgno(pWal, iTest)==pgno ){
-+        iRead2 = iTest;
-+        break;
-+      }
-+    }
-+    assert( iRead==iRead2 );
-+  }
-+#endif
-+
-+  /* If iRead is non-zero, then it is the log frame number that contains the
-+  ** required page. Read and return data from the log file.
-+  */
-+  if( iRead ){
-+    int sz;
-+    i64 iOffset;
-+    sz = pWal->hdr.szPage;
-+    sz = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
-+    testcase( sz<=32768 );
-+    testcase( sz>=65536 );
-+    iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
-+    *pInWal = 1;
-+    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
-+    return sqlite3OsRead(pWal->pWalFd, pOut, nOut, iOffset);
-+  }
-+
-+  *pInWal = 0;
-+  return SQLITE_OK;
-+}
-+
-+
-+/* 
-+** Return the size of the database in pages (or zero, if unknown).
-+*/
-+SQLITE_PRIVATE Pgno sqlite3WalDbsize(Wal *pWal){
-+  if( pWal && ALWAYS(pWal->readLock>=0) ){
-+    return pWal->hdr.nPage;
-+  }
-+  return 0;
-+}
-+
-+
-+/* 
-+** This function starts a write transaction on the WAL.
- **
+-**
 -**    SIZE    DESCRIPTION
 -**      4     Page number of next overflow page
 -**      *     Data
-+** A read transaction must have already been started by a prior call
-+** to sqlite3WalBeginReadTransaction().
- **
+-**
 -** Freelist pages come in two subtypes: trunk pages and leaf pages.  The
 -** file header points to the first in a linked list of trunk page.  Each trunk
 -** page points to multiple leaf pages.  The content of a leaf page is
 -** unspecified.  A trunk page looks like this:
-+** If another thread or process has written into the database since
-+** the read transaction was started, then it is not possible for this
-+** thread to write as doing so would cause a fork.  So this routine
-+** returns SQLITE_BUSY in that case and no write transaction is started.
- **
+-**
 -**    SIZE    DESCRIPTION
 -**      4     Page number of next trunk page
 -**      4     Number of leaf pointers on this page
 -**      *     zero or more pages numbers of leaves
-+** There can only be a single writer active at a time.
- */
-+SQLITE_PRIVATE int sqlite3WalBeginWriteTransaction(Wal *pWal){
-+  int rc;
- 
-+  /* Cannot start a write transaction without first holding a read
-+  ** transaction. */
-+  assert( pWal->readLock>=0 );
- 
+-*/
+-
+-
 -/* The following value is the maximum cell size assuming a maximum page
 -** size give above.
 -*/
 -#define MX_CELL_SIZE(pBt)  ((int)(pBt->pageSize-8))
-+  if( pWal->readOnly ){
-+    return SQLITE_READONLY;
-+  }
- 
+-
 -/* The maximum number of cells on a single page of the database.  This
 -** assumes a minimum cell size of 6 bytes  (4 bytes for the cell itself
 -** plus 2 bytes for the index to the cell in the page header).  Such
 -** small cells will be rare, but they are possible.
 -*/
 -#define MX_CELL(pBt) ((pBt->pageSize-8)/6)
-+  /* Only one writer allowed at a time.  Get the write lock.  Return
-+  ** SQLITE_BUSY if unable.
-+  */
-+  rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
-+  if( rc ){
-+    return rc;
-+  }
-+  pWal->writeLock = 1;
- 
+-
 -/* Forward declarations */
 -typedef struct MemPage MemPage;
 -typedef struct BtLock BtLock;
-+  /* If another connection has written to the database file since the
-+  ** time the read transaction on this connection was started, then
-+  ** the write is disallowed.
-+  */
-+  if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
-+    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
-+    pWal->writeLock = 0;
-+    rc = SQLITE_BUSY;
-+  }
- 
+-
 -/*
 -** This is a magic string that appears at the beginning of every
 -** SQLite database in order to identify the file as a real database.
@@ -18998,48 +3084,29 @@
 -#ifndef SQLITE_FILE_HEADER /* 123456789 123456 */
 -#  define SQLITE_FILE_HEADER "SQLite format 3"
 -#endif
-+  return rc;
-+}
- 
- /*
+-
+-/*
 -** Page type flags.  An ORed combination of these flags appear as the
 -** first byte of on-disk image of every BTree page.
-+** End a write transaction.  The commit has already been done.  This
-+** routine merely releases the lock.
- */
+-*/
 -#define PTF_INTKEY    0x01
 -#define PTF_ZERODATA  0x02
 -#define PTF_LEAFDATA  0x04
 -#define PTF_LEAF      0x08
-+SQLITE_PRIVATE int sqlite3WalEndWriteTransaction(Wal *pWal){
-+  if( pWal->writeLock ){
-+    walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
-+    pWal->writeLock = 0;
-+  }
-+  return SQLITE_OK;
-+}
- 
- /*
+-
+-/*
 -** As each page of the file is loaded into memory, an instance of the following
 -** structure is appended and initialized to zero.  This structure stores
 -** information about the page that is decoded from the raw file page.
-+** If any data has been written (but not committed) to the log file, this
-+** function moves the write-pointer back to the start of the transaction.
- **
+-**
 -** The pParent field points back to the parent page.  This allows us to
 -** walk up the BTree from any leaf to the root.  Care must be taken to
 -** unref() the parent page pointer when this page is no longer referenced.
 -** The pageDestructor() routine handles that chore.
-+** Additionally, the callback function is invoked for each frame written
-+** to the WAL since the start of the transaction. If the callback returns
-+** other than SQLITE_OK, it is not invoked again and the error code is
-+** returned to the caller.
- **
+-**
 -** Access to all fields of this structure is controlled by the mutex
 -** stored in MemPage.pBt->mutex.
-+** Otherwise, if the callback function does not return an error, this
-+** function returns SQLITE_OK.
- */
+-*/
 -struct MemPage {
 -  u8 isInit;           /* True if previously initialized. MUST BE FIRST! */
 -  u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
@@ -19063,337 +3130,27 @@
 -  DbPage *pDbPage;     /* Pager page handle */
 -  Pgno pgno;           /* Page number for this page */
 -};
-+SQLITE_PRIVATE int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
-+  int rc = SQLITE_OK;
-+  if( ALWAYS(pWal->writeLock) ){
-+    Pgno iMax = pWal->hdr.mxFrame;
-+    Pgno iFrame;
-+  
-+    /* Restore the clients cache of the wal-index header to the state it
-+    ** was in before the client began writing to the database. 
-+    */
-+    memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
- 
+-
 -/*
 -** The in-memory image of a disk page has the auxiliary information appended
 -** to the end.  EXTRA_SIZE is the number of bytes of space needed to hold
 -** that extra information.
-+    for(iFrame=pWal->hdr.mxFrame+1; 
-+        ALWAYS(rc==SQLITE_OK) && iFrame<=iMax; 
-+        iFrame++
-+    ){
-+      /* This call cannot fail. Unless the page for which the page number
-+      ** is passed as the second argument is (a) in the cache and 
-+      ** (b) has an outstanding reference, then xUndo is either a no-op
-+      ** (if (a) is false) or simply expels the page from the cache (if (b)
-+      ** is false).
-+      **
-+      ** If the upper layer is doing a rollback, it is guaranteed that there
-+      ** are no outstanding references to any page other than page 1. And
-+      ** page 1 is never written to the log until the transaction is
-+      ** committed. As a result, the call to xUndo may not fail.
-+      */
-+      assert( walFramePgno(pWal, iFrame)!=1 );
-+      rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
-+    }
-+    walCleanupHash(pWal);
-+  }
-+  assert( rc==SQLITE_OK );
-+  return rc;
-+}
-+
-+/* 
-+** Argument aWalData must point to an array of WAL_SAVEPOINT_NDATA u32 
-+** values. This function populates the array with values required to 
-+** "rollback" the write position of the WAL handle back to the current 
-+** point in the event of a savepoint rollback (via WalSavepointUndo()).
- */
+-*/
 -#define EXTRA_SIZE sizeof(MemPage)
-+SQLITE_PRIVATE void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
-+  assert( pWal->writeLock );
-+  aWalData[0] = pWal->hdr.mxFrame;
-+  aWalData[1] = pWal->hdr.aFrameCksum[0];
-+  aWalData[2] = pWal->hdr.aFrameCksum[1];
-+  aWalData[3] = pWal->nCkpt;
-+}
-+
-+/* 
-+** Move the write position of the WAL back to the point identified by
-+** the values in the aWalData[] array. aWalData must point to an array
-+** of WAL_SAVEPOINT_NDATA u32 values that has been previously populated
-+** by a call to WalSavepoint().
-+*/
-+SQLITE_PRIVATE int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
-+  int rc = SQLITE_OK;
-+
-+  assert( pWal->writeLock );
-+  assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame );
-+
-+  if( aWalData[3]!=pWal->nCkpt ){
-+    /* This savepoint was opened immediately after the write-transaction
-+    ** was started. Right after that, the writer decided to wrap around
-+    ** to the start of the log. Update the savepoint values to match.
-+    */
-+    aWalData[0] = 0;
-+    aWalData[3] = pWal->nCkpt;
-+  }
-+
-+  if( aWalData[0]<pWal->hdr.mxFrame ){
-+    pWal->hdr.mxFrame = aWalData[0];
-+    pWal->hdr.aFrameCksum[0] = aWalData[1];
-+    pWal->hdr.aFrameCksum[1] = aWalData[2];
-+    walCleanupHash(pWal);
-+  }
-+
-+  return rc;
-+}
- 
- /*
+-
+-/*
 -** A linked list of the following structures is stored at BtShared.pLock.
 -** Locks are added (or upgraded from READ_LOCK to WRITE_LOCK) when a cursor 
 -** is opened on the table with root page BtShared.iTable. Locks are removed
 -** from this list when a transaction is committed or rolled back, or when
 -** a btree handle is closed.
-+** This function is called just before writing a set of frames to the log
-+** file (see sqlite3WalFrames()). It checks to see if, instead of appending
-+** to the current log file, it is possible to overwrite the start of the
-+** existing log file with the new frames (i.e. "reset" the log). If so,
-+** it sets pWal->hdr.mxFrame to 0. Otherwise, pWal->hdr.mxFrame is left
-+** unchanged.
-+**
-+** SQLITE_OK is returned if no error is encountered (regardless of whether
-+** or not pWal->hdr.mxFrame is modified). An SQLite error code is returned
-+** if an error occurs.
- */
+-*/
 -struct BtLock {
 -  Btree *pBtree;        /* Btree handle holding this lock */
 -  Pgno iTable;          /* Root page of table */
 -  u8 eLock;             /* READ_LOCK or WRITE_LOCK */
 -  BtLock *pNext;        /* Next in BtShared.pLock list */
 -};
-+static int walRestartLog(Wal *pWal){
-+  int rc = SQLITE_OK;
-+  int cnt;
-+
-+  if( pWal->readLock==0 ){
-+    volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
-+    assert( pInfo->nBackfill==pWal->hdr.mxFrame );
-+    if( pInfo->nBackfill>0 ){
-+      u32 salt1;
-+      sqlite3_randomness(4, &salt1);
-+      rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
-+      if( rc==SQLITE_OK ){
-+        /* If all readers are using WAL_READ_LOCK(0) (in other words if no
-+        ** readers are currently using the WAL), then the transactions
-+        ** frames will overwrite the start of the existing log. Update the
-+        ** wal-index header to reflect this.
-+        **
-+        ** In theory it would be Ok to update the cache of the header only
-+        ** at this point. But updating the actual wal-index header is also
-+        ** safe and means there is no special case for sqlite3WalUndo()
-+        ** to handle if this transaction is rolled back.
-+        */
-+        int i;                    /* Loop counter */
-+        u32 *aSalt = pWal->hdr.aSalt;       /* Big-endian salt values */
-+
-+        /* Limit the size of WAL file if the journal_size_limit PRAGMA is
-+        ** set to a non-negative value.  Log errors encountered
-+        ** during the truncation attempt. */
-+        if( pWal->mxWalSize>=0 ){
-+          i64 sz;
-+          int rx;
-+          sqlite3BeginBenignMalloc();
-+          rx = sqlite3OsFileSize(pWal->pWalFd, &sz);
-+          if( rx==SQLITE_OK && (sz > pWal->mxWalSize) ){
-+            rx = sqlite3OsTruncate(pWal->pWalFd, pWal->mxWalSize);
-+          }
-+          sqlite3EndBenignMalloc();
-+          if( rx ){
-+            sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName);
-+          }
-+        }
-+
-+        pWal->nCkpt++;
-+        pWal->hdr.mxFrame = 0;
-+        sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0]));
-+        aSalt[1] = salt1;
-+        walIndexWriteHdr(pWal);
-+        pInfo->nBackfill = 0;
-+        for(i=1; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
-+        assert( pInfo->aReadMark[0]==0 );
-+        walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
-+      }else if( rc!=SQLITE_BUSY ){
-+        return rc;
-+      }
-+    }
-+    walUnlockShared(pWal, WAL_READ_LOCK(0));
-+    pWal->readLock = -1;
-+    cnt = 0;
-+    do{
-+      int notUsed;
-+      rc = walTryBeginRead(pWal, &notUsed, 1, ++cnt);
-+    }while( rc==WAL_RETRY );
-+    assert( (rc&0xff)!=SQLITE_BUSY ); /* BUSY not possible when useWal==1 */
-+    testcase( (rc&0xff)==SQLITE_IOERR );
-+    testcase( rc==SQLITE_PROTOCOL );
-+    testcase( rc==SQLITE_OK );
-+  }
-+  return rc;
-+}
-+
-+/* 
-+** Write a set of frames to the log. The caller must hold the write-lock
-+** on the log file (obtained using sqlite3WalBeginWriteTransaction()).
-+*/
-+SQLITE_PRIVATE int sqlite3WalFrames(
-+  Wal *pWal,                      /* Wal handle to write to */
-+  int szPage,                     /* Database page-size in bytes */
-+  PgHdr *pList,                   /* List of dirty pages to write */
-+  Pgno nTruncate,                 /* Database size after this commit */
-+  int isCommit,                   /* True if this is a commit */
-+  int sync_flags                  /* Flags to pass to OsSync() (or 0) */
-+){
-+  int rc;                         /* Used to catch return codes */
-+  u32 iFrame;                     /* Next frame address */
-+  u8 aFrame[WAL_FRAME_HDRSIZE];   /* Buffer to assemble frame-header in */
-+  PgHdr *p;                       /* Iterator to run through pList with. */
-+  PgHdr *pLast = 0;               /* Last frame in list */
-+  int nLast = 0;                  /* Number of extra copies of last page */
-+
-+  assert( pList );
-+  assert( pWal->writeLock );
-+
-+#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
-+  { int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
-+    WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
-+              pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
-+  }
-+#endif
-+
-+  /* See if it is possible to write these frames into the start of the
-+  ** log file, instead of appending to it at pWal->hdr.mxFrame.
-+  */
-+  if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
-+    return rc;
-+  }
-+
-+  /* If this is the first frame written into the log, write the WAL
-+  ** header to the start of the WAL file. See comments at the top of
-+  ** this source file for a description of the WAL header format.
-+  */
-+  iFrame = pWal->hdr.mxFrame;
-+  if( iFrame==0 ){
-+    u8 aWalHdr[WAL_HDRSIZE];      /* Buffer to assemble wal-header in */
-+    u32 aCksum[2];                /* Checksum for wal-header */
-+
-+    sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
-+    sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION);
-+    sqlite3Put4byte(&aWalHdr[8], szPage);
-+    sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
-+    sqlite3_randomness(8, pWal->hdr.aSalt);
-+    memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8);
-+    walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum);
-+    sqlite3Put4byte(&aWalHdr[24], aCksum[0]);
-+    sqlite3Put4byte(&aWalHdr[28], aCksum[1]);
-+    
-+    pWal->szPage = szPage;
-+    pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN;
-+    pWal->hdr.aFrameCksum[0] = aCksum[0];
-+    pWal->hdr.aFrameCksum[1] = aCksum[1];
-+
-+    rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
-+    WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
-+    if( rc!=SQLITE_OK ){
-+      return rc;
-+    }
-+  }
-+  assert( (int)pWal->szPage==szPage );
-+
-+  /* Write the log file. */
-+  for(p=pList; p; p=p->pDirty){
-+    u32 nDbsize;                  /* Db-size field for frame header */
-+    i64 iOffset;                  /* Write offset in log file */
-+    void *pData;
-+   
-+    iOffset = walFrameOffset(++iFrame, szPage);
-+    /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
-+    
-+    /* Populate and write the frame header */
-+    nDbsize = (isCommit && p->pDirty==0) ? nTruncate : 0;
-+#if defined(SQLITE_HAS_CODEC)
-+    if( (pData = sqlite3PagerCodec(p))==0 ) return SQLITE_NOMEM;
-+#else
-+    pData = p->pData;
-+#endif
-+    walEncodeFrame(pWal, p->pgno, nDbsize, pData, aFrame);
-+    rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
-+    if( rc!=SQLITE_OK ){
-+      return rc;
-+    }
-+
-+    /* Write the page data */
-+    rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset+sizeof(aFrame));
-+    if( rc!=SQLITE_OK ){
-+      return rc;
-+    }
-+    pLast = p;
-+  }
-+
-+  /* Sync the log file if the 'isSync' flag was specified. */
-+  if( sync_flags ){
-+    i64 iSegment = sqlite3OsSectorSize(pWal->pWalFd);
-+    i64 iOffset = walFrameOffset(iFrame+1, szPage);
-+
-+    assert( isCommit );
-+    assert( iSegment>0 );
-+
-+    iSegment = (((iOffset+iSegment-1)/iSegment) * iSegment);
-+    while( iOffset<iSegment ){
-+      void *pData;
-+#if defined(SQLITE_HAS_CODEC)
-+      if( (pData = sqlite3PagerCodec(pLast))==0 ) return SQLITE_NOMEM;
-+#else
-+      pData = pLast->pData;
-+#endif
-+      walEncodeFrame(pWal, pLast->pgno, nTruncate, pData, aFrame);
-+      /* testcase( IS_BIG_INT(iOffset) ); // requires a 4GiB WAL */
-+      rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOffset);
-+      if( rc!=SQLITE_OK ){
-+        return rc;
-+      }
-+      iOffset += WAL_FRAME_HDRSIZE;
-+      rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOffset); 
-+      if( rc!=SQLITE_OK ){
-+        return rc;
-+      }
-+      nLast++;
-+      iOffset += szPage;
-+    }
-+
-+    rc = sqlite3OsSync(pWal->pWalFd, sync_flags);
-+  }
-+
-+  /* Append data to the wal-index. It is not necessary to lock the 
-+  ** wal-index to do this as the SQLITE_SHM_WRITE lock held on the wal-index
-+  ** guarantees that there are no other writers, and no data that may
-+  ** be in use by existing readers is being overwritten.
-+  */
-+  iFrame = pWal->hdr.mxFrame;
-+  for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){
-+    iFrame++;
-+    rc = walIndexAppend(pWal, iFrame, p->pgno);
-+  }
-+  while( nLast>0 && rc==SQLITE_OK ){
-+    iFrame++;
-+    nLast--;
-+    rc = walIndexAppend(pWal, iFrame, pLast->pgno);
-+  }
-+
-+  if( rc==SQLITE_OK ){
-+    /* Update the private copy of the header. */
-+    pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
-+    testcase( szPage<=32768 );
-+    testcase( szPage>=65536 );
 +    pWal->hdr.mxFrame = iFrame;
 +    if( isCommit ){
 +      pWal->hdr.iChange++;
@@ -19917,19 +3674,24 @@
  #ifndef SQLITE_OMIT_SHARED_CACHE
  #if SQLITE_THREADSAFE
  
-@@ -83634,6 +84834,11 @@
+@@ -85801,10 +87170,16 @@
+ */
+ SQLITE_PRIVATE void sqlite3RegisterBuiltinFunctions(sqlite3 *db){
+   int rc = sqlite3_overload_function(db, "MATCH", 2);
++#ifndef OMIT_EXPORT
++  extern void sqlcipher_exportFunc(sqlite3_context *, int, sqlite3_value **);
++#endif
+   assert( rc==SQLITE_NOMEM || rc==SQLITE_OK );
    if( rc==SQLITE_NOMEM ){
      db->mallocFailed = 1;
    }
-+
 +#ifndef OMIT_EXPORT
-+  extern void sqlcipher_exportFunc(sqlite3_context *, int, sqlite3_value **);
 +  sqlite3CreateFunc(db, "sqlcipher_export", 1, SQLITE_TEXT, 0, sqlcipher_exportFunc, 0, 0, 0);
 +#endif
  }
  
  /*
-@@ -88929,60 +90134,6 @@
+@@ -91158,60 +92533,6 @@
  
  #ifndef SQLITE_OMIT_SCHEMA_PRAGMAS
    /*
@@ -19990,7 +3752,7 @@
    **   PRAGMA table_info(<table>)
    **
    ** Return a single row for each column of the named table. The columns of
-@@ -89620,6 +90771,36 @@
+@@ -91849,6 +93170,40 @@
        sqlite3_rekey(db, zKey, i/2);
      }
    }else
@@ -20007,6 +3769,10 @@
 +    extern int codec_set_kdf_iter(sqlite3*, int, int, int);
 +    codec_set_kdf_iter(db, iDb, atoi(zRight), 2); // change of RW PBKDF2 iteration
 +  }else
++  if( sqlite3StrICmp(zLeft, "fast_kdf_iter")==0 && zRight ){
++    extern int codec_set_fast_kdf_iter(sqlite3*, int, int, int);
++    codec_set_fast_kdf_iter(db, iDb, atoi(zRight), 2); // change of RW PBKDF2 iteration
++  }else
 +  if( sqlite3StrICmp(zLeft, "rekey_kdf_iter")==0 && zRight ){
 +    extern int codec_set_kdf_iter(sqlite3*, int, int, int); 
 +    codec_set_kdf_iter(db, iDb, atoi(zRight), 1); // change # if W iterations
[Date Prev][Date Next] [Thread Prev][Thread Next] [Thread Index] [Date Index] [Author Index]