#ifndef SQLITE_OMIT_WAL
#include "wal.h"
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
int sqlite3WalTrace = 0;
# define WALTRACE(X) if(sqlite3WalTrace) sqlite3DebugPrintf X
#else
# define WALTRACE(X)
#endif
#define WAL_MAX_VERSION 3007000
#define WALINDEX_MAX_VERSION 3007000
#define WAL_WRITE_LOCK 0
#define WAL_ALL_BUT_WRITE 1
#define WAL_CKPT_LOCK 1
#define WAL_RECOVER_LOCK 2
#define WAL_READ_LOCK(I) (3+(I))
#define WAL_NREADER (SQLITE_SHM_NLOCK-3)
typedef struct WalIndexHdr WalIndexHdr;
typedef struct WalIterator WalIterator;
typedef struct WalCkptInfo WalCkptInfo;
struct WalIndexHdr {
u32 iVersion;
u32 unused;
u32 iChange;
u8 isInit;
u8 bigEndCksum;
u16 szPage;
u32 mxFrame;
u32 nPage;
u32 aFrameCksum[2];
u32 aSalt[2];
u32 aCksum[2];
};
struct WalCkptInfo {
u32 nBackfill;
u32 aReadMark[WAL_NREADER];
u8 aLock[SQLITE_SHM_NLOCK];
u32 nBackfillAttempted;
u32 notUsed0;
};
#define READMARK_NOT_USED 0xffffffff
#define WALINDEX_LOCK_OFFSET (sizeof(WalIndexHdr)*2+offsetof(WalCkptInfo,aLock))
#define WALINDEX_HDR_SIZE (sizeof(WalIndexHdr)*2+sizeof(WalCkptInfo))
#define WAL_FRAME_HDRSIZE 24
#define WAL_HDRSIZE 32
#define WAL_MAGIC 0x377f0682
#define walFrameOffset(iFrame, szPage) ( \
WAL_HDRSIZE + ((iFrame)-1)*(i64)((szPage)+WAL_FRAME_HDRSIZE) \
)
struct Wal {
sqlite3_vfs *pVfs;
sqlite3_file *pDbFd;
sqlite3_file *pWalFd;
u32 iCallback;
i64 mxWalSize;
int nWiData;
int szFirstBlock;
volatile u32 **apWiData;
u32 szPage;
i16 readLock;
u8 syncFlags;
u8 exclusiveMode;
u8 writeLock;
u8 ckptLock;
u8 readOnly;
u8 truncateOnCommit;
u8 syncHeader;
u8 padToSectorBoundary;
u8 bShmUnreliable;
WalIndexHdr hdr;
u32 minFrame;
u32 iReCksum;
const char *zWalName;
u32 nCkpt;
#ifdef SQLITE_DEBUG
u8 lockError;
#endif
#ifdef SQLITE_ENABLE_SNAPSHOT
WalIndexHdr *pSnapshot;
#endif
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
sqlite3 *db;
#endif
};
#define WAL_NORMAL_MODE 0
#define WAL_EXCLUSIVE_MODE 1
#define WAL_HEAPMEMORY_MODE 2
#define WAL_RDWR 0
#define WAL_RDONLY 1
#define WAL_SHM_RDONLY 2
typedef u16 ht_slot;
struct WalIterator {
u32 iPrior;
int nSegment;
struct WalSegment {
int iNext;
ht_slot *aIndex;
u32 *aPgno;
int nEntry;
int iZero;
} aSegment[1];
};
#define HASHTABLE_NPAGE 4096
#define HASHTABLE_HASH_1 383
#define HASHTABLE_NSLOT (HASHTABLE_NPAGE*2)
#define HASHTABLE_NPAGE_ONE (HASHTABLE_NPAGE - (WALINDEX_HDR_SIZE/sizeof(u32)))
#define WALINDEX_PGSZ ( \
sizeof(ht_slot)*HASHTABLE_NSLOT + HASHTABLE_NPAGE*sizeof(u32) \
)
static SQLITE_NOINLINE int walIndexPageRealloc(
Wal *pWal,
int iPage,
volatile u32 **ppPage
){
int rc = SQLITE_OK;
if( pWal->nWiData<=iPage ){
sqlite3_int64 nByte = sizeof(u32*)*(iPage+1);
volatile u32 **apNew;
apNew = (volatile u32 **)sqlite3Realloc((void *)pWal->apWiData, nByte);
if( !apNew ){
*ppPage = 0;
return SQLITE_NOMEM_BKPT;
}
memset((void*)&apNew[pWal->nWiData], 0,
sizeof(u32*)*(iPage+1-pWal->nWiData));
pWal->apWiData = apNew;
pWal->nWiData = iPage+1;
}
assert( pWal->apWiData[iPage]==0 );
if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE ){
pWal->apWiData[iPage] = (u32 volatile *)sqlite3MallocZero(WALINDEX_PGSZ);
if( !pWal->apWiData[iPage] ) rc = SQLITE_NOMEM_BKPT;
}else{
rc = sqlite3OsShmMap(pWal->pDbFd, iPage, WALINDEX_PGSZ,
pWal->writeLock, (void volatile **)&pWal->apWiData[iPage]
);
assert( pWal->apWiData[iPage]!=0
|| rc!=SQLITE_OK
|| (pWal->writeLock==0 && iPage==0) );
testcase( pWal->apWiData[iPage]==0 && rc==SQLITE_OK );
if( rc==SQLITE_OK ){
if( iPage>0 && sqlite3FaultSim(600) ) rc = SQLITE_NOMEM;
}else if( (rc&0xff)==SQLITE_READONLY ){
pWal->readOnly |= WAL_SHM_RDONLY;
if( rc==SQLITE_READONLY ){
rc = SQLITE_OK;
}
}
}
*ppPage = pWal->apWiData[iPage];
assert( iPage==0 || *ppPage || rc!=SQLITE_OK );
return rc;
}
static int walIndexPage(
Wal *pWal,
int iPage,
volatile u32 **ppPage
){
if( pWal->nWiData<=iPage || (*ppPage = pWal->apWiData[iPage])==0 ){
return walIndexPageRealloc(pWal, iPage, ppPage);
}
return SQLITE_OK;
}
static volatile WalCkptInfo *walCkptInfo(Wal *pWal){
assert( pWal->nWiData>0 && pWal->apWiData[0] );
return (volatile WalCkptInfo*)&(pWal->apWiData[0][sizeof(WalIndexHdr)/2]);
}
static volatile WalIndexHdr *walIndexHdr(Wal *pWal){
assert( pWal->nWiData>0 && pWal->apWiData[0] );
return (volatile WalIndexHdr*)pWal->apWiData[0];
}
#define BYTESWAP32(x) ( \
(((x)&0x000000FF)<<24) + (((x)&0x0000FF00)<<8) \
+ (((x)&0x00FF0000)>>8) + (((x)&0xFF000000)>>24) \
)
static void walChecksumBytes(
int nativeCksum,
u8 *a,
int nByte,
const u32 *aIn,
u32 *aOut
){
u32 s1, s2;
u32 *aData = (u32 *)a;
u32 *aEnd = (u32 *)&a[nByte];
if( aIn ){
s1 = aIn[0];
s2 = aIn[1];
}else{
s1 = s2 = 0;
}
assert( nByte>=8 );
assert( (nByte&0x00000007)==0 );
assert( nByte<=65536 );
assert( nByte%4==0 );
if( !nativeCksum ){
do {
s1 += BYTESWAP32(aData[0]) + s2;
s2 += BYTESWAP32(aData[1]) + s1;
aData += 2;
}while( aData<aEnd );
}else if( nByte%64==0 ){
do {
s1 += *aData++ + s2;
s2 += *aData++ + s1;
s1 += *aData++ + s2;
s2 += *aData++ + s1;
s1 += *aData++ + s2;
s2 += *aData++ + s1;
s1 += *aData++ + s2;
s2 += *aData++ + s1;
s1 += *aData++ + s2;
s2 += *aData++ + s1;
s1 += *aData++ + s2;
s2 += *aData++ + s1;
s1 += *aData++ + s2;
s2 += *aData++ + s1;
s1 += *aData++ + s2;
s2 += *aData++ + s1;
}while( aData<aEnd );
}else{
do {
s1 += *aData++ + s2;
s2 += *aData++ + s1;
}while( aData<aEnd );
}
assert( aData==aEnd );
aOut[0] = s1;
aOut[1] = s2;
}
static void walShmBarrier(Wal *pWal){
if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
sqlite3OsShmBarrier(pWal->pDbFd);
}
}
#if defined(__clang__) && !defined(SQLITE_NO_TSAN)
# define SQLITE_NO_TSAN __attribute__((no_sanitize_thread))
#else
# define SQLITE_NO_TSAN
#endif
static SQLITE_NO_TSAN void walIndexWriteHdr(Wal *pWal){
volatile WalIndexHdr *aHdr = walIndexHdr(pWal);
const int nCksum = offsetof(WalIndexHdr, aCksum);
assert( pWal->writeLock );
pWal->hdr.isInit = 1;
pWal->hdr.iVersion = WALINDEX_MAX_VERSION;
walChecksumBytes(1, (u8*)&pWal->hdr, nCksum, 0, pWal->hdr.aCksum);
memcpy((void*)&aHdr[1], (const void*)&pWal->hdr, sizeof(WalIndexHdr));
walShmBarrier(pWal);
memcpy((void*)&aHdr[0], (const void*)&pWal->hdr, sizeof(WalIndexHdr));
}
static void walEncodeFrame(
Wal *pWal,
u32 iPage,
u32 nTruncate,
u8 *aData,
u8 *aFrame
){
int nativeCksum;
u32 *aCksum = pWal->hdr.aFrameCksum;
assert( WAL_FRAME_HDRSIZE==24 );
sqlite3Put4byte(&aFrame[0], iPage);
sqlite3Put4byte(&aFrame[4], nTruncate);
if( pWal->iReCksum==0 ){
memcpy(&aFrame[8], pWal->hdr.aSalt, 8);
nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
sqlite3Put4byte(&aFrame[16], aCksum[0]);
sqlite3Put4byte(&aFrame[20], aCksum[1]);
}else{
memset(&aFrame[8], 0, 16);
}
}
static int walDecodeFrame(
Wal *pWal,
u32 *piPage,
u32 *pnTruncate,
u8 *aData,
u8 *aFrame
){
int nativeCksum;
u32 *aCksum = pWal->hdr.aFrameCksum;
u32 pgno;
assert( WAL_FRAME_HDRSIZE==24 );
if( memcmp(&pWal->hdr.aSalt, &aFrame[8], 8)!=0 ){
return 0;
}
pgno = sqlite3Get4byte(&aFrame[0]);
if( pgno==0 ){
return 0;
}
nativeCksum = (pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN);
walChecksumBytes(nativeCksum, aFrame, 8, aCksum, aCksum);
walChecksumBytes(nativeCksum, aData, pWal->szPage, aCksum, aCksum);
if( aCksum[0]!=sqlite3Get4byte(&aFrame[16])
|| aCksum[1]!=sqlite3Get4byte(&aFrame[20])
){
return 0;
}
*piPage = pgno;
*pnTruncate = sqlite3Get4byte(&aFrame[4]);
return 1;
}
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
static const char *walLockName(int lockIdx){
if( lockIdx==WAL_WRITE_LOCK ){
return "WRITE-LOCK";
}else if( lockIdx==WAL_CKPT_LOCK ){
return "CKPT-LOCK";
}else if( lockIdx==WAL_RECOVER_LOCK ){
return "RECOVER-LOCK";
}else{
static char zName[15];
sqlite3_snprintf(sizeof(zName), zName, "READ-LOCK[%d]",
lockIdx-WAL_READ_LOCK(0));
return zName;
}
}
#endif
static int walLockShared(Wal *pWal, int lockIdx){
int rc;
if( pWal->exclusiveMode ) return SQLITE_OK;
rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
SQLITE_SHM_LOCK | SQLITE_SHM_SHARED);
WALTRACE(("WAL%p: acquire SHARED-%s %s\n", pWal,
walLockName(lockIdx), rc ? "failed" : "ok"));
VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); )
return rc;
}
static void walUnlockShared(Wal *pWal, int lockIdx){
if( pWal->exclusiveMode ) return;
(void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, 1,
SQLITE_SHM_UNLOCK | SQLITE_SHM_SHARED);
WALTRACE(("WAL%p: release SHARED-%s\n", pWal, walLockName(lockIdx)));
}
static int walLockExclusive(Wal *pWal, int lockIdx, int n){
int rc;
if( pWal->exclusiveMode ) return SQLITE_OK;
rc = sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
SQLITE_SHM_LOCK | SQLITE_SHM_EXCLUSIVE);
WALTRACE(("WAL%p: acquire EXCLUSIVE-%s cnt=%d %s\n", pWal,
walLockName(lockIdx), n, rc ? "failed" : "ok"));
VVA_ONLY( pWal->lockError = (u8)(rc!=SQLITE_OK && (rc&0xFF)!=SQLITE_BUSY); )
return rc;
}
static void walUnlockExclusive(Wal *pWal, int lockIdx, int n){
if( pWal->exclusiveMode ) return;
(void)sqlite3OsShmLock(pWal->pDbFd, lockIdx, n,
SQLITE_SHM_UNLOCK | SQLITE_SHM_EXCLUSIVE);
WALTRACE(("WAL%p: release EXCLUSIVE-%s cnt=%d\n", pWal,
walLockName(lockIdx), n));
}
static int walHash(u32 iPage){
assert( iPage>0 );
assert( (HASHTABLE_NSLOT & (HASHTABLE_NSLOT-1))==0 );
return (iPage*HASHTABLE_HASH_1) & (HASHTABLE_NSLOT-1);
}
static int walNextHash(int iPriorHash){
return (iPriorHash+1)&(HASHTABLE_NSLOT-1);
}
typedef struct WalHashLoc WalHashLoc;
struct WalHashLoc {
volatile ht_slot *aHash;
volatile u32 *aPgno;
u32 iZero;
};
static int walHashGet(
Wal *pWal,
int iHash,
WalHashLoc *pLoc
){
int rc;
rc = walIndexPage(pWal, iHash, &pLoc->aPgno);
assert( rc==SQLITE_OK || iHash>0 );
if( pLoc->aPgno ){
pLoc->aHash = (volatile ht_slot *)&pLoc->aPgno[HASHTABLE_NPAGE];
if( iHash==0 ){
pLoc->aPgno = &pLoc->aPgno[WALINDEX_HDR_SIZE/sizeof(u32)];
pLoc->iZero = 0;
}else{
pLoc->iZero = HASHTABLE_NPAGE_ONE + (iHash-1)*HASHTABLE_NPAGE;
}
}else if( NEVER(rc==SQLITE_OK) ){
rc = SQLITE_ERROR;
}
return rc;
}
static int walFramePage(u32 iFrame){
int iHash = (iFrame+HASHTABLE_NPAGE-HASHTABLE_NPAGE_ONE-1) / HASHTABLE_NPAGE;
assert( (iHash==0 || iFrame>HASHTABLE_NPAGE_ONE)
&& (iHash>=1 || iFrame<=HASHTABLE_NPAGE_ONE)
&& (iHash<=1 || iFrame>(HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE))
&& (iHash>=2 || iFrame<=HASHTABLE_NPAGE_ONE+HASHTABLE_NPAGE)
&& (iHash<=2 || iFrame>(HASHTABLE_NPAGE_ONE+2*HASHTABLE_NPAGE))
);
assert( iHash>=0 );
return iHash;
}
static u32 walFramePgno(Wal *pWal, u32 iFrame){
int iHash = walFramePage(iFrame);
if( iHash==0 ){
return pWal->apWiData[0][WALINDEX_HDR_SIZE/sizeof(u32) + iFrame - 1];
}
return pWal->apWiData[iHash][(iFrame-1-HASHTABLE_NPAGE_ONE)%HASHTABLE_NPAGE];
}
static void walCleanupHash(Wal *pWal){
WalHashLoc sLoc;
int iLimit = 0;
int nByte;
int i;
assert( pWal->writeLock );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE-1 );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE );
testcase( pWal->hdr.mxFrame==HASHTABLE_NPAGE_ONE+1 );
if( pWal->hdr.mxFrame==0 ) return;
assert( pWal->nWiData>walFramePage(pWal->hdr.mxFrame) );
assert( pWal->apWiData[walFramePage(pWal->hdr.mxFrame)] );
i = walHashGet(pWal, walFramePage(pWal->hdr.mxFrame), &sLoc);
if( NEVER(i) ) return;
iLimit = pWal->hdr.mxFrame - sLoc.iZero;
assert( iLimit>0 );
for(i=0; i<HASHTABLE_NSLOT; i++){
if( sLoc.aHash[i]>iLimit ){
sLoc.aHash[i] = 0;
}
}
nByte = (int)((char *)sLoc.aHash - (char *)&sLoc.aPgno[iLimit]);
assert( nByte>=0 );
memset((void *)&sLoc.aPgno[iLimit], 0, nByte);
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
if( iLimit ){
int j;
int iKey;
for(j=0; j<iLimit; j++){
for(iKey=walHash(sLoc.aPgno[j]);sLoc.aHash[iKey];iKey=walNextHash(iKey)){
if( sLoc.aHash[iKey]==j+1 ) break;
}
assert( sLoc.aHash[iKey]==j+1 );
}
}
#endif
}
static int walIndexAppend(Wal *pWal, u32 iFrame, u32 iPage){
int rc;
WalHashLoc sLoc;
rc = walHashGet(pWal, walFramePage(iFrame), &sLoc);
if( rc==SQLITE_OK ){
int iKey;
int idx;
int nCollide;
idx = iFrame - sLoc.iZero;
assert( idx <= HASHTABLE_NSLOT/2 + 1 );
if( idx==1 ){
int nByte = (int)((u8*)&sLoc.aHash[HASHTABLE_NSLOT] - (u8*)sLoc.aPgno);
assert( nByte>=0 );
memset((void*)sLoc.aPgno, 0, nByte);
}
if( sLoc.aPgno[idx-1] ){
walCleanupHash(pWal);
assert( !sLoc.aPgno[idx-1] );
}
nCollide = idx;
for(iKey=walHash(iPage); sLoc.aHash[iKey]; iKey=walNextHash(iKey)){
if( (nCollide--)==0 ) return SQLITE_CORRUPT_BKPT;
}
sLoc.aPgno[idx-1] = iPage;
AtomicStore(&sLoc.aHash[iKey], (ht_slot)idx);
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
{
int i;
int nEntry = 0;
for(i=0; i<HASHTABLE_NSLOT; i++){ if( sLoc.aHash[i] ) nEntry++; }
assert( nEntry==idx );
}
if( (idx&0x3ff)==0 ){
int i;
for(i=0; i<idx; i++){
for(iKey=walHash(sLoc.aPgno[i]);
sLoc.aHash[iKey];
iKey=walNextHash(iKey)){
if( sLoc.aHash[iKey]==i+1 ) break;
}
assert( sLoc.aHash[iKey]==i+1 );
}
}
#endif
}
return rc;
}
static int walIndexRecover(Wal *pWal){
int rc;
i64 nSize;
u32 aFrameCksum[2] = {0, 0};
int iLock;
assert( pWal->ckptLock==1 || pWal->ckptLock==0 );
assert( WAL_ALL_BUT_WRITE==WAL_WRITE_LOCK+1 );
assert( WAL_CKPT_LOCK==WAL_ALL_BUT_WRITE );
assert( pWal->writeLock );
iLock = WAL_ALL_BUT_WRITE + pWal->ckptLock;
rc = walLockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock);
if( rc ){
return rc;
}
WALTRACE(("WAL%p: recovery begin...\n", pWal));
memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
rc = sqlite3OsFileSize(pWal->pWalFd, &nSize);
if( rc!=SQLITE_OK ){
goto recovery_error;
}
if( nSize>WAL_HDRSIZE ){
u8 aBuf[WAL_HDRSIZE];
u32 *aPrivate = 0;
u8 *aFrame = 0;
int szFrame;
u8 *aData;
int szPage;
u32 magic;
u32 version;
int isValid;
u32 iPg;
u32 iLastFrame;
rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
if( rc!=SQLITE_OK ){
goto recovery_error;
}
magic = sqlite3Get4byte(&aBuf[0]);
szPage = sqlite3Get4byte(&aBuf[8]);
if( (magic&0xFFFFFFFE)!=WAL_MAGIC
|| szPage&(szPage-1)
|| szPage>SQLITE_MAX_PAGE_SIZE
|| szPage<512
){
goto finished;
}
pWal->hdr.bigEndCksum = (u8)(magic&0x00000001);
pWal->szPage = szPage;
pWal->nCkpt = sqlite3Get4byte(&aBuf[12]);
memcpy(&pWal->hdr.aSalt, &aBuf[16], 8);
walChecksumBytes(pWal->hdr.bigEndCksum==SQLITE_BIGENDIAN,
aBuf, WAL_HDRSIZE-2*4, 0, pWal->hdr.aFrameCksum
);
if( pWal->hdr.aFrameCksum[0]!=sqlite3Get4byte(&aBuf[24])
|| pWal->hdr.aFrameCksum[1]!=sqlite3Get4byte(&aBuf[28])
){
goto finished;
}
version = sqlite3Get4byte(&aBuf[4]);
if( version!=WAL_MAX_VERSION ){
rc = SQLITE_CANTOPEN_BKPT;
goto finished;
}
szFrame = szPage + WAL_FRAME_HDRSIZE;
aFrame = (u8 *)sqlite3_malloc64(szFrame + WALINDEX_PGSZ);
if( !aFrame ){
rc = SQLITE_NOMEM_BKPT;
goto recovery_error;
}
aData = &aFrame[WAL_FRAME_HDRSIZE];
aPrivate = (u32*)&aData[szPage];
iLastFrame = (nSize - WAL_HDRSIZE) / szFrame;
for(iPg=0; iPg<=(u32)walFramePage(iLastFrame); iPg++){
u32 *aShare;
u32 iFrame;
u32 iLast = MIN(iLastFrame, HASHTABLE_NPAGE_ONE+iPg*HASHTABLE_NPAGE);
u32 iFirst = 1 + (iPg==0?0:HASHTABLE_NPAGE_ONE+(iPg-1)*HASHTABLE_NPAGE);
u32 nHdr, nHdr32;
rc = walIndexPage(pWal, iPg, (volatile u32**)&aShare);
assert( aShare!=0 || rc!=SQLITE_OK );
if( aShare==0 ) break;
pWal->apWiData[iPg] = aPrivate;
for(iFrame=iFirst; iFrame<=iLast; iFrame++){
i64 iOffset = walFrameOffset(iFrame, szPage);
u32 pgno;
u32 nTruncate;
rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
if( rc!=SQLITE_OK ) break;
isValid = walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame);
if( !isValid ) break;
rc = walIndexAppend(pWal, iFrame, pgno);
if( NEVER(rc!=SQLITE_OK) ) break;
if( nTruncate ){
pWal->hdr.mxFrame = iFrame;
pWal->hdr.nPage = nTruncate;
pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
testcase( szPage<=32768 );
testcase( szPage>=65536 );
aFrameCksum[0] = pWal->hdr.aFrameCksum[0];
aFrameCksum[1] = pWal->hdr.aFrameCksum[1];
}
}
pWal->apWiData[iPg] = aShare;
nHdr = (iPg==0 ? WALINDEX_HDR_SIZE : 0);
nHdr32 = nHdr / sizeof(u32);
#ifndef SQLITE_SAFER_WALINDEX_RECOVERY
memcpy(&aShare[nHdr32], &aPrivate[nHdr32], WALINDEX_PGSZ-nHdr);
#else
{
int i;
for(i=nHdr32; i<WALINDEX_PGSZ/sizeof(u32); i++){
if( aShare[i]!=aPrivate[i] ){
aShare[i] = aPrivate[i];
}
}
}
#endif
if( iFrame<=iLast ) break;
}
sqlite3_free(aFrame);
}
finished:
if( rc==SQLITE_OK ){
volatile WalCkptInfo *pInfo;
int i;
pWal->hdr.aFrameCksum[0] = aFrameCksum[0];
pWal->hdr.aFrameCksum[1] = aFrameCksum[1];
walIndexWriteHdr(pWal);
pInfo = walCkptInfo(pWal);
pInfo->nBackfill = 0;
pInfo->nBackfillAttempted = pWal->hdr.mxFrame;
pInfo->aReadMark[0] = 0;
for(i=1; i<WAL_NREADER; i++){
rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
if( rc==SQLITE_OK ){
if( i==1 && pWal->hdr.mxFrame ){
pInfo->aReadMark[i] = pWal->hdr.mxFrame;
}else{
pInfo->aReadMark[i] = READMARK_NOT_USED;
}
walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
}else if( rc!=SQLITE_BUSY ){
goto recovery_error;
}
}
if( pWal->hdr.nPage ){
sqlite3_log(SQLITE_NOTICE_RECOVER_WAL,
"recovered %d frames from WAL file %s",
pWal->hdr.mxFrame, pWal->zWalName
);
}
}
recovery_error:
WALTRACE(("WAL%p: recovery %s\n", pWal, rc ? "failed" : "ok"));
walUnlockExclusive(pWal, iLock, WAL_READ_LOCK(0)-iLock);
return rc;
}
static void walIndexClose(Wal *pWal, int isDelete){
if( pWal->exclusiveMode==WAL_HEAPMEMORY_MODE || pWal->bShmUnreliable ){
int i;
for(i=0; i<pWal->nWiData; i++){
sqlite3_free((void *)pWal->apWiData[i]);
pWal->apWiData[i] = 0;
}
}
if( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE ){
sqlite3OsShmUnmap(pWal->pDbFd, isDelete);
}
}
int sqlite3WalOpen(
sqlite3_vfs *pVfs,
sqlite3_file *pDbFd,
const char *zWalName,
int bNoShm,
i64 mxWalSize,
Wal **ppWal
){
int rc;
Wal *pRet;
int flags;
assert( zWalName && zWalName[0] );
assert( pDbFd );
assert( 48 == sizeof(WalIndexHdr) );
assert( 40 == sizeof(WalCkptInfo) );
assert( 120 == WALINDEX_LOCK_OFFSET );
assert( 136 == WALINDEX_HDR_SIZE );
assert( 4096 == HASHTABLE_NPAGE );
assert( 4062 == HASHTABLE_NPAGE_ONE );
assert( 8192 == HASHTABLE_NSLOT );
assert( 383 == HASHTABLE_HASH_1 );
assert( 32768 == WALINDEX_PGSZ );
assert( 8 == SQLITE_SHM_NLOCK );
assert( 5 == WAL_NREADER );
assert( 24 == WAL_FRAME_HDRSIZE );
assert( 32 == WAL_HDRSIZE );
assert( 120 == WALINDEX_LOCK_OFFSET + WAL_WRITE_LOCK );
assert( 121 == WALINDEX_LOCK_OFFSET + WAL_CKPT_LOCK );
assert( 122 == WALINDEX_LOCK_OFFSET + WAL_RECOVER_LOCK );
assert( 123 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(0) );
assert( 124 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(1) );
assert( 125 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(2) );
assert( 126 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(3) );
assert( 127 == WALINDEX_LOCK_OFFSET + WAL_READ_LOCK(4) );
#ifdef WIN_SHM_BASE
assert( WIN_SHM_BASE==WALINDEX_LOCK_OFFSET );
#endif
#ifdef UNIX_SHM_BASE
assert( UNIX_SHM_BASE==WALINDEX_LOCK_OFFSET );
#endif
*ppWal = 0;
pRet = (Wal*)sqlite3MallocZero(sizeof(Wal) + pVfs->szOsFile);
if( !pRet ){
return SQLITE_NOMEM_BKPT;
}
pRet->pVfs = pVfs;
pRet->pWalFd = (sqlite3_file *)&pRet[1];
pRet->pDbFd = pDbFd;
pRet->readLock = -1;
pRet->mxWalSize = mxWalSize;
pRet->zWalName = zWalName;
pRet->syncHeader = 1;
pRet->padToSectorBoundary = 1;
pRet->exclusiveMode = (bNoShm ? WAL_HEAPMEMORY_MODE: WAL_NORMAL_MODE);
flags = (SQLITE_OPEN_READWRITE|SQLITE_OPEN_CREATE|SQLITE_OPEN_WAL);
rc = sqlite3OsOpen(pVfs, zWalName, pRet->pWalFd, flags, &flags);
if( rc==SQLITE_OK && flags&SQLITE_OPEN_READONLY ){
pRet->readOnly = WAL_RDONLY;
}
if( rc!=SQLITE_OK ){
walIndexClose(pRet, 0);
sqlite3OsClose(pRet->pWalFd);
sqlite3_free(pRet);
}else{
int iDC = sqlite3OsDeviceCharacteristics(pDbFd);
if( iDC & SQLITE_IOCAP_SEQUENTIAL ){ pRet->syncHeader = 0; }
if( iDC & SQLITE_IOCAP_POWERSAFE_OVERWRITE ){
pRet->padToSectorBoundary = 0;
}
*ppWal = pRet;
WALTRACE(("WAL%d: opened\n", pRet));
}
return rc;
}
void sqlite3WalLimit(Wal *pWal, i64 iLimit){
if( pWal ) pWal->mxWalSize = iLimit;
}
static int walIteratorNext(
WalIterator *p,
u32 *piPage,
u32 *piFrame
){
u32 iMin;
u32 iRet = 0xFFFFFFFF;
int i;
iMin = p->iPrior;
assert( iMin<0xffffffff );
for(i=p->nSegment-1; i>=0; i--){
struct WalSegment *pSegment = &p->aSegment[i];
while( pSegment->iNext<pSegment->nEntry ){
u32 iPg = pSegment->aPgno[pSegment->aIndex[pSegment->iNext]];
if( iPg>iMin ){
if( iPg<iRet ){
iRet = iPg;
*piFrame = pSegment->iZero + pSegment->aIndex[pSegment->iNext];
}
break;
}
pSegment->iNext++;
}
}
*piPage = p->iPrior = iRet;
return (iRet==0xFFFFFFFF);
}
static void walMerge(
const u32 *aContent,
ht_slot *aLeft,
int nLeft,
ht_slot **paRight,
int *pnRight,
ht_slot *aTmp
){
int iLeft = 0;
int iRight = 0;
int iOut = 0;
int nRight = *pnRight;
ht_slot *aRight = *paRight;
assert( nLeft>0 && nRight>0 );
while( iRight<nRight || iLeft<nLeft ){
ht_slot logpage;
Pgno dbpage;
if( (iLeft<nLeft)
&& (iRight>=nRight || aContent[aLeft[iLeft]]<aContent[aRight[iRight]])
){
logpage = aLeft[iLeft++];
}else{
logpage = aRight[iRight++];
}
dbpage = aContent[logpage];
aTmp[iOut++] = logpage;
if( iLeft<nLeft && aContent[aLeft[iLeft]]==dbpage ) iLeft++;
assert( iLeft>=nLeft || aContent[aLeft[iLeft]]>dbpage );
assert( iRight>=nRight || aContent[aRight[iRight]]>dbpage );
}
*paRight = aLeft;
*pnRight = iOut;
memcpy(aLeft, aTmp, sizeof(aTmp[0])*iOut);
}
static void walMergesort(
const u32 *aContent,
ht_slot *aBuffer,
ht_slot *aList,
int *pnList
){
struct Sublist {
int nList;
ht_slot *aList;
};
const int nList = *pnList;
int nMerge = 0;
ht_slot *aMerge = 0;
int iList;
u32 iSub = 0;
struct Sublist aSub[13];
memset(aSub, 0, sizeof(aSub));
assert( nList<=HASHTABLE_NPAGE && nList>0 );
assert( HASHTABLE_NPAGE==(1<<(ArraySize(aSub)-1)) );
for(iList=0; iList<nList; iList++){
nMerge = 1;
aMerge = &aList[iList];
for(iSub=0; iList & (1<<iSub); iSub++){
struct Sublist *p;
assert( iSub<ArraySize(aSub) );
p = &aSub[iSub];
assert( p->aList && p->nList<=(1<<iSub) );
assert( p->aList==&aList[iList&~((2<<iSub)-1)] );
walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer);
}
aSub[iSub].aList = aMerge;
aSub[iSub].nList = nMerge;
}
for(iSub++; iSub<ArraySize(aSub); iSub++){
if( nList & (1<<iSub) ){
struct Sublist *p;
assert( iSub<ArraySize(aSub) );
p = &aSub[iSub];
assert( p->nList<=(1<<iSub) );
assert( p->aList==&aList[nList&~((2<<iSub)-1)] );
walMerge(aContent, p->aList, p->nList, &aMerge, &nMerge, aBuffer);
}
}
assert( aMerge==aList );
*pnList = nMerge;
#ifdef SQLITE_DEBUG
{
int i;
for(i=1; i<*pnList; i++){
assert( aContent[aList[i]] > aContent[aList[i-1]] );
}
}
#endif
}
static void walIteratorFree(WalIterator *p){
sqlite3_free(p);
}
static int walIteratorInit(Wal *pWal, u32 nBackfill, WalIterator **pp){
WalIterator *p;
int nSegment;
u32 iLast;
sqlite3_int64 nByte;
int i;
ht_slot *aTmp;
int rc = SQLITE_OK;
assert( pWal->ckptLock && pWal->hdr.mxFrame>0 );
iLast = pWal->hdr.mxFrame;
nSegment = walFramePage(iLast) + 1;
nByte = sizeof(WalIterator)
+ (nSegment-1)*sizeof(struct WalSegment)
+ iLast*sizeof(ht_slot);
p = (WalIterator *)sqlite3_malloc64(nByte);
if( !p ){
return SQLITE_NOMEM_BKPT;
}
memset(p, 0, nByte);
p->nSegment = nSegment;
aTmp = (ht_slot *)sqlite3_malloc64(
sizeof(ht_slot) * (iLast>HASHTABLE_NPAGE?HASHTABLE_NPAGE:iLast)
);
if( !aTmp ){
rc = SQLITE_NOMEM_BKPT;
}
for(i=walFramePage(nBackfill+1); rc==SQLITE_OK && i<nSegment; i++){
WalHashLoc sLoc;
rc = walHashGet(pWal, i, &sLoc);
if( rc==SQLITE_OK ){
int j;
int nEntry;
ht_slot *aIndex;
if( (i+1)==nSegment ){
nEntry = (int)(iLast - sLoc.iZero);
}else{
nEntry = (int)((u32*)sLoc.aHash - (u32*)sLoc.aPgno);
}
aIndex = &((ht_slot *)&p->aSegment[p->nSegment])[sLoc.iZero];
sLoc.iZero++;
for(j=0; j<nEntry; j++){
aIndex[j] = (ht_slot)j;
}
walMergesort((u32 *)sLoc.aPgno, aTmp, aIndex, &nEntry);
p->aSegment[i].iZero = sLoc.iZero;
p->aSegment[i].nEntry = nEntry;
p->aSegment[i].aIndex = aIndex;
p->aSegment[i].aPgno = (u32 *)sLoc.aPgno;
}
}
sqlite3_free(aTmp);
if( rc!=SQLITE_OK ){
walIteratorFree(p);
p = 0;
}
*pp = p;
return rc;
}
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
static int walEnableBlocking(Wal *pWal){
int res = 0;
if( pWal->db ){
int tmout = pWal->db->busyTimeout;
if( tmout ){
int rc;
rc = sqlite3OsFileControl(
pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout
);
res = (rc==SQLITE_OK);
}
}
return res;
}
static void walDisableBlocking(Wal *pWal){
int tmout = 0;
sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_LOCK_TIMEOUT, (void*)&tmout);
}
int sqlite3WalWriteLock(Wal *pWal, int bLock){
int rc = SQLITE_OK;
assert( pWal->readLock<0 || bLock==0 );
if( bLock ){
assert( pWal->db );
if( walEnableBlocking(pWal) ){
rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
if( rc==SQLITE_OK ){
pWal->writeLock = 1;
}
walDisableBlocking(pWal);
}
}else if( pWal->writeLock ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0;
}
return rc;
}
void sqlite3WalDb(Wal *pWal, sqlite3 *db){
pWal->db = db;
}
static int walLockWriter(Wal *pWal){
int rc;
walEnableBlocking(pWal);
rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
walDisableBlocking(pWal);
return rc;
}
#else
# define walEnableBlocking(x) 0
# define walDisableBlocking(x)
# define walLockWriter(pWal) walLockExclusive((pWal), WAL_WRITE_LOCK, 1)
# define sqlite3WalDb(pWal, db)
#endif
static int walBusyLock(
Wal *pWal,
int (*xBusy)(void*),
void *pBusyArg,
int lockIdx,
int n
){
int rc;
do {
rc = walLockExclusive(pWal, lockIdx, n);
}while( xBusy && rc==SQLITE_BUSY && xBusy(pBusyArg) );
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
if( rc==SQLITE_BUSY_TIMEOUT ){
walDisableBlocking(pWal);
rc = SQLITE_BUSY;
}
#endif
return rc;
}
static int walPagesize(Wal *pWal){
return (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
}
static void walRestartHdr(Wal *pWal, u32 salt1){
volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
int i;
u32 *aSalt = pWal->hdr.aSalt;
pWal->nCkpt++;
pWal->hdr.mxFrame = 0;
sqlite3Put4byte((u8*)&aSalt[0], 1 + sqlite3Get4byte((u8*)&aSalt[0]));
memcpy(&pWal->hdr.aSalt[1], &salt1, 4);
walIndexWriteHdr(pWal);
AtomicStore(&pInfo->nBackfill, 0);
pInfo->nBackfillAttempted = 0;
pInfo->aReadMark[1] = 0;
for(i=2; i<WAL_NREADER; i++) pInfo->aReadMark[i] = READMARK_NOT_USED;
assert( pInfo->aReadMark[0]==0 );
}
static int walCheckpoint(
Wal *pWal,
sqlite3 *db,
int eMode,
int (*xBusy)(void*),
void *pBusyArg,
int sync_flags,
u8 *zBuf
){
int rc = SQLITE_OK;
int szPage;
WalIterator *pIter = 0;
u32 iDbpage = 0;
u32 iFrame = 0;
u32 mxSafeFrame;
u32 mxPage;
int i;
volatile WalCkptInfo *pInfo;
szPage = walPagesize(pWal);
testcase( szPage<=32768 );
testcase( szPage>=65536 );
pInfo = walCkptInfo(pWal);
if( pInfo->nBackfill<pWal->hdr.mxFrame ){
assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );
mxSafeFrame = pWal->hdr.mxFrame;
mxPage = pWal->hdr.nPage;
for(i=1; i<WAL_NREADER; i++){
u32 y = AtomicLoad(pInfo->aReadMark+i);
if( mxSafeFrame>y ){
assert( y<=pWal->hdr.mxFrame );
rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(i), 1);
if( rc==SQLITE_OK ){
u32 iMark = (i==1 ? mxSafeFrame : READMARK_NOT_USED);
AtomicStore(pInfo->aReadMark+i, iMark);
walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
}else if( rc==SQLITE_BUSY ){
mxSafeFrame = y;
xBusy = 0;
}else{
goto walcheckpoint_out;
}
}
}
if( pInfo->nBackfill<mxSafeFrame ){
rc = walIteratorInit(pWal, pInfo->nBackfill, &pIter);
assert( rc==SQLITE_OK || pIter==0 );
}
if( pIter
&& (rc = walBusyLock(pWal,xBusy,pBusyArg,WAL_READ_LOCK(0),1))==SQLITE_OK
){
u32 nBackfill = pInfo->nBackfill;
pInfo->nBackfillAttempted = mxSafeFrame;
rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags));
if( rc==SQLITE_OK ){
i64 nReq = ((i64)mxPage * szPage);
i64 nSize;
sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_START, 0);
rc = sqlite3OsFileSize(pWal->pDbFd, &nSize);
if( rc==SQLITE_OK && nSize<nReq ){
if( (nSize+65536+(i64)pWal->hdr.mxFrame*szPage)<nReq ){
rc = SQLITE_CORRUPT_BKPT;
}else{
sqlite3OsFileControlHint(pWal->pDbFd, SQLITE_FCNTL_SIZE_HINT,&nReq);
}
}
}
while( rc==SQLITE_OK && 0==walIteratorNext(pIter, &iDbpage, &iFrame) ){
i64 iOffset;
assert( walFramePgno(pWal, iFrame)==iDbpage );
if( AtomicLoad(&db->u1.isInterrupted) ){
rc = db->mallocFailed ? SQLITE_NOMEM_BKPT : SQLITE_INTERRUPT;
break;
}
if( iFrame<=nBackfill || iFrame>mxSafeFrame || iDbpage>mxPage ){
continue;
}
iOffset = walFrameOffset(iFrame, szPage) + WAL_FRAME_HDRSIZE;
rc = sqlite3OsRead(pWal->pWalFd, zBuf, szPage, iOffset);
if( rc!=SQLITE_OK ) break;
iOffset = (iDbpage-1)*(i64)szPage;
testcase( IS_BIG_INT(iOffset) );
rc = sqlite3OsWrite(pWal->pDbFd, zBuf, szPage, iOffset);
if( rc!=SQLITE_OK ) break;
}
sqlite3OsFileControl(pWal->pDbFd, SQLITE_FCNTL_CKPT_DONE, 0);
if( rc==SQLITE_OK ){
if( mxSafeFrame==walIndexHdr(pWal)->mxFrame ){
i64 szDb = pWal->hdr.nPage*(i64)szPage;
testcase( IS_BIG_INT(szDb) );
rc = sqlite3OsTruncate(pWal->pDbFd, szDb);
if( rc==SQLITE_OK ){
rc = sqlite3OsSync(pWal->pDbFd, CKPT_SYNC_FLAGS(sync_flags));
}
}
if( rc==SQLITE_OK ){
AtomicStore(&pInfo->nBackfill, mxSafeFrame);
}
}
walUnlockExclusive(pWal, WAL_READ_LOCK(0), 1);
}
if( rc==SQLITE_BUSY ){
rc = SQLITE_OK;
}
}
if( rc==SQLITE_OK && eMode!=SQLITE_CHECKPOINT_PASSIVE ){
assert( pWal->writeLock );
if( pInfo->nBackfill<pWal->hdr.mxFrame ){
rc = SQLITE_BUSY;
}else if( eMode>=SQLITE_CHECKPOINT_RESTART ){
u32 salt1;
sqlite3_randomness(4, &salt1);
assert( pInfo->nBackfill==pWal->hdr.mxFrame );
rc = walBusyLock(pWal, xBusy, pBusyArg, WAL_READ_LOCK(1), WAL_NREADER-1);
if( rc==SQLITE_OK ){
if( eMode==SQLITE_CHECKPOINT_TRUNCATE ){
walRestartHdr(pWal, salt1);
rc = sqlite3OsTruncate(pWal->pWalFd, 0);
}
walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
}
}
}
walcheckpoint_out:
walIteratorFree(pIter);
return rc;
}
static void walLimitSize(Wal *pWal, i64 nMax){
i64 sz;
int rx;
sqlite3BeginBenignMalloc();
rx = sqlite3OsFileSize(pWal->pWalFd, &sz);
if( rx==SQLITE_OK && (sz > nMax ) ){
rx = sqlite3OsTruncate(pWal->pWalFd, nMax);
}
sqlite3EndBenignMalloc();
if( rx ){
sqlite3_log(rx, "cannot limit WAL size: %s", pWal->zWalName);
}
}
int sqlite3WalClose(
Wal *pWal,
sqlite3 *db,
int sync_flags,
int nBuf,
u8 *zBuf
){
int rc = SQLITE_OK;
if( pWal ){
int isDelete = 0;
if( zBuf!=0
&& SQLITE_OK==(rc = sqlite3OsLock(pWal->pDbFd, SQLITE_LOCK_EXCLUSIVE))
){
if( pWal->exclusiveMode==WAL_NORMAL_MODE ){
pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
}
rc = sqlite3WalCheckpoint(pWal, db,
SQLITE_CHECKPOINT_PASSIVE, 0, 0, sync_flags, nBuf, zBuf, 0, 0
);
if( rc==SQLITE_OK ){
int bPersist = -1;
sqlite3OsFileControlHint(
pWal->pDbFd, SQLITE_FCNTL_PERSIST_WAL, &bPersist
);
if( bPersist!=1 ){
isDelete = 1;
}else if( pWal->mxWalSize>=0 ){
walLimitSize(pWal, 0);
}
}
}
walIndexClose(pWal, isDelete);
sqlite3OsClose(pWal->pWalFd);
if( isDelete ){
sqlite3BeginBenignMalloc();
sqlite3OsDelete(pWal->pVfs, pWal->zWalName, 0);
sqlite3EndBenignMalloc();
}
WALTRACE(("WAL%p: closed\n", pWal));
sqlite3_free((void *)pWal->apWiData);
sqlite3_free(pWal);
}
return rc;
}
static SQLITE_NO_TSAN int walIndexTryHdr(Wal *pWal, int *pChanged){
u32 aCksum[2];
WalIndexHdr h1, h2;
WalIndexHdr volatile *aHdr;
assert( pWal->nWiData>0 && pWal->apWiData[0] );
aHdr = walIndexHdr(pWal);
memcpy(&h1, (void *)&aHdr[0], sizeof(h1));
walShmBarrier(pWal);
memcpy(&h2, (void *)&aHdr[1], sizeof(h2));
if( memcmp(&h1, &h2, sizeof(h1))!=0 ){
return 1;
}
if( h1.isInit==0 ){
return 1;
}
walChecksumBytes(1, (u8*)&h1, sizeof(h1)-sizeof(h1.aCksum), 0, aCksum);
if( aCksum[0]!=h1.aCksum[0] || aCksum[1]!=h1.aCksum[1] ){
return 1;
}
if( memcmp(&pWal->hdr, &h1, sizeof(WalIndexHdr)) ){
*pChanged = 1;
memcpy(&pWal->hdr, &h1, sizeof(WalIndexHdr));
pWal->szPage = (pWal->hdr.szPage&0xfe00) + ((pWal->hdr.szPage&0x0001)<<16);
testcase( pWal->szPage<=32768 );
testcase( pWal->szPage>=65536 );
}
return 0;
}
#define WAL_RETRY (-1)
static int walIndexReadHdr(Wal *pWal, int *pChanged){
int rc;
int badHdr;
volatile u32 *page0;
assert( pChanged );
rc = walIndexPage(pWal, 0, &page0);
if( rc!=SQLITE_OK ){
assert( rc!=SQLITE_READONLY );
if( rc==SQLITE_READONLY_CANTINIT ){
assert( page0==0 );
assert( pWal->writeLock==0 );
assert( pWal->readOnly & WAL_SHM_RDONLY );
pWal->bShmUnreliable = 1;
pWal->exclusiveMode = WAL_HEAPMEMORY_MODE;
*pChanged = 1;
}else{
return rc;
}
}else{
testcase( page0!=0 );
}
assert( page0!=0 || pWal->writeLock==0 );
badHdr = (page0 ? walIndexTryHdr(pWal, pChanged) : 1);
if( badHdr ){
if( pWal->bShmUnreliable==0 && (pWal->readOnly & WAL_SHM_RDONLY) ){
if( SQLITE_OK==(rc = walLockShared(pWal, WAL_WRITE_LOCK)) ){
walUnlockShared(pWal, WAL_WRITE_LOCK);
rc = SQLITE_READONLY_RECOVERY;
}
}else{
int bWriteLock = pWal->writeLock;
if( bWriteLock || SQLITE_OK==(rc = walLockWriter(pWal)) ){
pWal->writeLock = 1;
if( SQLITE_OK==(rc = walIndexPage(pWal, 0, &page0)) ){
badHdr = walIndexTryHdr(pWal, pChanged);
if( badHdr ){
rc = walIndexRecover(pWal);
*pChanged = 1;
}
}
if( bWriteLock==0 ){
pWal->writeLock = 0;
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
}
}
}
}
if( badHdr==0 && pWal->hdr.iVersion!=WALINDEX_MAX_VERSION ){
rc = SQLITE_CANTOPEN_BKPT;
}
if( pWal->bShmUnreliable ){
if( rc!=SQLITE_OK ){
walIndexClose(pWal, 0);
pWal->bShmUnreliable = 0;
assert( pWal->nWiData>0 && pWal->apWiData[0]==0 );
if( rc==SQLITE_IOERR_SHORT_READ ) rc = WAL_RETRY;
}
pWal->exclusiveMode = WAL_NORMAL_MODE;
}
return rc;
}
static int walBeginShmUnreliable(Wal *pWal, int *pChanged){
i64 szWal;
i64 iOffset;
u8 aBuf[WAL_HDRSIZE];
u8 *aFrame = 0;
int szFrame;
u8 *aData;
volatile void *pDummy;
int rc;
u32 aSaveCksum[2];
assert( pWal->bShmUnreliable );
assert( pWal->readOnly & WAL_SHM_RDONLY );
assert( pWal->nWiData>0 && pWal->apWiData[0] );
rc = walLockShared(pWal, WAL_READ_LOCK(0));
if( rc!=SQLITE_OK ){
if( rc==SQLITE_BUSY ) rc = WAL_RETRY;
goto begin_unreliable_shm_out;
}
pWal->readLock = 0;
rc = sqlite3OsShmMap(pWal->pDbFd, 0, WALINDEX_PGSZ, 0, &pDummy);
assert( rc!=SQLITE_OK );
if( rc!=SQLITE_READONLY_CANTINIT ){
rc = (rc==SQLITE_READONLY ? WAL_RETRY : rc);
goto begin_unreliable_shm_out;
}
memcpy(&pWal->hdr, (void*)walIndexHdr(pWal), sizeof(WalIndexHdr));
rc = sqlite3OsFileSize(pWal->pWalFd, &szWal);
if( rc!=SQLITE_OK ){
goto begin_unreliable_shm_out;
}
if( szWal<WAL_HDRSIZE ){
*pChanged = 1;
rc = (pWal->hdr.mxFrame==0 ? SQLITE_OK : WAL_RETRY);
goto begin_unreliable_shm_out;
}
rc = sqlite3OsRead(pWal->pWalFd, aBuf, WAL_HDRSIZE, 0);
if( rc!=SQLITE_OK ){
goto begin_unreliable_shm_out;
}
if( memcmp(&pWal->hdr.aSalt, &aBuf[16], 8) ){
rc = WAL_RETRY;
goto begin_unreliable_shm_out;
}
assert( (pWal->szPage & (pWal->szPage-1))==0 );
assert( pWal->szPage>=512 && pWal->szPage<=65536 );
szFrame = pWal->szPage + WAL_FRAME_HDRSIZE;
aFrame = (u8 *)sqlite3_malloc64(szFrame);
if( aFrame==0 ){
rc = SQLITE_NOMEM_BKPT;
goto begin_unreliable_shm_out;
}
aData = &aFrame[WAL_FRAME_HDRSIZE];
aSaveCksum[0] = pWal->hdr.aFrameCksum[0];
aSaveCksum[1] = pWal->hdr.aFrameCksum[1];
for(iOffset=walFrameOffset(pWal->hdr.mxFrame+1, pWal->szPage);
iOffset+szFrame<=szWal;
iOffset+=szFrame
){
u32 pgno;
u32 nTruncate;
rc = sqlite3OsRead(pWal->pWalFd, aFrame, szFrame, iOffset);
if( rc!=SQLITE_OK ) break;
if( !walDecodeFrame(pWal, &pgno, &nTruncate, aData, aFrame) ) break;
if( nTruncate ){
rc = WAL_RETRY;
break;
}
}
pWal->hdr.aFrameCksum[0] = aSaveCksum[0];
pWal->hdr.aFrameCksum[1] = aSaveCksum[1];
begin_unreliable_shm_out:
sqlite3_free(aFrame);
if( rc!=SQLITE_OK ){
int i;
for(i=0; i<pWal->nWiData; i++){
sqlite3_free((void*)pWal->apWiData[i]);
pWal->apWiData[i] = 0;
}
pWal->bShmUnreliable = 0;
sqlite3WalEndReadTransaction(pWal);
*pChanged = 1;
}
return rc;
}
static int walTryBeginRead(Wal *pWal, int *pChanged, int useWal, int cnt){
volatile WalCkptInfo *pInfo;
u32 mxReadMark;
int mxI;
int i;
int rc = SQLITE_OK;
u32 mxFrame;
assert( pWal->readLock<0 );
assert( (pWal->readOnly & WAL_SHM_RDONLY)==0 || useWal==0 );
if( cnt>5 ){
int nDelay = 1;
if( cnt>100 ){
VVA_ONLY( pWal->lockError = 1; )
return SQLITE_PROTOCOL;
}
if( cnt>=10 ) nDelay = (cnt-9)*(cnt-9)*39;
sqlite3OsSleep(pWal->pVfs, nDelay);
}
if( !useWal ){
assert( rc==SQLITE_OK );
if( pWal->bShmUnreliable==0 ){
rc = walIndexReadHdr(pWal, pChanged);
}
if( rc==SQLITE_BUSY ){
if( pWal->apWiData[0]==0 ){
rc = WAL_RETRY;
}else if( SQLITE_OK==(rc = walLockShared(pWal, WAL_RECOVER_LOCK)) ){
walUnlockShared(pWal, WAL_RECOVER_LOCK);
rc = WAL_RETRY;
}else if( rc==SQLITE_BUSY ){
rc = SQLITE_BUSY_RECOVERY;
}
}
if( rc!=SQLITE_OK ){
return rc;
}
else if( pWal->bShmUnreliable ){
return walBeginShmUnreliable(pWal, pChanged);
}
}
assert( pWal->nWiData>0 );
assert( pWal->apWiData[0]!=0 );
pInfo = walCkptInfo(pWal);
if( !useWal && AtomicLoad(&pInfo->nBackfill)==pWal->hdr.mxFrame
#ifdef SQLITE_ENABLE_SNAPSHOT
&& (pWal->pSnapshot==0 || pWal->hdr.mxFrame==0)
#endif
){
rc = walLockShared(pWal, WAL_READ_LOCK(0));
walShmBarrier(pWal);
if( rc==SQLITE_OK ){
if( memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr)) ){
walUnlockShared(pWal, WAL_READ_LOCK(0));
return WAL_RETRY;
}
pWal->readLock = 0;
return SQLITE_OK;
}else if( rc!=SQLITE_BUSY ){
return rc;
}
}
mxReadMark = 0;
mxI = 0;
mxFrame = pWal->hdr.mxFrame;
#ifdef SQLITE_ENABLE_SNAPSHOT
if( pWal->pSnapshot && pWal->pSnapshot->mxFrame<mxFrame ){
mxFrame = pWal->pSnapshot->mxFrame;
}
#endif
for(i=1; i<WAL_NREADER; i++){
u32 thisMark = AtomicLoad(pInfo->aReadMark+i);
if( mxReadMark<=thisMark && thisMark<=mxFrame ){
assert( thisMark!=READMARK_NOT_USED );
mxReadMark = thisMark;
mxI = i;
}
}
if( (pWal->readOnly & WAL_SHM_RDONLY)==0
&& (mxReadMark<mxFrame || mxI==0)
){
for(i=1; i<WAL_NREADER; i++){
rc = walLockExclusive(pWal, WAL_READ_LOCK(i), 1);
if( rc==SQLITE_OK ){
AtomicStore(pInfo->aReadMark+i,mxFrame);
mxReadMark = mxFrame;
mxI = i;
walUnlockExclusive(pWal, WAL_READ_LOCK(i), 1);
break;
}else if( rc!=SQLITE_BUSY ){
return rc;
}
}
}
if( mxI==0 ){
assert( rc==SQLITE_BUSY || (pWal->readOnly & WAL_SHM_RDONLY)!=0 );
return rc==SQLITE_BUSY ? WAL_RETRY : SQLITE_READONLY_CANTINIT;
}
rc = walLockShared(pWal, WAL_READ_LOCK(mxI));
if( rc ){
return rc==SQLITE_BUSY ? WAL_RETRY : rc;
}
pWal->minFrame = AtomicLoad(&pInfo->nBackfill)+1;
walShmBarrier(pWal);
if( AtomicLoad(pInfo->aReadMark+mxI)!=mxReadMark
|| memcmp((void *)walIndexHdr(pWal), &pWal->hdr, sizeof(WalIndexHdr))
){
walUnlockShared(pWal, WAL_READ_LOCK(mxI));
return WAL_RETRY;
}else{
assert( mxReadMark<=pWal->hdr.mxFrame );
pWal->readLock = (i16)mxI;
}
return rc;
}
#ifdef SQLITE_ENABLE_SNAPSHOT
int sqlite3WalSnapshotRecover(Wal *pWal){
int rc;
assert( pWal->readLock>=0 );
rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
if( rc==SQLITE_OK ){
volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
int szPage = (int)pWal->szPage;
i64 szDb;
rc = sqlite3OsFileSize(pWal->pDbFd, &szDb);
if( rc==SQLITE_OK ){
void *pBuf1 = sqlite3_malloc(szPage);
void *pBuf2 = sqlite3_malloc(szPage);
if( pBuf1==0 || pBuf2==0 ){
rc = SQLITE_NOMEM;
}else{
u32 i = pInfo->nBackfillAttempted;
for(i=pInfo->nBackfillAttempted; i>AtomicLoad(&pInfo->nBackfill); i--){
WalHashLoc sLoc;
u32 pgno;
i64 iDbOff;
i64 iWalOff;
rc = walHashGet(pWal, walFramePage(i), &sLoc);
if( rc!=SQLITE_OK ) break;
assert( i - sLoc.iZero - 1 >=0 );
pgno = sLoc.aPgno[i-sLoc.iZero-1];
iDbOff = (i64)(pgno-1) * szPage;
if( iDbOff+szPage<=szDb ){
iWalOff = walFrameOffset(i, szPage) + WAL_FRAME_HDRSIZE;
rc = sqlite3OsRead(pWal->pWalFd, pBuf1, szPage, iWalOff);
if( rc==SQLITE_OK ){
rc = sqlite3OsRead(pWal->pDbFd, pBuf2, szPage, iDbOff);
}
if( rc!=SQLITE_OK || 0==memcmp(pBuf1, pBuf2, szPage) ){
break;
}
}
pInfo->nBackfillAttempted = i-1;
}
}
sqlite3_free(pBuf1);
sqlite3_free(pBuf2);
}
walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
}
return rc;
}
#endif
int sqlite3WalBeginReadTransaction(Wal *pWal, int *pChanged){
int rc;
int cnt = 0;
#ifdef SQLITE_ENABLE_SNAPSHOT
int bChanged = 0;
WalIndexHdr *pSnapshot = pWal->pSnapshot;
#endif
assert( pWal->ckptLock==0 );
#ifdef SQLITE_ENABLE_SNAPSHOT
if( pSnapshot ){
if( memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
bChanged = 1;
}
(void)walEnableBlocking(pWal);
rc = walLockShared(pWal, WAL_CKPT_LOCK);
walDisableBlocking(pWal);
if( rc!=SQLITE_OK ){
return rc;
}
pWal->ckptLock = 1;
}
#endif
do{
rc = walTryBeginRead(pWal, pChanged, 0, ++cnt);
}while( rc==WAL_RETRY );
testcase( (rc&0xff)==SQLITE_BUSY );
testcase( (rc&0xff)==SQLITE_IOERR );
testcase( rc==SQLITE_PROTOCOL );
testcase( rc==SQLITE_OK );
#ifdef SQLITE_ENABLE_SNAPSHOT
if( rc==SQLITE_OK ){
if( pSnapshot && memcmp(pSnapshot, &pWal->hdr, sizeof(WalIndexHdr))!=0 ){
volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
assert( pWal->readLock>0 || pWal->hdr.mxFrame==0 );
assert( pInfo->aReadMark[pWal->readLock]<=pSnapshot->mxFrame );
if( !memcmp(pSnapshot->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
&& pSnapshot->mxFrame>=pInfo->nBackfillAttempted
){
assert( pWal->readLock>0 );
memcpy(&pWal->hdr, pSnapshot, sizeof(WalIndexHdr));
*pChanged = bChanged;
}else{
rc = SQLITE_ERROR_SNAPSHOT;
}
pWal->minFrame = 1;
if( rc!=SQLITE_OK ){
sqlite3WalEndReadTransaction(pWal);
}
}
}
if( pWal->ckptLock ){
assert( pSnapshot );
walUnlockShared(pWal, WAL_CKPT_LOCK);
pWal->ckptLock = 0;
}
#endif
return rc;
}
void sqlite3WalEndReadTransaction(Wal *pWal){
sqlite3WalEndWriteTransaction(pWal);
if( pWal->readLock>=0 ){
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
pWal->readLock = -1;
}
}
int sqlite3WalFindFrame(
Wal *pWal,
Pgno pgno,
u32 *piRead
){
u32 iRead = 0;
u32 iLast = pWal->hdr.mxFrame;
int iHash;
int iMinHash;
assert( pWal->readLock>=0 || pWal->lockError );
if( iLast==0 || (pWal->readLock==0 && pWal->bShmUnreliable==0) ){
*piRead = 0;
return SQLITE_OK;
}
iMinHash = walFramePage(pWal->minFrame);
for(iHash=walFramePage(iLast); iHash>=iMinHash; iHash--){
WalHashLoc sLoc;
int iKey;
int nCollide;
int rc;
u32 iH;
rc = walHashGet(pWal, iHash, &sLoc);
if( rc!=SQLITE_OK ){
return rc;
}
nCollide = HASHTABLE_NSLOT;
iKey = walHash(pgno);
while( (iH = AtomicLoad(&sLoc.aHash[iKey]))!=0 ){
u32 iFrame = iH + sLoc.iZero;
if( iFrame<=iLast && iFrame>=pWal->minFrame && sLoc.aPgno[iH-1]==pgno ){
assert( iFrame>iRead || CORRUPT_DB );
iRead = iFrame;
}
if( (nCollide--)==0 ){
return SQLITE_CORRUPT_BKPT;
}
iKey = walNextHash(iKey);
}
if( iRead ) break;
}
#ifdef SQLITE_ENABLE_EXPENSIVE_ASSERT
{
u32 iRead2 = 0;
u32 iTest;
assert( pWal->bShmUnreliable || pWal->minFrame>0 );
for(iTest=iLast; iTest>=pWal->minFrame && iTest>0; iTest--){
if( walFramePgno(pWal, iTest)==pgno ){
iRead2 = iTest;
break;
}
}
assert( iRead==iRead2 );
}
#endif
*piRead = iRead;
return SQLITE_OK;
}
int sqlite3WalReadFrame(
Wal *pWal,
u32 iRead,
int nOut,
u8 *pOut
){
int sz;
i64 iOffset;
sz = pWal->hdr.szPage;
sz = (sz&0xfe00) + ((sz&0x0001)<<16);
testcase( sz<=32768 );
testcase( sz>=65536 );
iOffset = walFrameOffset(iRead, sz) + WAL_FRAME_HDRSIZE;
return sqlite3OsRead(pWal->pWalFd, pOut, (nOut>sz ? sz : nOut), iOffset);
}
Pgno sqlite3WalDbsize(Wal *pWal){
if( pWal && ALWAYS(pWal->readLock>=0) ){
return pWal->hdr.nPage;
}
return 0;
}
int sqlite3WalBeginWriteTransaction(Wal *pWal){
int rc;
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
if( pWal->writeLock ){
assert( !memcmp(&pWal->hdr,(void *)walIndexHdr(pWal),sizeof(WalIndexHdr)) );
return SQLITE_OK;
}
#endif
assert( pWal->readLock>=0 );
assert( pWal->writeLock==0 && pWal->iReCksum==0 );
if( pWal->readOnly ){
return SQLITE_READONLY;
}
rc = walLockExclusive(pWal, WAL_WRITE_LOCK, 1);
if( rc ){
return rc;
}
pWal->writeLock = 1;
if( memcmp(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr))!=0 ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0;
rc = SQLITE_BUSY_SNAPSHOT;
}
return rc;
}
int sqlite3WalEndWriteTransaction(Wal *pWal){
if( pWal->writeLock ){
walUnlockExclusive(pWal, WAL_WRITE_LOCK, 1);
pWal->writeLock = 0;
pWal->iReCksum = 0;
pWal->truncateOnCommit = 0;
}
return SQLITE_OK;
}
int sqlite3WalUndo(Wal *pWal, int (*xUndo)(void *, Pgno), void *pUndoCtx){
int rc = SQLITE_OK;
if( ALWAYS(pWal->writeLock) ){
Pgno iMax = pWal->hdr.mxFrame;
Pgno iFrame;
memcpy(&pWal->hdr, (void *)walIndexHdr(pWal), sizeof(WalIndexHdr));
for(iFrame=pWal->hdr.mxFrame+1;
ALWAYS(rc==SQLITE_OK) && iFrame<=iMax;
iFrame++
){
assert( walFramePgno(pWal, iFrame)!=1 );
rc = xUndo(pUndoCtx, walFramePgno(pWal, iFrame));
}
if( iMax!=pWal->hdr.mxFrame ) walCleanupHash(pWal);
}
return rc;
}
void sqlite3WalSavepoint(Wal *pWal, u32 *aWalData){
assert( pWal->writeLock );
aWalData[0] = pWal->hdr.mxFrame;
aWalData[1] = pWal->hdr.aFrameCksum[0];
aWalData[2] = pWal->hdr.aFrameCksum[1];
aWalData[3] = pWal->nCkpt;
}
int sqlite3WalSavepointUndo(Wal *pWal, u32 *aWalData){
int rc = SQLITE_OK;
assert( pWal->writeLock );
assert( aWalData[3]!=pWal->nCkpt || aWalData[0]<=pWal->hdr.mxFrame );
if( aWalData[3]!=pWal->nCkpt ){
aWalData[0] = 0;
aWalData[3] = pWal->nCkpt;
}
if( aWalData[0]<pWal->hdr.mxFrame ){
pWal->hdr.mxFrame = aWalData[0];
pWal->hdr.aFrameCksum[0] = aWalData[1];
pWal->hdr.aFrameCksum[1] = aWalData[2];
walCleanupHash(pWal);
}
return rc;
}
static int walRestartLog(Wal *pWal){
int rc = SQLITE_OK;
int cnt;
if( pWal->readLock==0 ){
volatile WalCkptInfo *pInfo = walCkptInfo(pWal);
assert( pInfo->nBackfill==pWal->hdr.mxFrame );
if( pInfo->nBackfill>0 ){
u32 salt1;
sqlite3_randomness(4, &salt1);
rc = walLockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
if( rc==SQLITE_OK ){
walRestartHdr(pWal, salt1);
walUnlockExclusive(pWal, WAL_READ_LOCK(1), WAL_NREADER-1);
}else if( rc!=SQLITE_BUSY ){
return rc;
}
}
walUnlockShared(pWal, WAL_READ_LOCK(0));
pWal->readLock = -1;
cnt = 0;
do{
int notUsed;
rc = walTryBeginRead(pWal, ¬Used, 1, ++cnt);
}while( rc==WAL_RETRY );
assert( (rc&0xff)!=SQLITE_BUSY );
testcase( (rc&0xff)==SQLITE_IOERR );
testcase( rc==SQLITE_PROTOCOL );
testcase( rc==SQLITE_OK );
}
return rc;
}
typedef struct WalWriter {
Wal *pWal;
sqlite3_file *pFd;
sqlite3_int64 iSyncPoint;
int syncFlags;
int szPage;
} WalWriter;
static int walWriteToLog(
WalWriter *p,
void *pContent,
int iAmt,
sqlite3_int64 iOffset
){
int rc;
if( iOffset<p->iSyncPoint && iOffset+iAmt>=p->iSyncPoint ){
int iFirstAmt = (int)(p->iSyncPoint - iOffset);
rc = sqlite3OsWrite(p->pFd, pContent, iFirstAmt, iOffset);
if( rc ) return rc;
iOffset += iFirstAmt;
iAmt -= iFirstAmt;
pContent = (void*)(iFirstAmt + (char*)pContent);
assert( WAL_SYNC_FLAGS(p->syncFlags)!=0 );
rc = sqlite3OsSync(p->pFd, WAL_SYNC_FLAGS(p->syncFlags));
if( iAmt==0 || rc ) return rc;
}
rc = sqlite3OsWrite(p->pFd, pContent, iAmt, iOffset);
return rc;
}
static int walWriteOneFrame(
WalWriter *p,
PgHdr *pPage,
int nTruncate,
sqlite3_int64 iOffset
){
int rc;
void *pData;
u8 aFrame[WAL_FRAME_HDRSIZE];
pData = pPage->pData;
walEncodeFrame(p->pWal, pPage->pgno, nTruncate, pData, aFrame);
rc = walWriteToLog(p, aFrame, sizeof(aFrame), iOffset);
if( rc ) return rc;
rc = walWriteToLog(p, pData, p->szPage, iOffset+sizeof(aFrame));
return rc;
}
static int walRewriteChecksums(Wal *pWal, u32 iLast){
const int szPage = pWal->szPage;
int rc = SQLITE_OK;
u8 *aBuf;
u8 aFrame[WAL_FRAME_HDRSIZE];
u32 iRead;
i64 iCksumOff;
aBuf = sqlite3_malloc(szPage + WAL_FRAME_HDRSIZE);
if( aBuf==0 ) return SQLITE_NOMEM_BKPT;
assert( pWal->iReCksum>0 );
if( pWal->iReCksum==1 ){
iCksumOff = 24;
}else{
iCksumOff = walFrameOffset(pWal->iReCksum-1, szPage) + 16;
}
rc = sqlite3OsRead(pWal->pWalFd, aBuf, sizeof(u32)*2, iCksumOff);
pWal->hdr.aFrameCksum[0] = sqlite3Get4byte(aBuf);
pWal->hdr.aFrameCksum[1] = sqlite3Get4byte(&aBuf[sizeof(u32)]);
iRead = pWal->iReCksum;
pWal->iReCksum = 0;
for(; rc==SQLITE_OK && iRead<=iLast; iRead++){
i64 iOff = walFrameOffset(iRead, szPage);
rc = sqlite3OsRead(pWal->pWalFd, aBuf, szPage+WAL_FRAME_HDRSIZE, iOff);
if( rc==SQLITE_OK ){
u32 iPgno, nDbSize;
iPgno = sqlite3Get4byte(aBuf);
nDbSize = sqlite3Get4byte(&aBuf[4]);
walEncodeFrame(pWal, iPgno, nDbSize, &aBuf[WAL_FRAME_HDRSIZE], aFrame);
rc = sqlite3OsWrite(pWal->pWalFd, aFrame, sizeof(aFrame), iOff);
}
}
sqlite3_free(aBuf);
return rc;
}
int sqlite3WalFrames(
Wal *pWal,
int szPage,
PgHdr *pList,
Pgno nTruncate,
int isCommit,
int sync_flags
){
int rc;
u32 iFrame;
PgHdr *p;
PgHdr *pLast = 0;
int nExtra = 0;
int szFrame;
i64 iOffset;
WalWriter w;
u32 iFirst = 0;
WalIndexHdr *pLive;
assert( pList );
assert( pWal->writeLock );
assert( (isCommit!=0)==(nTruncate!=0) );
#if defined(SQLITE_TEST) && defined(SQLITE_DEBUG)
{ int cnt; for(cnt=0, p=pList; p; p=p->pDirty, cnt++){}
WALTRACE(("WAL%p: frame write begin. %d frames. mxFrame=%d. %s\n",
pWal, cnt, pWal->hdr.mxFrame, isCommit ? "Commit" : "Spill"));
}
#endif
pLive = (WalIndexHdr*)walIndexHdr(pWal);
if( memcmp(&pWal->hdr, (void *)pLive, sizeof(WalIndexHdr))!=0 ){
iFirst = pLive->mxFrame+1;
}
if( SQLITE_OK!=(rc = walRestartLog(pWal)) ){
return rc;
}
iFrame = pWal->hdr.mxFrame;
if( iFrame==0 ){
u8 aWalHdr[WAL_HDRSIZE];
u32 aCksum[2];
sqlite3Put4byte(&aWalHdr[0], (WAL_MAGIC | SQLITE_BIGENDIAN));
sqlite3Put4byte(&aWalHdr[4], WAL_MAX_VERSION);
sqlite3Put4byte(&aWalHdr[8], szPage);
sqlite3Put4byte(&aWalHdr[12], pWal->nCkpt);
if( pWal->nCkpt==0 ) sqlite3_randomness(8, pWal->hdr.aSalt);
memcpy(&aWalHdr[16], pWal->hdr.aSalt, 8);
walChecksumBytes(1, aWalHdr, WAL_HDRSIZE-2*4, 0, aCksum);
sqlite3Put4byte(&aWalHdr[24], aCksum[0]);
sqlite3Put4byte(&aWalHdr[28], aCksum[1]);
pWal->szPage = szPage;
pWal->hdr.bigEndCksum = SQLITE_BIGENDIAN;
pWal->hdr.aFrameCksum[0] = aCksum[0];
pWal->hdr.aFrameCksum[1] = aCksum[1];
pWal->truncateOnCommit = 1;
rc = sqlite3OsWrite(pWal->pWalFd, aWalHdr, sizeof(aWalHdr), 0);
WALTRACE(("WAL%p: wal-header write %s\n", pWal, rc ? "failed" : "ok"));
if( rc!=SQLITE_OK ){
return rc;
}
if( pWal->syncHeader ){
rc = sqlite3OsSync(pWal->pWalFd, CKPT_SYNC_FLAGS(sync_flags));
if( rc ) return rc;
}
}
if( (int)pWal->szPage!=szPage ){
return SQLITE_CORRUPT_BKPT;
}
w.pWal = pWal;
w.pFd = pWal->pWalFd;
w.iSyncPoint = 0;
w.syncFlags = sync_flags;
w.szPage = szPage;
iOffset = walFrameOffset(iFrame+1, szPage);
szFrame = szPage + WAL_FRAME_HDRSIZE;
for(p=pList; p; p=p->pDirty){
int nDbSize;
if( iFirst && (p->pDirty || isCommit==0) ){
u32 iWrite = 0;
VVA_ONLY(rc =) sqlite3WalFindFrame(pWal, p->pgno, &iWrite);
assert( rc==SQLITE_OK || iWrite==0 );
if( iWrite>=iFirst ){
i64 iOff = walFrameOffset(iWrite, szPage) + WAL_FRAME_HDRSIZE;
void *pData;
if( pWal->iReCksum==0 || iWrite<pWal->iReCksum ){
pWal->iReCksum = iWrite;
}
pData = p->pData;
rc = sqlite3OsWrite(pWal->pWalFd, pData, szPage, iOff);
if( rc ) return rc;
p->flags &= ~PGHDR_WAL_APPEND;
continue;
}
}
iFrame++;
assert( iOffset==walFrameOffset(iFrame, szPage) );
nDbSize = (isCommit && p->pDirty==0) ? nTruncate : 0;
rc = walWriteOneFrame(&w, p, nDbSize, iOffset);
if( rc ) return rc;
pLast = p;
iOffset += szFrame;
p->flags |= PGHDR_WAL_APPEND;
}
if( isCommit && pWal->iReCksum ){
rc = walRewriteChecksums(pWal, iFrame);
if( rc ) return rc;
}
if( isCommit && WAL_SYNC_FLAGS(sync_flags)!=0 ){
int bSync = 1;
if( pWal->padToSectorBoundary ){
int sectorSize = sqlite3SectorSize(pWal->pWalFd);
w.iSyncPoint = ((iOffset+sectorSize-1)/sectorSize)*sectorSize;
bSync = (w.iSyncPoint==iOffset);
testcase( bSync );
while( iOffset<w.iSyncPoint ){
rc = walWriteOneFrame(&w, pLast, nTruncate, iOffset);
if( rc ) return rc;
iOffset += szFrame;
nExtra++;
assert( pLast!=0 );
}
}
if( bSync ){
assert( rc==SQLITE_OK );
rc = sqlite3OsSync(w.pFd, WAL_SYNC_FLAGS(sync_flags));
}
}
if( isCommit && pWal->truncateOnCommit && pWal->mxWalSize>=0 ){
i64 sz = pWal->mxWalSize;
if( walFrameOffset(iFrame+nExtra+1, szPage)>pWal->mxWalSize ){
sz = walFrameOffset(iFrame+nExtra+1, szPage);
}
walLimitSize(pWal, sz);
pWal->truncateOnCommit = 0;
}
iFrame = pWal->hdr.mxFrame;
for(p=pList; p && rc==SQLITE_OK; p=p->pDirty){
if( (p->flags & PGHDR_WAL_APPEND)==0 ) continue;
iFrame++;
rc = walIndexAppend(pWal, iFrame, p->pgno);
}
assert( pLast!=0 || nExtra==0 );
while( rc==SQLITE_OK && nExtra>0 ){
iFrame++;
nExtra--;
rc = walIndexAppend(pWal, iFrame, pLast->pgno);
}
if( rc==SQLITE_OK ){
pWal->hdr.szPage = (u16)((szPage&0xff00) | (szPage>>16));
testcase( szPage<=32768 );
testcase( szPage>=65536 );
pWal->hdr.mxFrame = iFrame;
if( isCommit ){
pWal->hdr.iChange++;
pWal->hdr.nPage = nTruncate;
}
if( isCommit ){
walIndexWriteHdr(pWal);
pWal->iCallback = iFrame;
}
}
WALTRACE(("WAL%p: frame write %s\n", pWal, rc ? "failed" : "ok"));
return rc;
}
int sqlite3WalCheckpoint(
Wal *pWal,
sqlite3 *db,
int eMode,
int (*xBusy)(void*),
void *pBusyArg,
int sync_flags,
int nBuf,
u8 *zBuf,
int *pnLog,
int *pnCkpt
){
int rc;
int isChanged = 0;
int eMode2 = eMode;
int (*xBusy2)(void*) = xBusy;
assert( pWal->ckptLock==0 );
assert( pWal->writeLock==0 );
assert( eMode!=SQLITE_CHECKPOINT_PASSIVE || xBusy==0 );
if( pWal->readOnly ) return SQLITE_READONLY;
WALTRACE(("WAL%p: checkpoint begins\n", pWal));
sqlite3WalDb(pWal, db);
(void)walEnableBlocking(pWal);
rc = walLockExclusive(pWal, WAL_CKPT_LOCK, 1);
testcase( rc==SQLITE_BUSY );
testcase( rc!=SQLITE_OK && xBusy2!=0 );
if( rc==SQLITE_OK ){
pWal->ckptLock = 1;
if( eMode!=SQLITE_CHECKPOINT_PASSIVE ){
rc = walBusyLock(pWal, xBusy2, pBusyArg, WAL_WRITE_LOCK, 1);
if( rc==SQLITE_OK ){
pWal->writeLock = 1;
}else if( rc==SQLITE_BUSY ){
eMode2 = SQLITE_CHECKPOINT_PASSIVE;
xBusy2 = 0;
rc = SQLITE_OK;
}
}
}
if( rc==SQLITE_OK ){
walDisableBlocking(pWal);
rc = walIndexReadHdr(pWal, &isChanged);
(void)walEnableBlocking(pWal);
if( isChanged && pWal->pDbFd->pMethods->iVersion>=3 ){
sqlite3OsUnfetch(pWal->pDbFd, 0, 0);
}
}
if( rc==SQLITE_OK ){
if( pWal->hdr.mxFrame && walPagesize(pWal)!=nBuf ){
rc = SQLITE_CORRUPT_BKPT;
}else{
rc = walCheckpoint(pWal, db, eMode2, xBusy2, pBusyArg, sync_flags, zBuf);
}
if( rc==SQLITE_OK || rc==SQLITE_BUSY ){
if( pnLog ) *pnLog = (int)pWal->hdr.mxFrame;
if( pnCkpt ) *pnCkpt = (int)(walCkptInfo(pWal)->nBackfill);
}
}
if( isChanged ){
memset(&pWal->hdr, 0, sizeof(WalIndexHdr));
}
walDisableBlocking(pWal);
sqlite3WalDb(pWal, 0);
sqlite3WalEndWriteTransaction(pWal);
if( pWal->ckptLock ){
walUnlockExclusive(pWal, WAL_CKPT_LOCK, 1);
pWal->ckptLock = 0;
}
WALTRACE(("WAL%p: checkpoint %s\n", pWal, rc ? "failed" : "ok"));
#ifdef SQLITE_ENABLE_SETLK_TIMEOUT
if( rc==SQLITE_BUSY_TIMEOUT ) rc = SQLITE_BUSY;
#endif
return (rc==SQLITE_OK && eMode!=eMode2 ? SQLITE_BUSY : rc);
}
int sqlite3WalCallback(Wal *pWal){
u32 ret = 0;
if( pWal ){
ret = pWal->iCallback;
pWal->iCallback = 0;
}
return (int)ret;
}
int sqlite3WalExclusiveMode(Wal *pWal, int op){
int rc;
assert( pWal->writeLock==0 );
assert( pWal->exclusiveMode!=WAL_HEAPMEMORY_MODE || op==-1 );
assert( pWal->readLock>=0 || pWal->lockError );
assert( pWal->readLock>=0 || (op<=0 && pWal->exclusiveMode==0) );
if( op==0 ){
if( pWal->exclusiveMode!=WAL_NORMAL_MODE ){
pWal->exclusiveMode = WAL_NORMAL_MODE;
if( walLockShared(pWal, WAL_READ_LOCK(pWal->readLock))!=SQLITE_OK ){
pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
}
rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
}else{
rc = 0;
}
}else if( op>0 ){
assert( pWal->exclusiveMode==WAL_NORMAL_MODE );
assert( pWal->readLock>=0 );
walUnlockShared(pWal, WAL_READ_LOCK(pWal->readLock));
pWal->exclusiveMode = WAL_EXCLUSIVE_MODE;
rc = 1;
}else{
rc = pWal->exclusiveMode==WAL_NORMAL_MODE;
}
return rc;
}
int sqlite3WalHeapMemory(Wal *pWal){
return (pWal && pWal->exclusiveMode==WAL_HEAPMEMORY_MODE );
}
#ifdef SQLITE_ENABLE_SNAPSHOT
int sqlite3WalSnapshotGet(Wal *pWal, sqlite3_snapshot **ppSnapshot){
int rc = SQLITE_OK;
WalIndexHdr *pRet;
static const u32 aZero[4] = { 0, 0, 0, 0 };
assert( pWal->readLock>=0 && pWal->writeLock==0 );
if( memcmp(&pWal->hdr.aFrameCksum[0],aZero,16)==0 ){
*ppSnapshot = 0;
return SQLITE_ERROR;
}
pRet = (WalIndexHdr*)sqlite3_malloc(sizeof(WalIndexHdr));
if( pRet==0 ){
rc = SQLITE_NOMEM_BKPT;
}else{
memcpy(pRet, &pWal->hdr, sizeof(WalIndexHdr));
*ppSnapshot = (sqlite3_snapshot*)pRet;
}
return rc;
}
void sqlite3WalSnapshotOpen(
Wal *pWal,
sqlite3_snapshot *pSnapshot
){
pWal->pSnapshot = (WalIndexHdr*)pSnapshot;
}
int sqlite3_snapshot_cmp(sqlite3_snapshot *p1, sqlite3_snapshot *p2){
WalIndexHdr *pHdr1 = (WalIndexHdr*)p1;
WalIndexHdr *pHdr2 = (WalIndexHdr*)p2;
if( pHdr1->aSalt[0]<pHdr2->aSalt[0] ) return -1;
if( pHdr1->aSalt[0]>pHdr2->aSalt[0] ) return +1;
if( pHdr1->mxFrame<pHdr2->mxFrame ) return -1;
if( pHdr1->mxFrame>pHdr2->mxFrame ) return +1;
return 0;
}
int sqlite3WalSnapshotCheck(Wal *pWal, sqlite3_snapshot *pSnapshot){
int rc;
rc = walLockShared(pWal, WAL_CKPT_LOCK);
if( rc==SQLITE_OK ){
WalIndexHdr *pNew = (WalIndexHdr*)pSnapshot;
if( memcmp(pNew->aSalt, pWal->hdr.aSalt, sizeof(pWal->hdr.aSalt))
|| pNew->mxFrame<walCkptInfo(pWal)->nBackfillAttempted
){
rc = SQLITE_ERROR_SNAPSHOT;
walUnlockShared(pWal, WAL_CKPT_LOCK);
}
}
return rc;
}
void sqlite3WalSnapshotUnlock(Wal *pWal){
assert( pWal );
walUnlockShared(pWal, WAL_CKPT_LOCK);
}
#endif
#ifdef SQLITE_ENABLE_ZIPVFS
int sqlite3WalFramesize(Wal *pWal){
assert( pWal==0 || pWal->readLock>=0 );
return (pWal ? pWal->szPage : 0);
}
#endif
sqlite3_file *sqlite3WalFile(Wal *pWal){
return pWal->pWalFd;
}
#endif