pager.c文件中有Pager的定义
struct Pager {
sqlite3_vfs *pVfs; /* OS functions to use for IO */
u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */
u8 journalMode; /* One of the PAGER_JOURNALMODE_* values */
u8 useJournal; /* Use a rollback journal on this file */
u8 noSync; /* Do not sync the journal if true */
u8 fullSync; /* Do extra syncs of the journal for robustness */
u8 extraSync; /* sync directory after journal delete */
u8 syncFlags; /* SYNC_NORMAL or SYNC_FULL otherwise */
u8 walSyncFlags; /* See description above */
u8 tempFile; /* zFilename is a temporary or immutable file */
u8 noLock; /* Do not lock (except in WAL mode) */
u8 readOnly; /* True for a read-only database */
u8 memDb; /* True to inhibit all file I/O */
/**************************************************************************
** The following block contains those class members that change during
** routine operation. Class members not in this block are either fixed
** when the pager is first created or else only change when there is a
** significant mode change (such as changing the page_size, locking_mode,
** or the journal_mode). From another view, these class members describe
** the "state" of the pager, while other class members describe the
** "configuration" of the pager.
*/
u8 eState; /* Pager state (OPEN, READER, WRITER_LOCKED..) */
u8 eLock; /* Current lock held on database file */
u8 changeCountDone; /* Set after incrementing the change-counter */
u8 setMaster; /* True if a m-j name has been written to jrnl */
u8 doNotSpill; /* Do not spill the cache when non-zero */
u8 subjInMemory; /* True to use in-memory sub-journals */
u8 bUseFetch; /* True to use xFetch() */
u8 hasHeldSharedLock; /* True if a shared lock has ever been held */
Pgno dbSize; /* Number of pages in the database */
Pgno dbOrigSize; /* dbSize before the current transaction */
Pgno dbFileSize; /* Number of pages in the database file */
Pgno dbHintSize; /* Value passed to FCNTL_SIZE_HINT call */
int errCode; /* One of several kinds of errors */
int nRec; /* Pages journalled since last j-header written */
u32 cksumInit; /* Quasi-random value added to every checksum */
u32 nSubRec; /* Number of records written to sub-journal */
Bitvec *pInJournal; /* One bit for each page in the database file */
sqlite3_file *fd; /* File descriptor for database */
sqlite3_file *jfd; /* File descriptor for main journal */
sqlite3_file *sjfd; /* File descriptor for sub-journal */
i64 journalOff; /* Current write offset in the journal file */
i64 journalHdr; /* Byte offset to previous journal header */
sqlite3_backup *pBackup; /* Pointer to list of ongoing backup processes */
PagerSavepoint *aSavepoint; /* Array of active savepoints */
int nSavepoint; /* Number of elements in aSavepoint[] */
u32 iDataVersion; /* Changes whenever database content changes */
char dbFileVers[16]; /* Changes whenever database file changes */
int nMmapOut; /* Number of mmap pages currently outstanding */
sqlite3_int64 szMmap; /* Desired maximum mmap size */
PgHdr *pMmapFreelist; /* List of free mmap page headers (pDirty) */
/*
** End of the routinely-changing class members
***************************************************************************/
u16 nExtra; /* Add this many bytes to each in-memory page */
i16 nReserve; /* Number of unused bytes at end of each page */
u32 vfsFlags; /* Flags for sqlite3_vfs.xOpen() */
u32 sectorSize; /* Assumed sector size during rollback */
int pageSize; /* Number of bytes in a page */
Pgno mxPgno; /* Maximum allowed size of the database */
i64 journalSizeLimit; /* Size limit for persistent journal files */
char *zFilename; /* Name of the database file */
char *zJournal; /* Name of the journal file */
int (*xBusyHandler)(void*); /* Function to call when busy */
void *pBusyHandlerArg; /* Context argument for xBusyHandler */
int aStat[4]; /* Total cache hits, misses, writes, spills */
#ifdef SQLITE_TEST
int nRead; /* Database pages read */
#endif
void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
int (*xGet)(Pager*,Pgno,DbPage**,int); /* Routine to fetch a patch */
#ifdef SQLITE_HAS_CODEC
void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
void (*xCodecFree)(void*); /* Destructor for the codec */
void *pCodec; /* First argument to xCodec... methods */
#endif
char *pTmpSpace; /* Pager.pageSize bytes of space for tmp use */
PCache *pPCache; /* Pointer to page cache object */
#ifndef SQLITE_OMIT_WAL
Wal *pWal; /* Write-ahead log used by "journal_mode=wal" */
char *zWal; /* File name for write-ahead log */
#endif
};
sqliteInt.h中关于KeyInfo的定义
2132 /*
2133 ** An instance of the following structure is passed as the first
2134 ** argument to sqlite3VdbeKeyCompare and is used to control the
2135 ** comparison of the two index keys.
2136 **
2137 ** Note that aSortOrder[] and aColl[] have nField+1 slots. There
2138 ** are nField slots for the columns of an index then one extra slot
2139 ** for the rowid at the end.
2140 */
2141 struct KeyInfo {
2142 u32 nRef; /* Number of references to this KeyInfo object */
2143 u8 enc; /* Text encoding - one of the SQLITE_UTF* values */
2144 u16 nKeyField; /* Number of key columns in the index */
2145 u16 nAllField; /* Total columns, including key plus others */
2146 sqlite3 *db; /* The database connection */
2147 u8 *aSortOrder; /* Sort order for each column. */
2148 CollSeq *aColl[1]; /* Collating sequence for each term of the key */
2149 };
2150
sqliteInt.h中关于UnpackedRecord的定义
此对象保存已被解析为各个字段的记录, 以便进行比较。A 记录是包含一个或多个数据字段的对象。 记录用于存储表行的内容和存储索引的键。 记录的 blob 编码由 VDBE 的 OP _ makecor誓言创建, 并由 OP _ 列操作码进行分解. 此对象的一个实例用作对索引 b + 树进行搜索的 "键"。搜索的目的是查找与此对象描述的键关闭的条目。 此对象可能只包含键的前缀。 字段的数量是由 Pkeyinfo-nfield 给出的...... 如果此键分别小于或大于 btree 中的一个键, 则 r1 和 r1 字段是要返回的值。 这些函数通常分别为-1 和 + 1, 但如果 b 树是 DESC 顺序的, 则可能会反转到 + 1 和-1. 当它们找到等于比较时, 键比较函数实际上返回 default _ rc。 默认 _ rc 可以是-1、0或 + 1。 如果 b-tres 中有多个条目具有相同的键 (当只查看第一个 pKeyInfo-> nFields 时), 则可以将 default _ rc 设置为-1 以使搜索找到最后一个匹配项, 或者 + 1 可导致搜索找到第一个匹配项。i ' 我会把 eqSeen 的是真的, 如果他们曾经得到和平等的结果, 当比较这个结构与 b 树记录。当 default_rc!=0 时, 搜索可能会在第一场比赛前夕或最后一场比赛结束后立即出现在记录上。 EqSeen 字段将指示 b 树中是否存在完全匹配。
2168 ** or greater than a key in the btree, respectively. These are normally
2169 ** -1 and +1 respectively, but might be inverted to +1 and -1 if the b-tree
2170 ** is in DESC order.
2171 **
2172 ** The key comparison functions actually return default_rc when they find
2173 ** an equals comparison. default_rc can be -1, 0, or +1. If there are
2174 ** multiple entries in the b-tree with the same key (when only looking
2175 ** at the first pKeyInfo->nFields,) then default_rc can be set to -1 to
2176 ** cause the search to find the last match, or +1 to cause the search to
2177 ** find the first match.
2178 **
2179 ** The key comparison functions will set eqSeen to true if they ever
2180 ** get and equal results when comparing this structure to a b-tree record.
2181 ** When default_rc!=0, the search might end up on the record immediately
2182 ** before the first match or immediately after the last match. The
2183 ** eqSeen field will indicate whether or not an exact match exists in the
2184 ** b-tree.
2185 */
2186 struct UnpackedRecord {
2187 KeyInfo *pKeyInfo; /* Collation and sort-order information */
2188 Mem *aMem; /* Values */
2189 u16 nField; /* Number of entries in apMem[] */
2190 i8 default_rc; /* Comparison result if keys are equal */
2191 u8 errCode; /* Error detected by xRecordCompare (CORRUPT or NOMEM) */
2192 i8 r1; /* Value to return if (lhs < rhs) */
2193 i8 r2; /* Value to return if (lhs > rhs) */
2194 u8 eqSeen; /* True if an equality comparison has been seen */
2195 };
vdbeaux.c中的sqlite3VdbeRecordUnpack函数
给定 pKey [] 中记录的 nkey 字节编码, 使用解码记录的内容填充由第四个参数指示的 "填充 UnpackedRecord" 结构。在sqlite3VdbeRecordUnpack函数中,const void *pkey也是使用的变长整数的方式进行存储的。因此,取出来的时候需要进行相应的处理。
3811 /*
3812 Given the nKey-byte encoding of a record in pKey[], populate the
3813 UnpackedRecord structure indicated by the fourth argument with the
3814 contents of the decoded record.
3815 */
3816 void sqlite3VdbeRecordUnpack(
3817 KeyInfo *pKeyInfo, /* Information about the record format */
3818 int nKey, /* Size of the binary record */
3819 const void *pKey, /* The binary record */
3820 UnpackedRecord *p /* Populate this structure before returning. */
3821 ){
3822 const unsigned char *aKey = (const unsigned char *)pKey;
3823 u32 d;
3824 u32 idx; /* Offset in aKey[] to read from */
3825 u16 u; /* Unsigned loop counter */
3826 u32 szHdr;
3827 Mem *pMem = p->aMem;
3828
3829 p->default_rc = 0;
3830 assert( EIGHT_BYTE_ALIGNMENT(pMem) );
3831 idx = getVarint32(aKey, szHdr);//szHdr存储的aKey对应的可变长整数的值,szHdr刚开始是空的,但是执行getVarint32函数的内部会给szHdr指向的内容赋值
3832 d = szHdr;
3833 u = 0;
3834 while( idx<szHdr && d<=(u32)nKey ){
3835 u32 serial_type;
3836
3837 idx += getVarint32(&aKey[idx], serial_type);
3838 pMem->enc = pKeyInfo->enc;
3839 pMem->db = pKeyInfo->db;
3840 /* pMem->flags = 0; // sqlite3VdbeSerialGet() will set this for us */
3841 pMem->szMalloc = 0;
3842 pMem->z = 0;
3843 d += sqlite3VdbeSerialGet(&aKey[d], serial_type, pMem);
3844 pMem++;
3845 if( (++u)>=p->nField ) break;
3846 }
3847 if( d>(u32)nKey && u ){
3848 assert( CORRUPT_DB );
3849 /* In a corrupt record entry, the last pMem might have been set up using
3850 uninitialized memory. Overwrite its value with NULL, to prevent
3851 warnings from MSAN. */
3852 sqlite3VdbeMemSetNull(pMem-1);
3853 }
3854 assert( u<=pKeyInfo->nKeyField + 1 );
3855 p->nField = u;
3856 }
sqlite3中关于变长整数的处理 fts1.c,getVarint32函数
因为sqlite存储变长整数采取的是大端方式,指数据的高字节保存在内存的低地址中。因此,计算变长整数结果的时候,首先从数据的高字节算起,即从内存的低地址处算起,然后进行地址的增加。将新的地址中的内容取出来对结果进行更新,详情见下面的算法。对于char类型,在C语言中一个字符采用一个字节来存储。
126 /* Read a 64-bit variable-length integer from memory starting at p[0].
127 * Return the number of bytes read, or 0 on error.
128 * The value is stored in *v. */
129 static int getVarint(const char *p, sqlite_int64 *v){
130 const unsigned char *q = (const unsigned char *) p;
131 sqlite_uint64 x = 0, y = 1;
132 while( (*q & 0x80) == 0x80 ){
133 x += y * (*q++ & 0x7f);
134 y <<= 7;
135 if( q - (unsigned char *)p >= VARINT_MAX ){ /* bad data */
136 assert( 0 );
137 return 0;
138 }
139 }
140 x += y * (*q++);
141 *v = (sqlite_int64) x;
142 return (int) (q - (unsigned char *)p);
143 }
144
145 static int getVarint32(const char *p, int *pi){
146 sqlite_int64 i;
147 int ret = getVarint(p, &i);
148 *pi = (int) i;
149 assert( *pi==i );
150 return ret;
151 }
152
在vdbeaux.c中查看函数sqlite3VdbeSerialGet的定义
3691 u32 sqlite3VdbeSerialGet(
3692 const unsigned char *buf, /* Buffer to deserialize from */
3693 u32 serial_type, /* Serial type to deserialize */
3694 Mem *pMem /* Memory cell to write value into */
3695 ){
3696 switch( serial_type ){
3697 case 10: { /* Internal use only: NULL with virtual table
3698 UPDATE no-change flag set */
3699 pMem->flags = MEM_Null|MEM_Zero;
3700 pMem->n = 0;
3701 pMem->u.nZero = 0;
3702 break;
3703 }
3704 case 11: /* Reserved for future use */
3705 case 0: { /* Null */
3706 /* EVIDENCE-OF: R-24078-09375 Value is a NULL. */
3707 pMem->flags = MEM_Null;
3708 break;
3709 }
3710 case 1: {
3711 /* EVIDENCE-OF: R-44885-25196 Value is an 8-bit twos-complement
3712 integer. */
3713 pMem->u.i = ONE_BYTE_INT(buf);
3714 pMem->flags = MEM_Int;
3715 testcase( pMem->u.i<0 );
3716 return 1;
3717 }
3718 case 2: { /* 2-byte signed integer */
3719 /* EVIDENCE-OF: R-49794-35026 Value is a big-endian 16-bit
3720 twos-complement integer. */
3721 pMem->u.i = TWO_BYTE_INT(buf);
3722 pMem->flags = MEM_Int;
3723 testcase( pMem->u.i<0 );
3724 return 2;
3725 }
3726 case 3: { /* 3-byte signed integer */
3727 /* EVIDENCE-OF: R-37839-54301 Value is a big-endian 24-bit
3728 twos-complement integer. */
3729 pMem->u.i = THREE_BYTE_INT(buf);
3730 pMem->flags = MEM_Int;
3731 testcase( pMem->u.i<0 );
3732 return 3;
3733 }
3734 case 4: { /* 4-byte signed integer */
3735 /* EVIDENCE-OF: R-01849-26079 Value is a big-endian 32-bit
3736 twos-complement integer. */
3737 pMem->u.i = FOUR_BYTE_INT(buf);
3738 #ifdef __HP_cc
3739 /* Work around a sign-extension bug in the HP compiler for HP/UX */
3740 if( buf[0]&0x80 ) pMem->u.i |= 0xffffffff80000000LL;
3741 #endif
3742 pMem->flags = MEM_Int;
3743 testcase( pMem->u.i<0 );
3744 return 4;
3745 }
3746 case 5: { /* 6-byte signed integer */
3747 /* EVIDENCE-OF: R-50385-09674 Value is a big-endian 48-bit
3748 twos-complement integer. */
3749 pMem->u.i = FOUR_BYTE_UINT(buf+2) + (((i64)1)<<32)*TWO_BYTE_INT(buf);
3750 pMem->flags = MEM_Int;
3751 testcase( pMem->u.i<0 );
3752 return 6;
3753 }
3754 case 6: /* 8-byte signed integer */
3755 case 7: { /* IEEE floating point */
3756 /* These use local variables, so do them in a separate routine
3757 to avoid having to move the frame pointer in the common case */
3758 return serialGet(buf,serial_type,pMem);
3759 }
3760 case 8: /* Integer 0 */
3761 case 9: { /* Integer 1 */
3762 /* EVIDENCE-OF: R-12976-22893 Value is the integer 0. */
3763 /* EVIDENCE-OF: R-18143-12121 Value is the integer 1. */
3764 pMem->u.i = serial_type-8;
3765 pMem->flags = MEM_Int;
3766 return 0;
3767 }
3768 default: {
3769 /* EVIDENCE-OF: R-14606-31564 Value is a BLOB that is (N-12)/2 bytes in
3770 length.
3771 EVIDENCE-OF: R-28401-00140 Value is a string in the text encoding and
3772 (N-13)/2 bytes in length. */
3773 static const u16 aFlag[] = { MEM_Blob|MEM_Ephem, MEM_Str|MEM_Ephem };
3774 pMem->z = (char *)buf;
3775 pMem->n = (serial_type-12)/2;
3776 pMem->flags = aFlag[serial_type&1];
3777 return pMem->n;
3778 }
3779 }
3780 return 0;
3781 }
serialGet函数
将buf指向的数据blob反序列化为串行类型serial_type,并将结果存储在pMem中。返回读取的字节数。此功能实现为两个单独的性能例程。需要局部变量的少数情况被分解为单独的例程,因此在大多数情况下避免了移动堆栈指针的开销。
/*
3647 Deserialize the data blob pointed to by buf as serial type serial_type
3648 and store the result in pMem. Return the number of bytes read.
3649
3650 This function is implemented as two separate routines for performance.
3651 The few cases that require local variables are broken out into a separate
3652 routine so that in most cases the overhead of moving the stack pointer
3653 is avoided.
3654 */
3655 static u32 serialGet(
3656 const unsigned char *buf, /* Buffer to deserialize from */
3657 u32 serial_type, /* Serial type to deserialize */
3658 Mem *pMem /* Memory cell to write value into */
3659 ){
3660 u64 x = FOUR_BYTE_UINT(buf);
3661 u32 y = FOUR_BYTE_UINT(buf+4);
3662 x = (x<<32) + y;
3663 if( serial_type==6 ){
3664 /* EVIDENCE-OF: R-29851-52272 Value is a big-endian 64-bit
3665 twos-complement integer. */
3666 pMem->u.i = *(i64*)&x;
3667 pMem->flags = MEM_Int;
3668 testcase( pMem->u.i<0 );
3669 }else{
3670 /* EVIDENCE-OF: R-57343-49114 Value is a big-endian IEEE 754-2008 64-bit
3671 floating point number. */
3672 #if !defined(NDEBUG) && !defined(SQLITE_OMIT_FLOATING_POINT)
3673 /* Verify that integers and floating point values use the same
3674 byte order. Or, that if SQLITE_MIXED_ENDIAN_64BIT_FLOAT is
3675 defined that 64-bit floating point values really are mixed
3676 endian.
3677 */
3678 static const u64 t1 = ((u64)0x3ff00000)<<32;
3679 static const double r1 = 1.0;
3680 u64 t2 = t1;
3681 swapMixedEndianFloat(t2);
3682 assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 );
3683 #endif
3684 assert( sizeof(x)==8 && sizeof(pMem->u.r)==8 );
3685 swapMixedEndianFloat(x);
3686 memcpy(&pMem->u.r, &x, sizeof(x));
3687 pMem->flags = IsNaN(x) ? MEM_Null : MEM_Real;
3688 }
3689 return 8;
3690 }
关于BYTE_INT的定义
3637 /* Input "x" is a sequence of unsigned characters that represent a
3638 big-endian integer. Return the equivalent native integer
3639 */
3640 #define ONE_BYTE_INT(x) ((i8)(x)[0])
3641 #define TWO_BYTE_INT(x) (256*(i8)((x)[0])|(x)[1])
3642 #define THREE_BYTE_INT(x) (65536*(i8)((x)[0])|((x)[1]<<8)|(x)[2])
3643 #define FOUR_BYTE_UINT(x) (((u32)(x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
3644 #define FOUR_BYTE_INT(x) (16777216*(i8)((x)[0])|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
查看函数SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord的定义
80074 /*
80075 ** This routine is used to allocate sufficient space for an UnpackedRecord
80076 ** structure large enough to be used with sqlite3VdbeRecordUnpack() if
80077 ** the first argument is a pointer to KeyInfo structure pKeyInfo.
80078 **
80079 ** The space is either allocated using sqlite3DbMallocRaw() or from within
80080 ** the unaligned buffer passed via the second and third arguments (presumably
80081 ** stack space). If the former, then *ppFree is set to a pointer that should
80082 ** be eventually freed by the caller using sqlite3DbFree(). Or, if the
80083 ** allocation comes from the pSpace/szSpace buffer, *ppFree is set to NULL
80084 ** before returning.
80085 **
80086 ** If an OOM error occurs, NULL is returned.
80087 */
80088 SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(
80089 KeyInfo *pKeyInfo /* Description of the record */
80090 ){
80091 UnpackedRecord *p; /* Unpacked record to return */
80092 int nByte; /* Number of bytes required for *p */
80093 nByte = ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*(pKeyInfo->nKeyField+1);
80094 p = (UnpackedRecord *)sqlite3DbMallocRaw(pKeyInfo->db, nByte);
80095 if( !p ) return 0;
80096 p->aMem = (Mem*)&((char*)p)[ROUND8(sizeof(UnpackedRecord))];
80097 assert( pKeyInfo->aSortOrder!=0 );
80098 p->pKeyInfo = pKeyInfo;
80099 p->nField = pKeyInfo->nKeyField + 1;
80100 return p;
80101 }
sqlite3BtreeMovetoUnpacked函数
5327 /* Move the cursor so that it points to an entry near the key
5328 specified by pIdxKey or intKey. Return a success code.
5329
5330 For INTKEY tables, the intKey parameter is used. pIdxKey
5331 must be NULL. For index tables, pIdxKey is used and intKey
5332 is ignored.
5333
5334 If an exact match is not found, then the cursor is always
5335 left pointing at a leaf page which would hold the entry if it
5336 were present. The cursor might point to an entry that comes
5337 before or after the key.
5338
5339 An integer is written into *pRes which is the result of
5340 comparing the key with the entry to which the cursor is
5341 pointing. The meaning of the integer written into
5342 *pRes is as follows:
5343
5344 *pRes<0 The cursor is left pointing at an entry that
5345 is smaller than intKey/pIdxKey or if the table is empty
5346 and the cursor is therefore left point to nothing.
5347
5348 *pRes==0 The cursor is left pointing at an entry that
5349 exactly matches intKey/pIdxKey.
5350
5351 *pRes>0 The cursor is left pointing at an entry that
5352 is larger than intKey/pIdxKey.
5353
5354 For index tables, the pIdxKey->eqSeen field is set to 1 if there
5355 exists an entry in the table that exactly matches pIdxKey.
5356 */
5357 int sqlite3BtreeMovetoUnpacked(
5358 BtCursor *pCur, /* The cursor to be moved */
5359 UnpackedRecord *pIdxKey, /* Unpacked index key */
5360 i64 intKey, /* The table key */
5361 int biasRight, /* If true, bias the search to the high end */
5362 int *pRes /* Write search results here */
5363 ){
5364 int rc;
5365 RecordCompare xRecordCompare;
5366
5367 assert( cursorOwnsBtShared(pCur) );
5368 assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
5369 assert( pRes );
5370 assert( (pIdxKey==0)==(pCur->pKeyInfo==0) );
5371 assert( pCur->eState!=CURSOR_VALID || (pIdxKey==0)==(pCur->curIntKey!=0) );
5372
5373 /* If the cursor is already positioned at the point we are trying
5374 to move to, then just return without doing any work */
5375 if( pIdxKey==0
5376 && pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0
5377 ){
5378 if( pCur->info.nKey==intKey ){
5379 *pRes = 0;
5380 return SQLITE_OK;
5381 }
5382 if( pCur->info.nKey<intKey ){
5383 if( (pCur->curFlags & BTCF_AtLast)!=0 ){
5384 *pRes = -1;
5385 return SQLITE_OK;
5386 }
5387 /* If the requested key is one more than the previous key, then
5388 try to get there using sqlite3BtreeNext() rather than a full
5389 binary search. This is an optimization only. The correct answer
5390 is still obtained without this case, only a little more slowely */
5391 if( pCur->info.nKey+1==intKey ){
5392 *pRes = 0;
5393 rc = sqlite3BtreeNext(pCur, 0);
5394 if( rc==SQLITE_OK ){
5395 getCellInfo(pCur);
5396 if( pCur->info.nKey==intKey ){
5397 return SQLITE_OK;
5398 }
5399 }else if( rc==SQLITE_DONE ){
5400 rc = SQLITE_OK;
5401 }else{
5402 return rc;
5403 }
5404 }
5405 }
5406 }
5407
5408 if( pIdxKey ){
5409 xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
5410 pIdxKey->errCode = 0;
5411 assert( pIdxKey->default_rc==1
5412 || pIdxKey->default_rc==0
5413 || pIdxKey->default_rc==-1
5414 );
5415 }else{
5416 xRecordCompare = 0; /* All keys are integers */
5417 }
5418
5419 rc = moveToRoot(pCur);
5420 if( rc ){
5421 if( rc==SQLITE_EMPTY ){
5422 assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
5423 *pRes = -1;
5424 return SQLITE_OK;
5425 }
5426 return rc;
5427 }
5428 assert( pCur->pPage );
5429 assert( pCur->pPage->isInit );
5430 assert( pCur->eState==CURSOR_VALID );
5431 assert( pCur->pPage->nCell > 0 );
5432 assert( pCur->iPage==0 || pCur->apPage[0]->intKey==pCur->curIntKey );
5433 assert( pCur->curIntKey || pIdxKey );
5434 for(;;){
5435 int lwr, upr, idx, c;
5436 Pgno chldPg;
5437 MemPage *pPage = pCur->pPage;
5438 u8 *pCell; /* Pointer to current cell in pPage */
5439
5440 /* pPage->nCell must be greater than zero. If this is the root-page
5441 the cursor would have been INVALID above and this for(;;) loop
5442 not run. If this is not the root-page, then the moveToChild() routine
5443 would have already detected db corruption. Similarly, pPage must
5444 be the right kind (index or table) of b-tree page. Otherwise
5445 a moveToChild() or moveToRoot() call would have detected corruption. */
5446 assert( pPage->nCell>0 );
5447 assert( pPage->intKey==(pIdxKey==0) );
5448 lwr = 0;
5449 upr = pPage->nCell-1;
5450 assert( biasRight==0 || biasRight==1 );
5451 idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */
5452 pCur->ix = (u16)idx;
5453 if( xRecordCompare==0 ){
5454 for(;;){
5455 i64 nCellKey;
5456 pCell = findCellPastPtr(pPage, idx);
5457 if( pPage->intKeyLeaf ){
5458 while( 0x80 <= *(pCell++) ){
5459 if( pCell>=pPage->aDataEnd ){
5460 return SQLITE_CORRUPT_PAGE(pPage);
5461 }
5462 }
5463 }
5464 getVarint(pCell, (u64*)&nCellKey);
5465 if( nCellKey<intKey ){
5466 lwr = idx+1;
5467 if( lwr>upr ){ c = -1; break; }
5468 }else if( nCellKey>intKey ){
5469 upr = idx-1;
5470 if( lwr>upr ){ c = +1; break; }
5471 }else{
5472 assert( nCellKey==intKey );
5473 pCur->ix = (u16)idx;
5474 if( !pPage->leaf ){
5475 lwr = idx;
5476 goto moveto_next_layer;
5477 }else{
5478 pCur->curFlags |= BTCF_ValidNKey;
5479 pCur->info.nKey = nCellKey;
5480 pCur->info.nSize = 0;
5481 *pRes = 0;
5482 return SQLITE_OK;
5483 }
5484 }
5485 assert( lwr+upr>=0 );
5486 idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2; */
5487 }
5488 }else{
5489 for(;;){
5490 int nCell; /* Size of the pCell cell in bytes */
5491 pCell = findCellPastPtr(pPage, idx);
5492
5493 /* The maximum supported page-size is 65536 bytes. This means that
5494 the maximum number of record bytes stored on an index B-Tree
5495 page is less than 16384 bytes and may be stored as a 2-byte
5496 varint. This information is used to attempt to avoid parsing
5497 the entire cell by checking for the cases where the record is
5498 stored entirely within the b-tree page by inspecting the first
5499 2 bytes of the cell.
5500 */
5501 nCell = pCell[0];
5502 if( nCell<=pPage->max1bytePayload ){
5503 /* This branch runs if the record-size field of the cell is a
5504 single byte varint and the record fits entirely on the main
5505 b-tree page. */
5506 testcase( pCell+nCell+1==pPage->aDataEnd );
5507 c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey);
5508 }else if( !(pCell[1] & 0x80)
5509 && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal
5510 ){
5511 /* The record-size field is a 2 byte varint and the record
5512 fits entirely on the main b-tree page. */
5513 testcase( pCell+nCell+2==pPage->aDataEnd );
5514 c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey);
5515 }else{
5516 /* The record flows over onto one or more overflow pages. In
5517 this case the whole cell needs to be parsed, a buffer allocated
5518 and accessPayload() used to retrieve the record into the
5519 buffer before VdbeRecordCompare() can be called.
5520
5521 If the record is corrupt, the xRecordCompare routine may read
5522 up to two varints past the end of the buffer. An extra 18
5523 bytes of padding is allocated at the end of the buffer in
5524 case this happens. */
5525 void *pCellKey;
5526 u8 * const pCellBody = pCell - pPage->childPtrSize;
5527 const int nOverrun = 18; /* Size of the overrun padding */
5528 pPage->xParseCell(pPage, pCellBody, &pCur->info);
5529 nCell = (int)pCur->info.nKey;
5530 testcase( nCell<0 ); /* True if key size is 2^32 or more */
5531 testcase( nCell==0 ); /* Invalid key size: 0x80 0x80 0x00 */
5532 testcase( nCell==1 ); /* Invalid key size: 0x80 0x80 0x01 */
5533 testcase( nCell==2 ); /* Minimum legal index key size */
5534 if( nCell<2 || nCell/pCur->pBt->usableSize>pCur->pBt->nPage ){
5535 rc = SQLITE_CORRUPT_PAGE(pPage);
5536 goto moveto_finish;
5537 }
5538 pCellKey = sqlite3Malloc( nCell+nOverrun );
5539 if( pCellKey==0 ){
5540 rc = SQLITE_NOMEM_BKPT;
5541 goto moveto_finish;
5542 }
5543 pCur->ix = (u16)idx;
5544 rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0);
5545 memset(((u8*)pCellKey)+nCell,0,nOverrun); /* Fix uninit warnings */
5546 pCur->curFlags &= ~BTCF_ValidOvfl;
5547 if( rc ){
5548 sqlite3_free(pCellKey);
5549 goto moveto_finish;
5550 }
5551 c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey);
5552 sqlite3_free(pCellKey);
5553 }
5554 assert(
5555 (pIdxKey->errCode!=SQLITE_CORRUPT || c==0)
5556 && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed)
5557 );
5558 if( c<0 ){
5559 lwr = idx+1;
5560 }else if( c>0 ){
5561 upr = idx-1;
5562 }else{
5563 assert( c==0 );
5564 *pRes = 0;
5565 rc = SQLITE_OK;
5566 pCur->ix = (u16)idx;
5567 if( pIdxKey->errCode ) rc = SQLITE_CORRUPT_BKPT;
5568 goto moveto_finish;
5569 }
5570 if( lwr>upr ) break;
5571 assert( lwr+upr>=0 );
5572 idx = (lwr+upr)>>1; /* idx = (lwr+upr)/2 */
5573 }
5574 }
5575 assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) );
5576 assert( pPage->isInit );
5577 if( pPage->leaf ){
5578 assert( pCur->ix<pCur->pPage->nCell );
5579 pCur->ix = (u16)idx;
5580 *pRes = c;
5581 rc = SQLITE_OK;
5582 goto moveto_finish;
5583 }
5584 moveto_next_layer:
5585 if( lwr>=pPage->nCell ){
5586 chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
5587 }else{
5588 chldPg = get4byte(findCell(pPage, lwr));
5589 }
5590 pCur->ix = (u16)lwr;
5591 rc = moveToChild(pCur, chldPg);
5592 if( rc ) break;
5593 }
5594 moveto_finish:
5595 pCur->info.nSize = 0;
5596 assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
5597 return rc;
5598 }
在btreeInt.h中有关于struct MemPage的定义
此对象的实例存储有关已加载到内存中的单个数据库页的每个页面的信息。 此对象中的信息来自磁盘上的原始页面内容。 当每个数据库页加载到内存中时, 寻呼机将分配此对象的实例, 并将前8个字节归零。 (这是与寻呼机的每一页相关联的 "额外" 信息。 对此结构的所有字段的访问由存储在 mempage. Pbt-> 互斥体中的互斥体控制。
261 /*
262 An instance of this object stores information about each a single database
263 page that has been loaded into memory. The information in this object
264 is derived from the raw on-disk page content.
265
266 As each database page is loaded into memory, the pager allocats an
267 instance of this object and zeros the first 8 bytes. (This is the
268 "extra" information associated with each page of the pager.)
269
270 Access to all fields of this structure is controlled by the mutex
271 stored in MemPage.pBt->mutex.
272 */
273 struct MemPage {
274 u8 isInit; /* True if previously initialized. MUST BE FIRST! */
275 u8 bBusy; /* Prevent endless loops on corrupt database files */
276 u8 intKey; /* True if table b-trees. False for index b-trees */
277 u8 intKeyLeaf; /* True if the leaf of an intKey table */
278 Pgno pgno; /* Page number for this page */
279 /* Only the first 8 bytes (above) are zeroed by pager.c when a new page
280 is allocated. All fields that follow must be initialized before use */
281 u8 leaf; /* True if a leaf page */
282 u8 hdrOffset; /* 100 for page 1. 0 otherwise */
283 u8 childPtrSize; /* 0 if leaf==1. 4 if leaf==0 */
284 u8 max1bytePayload; /* min(maxLocal,127) */
285 u8 nOverflow; /* Number of overflow cell bodies in aCell[] */
286 u16 maxLocal; /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
287 u16 minLocal; /* Copy of BtShared.minLocal or BtShared.minLeaf */
288 u16 cellOffset; /* Index in aData of first cell pointer */
289 int nFree; /* Number of free bytes on the page. -1 for unknown */
290 u16 nCell; /* Number of cells on this page, local and ovfl */
291 u16 maskPage; /* Mask for page offset */
292 u16 aiOvfl[4]; /* Insert the i-th overflow cell before the aiOvfl-th
293 non-overflow cell */
294 u8 *apOvfl[4]; /* Pointers to the body of overflow cells */
295 BtShared *pBt; /* Pointer to BtShared that this page is part of */
296 u8 *aData; /* Pointer to disk image of the page data */
297 u8 *aDataEnd; /* One byte past the end of usable data */
298 u8 *aCellIdx; /* The cell index area */
299 u8 *aDataOfst; /* Same as aData for leaves. aData+4 for interior */
300 DbPage *pDbPage; /* Pager page handle */
301 u16 (*xCellSize)(MemPage*,u8*); /* cellSizePtr method */
302 void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */
303 };
304