sqlite源码分析

最新推荐文章于 2024-07-15 15:20:13 发布

久许

最新推荐文章于 2024-07-15 15:20:13 发布

阅读量1.5k

点赞数

分类专栏： sqlite

本文链接：https://blog.csdn.net/jiuweideqixu/article/details/91350341

版权

sqlite 专栏收录该内容

19 篇文章 0 订阅

订阅专栏

pager.c文件中有Pager的定义

struct Pager {
  sqlite3_vfs *pVfs;          /* OS functions to use for IO */
  u8 exclusiveMode;           /* Boolean. True if locking_mode==EXCLUSIVE */
  u8 journalMode;             /* One of the PAGER_JOURNALMODE_* values */
  u8 useJournal;              /* Use a rollback journal on this file */
  u8 noSync;                  /* Do not sync the journal if true */
  u8 fullSync;                /* Do extra syncs of the journal for robustness */
  u8 extraSync;               /* sync directory after journal delete */
  u8 syncFlags;               /* SYNC_NORMAL or SYNC_FULL otherwise */
  u8 walSyncFlags;            /* See description above */
  u8 tempFile;                /* zFilename is a temporary or immutable file */
  u8 noLock;                  /* Do not lock (except in WAL mode) */
  u8 readOnly;                /* True for a read-only database */
  u8 memDb;                   /* True to inhibit all file I/O */

  /**************************************************************************
  ** The following block contains those class members that change during
  ** routine operation.  Class members not in this block are either fixed
  ** when the pager is first created or else only change when there is a
  ** significant mode change (such as changing the page_size, locking_mode,
  ** or the journal_mode).  From another view, these class members describe
  ** the "state" of the pager, while other class members describe the
  ** "configuration" of the pager.
  */
  u8 eState;                  /* Pager state (OPEN, READER, WRITER_LOCKED..) */
  u8 eLock;                   /* Current lock held on database file */
  u8 changeCountDone;         /* Set after incrementing the change-counter */
  u8 setMaster;               /* True if a m-j name has been written to jrnl */
  u8 doNotSpill;              /* Do not spill the cache when non-zero */
  u8 subjInMemory;            /* True to use in-memory sub-journals */
  u8 bUseFetch;               /* True to use xFetch() */
  u8 hasHeldSharedLock;       /* True if a shared lock has ever been held */
  Pgno dbSize;                /* Number of pages in the database */
  Pgno dbOrigSize;            /* dbSize before the current transaction */
  Pgno dbFileSize;            /* Number of pages in the database file */
  Pgno dbHintSize;            /* Value passed to FCNTL_SIZE_HINT call */
  int errCode;                /* One of several kinds of errors */
  int nRec;                   /* Pages journalled since last j-header written */
  u32 cksumInit;              /* Quasi-random value added to every checksum */
  u32 nSubRec;                /* Number of records written to sub-journal */
  Bitvec *pInJournal;         /* One bit for each page in the database file */
  sqlite3_file *fd;           /* File descriptor for database */
  sqlite3_file *jfd;          /* File descriptor for main journal */
  sqlite3_file *sjfd;         /* File descriptor for sub-journal */
  i64 journalOff;             /* Current write offset in the journal file */
  i64 journalHdr;             /* Byte offset to previous journal header */
  sqlite3_backup *pBackup;    /* Pointer to list of ongoing backup processes */
  PagerSavepoint *aSavepoint; /* Array of active savepoints */
  int nSavepoint;             /* Number of elements in aSavepoint[] */
  u32 iDataVersion;           /* Changes whenever database content changes */
  char dbFileVers[16];        /* Changes whenever database file changes */

  int nMmapOut;               /* Number of mmap pages currently outstanding */
  sqlite3_int64 szMmap;       /* Desired maximum mmap size */
  PgHdr *pMmapFreelist;       /* List of free mmap page headers (pDirty) */
  /*
  ** End of the routinely-changing class members
  ***************************************************************************/

  u16 nExtra;                 /* Add this many bytes to each in-memory page */
  i16 nReserve;               /* Number of unused bytes at end of each page */
  u32 vfsFlags;               /* Flags for sqlite3_vfs.xOpen() */
  u32 sectorSize;             /* Assumed sector size during rollback */
  int pageSize;               /* Number of bytes in a page */
  Pgno mxPgno;                /* Maximum allowed size of the database */
  i64 journalSizeLimit;       /* Size limit for persistent journal files */
  char *zFilename;            /* Name of the database file */
  char *zJournal;             /* Name of the journal file */
  int (*xBusyHandler)(void*); /* Function to call when busy */
  void *pBusyHandlerArg;      /* Context argument for xBusyHandler */
  int aStat[4];               /* Total cache hits, misses, writes, spills */
#ifdef SQLITE_TEST
  int nRead;                  /* Database pages read */
#endif
  void (*xReiniter)(DbPage*); /* Call this routine when reloading pages */
  int (*xGet)(Pager*,Pgno,DbPage**,int); /* Routine to fetch a patch */
#ifdef SQLITE_HAS_CODEC
  void *(*xCodec)(void*,void*,Pgno,int); /* Routine for en/decoding data */
  void (*xCodecSizeChng)(void*,int,int); /* Notify of page size changes */
  void (*xCodecFree)(void*);             /* Destructor for the codec */
  void *pCodec;               /* First argument to xCodec... methods */
#endif
  char *pTmpSpace;            /* Pager.pageSize bytes of space for tmp use */
  PCache *pPCache;            /* Pointer to page cache object */
#ifndef SQLITE_OMIT_WAL
  Wal *pWal;                  /* Write-ahead log used by "journal_mode=wal" */
  char *zWal;                 /* File name for write-ahead log */
#endif
};

sqliteInt.h中关于KeyInfo的定义

2132 /*
2133 ** An instance of the following structure is passed as the first
2134 ** argument to sqlite3VdbeKeyCompare and is used to control the
2135 ** comparison of the two index keys.
2136 **
2137 ** Note that aSortOrder[] and aColl[] have nField+1 slots.  There
2138 ** are nField slots for the columns of an index then one extra slot
2139 ** for the rowid at the end.
2140 */
2141 struct KeyInfo {
2142   u32 nRef;           /* Number of references to this KeyInfo object */
2143   u8 enc;             /* Text encoding - one of the SQLITE_UTF* values */
2144   u16 nKeyField;      /* Number of key columns in the index */
2145   u16 nAllField;      /* Total columns, including key plus others */
2146   sqlite3 *db;        /* The database connection */
2147   u8 *aSortOrder;     /* Sort order for each column. */
2148   CollSeq *aColl[1];  /* Collating sequence for each term of the key */
2149 };
2150

sqliteInt.h中关于UnpackedRecord的定义

此对象保存已被解析为各个字段的记录, 以便进行比较。A 记录是包含一个或多个数据字段的对象。记录用于存储表行的内容和存储索引的键。记录的 blob 编码由 VDBE 的 OP _ makecor誓言创建, 并由 OP _ 列操作码进行分解. 此对象的一个实例用作对索引 b + 树进行搜索的 "键"。搜索的目的是查找与此对象描述的键关闭的条目。此对象可能只包含键的前缀。字段的数量是由 Pkeyinfo-nfield 给出的...... 如果此键分别小于或大于 btree 中的一个键, 则 r1 和 r1 字段是要返回的值。这些函数通常分别为-1 和 + 1, 但如果 b 树是 DESC 顺序的, 则可能会反转到 + 1 和-1. 当它们找到等于比较时, 键比较函数实际上返回 default _ rc。默认 _ rc 可以是-1、0或 + 1。如果 b-tres 中有多个条目具有相同的键 (当只查看第一个 pKeyInfo-> nFields 时), 则可以将 default _ rc 设置为-1 以使搜索找到最后一个匹配项, 或者 + 1 可导致搜索找到第一个匹配项。i ' 我会把 eqSeen 的是真的, 如果他们曾经得到和平等的结果, 当比较这个结构与 b 树记录。当 default_rc!=0 时, 搜索可能会在第一场比赛前夕或最后一场比赛结束后立即出现在记录上。 EqSeen 字段将指示 b 树中是否存在完全匹配。

2168 ** or greater than a key in the btree, respectively.  These are normally
2169 ** -1 and +1 respectively, but might be inverted to +1 and -1 if the b-tree
2170 ** is in DESC order.
2171 **
2172 ** The key comparison functions actually return default_rc when they find
2173 ** an equals comparison.  default_rc can be -1, 0, or +1.  If there are
2174 ** multiple entries in the b-tree with the same key (when only looking
2175 ** at the first pKeyInfo->nFields,) then default_rc can be set to -1 to
2176 ** cause the search to find the last match, or +1 to cause the search to
2177 ** find the first match.
2178 **
2179 ** The key comparison functions will set eqSeen to true if they ever
2180 ** get and equal results when comparing this structure to a b-tree record.
2181 ** When default_rc!=0, the search might end up on the record immediately
2182 ** before the first match or immediately after the last match.  The
2183 ** eqSeen field will indicate whether or not an exact match exists in the
2184 ** b-tree.
2185 */
2186 struct UnpackedRecord {
2187   KeyInfo *pKeyInfo;  /* Collation and sort-order information */
2188   Mem *aMem;          /* Values */
2189   u16 nField;         /* Number of entries in apMem[] */
2190   i8 default_rc;      /* Comparison result if keys are equal */
2191   u8 errCode;         /* Error detected by xRecordCompare (CORRUPT or NOMEM) */
2192   i8 r1;              /* Value to return if (lhs < rhs) */
2193   i8 r2;              /* Value to return if (lhs > rhs) */
2194   u8 eqSeen;          /* True if an equality comparison has been seen */
2195 };

vdbeaux.c中的sqlite3VdbeRecordUnpack函数

给定 pKey [] 中记录的 nkey 字节编码, 使用解码记录的内容填充由第四个参数指示的 "填充 UnpackedRecord" 结构。在sqlite3VdbeRecordUnpack函数中，const void *pkey也是使用的变长整数的方式进行存储的。因此，取出来的时候需要进行相应的处理。

3811 /*
3812 Given the nKey-byte encoding of a record in pKey[], populate the 
3813 UnpackedRecord structure indicated by the fourth argument with the
3814 contents of the decoded record.
3815 */
3816 void sqlite3VdbeRecordUnpack(
3817   KeyInfo *pKeyInfo,     /* Information about the record format */
3818   int nKey,              /* Size of the binary record */
3819   const void *pKey,      /* The binary record */
3820   UnpackedRecord *p      /* Populate this structure before returning. */
3821 ){
3822   const unsigned char *aKey = (const unsigned char *)pKey;
3823   u32 d;
3824   u32 idx;                        /* Offset in aKey[] to read from */
3825   u16 u;                          /* Unsigned loop counter */
3826   u32 szHdr;
3827   Mem *pMem = p->aMem;
3828 
3829   p->default_rc = 0;
3830   assert( EIGHT_BYTE_ALIGNMENT(pMem) );
3831   idx = getVarint32(aKey, szHdr);//szHdr存储的aKey对应的可变长整数的值，szHdr刚开始是空的，但是执行getVarint32函数的内部会给szHdr指向的内容赋值
3832   d = szHdr;
3833   u = 0;
3834   while( idx<szHdr && d<=(u32)nKey ){
3835     u32 serial_type;
3836 
3837     idx += getVarint32(&aKey[idx], serial_type);
3838     pMem->enc = pKeyInfo->enc;
3839     pMem->db = pKeyInfo->db;
3840     /* pMem->flags = 0; // sqlite3VdbeSerialGet() will set this for us */
3841     pMem->szMalloc = 0;
3842     pMem->z = 0;
3843     d += sqlite3VdbeSerialGet(&aKey[d], serial_type, pMem);
3844     pMem++;
3845     if( (++u)>=p->nField ) break;
3846   }
3847   if( d>(u32)nKey && u ){
3848     assert( CORRUPT_DB );
3849     /* In a corrupt record entry, the last pMem might have been set up using 
3850     uninitialized memory. Overwrite its value with NULL, to prevent
3851     warnings from MSAN. */
3852     sqlite3VdbeMemSetNull(pMem-1);
3853   }
3854   assert( u<=pKeyInfo->nKeyField + 1 );
3855   p->nField = u;
3856 }

sqlite3中关于变长整数的处理 fts1.c，getVarint32函数

因为sqlite存储变长整数采取的是大端方式，指数据的高字节保存在内存的低地址中。因此，计算变长整数结果的时候，首先从数据的高字节算起，即从内存的低地址处算起，然后进行地址的增加。将新的地址中的内容取出来对结果进行更新，详情见下面的算法。对于char类型，在C语言中一个字符采用一个字节来存储。

 126 /* Read a 64-bit variable-length integer from memory starting at p[0].
 127  * Return the number of bytes read, or 0 on error.
 128  * The value is stored in *v. */
 129 static int getVarint(const char *p, sqlite_int64 *v){
 130   const unsigned char *q = (const unsigned char *) p;
 131   sqlite_uint64 x = 0, y = 1;
 132   while( (*q & 0x80) == 0x80 ){
 133     x += y * (*q++ & 0x7f);
 134     y <<= 7;
 135     if( q - (unsigned char *)p >= VARINT_MAX ){  /* bad data */
 136       assert( 0 );
 137       return 0;
 138     }
 139   }
 140   x += y * (*q++);
 141   *v = (sqlite_int64) x;
 142   return (int) (q - (unsigned char *)p);
 143 }
 144 
 145 static int getVarint32(const char *p, int *pi){
 146  sqlite_int64 i;
 147  int ret = getVarint(p, &i);
 148  *pi = (int) i;
 149  assert( *pi==i );
 150  return ret;
 151 }
 152

在vdbeaux.c中查看函数sqlite3VdbeSerialGet的定义

3691 u32 sqlite3VdbeSerialGet(
3692   const unsigned char *buf,     /* Buffer to deserialize from */
3693   u32 serial_type,              /* Serial type to deserialize */
3694   Mem *pMem                     /* Memory cell to write value into */
3695 ){
3696   switch( serial_type ){
3697     case 10: { /* Internal use only: NULL with virtual table
3698                UPDATE no-change flag set */
3699       pMem->flags = MEM_Null|MEM_Zero;
3700       pMem->n = 0;
3701       pMem->u.nZero = 0;
3702       break;
3703     }
3704     case 11:   /* Reserved for future use */
3705     case 0: {  /* Null */
3706       /* EVIDENCE-OF: R-24078-09375 Value is a NULL. */
3707       pMem->flags = MEM_Null;
3708       break;
3709     }
3710     case 1: {
3711       /* EVIDENCE-OF: R-44885-25196 Value is an 8-bit twos-complement
3712       integer. */
3713       pMem->u.i = ONE_BYTE_INT(buf);
3714       pMem->flags = MEM_Int;
3715       testcase( pMem->u.i<0 );
3716       return 1;
3717     }
3718     case 2: { /* 2-byte signed integer */
3719       /* EVIDENCE-OF: R-49794-35026 Value is a big-endian 16-bit
3720       twos-complement integer. */
3721       pMem->u.i = TWO_BYTE_INT(buf);
3722       pMem->flags = MEM_Int;
3723       testcase( pMem->u.i<0 );
3724       return 2;
3725     }
3726     case 3: { /* 3-byte signed integer */
3727       /* EVIDENCE-OF: R-37839-54301 Value is a big-endian 24-bit
3728       twos-complement integer. */
3729       pMem->u.i = THREE_BYTE_INT(buf);
3730       pMem->flags = MEM_Int;
3731       testcase( pMem->u.i<0 );
3732       return 3;
3733     }
3734     case 4: { /* 4-byte signed integer */
3735       /* EVIDENCE-OF: R-01849-26079 Value is a big-endian 32-bit
3736       twos-complement integer. */
3737       pMem->u.i = FOUR_BYTE_INT(buf);
3738 #ifdef __HP_cc 
3739       /* Work around a sign-extension bug in the HP compiler for HP/UX */
3740       if( buf[0]&0x80 ) pMem->u.i |= 0xffffffff80000000LL;
3741 #endif
3742       pMem->flags = MEM_Int;
3743       testcase( pMem->u.i<0 );
3744       return 4;
3745     }
3746     case 5: { /* 6-byte signed integer */
3747       /* EVIDENCE-OF: R-50385-09674 Value is a big-endian 48-bit
3748       twos-complement integer. */
3749       pMem->u.i = FOUR_BYTE_UINT(buf+2) + (((i64)1)<<32)*TWO_BYTE_INT(buf);
3750       pMem->flags = MEM_Int;
3751       testcase( pMem->u.i<0 );
3752       return 6;
3753     }
3754     case 6:   /* 8-byte signed integer */
3755     case 7: { /* IEEE floating point */
3756       /* These use local variables, so do them in a separate routine
3757       to avoid having to move the frame pointer in the common case */
3758       return serialGet(buf,serial_type,pMem);
3759     }
3760     case 8:    /* Integer 0 */
3761     case 9: {  /* Integer 1 */
3762       /* EVIDENCE-OF: R-12976-22893 Value is the integer 0. */
3763       /* EVIDENCE-OF: R-18143-12121 Value is the integer 1. */
3764       pMem->u.i = serial_type-8;
3765       pMem->flags = MEM_Int;
3766       return 0;
3767     }
3768     default: {
3769       /* EVIDENCE-OF: R-14606-31564 Value is a BLOB that is (N-12)/2 bytes in
3770       length.
3771       EVIDENCE-OF: R-28401-00140 Value is a string in the text encoding and
3772       (N-13)/2 bytes in length. */
3773       static const u16 aFlag[] = { MEM_Blob|MEM_Ephem, MEM_Str|MEM_Ephem };
3774       pMem->z = (char *)buf;
3775       pMem->n = (serial_type-12)/2;
3776       pMem->flags = aFlag[serial_type&1];
3777       return pMem->n;
3778     }
3779   }
3780   return 0;
3781 }

serialGet函数

将buf指向的数据blob反序列化为串行类型serial_type，并将结果存储在pMem中。返回读取的字节数。此功能实现为两个单独的性能例程。需要局部变量的少数情况被分解为单独的例程，因此在大多数情况下避免了移动堆栈指针的开销。

/*
3647 Deserialize the data blob pointed to by buf as serial type serial_type
3648 and store the result in pMem.  Return the number of bytes read.
3649 
3650 This function is implemented as two separate routines for performance.
3651 The few cases that require local variables are broken out into a separate
3652 routine so that in most cases the overhead of moving the stack pointer
3653 is avoided.
3654 */
3655 static u32 serialGet(
3656   const unsigned char *buf,     /* Buffer to deserialize from */
3657   u32 serial_type,              /* Serial type to deserialize */
3658   Mem *pMem                     /* Memory cell to write value into */
3659 ){
3660   u64 x = FOUR_BYTE_UINT(buf);
3661   u32 y = FOUR_BYTE_UINT(buf+4);
3662   x = (x<<32) + y;
3663   if( serial_type==6 ){
3664     /* EVIDENCE-OF: R-29851-52272 Value is a big-endian 64-bit
3665     twos-complement integer. */
3666     pMem->u.i = *(i64*)&x;
3667     pMem->flags = MEM_Int;
3668     testcase( pMem->u.i<0 );
3669   }else{
3670     /* EVIDENCE-OF: R-57343-49114 Value is a big-endian IEEE 754-2008 64-bit
3671     floating point number. */
3672 #if !defined(NDEBUG) && !defined(SQLITE_OMIT_FLOATING_POINT)
3673     /* Verify that integers and floating point values use the same
3674     byte order.  Or, that if SQLITE_MIXED_ENDIAN_64BIT_FLOAT is
3675     defined that 64-bit floating point values really are mixed
3676     endian.
3677     */
3678     static const u64 t1 = ((u64)0x3ff00000)<<32;
3679     static const double r1 = 1.0;
3680     u64 t2 = t1;
3681     swapMixedEndianFloat(t2);
3682     assert( sizeof(r1)==sizeof(t2) && memcmp(&r1, &t2, sizeof(r1))==0 );
3683 #endif
3684     assert( sizeof(x)==8 && sizeof(pMem->u.r)==8 );
3685     swapMixedEndianFloat(x);
3686     memcpy(&pMem->u.r, &x, sizeof(x));
3687     pMem->flags = IsNaN(x) ? MEM_Null : MEM_Real;
3688   }
3689   return 8;
3690 }

关于BYTE_INT的定义

3637 /* Input "x" is a sequence of unsigned characters that represent a
3638 big-endian integer.  Return the equivalent native integer
3639 */
3640 #define ONE_BYTE_INT(x)    ((i8)(x)[0])
3641 #define TWO_BYTE_INT(x)    (256*(i8)((x)[0])|(x)[1])
3642 #define THREE_BYTE_INT(x)  (65536*(i8)((x)[0])|((x)[1]<<8)|(x)[2])
3643 #define FOUR_BYTE_UINT(x)  (((u32)(x)[0]<<24)|((x)[1]<<16)|((x)[2]<<8)|(x)[3])
3644 #define FOUR_BYTE_INT(x) (16777216*(i8)((x)[0])|((x)[1]<<16)|((x)[2]<<8)|(x)[3])

查看函数SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord的定义

 80074 /*
 80075 ** This routine is used to allocate sufficient space for an UnpackedRecord
 80076 ** structure large enough to be used with sqlite3VdbeRecordUnpack() if
 80077 ** the first argument is a pointer to KeyInfo structure pKeyInfo.
 80078 **
 80079 ** The space is either allocated using sqlite3DbMallocRaw() or from within
 80080 ** the unaligned buffer passed via the second and third arguments (presumably
 80081 ** stack space). If the former, then *ppFree is set to a pointer that should
 80082 ** be eventually freed by the caller using sqlite3DbFree(). Or, if the 
 80083 ** allocation comes from the pSpace/szSpace buffer, *ppFree is set to NULL
 80084 ** before returning.
 80085 **
 80086 ** If an OOM error occurs, NULL is returned.
 80087 */
 80088 SQLITE_PRIVATE UnpackedRecord *sqlite3VdbeAllocUnpackedRecord(
 80089   KeyInfo *pKeyInfo               /* Description of the record */
 80090 ){
 80091   UnpackedRecord *p;              /* Unpacked record to return */
 80092   int nByte;                      /* Number of bytes required for *p */
 80093   nByte = ROUND8(sizeof(UnpackedRecord)) + sizeof(Mem)*(pKeyInfo->nKeyField+1);
 80094   p = (UnpackedRecord *)sqlite3DbMallocRaw(pKeyInfo->db, nByte);
 80095   if( !p ) return 0;
 80096   p->aMem = (Mem*)&((char*)p)[ROUND8(sizeof(UnpackedRecord))];
 80097   assert( pKeyInfo->aSortOrder!=0 );
 80098   p->pKeyInfo = pKeyInfo;
 80099   p->nField = pKeyInfo->nKeyField + 1;
 80100   return p;
 80101 }

sqlite3BtreeMovetoUnpacked函数

 5327 /* Move the cursor so that it points to an entry near the key 
 5328 specified by pIdxKey or intKey.   Return a success code.
 5329 
 5330 For INTKEY tables, the intKey parameter is used.  pIdxKey 
 5331 must be NULL.  For index tables, pIdxKey is used and intKey
 5332 is ignored.
 5333 
 5334 If an exact match is not found, then the cursor is always
 5335 left pointing at a leaf page which would hold the entry if it
 5336 were present.  The cursor might point to an entry that comes
 5337 before or after the key.
 5338 
 5339 An integer is written into *pRes which is the result of
 5340 comparing the key with the entry to which the cursor is 
 5341 pointing.  The meaning of the integer written into
 5342 *pRes is as follows:
 5343 
 5344     *pRes<0      The cursor is left pointing at an entry that
 5345                  is smaller than intKey/pIdxKey or if the table is empty
 5346                  and the cursor is therefore left point to nothing.
 5347 
 5348     *pRes==0     The cursor is left pointing at an entry that
 5349                  exactly matches intKey/pIdxKey.
 5350 
 5351     *pRes>0      The cursor is left pointing at an entry that
 5352                  is larger than intKey/pIdxKey.
 5353 
 5354 For index tables, the pIdxKey->eqSeen field is set to 1 if there
 5355 exists an entry in the table that exactly matches pIdxKey.  
 5356 */
 5357 int sqlite3BtreeMovetoUnpacked(
 5358   BtCursor *pCur,          /* The cursor to be moved */
 5359   UnpackedRecord *pIdxKey, /* Unpacked index key */
 5360   i64 intKey,              /* The table key */
 5361   int biasRight,           /* If true, bias the search to the high end */
 5362   int *pRes                /* Write search results here */
 5363 ){
 5364   int rc;
 5365   RecordCompare xRecordCompare;
 5366 
 5367   assert( cursorOwnsBtShared(pCur) );
 5368   assert( sqlite3_mutex_held(pCur->pBtree->db->mutex) );
 5369   assert( pRes );
 5370   assert( (pIdxKey==0)==(pCur->pKeyInfo==0) );
 5371   assert( pCur->eState!=CURSOR_VALID || (pIdxKey==0)==(pCur->curIntKey!=0) );
 5372 
 5373   /* If the cursor is already positioned at the point we are trying
 5374   to move to, then just return without doing any work */
 5375   if( pIdxKey==0
 5376    && pCur->eState==CURSOR_VALID && (pCur->curFlags & BTCF_ValidNKey)!=0
 5377   ){
 5378     if( pCur->info.nKey==intKey ){
 5379       *pRes = 0;
 5380       return SQLITE_OK;
 5381     }
 5382     if( pCur->info.nKey<intKey ){
 5383       if( (pCur->curFlags & BTCF_AtLast)!=0 ){
 5384         *pRes = -1;
 5385         return SQLITE_OK;
 5386       }
 5387       /* If the requested key is one more than the previous key, then
 5388       try to get there using sqlite3BtreeNext() rather than a full
 5389       binary search.  This is an optimization only.  The correct answer
 5390       is still obtained without this case, only a little more slowely */
 5391       if( pCur->info.nKey+1==intKey ){
 5392         *pRes = 0;
 5393         rc = sqlite3BtreeNext(pCur, 0);
 5394         if( rc==SQLITE_OK ){
 5395           getCellInfo(pCur);
 5396           if( pCur->info.nKey==intKey ){
 5397             return SQLITE_OK;
 5398           }
 5399         }else if( rc==SQLITE_DONE ){
 5400           rc = SQLITE_OK;
 5401         }else{
 5402           return rc;
 5403         }
 5404       }
 5405     }
 5406   }
 5407 
 5408   if( pIdxKey ){
 5409     xRecordCompare = sqlite3VdbeFindCompare(pIdxKey);
 5410     pIdxKey->errCode = 0;
 5411     assert( pIdxKey->default_rc==1
 5412          || pIdxKey->default_rc==0
 5413          || pIdxKey->default_rc==-1
 5414     );
 5415   }else{
 5416     xRecordCompare = 0; /* All keys are integers */
 5417   }
 5418 
 5419   rc = moveToRoot(pCur);
 5420   if( rc ){
 5421     if( rc==SQLITE_EMPTY ){
 5422       assert( pCur->pgnoRoot==0 || pCur->pPage->nCell==0 );
 5423       *pRes = -1;
 5424       return SQLITE_OK;
 5425     }
 5426     return rc;
 5427   }
 5428   assert( pCur->pPage );
 5429   assert( pCur->pPage->isInit );
 5430   assert( pCur->eState==CURSOR_VALID );
 5431   assert( pCur->pPage->nCell > 0 );
 5432   assert( pCur->iPage==0 || pCur->apPage[0]->intKey==pCur->curIntKey );
 5433   assert( pCur->curIntKey || pIdxKey );
 5434   for(;;){
 5435     int lwr, upr, idx, c;
 5436     Pgno chldPg;
 5437     MemPage *pPage = pCur->pPage;
 5438     u8 *pCell;                          /* Pointer to current cell in pPage */
 5439 
 5440     /* pPage->nCell must be greater than zero. If this is the root-page
 5441     the cursor would have been INVALID above and this for(;;) loop
 5442     not run. If this is not the root-page, then the moveToChild() routine
 5443     would have already detected db corruption. Similarly, pPage must
 5444     be the right kind (index or table) of b-tree page. Otherwise
 5445     a moveToChild() or moveToRoot() call would have detected corruption.  */
 5446     assert( pPage->nCell>0 );
 5447     assert( pPage->intKey==(pIdxKey==0) );
 5448     lwr = 0;
 5449     upr = pPage->nCell-1;
 5450     assert( biasRight==0 || biasRight==1 );
 5451     idx = upr>>(1-biasRight); /* idx = biasRight ? upr : (lwr+upr)/2; */
 5452     pCur->ix = (u16)idx;
 5453     if( xRecordCompare==0 ){
 5454       for(;;){
 5455         i64 nCellKey;
 5456         pCell = findCellPastPtr(pPage, idx);
 5457         if( pPage->intKeyLeaf ){
 5458           while( 0x80 <= *(pCell++) ){
 5459             if( pCell>=pPage->aDataEnd ){
 5460               return SQLITE_CORRUPT_PAGE(pPage);
 5461             }
 5462           }
 5463         }
 5464         getVarint(pCell, (u64*)&nCellKey);
 5465         if( nCellKey<intKey ){
 5466           lwr = idx+1;
 5467           if( lwr>upr ){ c = -1; break; }
 5468         }else if( nCellKey>intKey ){
 5469           upr = idx-1;
 5470           if( lwr>upr ){ c = +1; break; }
 5471         }else{
 5472           assert( nCellKey==intKey );
 5473           pCur->ix = (u16)idx;
 5474           if( !pPage->leaf ){
 5475             lwr = idx;
 5476             goto moveto_next_layer;
 5477           }else{
 5478             pCur->curFlags |= BTCF_ValidNKey;
 5479             pCur->info.nKey = nCellKey;
 5480             pCur->info.nSize = 0;
 5481             *pRes = 0;
 5482             return SQLITE_OK;
 5483           }
 5484         }
 5485         assert( lwr+upr>=0 );
 5486         idx = (lwr+upr)>>1;  /* idx = (lwr+upr)/2; */
 5487       }
 5488     }else{
 5489       for(;;){
 5490         int nCell;  /* Size of the pCell cell in bytes */
 5491         pCell = findCellPastPtr(pPage, idx);
 5492 
 5493         /* The maximum supported page-size is 65536 bytes. This means that
 5494         the maximum number of record bytes stored on an index B-Tree
 5495         page is less than 16384 bytes and may be stored as a 2-byte
 5496         varint. This information is used to attempt to avoid parsing 
 5497         the entire cell by checking for the cases where the record is 
 5498         stored entirely within the b-tree page by inspecting the first 
 5499         2 bytes of the cell.
 5500         */
 5501         nCell = pCell[0];
 5502         if( nCell<=pPage->max1bytePayload ){
 5503           /* This branch runs if the record-size field of the cell is a
 5504           single byte varint and the record fits entirely on the main
 5505           b-tree page.  */
 5506           testcase( pCell+nCell+1==pPage->aDataEnd );
 5507           c = xRecordCompare(nCell, (void*)&pCell[1], pIdxKey);
 5508         }else if( !(pCell[1] & 0x80)
 5509           && (nCell = ((nCell&0x7f)<<7) + pCell[1])<=pPage->maxLocal
 5510         ){
 5511           /* The record-size field is a 2 byte varint and the record 
 5512           fits entirely on the main b-tree page.  */
 5513           testcase( pCell+nCell+2==pPage->aDataEnd );
 5514           c = xRecordCompare(nCell, (void*)&pCell[2], pIdxKey);
 5515         }else{
 5516           /* The record flows over onto one or more overflow pages. In
 5517           this case the whole cell needs to be parsed, a buffer allocated
 5518           and accessPayload() used to retrieve the record into the
 5519           buffer before VdbeRecordCompare() can be called. 
 5520           
 5521           If the record is corrupt, the xRecordCompare routine may read
 5522           up to two varints past the end of the buffer. An extra 18 
 5523           bytes of padding is allocated at the end of the buffer in
 5524           case this happens.  */
 5525           void *pCellKey;
 5526           u8 * const pCellBody = pCell - pPage->childPtrSize;
 5527           const int nOverrun = 18;  /* Size of the overrun padding */
 5528           pPage->xParseCell(pPage, pCellBody, &pCur->info);
 5529           nCell = (int)pCur->info.nKey;
 5530           testcase( nCell<0 );   /* True if key size is 2^32 or more */
 5531           testcase( nCell==0 );  /* Invalid key size:  0x80 0x80 0x00 */
 5532           testcase( nCell==1 );  /* Invalid key size:  0x80 0x80 0x01 */
 5533           testcase( nCell==2 );  /* Minimum legal index key size */
 5534           if( nCell<2 || nCell/pCur->pBt->usableSize>pCur->pBt->nPage ){
 5535             rc = SQLITE_CORRUPT_PAGE(pPage);
 5536             goto moveto_finish;
 5537           }
 5538           pCellKey = sqlite3Malloc( nCell+nOverrun );
 5539           if( pCellKey==0 ){
 5540             rc = SQLITE_NOMEM_BKPT;
 5541             goto moveto_finish;
 5542           }
 5543           pCur->ix = (u16)idx;
 5544           rc = accessPayload(pCur, 0, nCell, (unsigned char*)pCellKey, 0);
 5545           memset(((u8*)pCellKey)+nCell,0,nOverrun); /* Fix uninit warnings */
 5546           pCur->curFlags &= ~BTCF_ValidOvfl;
 5547           if( rc ){
 5548             sqlite3_free(pCellKey);
 5549             goto moveto_finish;
 5550           }
 5551           c = sqlite3VdbeRecordCompare(nCell, pCellKey, pIdxKey);
 5552           sqlite3_free(pCellKey);
 5553         }
 5554         assert(
 5555             (pIdxKey->errCode!=SQLITE_CORRUPT || c==0)
 5556          && (pIdxKey->errCode!=SQLITE_NOMEM || pCur->pBtree->db->mallocFailed)
 5557         );
 5558         if( c<0 ){
 5559           lwr = idx+1;
 5560         }else if( c>0 ){
 5561           upr = idx-1;
 5562         }else{
 5563           assert( c==0 );
 5564           *pRes = 0;
 5565           rc = SQLITE_OK;
 5566           pCur->ix = (u16)idx;
 5567           if( pIdxKey->errCode ) rc = SQLITE_CORRUPT_BKPT;
 5568           goto moveto_finish;
 5569         }
 5570         if( lwr>upr ) break;
 5571         assert( lwr+upr>=0 );
 5572         idx = (lwr+upr)>>1;  /* idx = (lwr+upr)/2 */
 5573       }
 5574     }
 5575     assert( lwr==upr+1 || (pPage->intKey && !pPage->leaf) );
 5576     assert( pPage->isInit );
 5577     if( pPage->leaf ){
 5578       assert( pCur->ix<pCur->pPage->nCell );
 5579       pCur->ix = (u16)idx;
 5580       *pRes = c;
 5581       rc = SQLITE_OK;
 5582       goto moveto_finish;
 5583     }
 5584 moveto_next_layer:
 5585     if( lwr>=pPage->nCell ){
 5586       chldPg = get4byte(&pPage->aData[pPage->hdrOffset+8]);
 5587     }else{
 5588       chldPg = get4byte(findCell(pPage, lwr));
 5589     }
 5590     pCur->ix = (u16)lwr;
 5591     rc = moveToChild(pCur, chldPg);
 5592     if( rc ) break;
 5593   }
 5594 moveto_finish:
 5595   pCur->info.nSize = 0;
 5596   assert( (pCur->curFlags & BTCF_ValidOvfl)==0 );
 5597   return rc;
 5598 }

在btreeInt.h中有关于struct MemPage的定义

此对象的实例存储有关已加载到内存中的单个数据库页的每个页面的信息。此对象中的信息来自磁盘上的原始页面内容。当每个数据库页加载到内存中时, 寻呼机将分配此对象的实例, 并将前8个字节归零。 (这是与寻呼机的每一页相关联的 "额外" 信息。对此结构的所有字段的访问由存储在 mempage. Pbt-> 互斥体中的互斥体控制。

261 /*
262  An instance of this object stores information about each a single database
263  page that has been loaded into memory.  The information in this object
264  is derived from the raw on-disk page content.
265 
266  As each database page is loaded into memory, the pager allocats an
267  instance of this object and zeros the first 8 bytes.  (This is the
268  "extra" information associated with each page of the pager.)
269 
270  Access to all fields of this structure is controlled by the mutex
271  stored in MemPage.pBt->mutex.
272 */
273 struct MemPage {
274   u8 isInit;           /* True if previously initialized. MUST BE FIRST! */
275   u8 bBusy;            /* Prevent endless loops on corrupt database files */
276   u8 intKey;           /* True if table b-trees.  False for index b-trees */
277   u8 intKeyLeaf;       /* True if the leaf of an intKey table */
278   Pgno pgno;           /* Page number for this page */
279   /* Only the first 8 bytes (above) are zeroed by pager.c when a new page
280    is allocated. All fields that follow must be initialized before use */
281   u8 leaf;             /* True if a leaf page */
282   u8 hdrOffset;        /* 100 for page 1.  0 otherwise */
283   u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 */
284   u8 max1bytePayload;  /* min(maxLocal,127) */
285   u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
286   u16 maxLocal;        /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
287   u16 minLocal;        /* Copy of BtShared.minLocal or BtShared.minLeaf */
288   u16 cellOffset;      /* Index in aData of first cell pointer */
289   int nFree;           /* Number of free bytes on the page. -1 for unknown */
290   u16 nCell;           /* Number of cells on this page, local and ovfl */
291   u16 maskPage;        /* Mask for page offset */
292   u16 aiOvfl[4];       /* Insert the i-th overflow cell before the aiOvfl-th
293                         non-overflow cell */
294   u8 *apOvfl[4];       /* Pointers to the body of overflow cells */
295   BtShared *pBt;       /* Pointer to BtShared that this page is part of */
296   u8 *aData;           /* Pointer to disk image of the page data */
297   u8 *aDataEnd;        /* One byte past the end of usable data */
298   u8 *aCellIdx;        /* The cell index area */
299   u8 *aDataOfst;       /* Same as aData for leaves.  aData+4 for interior */
300   DbPage *pDbPage;     /* Pager page handle */
301   u16 (*xCellSize)(MemPage*,u8*);             /* cellSizePtr method */
302   void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */
303 };
304

久许

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
sqlite源码分析

pager.c文件中有Pager的定义struct Pager { sqlite3_vfs *pVfs; /* OS functions to use for IO */ u8 exclusiveMode; /* Boolean. True if locking_mode==EXCLUSIVE */ u8 journalMode;...
复制链接

扫一扫

专栏目录