2021SC@SDUSC SQLite源码分析(六)————b树增删改查API
一、sqlite3BtreeInsert
在b-树的适当位置插入一个新记录。新记录的内容由pX对象描述。pCur游标仅用于定义记录应该插入到哪个表中,以及被留在哪个表中,之后将指向一个随机位置。
源码:
int sqlite3BtreeInsert(
BtCursor *pCur, /* Insert data into the table of this cursor */
const BtreePayload *pX, /* Content of the row to be inserted */
int flags, /* True if this is likely an append */
int seekResult /* Result of prior MovetoUnpacked() call */
){
int rc;
int loc = seekResult; /* -1: before desired location +1: after */
int szNew = 0;
int idx;
MemPage *pPage;
Btree *p = pCur->pBtree;
BtShared *pBt = p->pBt;
unsigned char *oldCell;
unsigned char *newCell = 0;
assert( (flags & (BTREE_SAVEPOSITION|BTREE_APPEND|BTREE_PREFORMAT))==flags );
assert( (flags & BTREE_PREFORMAT)==0 || seekResult || pCur->pKeyInfo==0 );
if( pCur->eState==CURSOR_FAULT ){
assert( pCur->skipNext!=SQLITE_OK );
return pCur->skipNext;
}
assert( cursorOwnsBtShared(pCur) );
assert( (pCur->curFlags & BTCF_WriteFlag)!=0
&& pBt->inTransaction==TRANS_WRITE
&& (pBt->btsFlags & BTS_READ_ONLY)==0 );
assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
/* Assert that the caller has been consistent. If this cursor was opened
** expecting an index b-tree, then the caller should be inserting blob
** keys with no associated data. If the cursor was opened expecting an
** intkey table, the caller should be inserting integer keys with a
** blob of associated data. */
assert( (flags & BTREE_PREFORMAT) || (pX->pKey==0)==(pCur->pKeyInfo==0) );
/* Save the positions of any other cursors open on this table.
**
** In some cases, the call to btreeMoveto() below is a no-op. For
** example, when inserting data into a table with auto-generated integer
** keys, the VDBE layer invokes sqlite3BtreeLast() to figure out the
** integer key to use. It then calls this function to actually insert the
** data into the intkey B-Tree. In this case btreeMoveto() recognizes
** that the cursor is already where it needs to be and returns without
** doing any work. To avoid thwarting these optimizations, it is important
** not to clear the cursor here.
*/
if( pCur->curFlags & BTCF_Multiple ){
rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
if( rc ) return rc;
if( loc && pCur->iPage<0 ){
/* This can only happen if the schema is corrupt such that there is more
** than one table or index with the same root page as used by the cursor.
** Which can only happen if the SQLITE_NoSchemaError flag was set when
** the schema was loaded. This cannot be asserted though, as a user might
** set the flag, load the schema, and then unset the flag. */
return SQLITE_CORRUPT_BKPT;
}
}
if( pCur->pKeyInfo==0 ){
assert( pX->pKey==0 );
/* If this is an insert into a table b-tree, invalidate any incrblob
** cursors open on the row being replaced */
if( p->hasIncrblobCur ){
invalidateIncrblobCursors(p, pCur->pgnoRoot, pX->nKey, 0);
}
/* If BTREE_SAVEPOSITION is set, the cursor must already be pointing
** to a row with the same key as the new entry being inserted.
*/
#ifdef SQLITE_DEBUG
if( flags & BTREE_SAVEPOSITION ){
assert( pCur->curFlags & BTCF_ValidNKey );
assert( pX->nKey==pCur->info.nKey );
assert( loc==0 );
}
#endif
/* On the other hand, BTREE_SAVEPOSITION==0 does not imply
** that the cursor is not pointing to a row to be overwritten.
** So do a complete check.
*/
if( (pCur->curFlags&BTCF_ValidNKey)!=0 && pX->nKey==pCur->info.nKey ){
/* The cursor is pointing to the entry that is to be
** overwritten */
assert( pX->nData>=0 && pX->nZero>=0 );
if( pCur->info.nSize!=0
&& pCur->info.nPayload==(u32)pX->nData+pX->nZero
){
/* New entry is the same size as the old. Do an overwrite */
return btreeOverwriteCell(pCur, pX);
}
assert( loc==0 );
}else if( loc==0 ){
/* The cursor is *not* pointing to the cell to be overwritten, nor
** to an adjacent cell. Move the cursor so that it is pointing either
** to the cell to be overwritten or an adjacent cell.
*/
rc = sqlite3BtreeMovetoUnpacked(pCur, 0, pX->nKey, flags!=0, &loc);
if( rc ) return rc;
}
}else{
/* This is an index or a WITHOUT ROWID table */
/* If BTREE_SAVEPOSITION is set, the cursor must already be pointing
** to a row with the same key as the new entry being inserted.
*/
assert( (flags & BTREE_SAVEPOSITION)==0 || loc==0 );
/* If the cursor is not already pointing either to the cell to be
** overwritten, or if a new cell is being inserted, if the cursor is
** not pointing to an immediately adjacent cell, then move the cursor
** so that it does.
*/
if( loc==0 && (flags & BTREE_SAVEPOSITION)==0 ){
if( pX->nMem ){
UnpackedRecord r;
r.pKeyInfo = pCur->pKeyInfo;
r.aMem = pX->aMem;
r.nField = pX->nMem;
r.default_rc = 0;
r.errCode = 0;
r.r1 = 0;
r.r2 = 0;
r.eqSeen = 0;
rc = sqlite3BtreeMovetoUnpacked(pCur, &r, 0, flags!=0, &loc);
}else{
rc = btreeMoveto(pCur, pX->pKey, pX->nKey, flags!=0, &loc);
}
if( rc ) return rc;
}
/* If the cursor is currently pointing to an entry to be overwritten
** and the new content is the same as as the old, then use the
** overwrite optimization.
*/
if( loc==0 ){
getCellInfo(pCur);
if( pCur->info.nKey==pX->nKey ){
BtreePayload x2;
x2.pData = pX->pKey;
x2.nData = pX->nKey;
x2.nZero = 0;
return btreeOverwriteCell(pCur, &x2);
}
}
}
assert( pCur->eState==CURSOR_VALID
|| (pCur->eState==CURSOR_INVALID && loc)
|| CORRUPT_DB );
pPage = pCur->pPage;
assert( pPage->intKey || pX->nKey>=0 || (flags & BTREE_PREFORMAT) );
assert( pPage->leaf || !pPage->intKey );
if( pPage->nFree<0 ){
if( NEVER(pCur->eState>CURSOR_INVALID) ){
rc = SQLITE_CORRUPT_BKPT;
}else{
rc = btreeComputeFreeSpace(pPage);
}
if( rc ) return rc;
}
TRACE(("INSERT: table=%d nkey=%lld ndata=%d page=%d %s\n",
pCur->pgnoRoot, pX->nKey, pX->nData, pPage->pgno,
loc==0 ? "overwrite" : "new entry"));
assert( pPage->isInit );
newCell = pBt->pTmpSpace;
assert( newCell!=0 );
if( flags & BTREE_PREFORMAT ){
rc = SQLITE_OK;
szNew = pBt->nPreformatSize;
if( szNew<4 ) szNew = 4;
if( ISAUTOVACUUM && szNew>pPage->maxLocal ){
CellInfo info;
pPage->xParseCell(pPage, newCell, &info);
if( info.nPayload!=info.nLocal ){
Pgno ovfl = get4byte(&newCell[szNew-4]);
ptrmapPut(pBt, ovfl, PTRMAP_OVERFLOW1, pPage->pgno, &rc);
}
}
}else{
rc = fillInCell(pPage, newCell, pX, &szNew);
}
if( rc ) goto end_insert;
assert( szNew==pPage->xCellSize(pPage, newCell) );
assert( szNew <= MX_CELL_SIZE(pBt) );
idx = pCur->ix;
if( loc==0 ){
CellInfo info;
assert( idx<pPage->nCell );
rc = sqlite3PagerWrite(pPage->pDbPage);
if( rc ){
goto end_insert;
}
oldCell = findCell(pPage, idx);
if( !pPage->leaf ){
memcpy(newCell, oldCell, 4);
}
BTREE_CLEAR_CELL(rc, pPage, oldCell, info);
testcase( pCur->curFlags & BTCF_ValidOvfl );
invalidateOverflowCache(pCur);
if( info.nSize==szNew && info.nLocal==info.nPayload
&& (!ISAUTOVACUUM || szNew<pPage->minLocal)
){
/* Overwrite the old cell with the new if they are the same size.
** We could also try to do this if the old cell is smaller, then add
** the leftover space to the free list. But experiments show that
** doing that is no faster then skipping this optimization and just
** calling dropCell() and insertCell().
**
** This optimization cannot be used on an autovacuum database if the
** new entry uses overflow pages, as the insertCell() call below is
** necessary to add the PTRMAP_OVERFLOW1 pointer-map entry. */
assert( rc==SQLITE_OK ); /* clearCell never fails when nLocal==nPayload */
if( oldCell < pPage->aData+pPage->hdrOffset+10 ){
return SQLITE_CORRUPT_BKPT;
}
if( oldCell+szNew > pPage->aDataEnd ){
return SQLITE_CORRUPT_BKPT;
}
memcpy(oldCell, newCell, szNew);
return SQLITE_OK;
}
dropCell(pPage, idx, info.nSize, &rc);
if( rc ) goto end_insert;
}else if( loc<0 && pPage->nCell>0 ){
assert( pPage->leaf );
idx = ++pCur->ix;
pCur->curFlags &= ~BTCF_ValidNKey;
}else{
assert( pPage->leaf );
}
insertCell(pPage, idx, newCell, szNew, 0, 0, &rc);
assert( pPage->nOverflow==0 || rc==SQLITE_OK );
assert( rc!=SQLITE_OK || pPage->nCell>0 || pPage->nOverflow>0 );
/* If no error has occurred and pPage has an overflow cell, call balance()
** to redistribute the cells within the tree. Since balance() may move
** the cursor, zero the BtCursor.info.nSize and BTCF_ValidNKey
** variables.
**
** Previous versions of SQLite called moveToRoot() to move the cursor
** back to the root page as balance() used to invalidate the contents
** of BtCursor.apPage[] and BtCursor.aiIdx[]. Instead of doing that,
** set the cursor state to "invalid". This makes common insert operations
** slightly faster.
**
** There is a subtle but important optimization here too. When inserting
** multiple records into an intkey b-tree using a single cursor (as can
** happen while processing an "INSERT INTO ... SELECT" statement), it
** is advantageous to leave the cursor pointing to the last entry in
** the b-tree if possible. If the cursor is left pointing to the last
** entry in the table, and the next row inserted has an integer key
** larger than the largest existing key, it is possible to insert the
** row without seeking the cursor. This can be a big performance boost.
*/
pCur->info.nSize = 0;
if( pPage->nOverflow ){
assert( rc==SQLITE_OK );
pCur->curFlags &= ~(BTCF_ValidNKey);
rc = balance(pCur);
/* Must make sure nOverflow is reset to zero even if the balance()
** fails. Internal data structure corruption will result otherwise.
** Also, set the cursor state to invalid. This stops saveCursorPosition()
** from trying to save the current position of the cursor. */
pCur->pPage->nOverflow = 0;
pCur->eState = CURSOR_INVALID;
if( (flags & BTREE_SAVEPOSITION) && rc==SQLITE_OK ){
btreeReleaseAllCursorPages(pCur);
if( pCur->pKeyInfo ){
assert( pCur->pKey==0 );
pCur->pKey = sqlite3Malloc( pX->nKey );
if( pCur->pKey==0 ){
rc = SQLITE_NOMEM;
}else{
memcpy(pCur->pKey, pX->pKey, pX->nKey);
}
}
pCur->eState = CURSOR_REQUIRESEEK;
pCur->nKey = pX->nKey;
}
}
assert( pCur->iPage<0 || pCur->pPage->nOverflow==0 );
end_insert:
return rc;
}
二、sqlite3BtreeDelete
删除游标所指向的条目。
如果flags参数的BTREE_SAVEPOSITION位为零,则游标在删除后将被留在任意位置。
源码:
int sqlite3BtreeDelete(BtCursor *pCur, u8 flags){
Btree *p = pCur->pBtree;
BtShared *pBt = p->pBt;
int rc; /* Return code */
MemPage *pPage; /* Page to delete cell from */
unsigned char *pCell; /* Pointer to cell to delete */
int iCellIdx; /* Index of cell to delete */
int iCellDepth; /* Depth of node containing pCell */
CellInfo info; /* Size of the cell being deleted */
int bSkipnext = 0; /* Leaf cursor in SKIPNEXT state */
u8 bPreserve = flags & BTREE_SAVEPOSITION; /* Keep cursor valid */
assert( cursorOwnsBtShared(pCur) );
assert( pBt->inTransaction==TRANS_WRITE );
assert( (pBt->btsFlags & BTS_READ_ONLY)==0 );
assert( pCur->curFlags & BTCF_WriteFlag );
assert( hasSharedCacheTableLock(p, pCur->pgnoRoot, pCur->pKeyInfo!=0, 2) );
assert( !hasReadConflicts(p, pCur->pgnoRoot) );
assert( (flags & ~(BTREE_SAVEPOSITION | BTREE_AUXDELETE))==0 );
if( pCur->eState==CURSOR_REQUIRESEEK ){
rc = btreeRestoreCursorPosition(pCur);
assert( rc!=SQLITE_OK || CORRUPT_DB || pCur->eState==CURSOR_VALID );
if( rc || pCur->eState!=CURSOR_VALID ) return rc;
}
assert( CORRUPT_DB || pCur->eState==CURSOR_VALID );
iCellDepth = pCur->iPage;
iCellIdx = pCur->ix;
pPage = pCur->pPage;
pCell = findCell(pPage, iCellIdx);
if( pPage->nFree<0 && btreeComputeFreeSpace(pPage) ) return SQLITE_CORRUPT;
/* If the bPreserve flag is set to true, then the cursor position must
** be preserved following this delete operation. If the current delete
** will cause a b-tree rebalance, then this is done by saving the cursor
** key and leaving the cursor in CURSOR_REQUIRESEEK state before
** returning.
**
** Or, if the current delete will not cause a rebalance, then the cursor
** will be left in CURSOR_SKIPNEXT state pointing to the entry immediately
** before or after the deleted entry. In this case set bSkipnext to true. */
if( bPreserve ){
if( !pPage->leaf
|| (pPage->nFree+cellSizePtr(pPage,pCell)+2)>(int)(pBt->usableSize*2/3)
|| pPage->nCell==1 /* See dbfuzz001.test for a test case */
){
/* A b-tree rebalance will be required after deleting this entry.
** Save the cursor key. */
rc = saveCursorKey(pCur);
if( rc ) return rc;
}else{
bSkipnext = 1;
}
}
/* If the page containing the entry to delete is not a leaf page, move
** the cursor to the largest entry in the tree that is smaller than
** the entry being deleted. This cell will replace the cell being deleted
** from the internal node. The 'previous' entry is used for this instead
** of the 'next' entry, as the previous entry is always a part of the
** sub-tree headed by the child page of the cell being deleted. This makes
** balancing the tree following the delete operation easier. */
if( !pPage->leaf ){
rc = sqlite3BtreePrevious(pCur, 0);
assert( rc!=SQLITE_DONE );
if( rc ) return rc;
}
/* Save the positions of any other cursors open on this table before
** making any modifications. */
if( pCur->curFlags & BTCF_Multiple ){
rc = saveAllCursors(pBt, pCur->pgnoRoot, pCur);
if( rc ) return rc;
}
/* If this is a delete operation to remove a row from a table b-tree,
** invalidate any incrblob cursors open on the row being deleted. */
if( pCur->pKeyInfo==0 && p->hasIncrblobCur ){
invalidateIncrblobCursors(p, pCur->pgnoRoot, pCur->info.nKey, 0);
}
/* Make the page containing the entry to be deleted writable. Then free any
** overflow pages associated with the entry and finally remove the cell
** itself from within the page. */
rc = sqlite3PagerWrite(pPage->pDbPage);
if( rc ) return rc;
BTREE_CLEAR_CELL(rc, pPage, pCell, info);
dropCell(pPage, iCellIdx, info.nSize, &rc);
if( rc ) return rc;
/* If the cell deleted was not located on a leaf page, then the cursor
** is currently pointing to the largest entry in the sub-tree headed
** by the child-page of the cell that was just deleted from an internal
** node. The cell from the leaf node needs to be moved to the internal
** node to replace the deleted cell. */
if( !pPage->leaf ){
MemPage *pLeaf = pCur->pPage;
int nCell;
Pgno n;
unsigned char *pTmp;
if( pLeaf->nFree<0 ){
rc = btreeComputeFreeSpace(pLeaf);
if( rc ) return rc;
}
if( iCellDepth<pCur->iPage-1 ){
n = pCur->apPage[iCellDepth+1]->pgno;
}else{
n = pCur->pPage->pgno;
}
pCell = findCell(pLeaf, pLeaf->nCell-1);
if( pCell<&pLeaf->aData[4] ) return SQLITE_CORRUPT_BKPT;
nCell = pLeaf->xCellSize(pLeaf, pCell);
assert( MX_CELL_SIZE(pBt) >= nCell );
pTmp = pBt->pTmpSpace;
assert( pTmp!=0 );
rc = sqlite3PagerWrite(pLeaf->pDbPage);
if( rc==SQLITE_OK ){
insertCell(pPage, iCellIdx, pCell-4, nCell+4, pTmp, n, &rc);
}
dropCell(pLeaf, pLeaf->nCell-1, nCell, &rc);
if( rc ) return rc;
}
/* Balance the tree. If the entry deleted was located on a leaf page,
** then the cursor still points to that page. In this case the first
** call to balance() repairs the tree, and the if(...) condition is
** never true.
**
** Otherwise, if the entry deleted was on an internal node page, then
** pCur is pointing to the leaf page from which a cell was removed to
** replace the cell deleted from the internal node. This is slightly
** tricky as the leaf node may be underfull, and the internal node may
** be either under or overfull. In this case run the balancing algorithm
** on the leaf node first. If the balance proceeds far enough up the
** tree that we can be sure that any problem in the internal node has
** been corrected, so be it. Otherwise, after balancing the leaf node,
** walk the cursor up the tree to the internal node and balance it as
** well. */
rc = balance(pCur);
if( rc==SQLITE_OK && pCur->iPage>iCellDepth ){
releasePageNotNull(pCur->pPage);
pCur->iPage--;
while( pCur->iPage>iCellDepth ){
releasePage(pCur->apPage[pCur->iPage--]);
}
pCur->pPage = pCur->apPage[pCur->iPage];
rc = balance(pCur);
}
if( rc==SQLITE_OK ){
if( bSkipnext ){
assert( bPreserve && (pCur->iPage==iCellDepth || CORRUPT_DB) );
assert( pPage==pCur->pPage || CORRUPT_DB );
assert( (pPage->nCell>0 || CORRUPT_DB) && iCellIdx<=pPage->nCell );
pCur->eState = CURSOR_SKIPNEXT;
if( iCellIdx>=pPage->nCell ){
pCur->skipNext = -1;
pCur->ix = pPage->nCell-1;
}else{
pCur->skipNext = 1;
}
}else{
rc = moveToRoot(pCur);
if( bPreserve ){
btreeReleaseAllCursorPages(pCur);
pCur->eState = CURSOR_REQUIRESEEK;
}
if( rc==SQLITE_EMPTY ) rc = SQLITE_OK;
}
}
return rc;
}