SQLite源码之pcache.c

本文详细解析了SQLite的页面缓存机制,包括PgHdr结构体的标志位含义、页面缓存管理、脏页列表操作、缓存大小调整及页大小更改等功能。探讨了如何在内存中高效管理和操作数据库页面,以及在不同场景下的优化策略。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

/* Bit values for PgHdr.flags */

如果换算成二进制的话,那么下表的各个标志的值分别为:
0000 0000 0001
0000 0000 0010
0000 0000 0100
0000 0000 1000
0000 0001 0000
0000 0010 0000
0000 0100 0000

#define PGHDR_CLEAN           0x001  /* Page not on the PCache.pDirty list */
#define PGHDR_DIRTY           0x002  /* Page is on the PCache.pDirty list */
#define PGHDR_WRITEABLE       0x004  /* Journaled and ready to modify */
#define PGHDR_NEED_SYNC       0x008  /* Fsync the rollback journal before
writing this page to the database */
#define PGHDR_DONT_WRITE      0x010  /* Do not write content to disk */
#define PGHDR_MMAP            0x020  /* This is an mmap page object */

#define PGHDR_WAL_APPEND      0x040  /* Appended to wal file */

/*
A complete page cache is an instance of this structure.  Every
entry in the cache holds a single page of the database file.  The
btree layer only operates on the cached copy of the database pages.

A page cache entry is "clean" if it exactly matches what is currently
on disk.  A page is "dirty" if it has been modified and needs to be
persisted to disk.

pDirty, pDirtyTail, pSynced:
  All dirty pages are linked into the doubly linked list using
  PgHdr.pDirtyNext and pDirtyPrev. The list is maintained in LRU order
  such that p was added to the list more recently than p->pDirtyNext.
  PCache.pDirty points to the first (newest) element in the list and
  pDirtyTail to the last (oldest).

  The PCache.pSynced variable is used to optimize searching for a dirty
  page to eject from the cache mid-transaction. It is better to eject
  a page that does not require a journal sync than one that does. 
  Therefore, pSynced is maintained so that it *almost* always points
  to either the oldest page in the pDirty/pDirtyTail list that has a
  clear PGHDR_NEED_SYNC flag or to a page that is older than this one
  (so that the right page to eject can be found by following pDirtyPrev
  pointers).
*/

struct PCache {
  PgHdr *pDirty, *pDirtyTail;         /* List of dirty pages in LRU order */
  PgHdr *pSynced;                     /* Last synced page in dirty page list */
  int nRefSum;                        /* Sum of ref counts over all pages */
  int szCache;                        /* Configured cache size */
  int szSpill;                        /* Size before spilling occurs */
  int szPage;                         /* Size of every page in this cache */
  int szExtra;                        /* Size of extra space for each page */
  u8 bPurgeable;                      /* True if pages are on backing store */
  u8 eCreate;                         /* eCreate value for for xFetch() */
  int (*xStress)(void*,PgHdr*);       /* Call to try make a page clean */
  void *pStress;                      /* Argument to xStress */
  sqlite3_pcache *pCache;             /* Pluggable cache module */
};

pcacheDump

/********************************** Test and Debug Logic **********************/
/*
Debug tracing macros.  Enable by by changing the "0" to "1" and
recompiling.

When sqlite3PcacheTrace is 1, single line trace messages are issued.
When sqlite3PcacheTrace is 2, a dump of the pcache showing all cache entries
is displayed for many operations, resulting in a lot of output.
*/

#if defined(SQLITE_DEBUG) && 0
  int sqlite3PcacheTrace = 2;       /* 0: off  1: simple  2: cache dumps */
  int sqlite3PcacheMxDump = 9999;   /* Max cache entries for pcacheDump() */
# define pcacheTrace(X) if(sqlite3PcacheTrace){sqlite3DebugPrintf X;}
  void pcacheDump(PCache *pCache){
    int N;
    int i, j;
    sqlite3_pcache_page *pLower;//相当于是一个pcache line
    PgHdr *pPg;//PgHdr中存放的是一个pcache line的控制信息
    unsigned char *a;
  
    if( sqlite3PcacheTrace<2 ) return;
    if( pCache->pCache==0 ) return;
    N = sqlite3PcachePagecount(pCache);//计算页面的个数,实际上是借助PCache1中的信息进行计算
    if( N>sqlite3PcacheMxDump ) N = sqlite3PcacheMxDump;
    for(i=1; i<=N; i++){
       pLower = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, i, 0);//第一个参数是PCache1类型的,第二个参数是页号,第三个参数是createFlag
       if( pLower==0 ) continue;//如果没有找到该页号对应的页面
       pPg = (PgHdr*)pLower->pExtra;//pcache line,含有两个指针,一个pBuf,一个pExtra,pExtra指向的内存中有PgHdr和MemPage。pBuf代表的内存中存放database page content和PgHdr1
       printf("%3d: nRef %2d flgs %02x data ", i, pPg->nRef, pPg->flags);
       a = (unsigned char *)pLower->pBuf;
       for(j=0; j<12; j++) printf("%02x", a[j]);//database page content的前12字节。()
       printf("\n");
       if( pPg->pPage==0 ){//将该pcache line对应的PgHdr1加入到LRU双向循环链表中。第三个参数是reuseUnlikely(不用了),如果reuseUnlikely==1的话,该pcache line占用的内存也将被释放
         sqlite3GlobalConfig.pcache2.xUnpin(pCache->pCache, pLower, 0);
       }
    }
  }
  #else
# define pcacheTrace(X)
# define pcacheDump(X)
#endif

sqlite3PcachePageSanity

/*
Check invariants on a PgHdr entry.  Return true if everything is OK.
Return false if any invariant is violated.

This routine is for use inside of assert() statements only.  For
example:         assert( sqlite3PcachePageSanity(pPg) );
*/

检查PgHdr中的不变量。

#ifdef SQLITE_DEBUG
int sqlite3PcachePageSanity(PgHdr *pPg){
  PCache *pCache;
  assert( pPg!=0 );
  assert( pPg->pgno>0 || pPg->pPager==0 );    /* Page number is 1 or more */
  pCache = pPg->pCache;
  assert( pCache!=0 );      /* Every page has an associated PCache */
  if( pPg->flags & PGHDR_CLEAN ){
    assert( (pPg->flags & PGHDR_DIRTY)==0 );/* Cannot be both CLEAN and DIRTY */
    assert( pCache->pDirty!=pPg );          /* CLEAN pages not on dirty list */
    assert( pCache->pDirtyTail!=pPg );//pDirty和pDirtyTail 由PCache进行维护
  }
  /* WRITEABLE pages must also be DIRTY */
  if( pPg->flags & PGHDR_WRITEABLE ){
    assert( pPg->flags & PGHDR_DIRTY );     /* WRITEABLE implies DIRTY */
  }
  /* NEED_SYNC can be set independently of WRITEABLE.  This can happen,
for example, when using the sqlite3PagerDontWrite() optimization:
   (1)  Page X is journalled, and gets WRITEABLE and NEED_SEEK.
   (2)  Page X moved to freelist, WRITEABLE is cleared//页面X被移动到自由页列表中
   (3)  Page X reused, WRITEABLE is set again//页面X被重用
If NEED_SYNC had been cleared in step 2, then it would not be reset
in step 3, and page might be written into the database without first
syncing the rollback journal, which might cause corruption on a power
loss.如果由于step2(被放入到自由页列表中)页面的NEED_SYNC标志被清除了,那么它将不会被重置。在step3(页面被重用时),一个页面可能会写入到数据库中,但却没有对回滚日志进行同步,这可能会导致断电时数据库崩溃。

Another example is when the database page size is smaller than the
disk sector size.  When any page of a sector is journalled, all pages
in that sector are marked NEED_SYNC even if they are still CLEAN, just
in case they are later modified, since all pages in the same sector
must be journalled and synced before any of those pages can be safely
written.另一个例子,当数据库页面大小比磁盘扇区小时,这时,该扇区内的任意一个页面被日志记录时,那么该扇区内部所有的页面都会被置NEED_SYNC标志,因为这些页面中的任意一个在被写入之前都必定要记录和同步
  */
  return 1;
}
#endif /* SQLITE_DEBUG */

sqlite3PagerDontWrite

/*
A call to this routine tells the pager that it is not necessary to
write the information on page pPg back to the disk, even though
that page might be marked as dirty.  This happens, for example, when
the page has been added as a leaf of the freelist and so its
content no longer matters.把以后不用的页面设置这个标志,如把页面添加进自由页列表中时进行设置,那么在该状态下该页就不会被写会磁盘了。

The overlying software layer calls this routine when all of the data
on the given page is unused. The pager marks the page as clean so
that it does not get written to disk.当该页面上的所有数据未使用时,设置该标志。

Tests show that this optimization can quadruple the speed of large 
DELETE operations.从测试来看,这个优化使得删除大量数据时的速度是以前的4倍。

This optimization cannot be used with a temp-file, as the page may
have been dirty at the start of the transaction. In that case, if
memory pressure forces page pPg out of the cache, the data does need 
to be written out to disk so that it may be read back in if the 
current transaction is rolled back.这个标志不适用与临时表,因为事务刚开始时页面可能就已经dirty了。内存有压力时,数据需要写回到磁盘中,因为如果事务发生了回滚,那么可以则可以从磁盘中读回数据。
*/

void sqlite3PagerDontWrite(PgHdr *pPg){
  Pager *pPager = pPg->pPager;
  if( !pPager->tempFile && (pPg->flags&PGHDR_DIRTY) && pPager->nSavepoint==0 ){
    PAGERTRACE(("DONT_WRITE page %d of %d\n", pPg->pgno, PAGERID(pPager)));
    IOTRACE(("CLEAN %p %d\n", pPager, pPg->pgno))
    pPg->flags |= PGHDR_DONT_WRITE;//给flags的DONT_WRITE位置位
    pPg->flags &= ~PGHDR_WRITEABLE;//给flags的WRITABLE位清零
    testcase( pPg->flags & PGHDR_NEED_SYNC );
    pager_set_pagehash(pPg);//设置该页内容的哈希值
  }
}
/*
Return a 32-bit hash of the page data for pPage.
*/
static u32 pager_datahash(int nByte, unsigned char *pData){
  u32 hash = 0;
  int i;
  for(i=0; i<nByte; i++){
    hash = (hash*1039) + pData[i];
  }
  return hash;
}
static u32 pager_pagehash(PgHdr *pPage){
  return pager_datahash(pPage->pPager->pageSize, (unsigned char *)pPage->pData);
}
static void pager_set_pagehash(PgHdr *pPage){
  pPage->pageHash = pager_pagehash(pPage);
}

pcacheManageDirtyList

/********************************** Linked List Management ********************/

/* Allowed values for second argument to pcacheManageDirtyList() */
#define PCACHE_DIRTYLIST_REMOVE   1    /* Remove pPage from dirty list */
#define PCACHE_DIRTYLIST_ADD      2    /* Add pPage to the dirty list */
#define PCACHE_DIRTYLIST_FRONT    3    /* Move pPage to the front of the list *//*
Manage pPage's participation on the dirty list.  Bits of the addRemove
argument determines what operation to do.  The 0x01 bit means first
remove pPage from the dirty list.  The 0x02 means add pPage back to
the dirty list.  Doing both moves pPage to the front of the dirty list.
*/

static void pcacheManageDirtyList(PgHdr *pPage, u8 addRemove){
  PCache *p = pPage->pCache;

  pcacheTrace(("%p.DIRTYLIST.%s %d\n", p,
                addRemove==1 ? "REMOVE" : addRemove==2 ? "ADD" : "FRONT",
                pPage->pgno));
  if( addRemove & PCACHE_DIRTYLIST_REMOVE ){
    assert( pPage->pDirtyNext || pPage==p->pDirtyTail );//要么pPage后面还有脏页,要么pPage就是最后一个脏页
    assert( pPage->pDirtyPrev || pPage==p->pDirty );//要么pPage前面还有脏页,要么pPage就是第一个脏页
  
    /* Update the PCache1.pSynced variable if necessary. */
    if( p->pSynced==pPage ){
      p->pSynced = pPage->pDirtyPrev;
    }
  
    if( pPage->pDirtyNext ){//如果后面有元素
      pPage->pDirtyNext->pDirtyPrev = pPage->pDirtyPrev;
    }else{
      assert( pPage==p->pDirtyTail );//如果pPage是最后一个元素
      p->pDirtyTail = pPage->pDirtyPrev;
    }
    if( pPage->pDirtyPrev ){//如果前面有元素
      pPage->pDirtyPrev->pDirtyNext = pPage->pDirtyNext;
    }else{//如果pPage是第一个元素
      /* If there are now no dirty pages in the cache, set eCreate to 2. 
This is an optimization that allows sqlite3PcacheFetch() to skip
searching for a dirty page to eject from the cache when it might
otherwise have to.  */
      assert( pPage==p->pDirty );
      p->pDirty = pPage->pDirtyNext;//去除这个脏页(pPage),第一个元素变为pPage的后一个(可能为空)
      assert( p->bPurgeable || p->eCreate==2 );
      if( p->pDirty==0 ){         /*OPTIMIZATION-IF-TRUE*/
        assert( p->bPurgeable==0 || p->eCreate==1 );
        p->eCreate = 2;//优化
      }
    }
  }
  if( addRemove & PCACHE_DIRTYLIST_ADD ){//向脏页列表中添加脏页
    pPage->pDirtyPrev = 0;
    pPage->pDirtyNext = p->pDirty;//类似头插法
    if( pPage->pDirtyNext ){//如果之前脏页列表中非空
      assert( pPage->pDirtyNext->pDirtyPrev==0 );
      pPage->pDirtyNext->pDirtyPrev = pPage;
    }else{//如果之前脏页列表为空
      p->pDirtyTail = pPage;//那么pPage就是尾部
      if( p->bPurgeable ){
        assert( p->eCreate==2 );
        p->eCreate = 1;
      }
    }
    p->pDirty = pPage;

    /* If pSynced is NULL and this page has a clear NEED_SYNC flag, set
pSynced to point to it. Checking the NEED_SYNC flag is an 
optimization, as if pSynced points to a page with the NEED_SYNC
flag set sqlite3PcacheFetchStress() searches through all newer 
entries of the dirty-list for a page with NEED_SYNC clear anyway.  */
    if( !p->pSynced 
     && 0==(pPage->flags&PGHDR_NEED_SYNC)   /*OPTIMIZATION-IF-FALSE*/
    ){//这点没看懂
      p->pSynced = pPage;
    }
  }
  pcacheDump(p);
}

pcacheUnpin

/*
Wrapper around the pluggable caches xUnpin method. If the cache is
being used for an in-memory database, this function is a no-op.
*/

static void pcacheUnpin(PgHdr *p){
  if( p->pCache->bPurgeable ){
    pcacheTrace(("%p.UNPIN %d\n", p->pCache, p->pgno));
    sqlite3GlobalConfig.pcache2.xUnpin(p->pCache->pCache, p->pPage, 0);//调用的是pcache1Unpin方法
    pcacheDump(p->pCache);
  }
}

numberOfCachePages

/*
Compute the number of pages of cache requested.   p->szCache is the
cache size requested by the "PRAGMA cache_size" statement.
*/

static int numberOfCachePages(PCache *p){
  if( p->szCache>=0 ){
    /* IMPLEMENTATION-OF: R-42059-47211 If the argument N is positive then the
suggested cache size is set to N. */
    return p->szCache;
  }else{
    /* IMPLEMANTATION-OF: R-59858-46238 If the argument N is negative, then the
number of cache pages is adjusted to be a number of pages that would
use approximately abs(N*1024) bytes of memory based on the current
page size. */
    return (int)((-1024*(i64)p->szCache)/(p->szPage+p->szExtra));
  }
}

defaultMethods(其中所有的方法在pcache1.c中有定义)

/*
This function is called during initialization (sqlite3_initialize()) to
install the default pluggable cache module, assuming the user has not
already provided an alternative.
*/

void sqlite3PCacheSetDefault(void){
  static const sqlite3_pcache_methods2 defaultMethods = {
    1,                       /* iVersion */
    0,                       /* pArg */
    pcache1Init,             /* xInit */
    pcache1Shutdown,         /* xShutdown */
    pcache1Create,           /* xCreate */
    pcache1Cachesize,        /* xCachesize */
    pcache1Pagecount,        /* xPagecount */
    pcache1Fetch,            /* xFetch */
    pcache1Unpin,            /* xUnpin */
    pcache1Rekey,            /* xRekey */
    pcache1Truncate,         /* xTruncate */
    pcache1Destroy,          /* xDestroy */
    pcache1Shrink            /* xShrink */
  };
  sqlite3_config(SQLITE_CONFIG_PCACHE2, &defaultMethods);
}
typedef struct sqlite3_pcache_methods2 sqlite3_pcache_methods2;
struct sqlite3_pcache_methods2 {
  int iVersion;
  void *pArg;
  int (*xInit)(void*);
  void (*xShutdown)(void*);
  sqlite3_pcache *(*xCreate)(int szPage, int szExtra, int bPurgeable);
  void (*xCachesize)(sqlite3_pcache*, int nCachesize);
  int (*xPagecount)(sqlite3_pcache*);
  sqlite3_pcache_page *(*xFetch)(sqlite3_pcache*, unsigned key, int createFlag);
  void (*xUnpin)(sqlite3_pcache*, sqlite3_pcache_page*, int discard);
  void (*xRekey)(sqlite3_pcache*, sqlite3_pcache_page*, 
      unsigned oldKey, unsigned newKey);
  void (*xTruncate)(sqlite3_pcache*, unsigned iLimit);
  void (*xDestroy)(sqlite3_pcache*);
  void (*xShrink)(sqlite3_pcache*);
};
static int pcache1Init(void *NotUsed){
  UNUSED_PARAMETER(NotUsed);
  assert( pcache1.isInit==0 );
  memset(&pcache1, 0, sizeof(pcache1));


  /*
  The pcache1.separateCache variable is true if each PCache has its own
  private PGroup (mode-1).  pcache1.separateCache is false if the single
  PGroup in pcache1.grp is used for all page caches (mode-2).
  
    *  Always use a unified cache (mode-2) if ENABLE_MEMORY_MANAGEMENT
  
    *  Use a unified cache in single-threaded applications that have
       configured a start-time buffer for use as page-cache memory using
       sqlite3_config(SQLITE_CONFIG_PAGECACHE, pBuf, sz, N) with non-NULL 
       pBuf argument.
  
    *  Otherwise use separate caches (mode-1)
  */
#if defined(SQLITE_ENABLE_MEMORY_MANAGEMENT)
  pcache1.separateCache = 0;
#elif SQLITE_THREADSAFE
  pcache1.separateCache = sqlite3GlobalConfig.pPage==0
                          || sqlite3GlobalConfig.bCoreMutex>0;
#else
  pcache1.separateCache = sqlite3GlobalConfig.pPage==0;
#endif

#if SQLITE_THREADSAFE
  if( sqlite3GlobalConfig.bCoreMutex ){
    pcache1.grp.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_LRU);
    pcache1.mutex = sqlite3MutexAlloc(SQLITE_MUTEX_STATIC_PMEM);
  }
#endif
  if( pcache1.separateCache
   && sqlite3GlobalConfig.nPage!=0
   && sqlite3GlobalConfig.pPage==0
  ){
    pcache1.nInitPage = sqlite3GlobalConfig.nPage;
  }else{
    pcache1.nInitPage = 0;
  }
  pcache1.grp.mxPinned = 10;
  pcache1.isInit = 1;
  return SQLITE_OK;
}

/*
Implementation of the sqlite3_pcache.xShutdown method.
Note that the static mutex allocated in xInit does 
not need to be freed.
*/

static void pcache1Shutdown(void *NotUsed){
  UNUSED_PARAMETER(NotUsed);
  assert( pcache1.isInit!=0 );
  memset(&pcache1, 0, sizeof(pcache1));
}

/*
Return the size in bytes of a PCache object.
*/

int sqlite3PcacheSize(void){ return sizeof(PCache); }

sqlite3PcacheOpen

/*
Create a new PCache object. Storage space to hold the object
has already been allocated and is passed in as the p pointer. 
The caller discovers how much space needs to be allocated by 
calling sqlite3PcacheSize().szExtra is some extra space allocated for each page.  The first
8 bytes of the extra space will be zeroed as the page is allocated,
but remaining content will be uninitialized.  Though it is opaque
to this module, the extra space really ends up being the MemPage
structure in the pager.

创建一个PCache对象。存放这个对象的存储空间已经分配了,被作为p指针。调用者可以通过调用sqlite3PcacheSize()函数来获得需要分配的空间。szExtra是需要为每个页面分配的额外的空间。当这个页面被分配的时候,这个额外空间的前8字节会被初始化为0。尽管对于这个模块来说,这是不透明的,但是额外的空间(extra space)实际上是以MemPage结构体来表示。
*/

int sqlite3PcacheOpen(
  int szPage,                  /* Size of every page */
  int szExtra,                 /* Extra space associated with each page */
  int bPurgeable,              /* True if pages are on backing store */
  int (*xStress)(void*,PgHdr*),/* Call to try to make pages clean */
  void *pStress,               /* Argument to xStress */
  PCache *p                    /* Preallocated space for the PCache */
){
  memset(p, 0, sizeof(PCache));
  p->szPage = 1;
  p->szExtra = szExtra;//每个页面的额外的空间(实际上是MemPage占用的内存的大小)
  assert( szExtra>=8 );  /* First 8 bytes will be zeroed */前8个字节被初始化为0
  p->bPurgeable = bPurgeable;
  p->eCreate = 2;
  p->xStress = xStress;
  p->pStress = pStress;
  p->szCache = 100;//Configured cache size
  p->szSpill = 1;//size before spilling occurs
  pcacheTrace(("%p.OPEN szPage %d bPurgeable %d\n",p,szPage,bPurgeable));
  return sqlite3PcacheSetPageSize(p, szPage);//设置页面的大小
}

MemPage 

/*
An instance of this object stores information about each a single database
page that has been loaded into memory.  The information in this object
is derived from the raw on-disk page content.

As each database page is loaded into memory, the pager allocats an
instance of this object and zeros the first 8 bytes.  (This is the
"extra" information associated with each page of the pager.)

Access to all fields of this structure is controlled by the mutex
stored in MemPage.pBt->mutex.
*/

这个对象的每个实例存储信息,这个信息和每个已经加载进内存的页面有关。这个对象中的信息来自原始的磁盘中页面的内容。因为每个数据库页面被加载进内存,pager模块分配一个这个对象的实例,将它的前8个字节初始化为零。(这是和pager模块的每个页面有关)。需要通过一个互斥变量MemPage.pBut->mutex来访问这个结构的所有字段。

struct MemPage {
  u8 isInit;           /* True if previously initialized. MUST BE FIRST! */如果之前初始化过了则为true
  u8 bBusy;            /* Prevent endless loops on corrupt database files */
  u8 intKey;           /* True if table b-trees.  False for index b-trees */
  u8 intKeyLeaf;       /* True if the leaf of an intKey table */
  Pgno pgno;           /* Page number for this page */页号
  /* Only the first 8 bytes (above) are zeroed by pager.c when a new page
  is allocated. All fields that follow must be initialized before use 前面占用8个字节内存的变量会被pager模块初始化为0。而下面的所有字段仅仅在被使用的时候才被初始化*/
  u8 leaf;             /* True if a leaf page */
  u8 hdrOffset;        /* 100 for page 1.  0 otherwise 如果是第1页,则为100,其他页都是0*/
  u8 childPtrSize;     /* 0 if leaf==1.  4 if leaf==0 如果是叶子页则为0,如果是内部页则为4*/
  u8 max1bytePayload;  /* min(maxLocal,127) */
  u8 nOverflow;        /* Number of overflow cell bodies in aCell[] */
  u16 maxLocal;        /* Copy of BtShared.maxLocal or BtShared.maxLeaf */
  u16 minLocal;        /* Copy of BtShared.minLocal or BtShared.minLeaf */
  u16 cellOffset;      /* Index in aData of first cell pointer */第一个指针的偏移
  int nFree;           /* Number of free bytes on the page. -1 for unknown */该页面空闲的字节数
  u16 nCell;           /* Number of cells on this page, local and ovfl */该页面cell的个数
  u16 maskPage;        /* Mask for page offset */
  u16 aiOvfl[4];       /* Insert the i-th overflow cell before the aiOvfl-th
                       non-overflow cell */
  u8 *apOvfl[4];       /* Pointers to the body of overflow cells */
  BtShared *pBt;       /* Pointer to BtShared that this page is part of */
  u8 *aData;           /* Pointer to disk image of the page data */指向页面内容的指针
  u8 *aDataEnd;        /* One byte past the end of usable data */
  u8 *aCellIdx;        /* The cell index area */
  u8 *aDataOfst;       /* Same as aData for leaves.  aData+4 for interior */
  DbPage *pDbPage;     /* Pager page handle */pager对象的handler
  u16 (*xCellSize)(MemPage*,u8*);             /* cellSizePtr method */
  void (*xParseCell)(MemPage*,u8*,CellInfo*); /* btreeParseCell method */
};

sqlite3PcacheSetPageSize

/*
Change the page size for PCache object. The caller must ensure that there
are no outstanding page references when this function is called.
*/

改变PCache对象的页面大小。调用方必须确保在调用此函数时没有未完成的页引用。

int sqlite3PcacheSetPageSize(PCache *pCache, int szPage){
  assert( pCache->nRefSum==0 && pCache->pDirty==0 );
  if( pCache->szPage ){//szPage表示页面的大小
    sqlite3_pcache *pNew;//sqlite3_pcache可以转换为PCache1类型
    pNew = sqlite3GlobalConfig.pcache2.xCreate(
                szPage, pCache->szExtra + ROUND8(sizeof(PgHdr)),
                pCache->bPurgeable//第3个参数表示页面是否在后备内存中(如果在的话,就可以擦除)
    );
    if( pNew==0 ) return SQLITE_NOMEM_BKPT;//如果创建失败了
    sqlite3GlobalConfig.pcache2.xCachesize(pNew, numberOfCachePages(pCache));//给PCache1的实例化变量进行赋值,对pNew进行更新
    if( pCache->pCache ){//如果之前的sqlite3_pcache不空,那么删除(从LRU中去除,从内存中去处各个pcache line占用的内存)
      sqlite3GlobalConfig.pcache2.xDestroy(pCache->pCache);
    }
    pCache->pCache = pNew;//更新pCache->pCache
    pCache->szPage = szPage;//设置页面的大小
    pcacheTrace(("%p.PAGESIZE %d\n",pCache,szPage));
  }
  return SQLITE_OK;
}

sqlite3PcacheFetch

/*
Try to obtain a page from the cache.

This routine returns a pointer to an sqlite3_pcache_page object if
such an object is already in cache, or if a new one is created.
This routine returns a NULL pointer if the object was not in cache
and could not be created.如果sqliet3_pcache_page对象存在,那么这个例程返回一个指向sqlite3_pcache_page对象的指针。如果这个sqlite3_pcache_page对象不存在,那么就动态创建一个。可以看出,sqlite3_pcache_page就是一个pcache line

The createFlags should be 0 to check for existing pages and should
be 3 (not 1, but 3) to try to create a new page.如果是检查一个页面是否存在,那么createFlags为0。如果createFlags是3,那么就创建一个页面。

If the createFlag is 0, then NULL is always returned if the page
is not already in the cache.  If createFlag is 1, then a new page
is created only if that can be done without spilling dirty pages
and without exceeding the cache size limit.The caller needs to invoke sqlite3PcacheFetchFinish() to properly
initialize the sqlite3_pcache_page object and convert it into a
PgHdr object.  The sqlite3PcacheFetch() and sqlite3PcacheFetchFinish()
routines are split this way for performance reasons. When separated
they can both (usually) operate without having to push values to
the stack on entry and pop them back off on exit, which saves a
lot of pushing and popping.

如果createFlag是0,那么当页面不在缓存中时该例程总是返回NULL。如果createFlag是1,那么当不需要换出脏页或者不超过cache的内存限制时,才会创造一个页面(对应于pcache1FetchStage2的step3)。调用者需要调用sqlite3PcacheFetchFinish例程来初始化sqlite3_pcache_page对象并且把它转换为一个PgHdr对象。sqlite3PcacheFetch例程和sqlite3PcacheFetchFinish例程出于性能原因以这种方式拆分。当把他们分开时,在开始调用的时候,不需要将值压入堆栈,因此在运行结束时也不需要将值推出(这减少了很多压栈和出栈的操作)。
*/

sqlite3_pcache_page *sqlite3PcacheFetch(
  PCache *pCache,       /* Obtain the page from this cache */
  Pgno pgno,            /* Page number to obtain */
  int createFlag        /* If true, create page if it does not exist already */
){
  int eCreate;
  sqlite3_pcache_page *pRes;

  assert( pCache!=0 );
  assert( pCache->pCache!=0 );
  assert( createFlag==3 || createFlag==0 );
  assert( pCache->eCreate==((pCache->bPurgeable && pCache->pDirty) ? 1 : 2) );

  /* eCreate defines what to do if the page does not exist.
     0     Do not allocate a new page.  (createFlag==0)//不分配,只是检查
     1     Allocate a new page if doing so is inexpensive.//进行很轻易地分配
           (createFlag==1 AND bPurgeable AND pDirty)
     2     Allocate a new page even it doing so is difficult.//进行困难地分配
           (createFlag==1 AND !(bPurgeable AND pDirty)
  */
  eCreate = createFlag & pCache->eCreate;
  assert( eCreate==0 || eCreate==1 || eCreate==2 );
  assert( createFlag==0 || pCache->eCreate==eCreate );
  assert( createFlag==0 || eCreate==1+(!pCache->bPurgeable||!pCache->pDirty) );
  pRes = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, eCreate);
  pcacheTrace(("%p.FETCH %d%s (result: %p)\n",pCache,pgno,
               createFlag?" create":"",pRes));
  return pRes;
}

sqlite3PcacheFetchStress

/*
If the sqlite3PcacheFetch() routine is unable to allocate a new
page because no clean pages are available for reuse and the cache
size limit has been reached, then this routine can be invoked to 
try harder to allocate a page.  This routine might invoke the stress
callback to spill dirty pages to the journal.  It will then try to
allocate the new page and will only fail to allocate a new page on
an OOM error.This routine should be invoked only after sqlite3PcacheFetch() fails.
*/

如果sqlite3PcacheFetch例程由于没有干净的页面来重用或者缓存的大小达到了缓存大小的限制,不能够分配一个一个页面,那么这个例程就会被调用来尽力分配一个页面。此例程可能会调用压力回调,将脏页溢出到日志。接下来会尝试着分配一个新页,仅在出现OOM错误的时候该分配会失败。这个例程会在调用sqlite3PcacheFetch例程失败之后被调用。

int sqlite3PcacheFetchStress(
  PCache *pCache,                 /* Obtain the page from this cache */
  Pgno pgno,                      /* Page number to obtain */
  sqlite3_pcache_page **ppPage    /* Write result here */
){
  PgHdr *pPg;
  if( pCache->eCreate==2 ) return 0;

  if( sqlite3PcachePagecount(pCache)>pCache->szSpill ){/*sqlite3PcachePagecount
返回的是total number of pages in apHash,pCache->szSpill=Size before spilling occurs*/
    /* Find a dirty page to write-out and recycle. First try to find a 
page that does not require a journal-sync (one with PGHDR_NEED_SYNC
cleared), but if that is not possible settle for any other 
unreferenced dirty page.找到一个页面来将其中的内容写回到数据库中,并重新利用该页面。首先寻找
一个不需要进行日志同步的页面,但是对于任何其他未引用的脏页这是不太可能的。

If the LRU page in the dirty list that has a clear PGHDR_NEED_SYNC
flag is currently referenced, then the following may leave pSynced
set incorrectly (pointing to other than the LRU page with NEED_SYNC
cleared). This is Ok, as pSynced is just an optimization. 如果LRU列表中的页面
在dirty列表中,它拥有PGHDR_NEED_SYNC标志并且目前被引用了,那么接下来可能会不正确地设置
NEED_SYNC标志(指向拥有NEED_SYNC标志的LRU列表页以外的页)。 这是可以的,因为 pSynced 只是一种优化*/
    for(pPg=pCache->pSynced; 
        pPg && (pPg->nRef || (pPg->flags&PGHDR_NEED_SYNC)); 
        pPg=pPg->pDirtyPrev
    );
    pCache->pSynced = pPg;
    if( !pPg ){
      for(pPg=pCache->pDirtyTail; pPg && pPg->nRef; pPg=pPg->pDirtyPrev);
    }
    if( pPg ){
      int rc;
#ifdef SQLITE_LOG_CACHE_SPILL
      sqlite3_log(SQLITE_FULL, 
                  "spill page %d making room for %d - cache used: %d/%d",
                  pPg->pgno, pgno,
                  sqlite3GlobalConfig.pcache2.xPagecount(pCache->pCache),
                numberOfCachePages(pCache));
#endif
      pcacheTrace(("%p.SPILL %d\n",pCache,pPg->pgno));
      rc = pCache->xStress(pCache->pStress, pPg);
      pcacheDump(pCache);
      if( rc!=SQLITE_OK && rc!=SQLITE_BUSY ){
        return rc;
      }
    }
  }
  *ppPage = sqlite3GlobalConfig.pcache2.xFetch(pCache->pCache, pgno, 2);
  return *ppPage==0 ? SQLITE_NOMEM_BKPT : SQLITE_OK;
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值