berkelydb 一点研究

 

 
创建一个数据库my_db.db
 
int main()
{
    DB *dbp;           /* DB structure handle */
    u_int32_t flags;   /* database open flags */
    DBT key, data;
    int ret, t_ret;
 
    /* Create the database handle and open the underlying database. */
    printf ("test 1");
    if ((ret = db_create(&dbp, NULL, 0)) != 0) {
        fprintf(stderr, "db_create: %s/n", db_strerror(ret));
        exit (1);
    }
    printf("test 2");
 
    /* Database open flags */
    flags = DB_CREATE;    /* If the database does not exist,
                          * create it.*/
 
    /* open the database */
    ret = dbp->open(dbp,        /* DB structure pointer */
        NULL,       /* Transaction pointer */
        "my_db.db", /* On-disk file that holds the database. */
        NULL,       /* Optional logical database name */
        DB_HASH,    /* Database access method */
        flags,      /* Open flags */
        0);         /* File mode (using defaults) */
    if (ret != 0) {
        /* Error handling goes here */
    }
 
    printf("test 3");
    /* Initialize key/data structures. */
    memset(&key, 0, sizeof(key));
    memset(&data, 0, sizeof(data));
    key.data = "fruit";
    key.size = sizeof("fruit");
    data.data = "apple";
    data.size = sizeof("apple");
 
    /* Store a key/data pair. */
    if ((ret = dbp->put(dbp, NULL, &key, &data, 0)) == 0)
        printf("db: %s: key stored./n", (char *)key.data);
    else {
        dbp->err(dbp, ret, "DB->put");
        goto err;
    }
   ....
}
   
 
普通元数据页定义,对于HASH类型的数据库,第一页一般是放元数据页,后面陆续放数据页。
所谓元数据页,不过是对整个HASH文件总描述信息结构。
/*
 * A DB_LSN has two parts, a fileid which identifies a specific file, and an
 * offset within that file.  The fileid is an unsigned 4-byte quantity that
 * uniquely identifies a file within the log directory -- currently a simple
 * counter inside the log.  The offset is also an unsigned 4-byte value.  The
 * log manager guarantees the offset is never more than 4 bytes by switching
 * to a new log file before the maximum length imposed by an unsigned 4-byte
 * offset is reached.
 */

struct __db_lsn {
 u_int32_t file;  /* File ID. */                        文件fd
 u_int32_t offset;  /* File offset. */                  文件偏移
};
 
/************************************************************************
 GENERIC METADATA PAGE HEADER
 *
通用元数据页头
 * !!!
 * The magic and version numbers have to be in the same place in all versions
 * of the metadata page as the application may not have upgraded the database.
 ************************************************************************/

typedef struct _dbmeta33 {
 DB_LSN   lsn ;  /* 00-07: LSN. */
 db_pgno_t pgno;  /* 08-11: Current page number. */           当前页号
 u_int32_t magic; /* 12-15: Magic number. */
 u_int32_t version; /* 16-19: Version. */
 u_int32_t pagesize; /* 20-23: Pagesize. */                  页大小
 u_int8_t  encrypt_alg; /*    24: Encryption algorithm. */   加密算法
 u_int8_t  type;  /*    25: Page type. */                    页类型
#define DBMETA_CHKSUM  0x01
 u_int8_t  metaflags; /* 26: Meta-only flags */
 u_int8_t  unused1; /* 27: Unused. */
 u_int32_t free;  /* 28-31: Free list page number. */
 db_pgno_t last_pgno; /* 32-35: Page number of last page in db. */
 u_int32_t unused3; /* 36-39: Unused. */
 u_int32_t key_count; /* 40-43: Cached key count. */
 u_int32_t record_count; /* 44-47: Cached record count. */
 u_int32_t flags; /* 48-51: Flags: unique to each AM. */
    /* 52-71: Unique file ID. */
 u_int8_t  uid[DB_FILE_ID_LEN];
} DBMETA33, DBMETA ;
 
 
HASH类型的数据库元数据页定义
 
typedef struct _hashmeta33 {
#define DB_HASH_DUP 0x01 /*   Duplicates. */
#define DB_HASH_SUBDB 0x02 /*   Subdatabases. */
#define DB_HASH_DUPSORT 0x04 /*   Duplicates are sorted. */

 DBMETA dbmeta ;  /* 00-71: Generic meta-data page header. */ 通用的数据库元结构
                        下面的是hash自有的成员
 u_int32_t max_bucket;  /* 72-75: ID of Maximum bucket in use */
 u_int32_t high_mask;   /* 76-79: Modulo mask into table */
 u_int32_t low_mask;    /* 80-83: Modulo mask into table lower half */
 u_int32_t ffactor;     /* 84-87: Fill factor */
 u_int32_t nelem;       /* 88-91: Number of keys in hash table */
 u_int32_t h_charkey;   /* 92-95: Value of hash(CHARKEY) */
#define NCACHED 32      /* number of spare points */
    /* 96-223: Spare pages for overflow */

 u_int32_t spares[NCACHED];
 u_int32_t unused[59];   /* 224-459: Unused space */
 u_int32_t crypto_magic; /* 460-463: Crypto magic number */
 u_int32_t trash[3];     /* 464-475: Trash space - Do not use */
 u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */
 u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */
 
 /*
  * Minimum page size is 512.
  */
} HMETA33, HMETA;
 
 
 
 
数据页(page),是BDB对数据操作的最小单元。不同的数据存储结构对页的定义会稍有不同,
但都是在页头若干个字节标识该页的相关属性。下面是一个的页结构定义:

 
typedef struct _db_page
{
 DB_LSN   lsn;  /* 00-07: Log sequence number. */
 db_pgno_t pgno;  /* 08-11: Current page number. */
 db_pgno_t prev_pgno; /* 12-15: Previous page number. */
 db_pgno_t next_pgno; /* 16-19: Next page number. */
 db_indx_t entries; /* 20-21: Number of items on the page. */
 db_indx_t hf_offset; /* 22-23: High free byte page offset. */
 
 /*
  * The btree levels are numbered from the leaf to the root, starting
  * with 1, so the leaf is level 1, its parent is level 2, and so on.
  * We maintain this level on all btree pages, but the only place that
  * we actually need it is on the root page.  It would not be difficult
  * to hide the byte on the root page once it becomes an internal page,
  * so we could get this byte back if we needed it for something else.
  */

#define LEAFLEVEL   1
#define MAXBTREELEVEL 255

 u_int8_t  level; /*    24: Btree tree level. */
 u_int8_t  type;  /*    25: Page type. */
} PAGE ;
 
 
/*
 * With many compilers sizeof(PAGE) == 28, while SIZEOF_PAGE == 26.
 * We add in other things directly after the page header and need
 * the SIZEOF_PAGE.  When giving the sizeof(), many compilers will
 * pad it out to the next 4-byte boundary.
 */
#define SIZEOF_PAGE 26              页头大小为26个byte
 
 
HASH的内存页布局:
 
/*
 * +-----------------------------------+
 * |    lsn    |   pgno    | prev pgno |
 * +-----------------------------------+
 * | next pgno |  entries  | hf offset |
 * +-----------------------------------+
 * |   level   |   type
    |    index  |
 * +-----------------------------------+
 * |               index...            |
 * +                                   +
 * |  F R E E A R E A                  |
 * +                                   +
 * |              <-- free |   item    |
 * +-----------------------------------+
 * |   item    |   item    |   item    |
 * +-----------------------------------+
 */
数据是页尾巴往前放,索引是前面往后面放,索引是keydata的页内偏移量
 
 

下面有些来自CSDN,加上自己的,并加了写描述,这些宏是处理Page内数据的
 最基本东西,一旦理解了,HASH的db中key/data操作将变的很简单了。其他的
 key到桶和文件cache的MVC等计算另当别论了。

#define MINFILL   4
#define ISBIG (I, N) (((N) > ((I)->hdr->dbmeta.pagesize / MINFILL)) ? 1 : 0)
功能:
   判断是否数据的长度大过页的空间 。是页大小的1/4
 
#define  P_INP (dbp, pg)                                              /
       ((db_indx_t *)((u_int8_t *)(pg) + SIZEOF_PAGE +               /
       (F_ISSET((dbp), DB_AM_ENCRYPT) ? sizeof(PG_CRYPTO) :          /
       (F_ISSET((dbp), DB_AM_CHKSUM) ? sizeof(PG_CHKSUM) : 0))))
功能:
取得页中index对应的首地址
 
执行流程:
1.首先加上公共的页头大小,这里为 SIZEOF_PAGE(26字节)
2.判断数据库是否有DB_AM_ENCRYPT标志,如果有,则加上PG_CRYPTO结构的大小。并返回。否则执行下一步。
3.判断数据库是否有DB_AM_CHKSUM标志,如果有,则加上PG_CHKSUM结构的大小。并返回。
 
备注:
    DB_AM_ENCRYPT 总是伴随DB_AM_CHKSUM标志,所以先测试DB_AM_ENCRYPT标志。
 
 
#define P_OVERHEAD (dbp)              P_TO_UINT16(P_INP(dbp, 0))
功能:
   页头长度。 一般为26字节。
 
 
#define NUM_ENT (p) (((PAGE *)p)->entries)
功能:
   取 页内元素个数。
 
 
#define HOFFSET (dbp, pg)            (((PAGE *)p)->hf_offset)
功能:
   空闲空间的空闲区的高地址。
 
 
#define LOFFSET (dbp, pg)                                         
    (P_OVERHEAD(dbp) + NUM_ENT(pg) * sizeof(db_indx_t))
功能:
   空闲空间的首地址。 typedef u_int16_t db_indx_t; /* Page offset type. */ 是页内偏移地址16位整数
 
 
#define P_FREESPACE ( dbp, pg)       (HOFFSET(pg) - LOFFSET(dbp, pg))
功能:
   空闲空间的大小。索引后面地址到放数据的开始地方
 
 
#define P_ ENTRY ( dbp, pg, indx)      ((u_int8_t *)pg + P_INP(dbp, pg)[indx])
功能:
   取得特定索引的指针。
 
 
#define H_NUMPAIRS (pg)   (NUM_ENT(pg) / 2)
功能:
   取得页内多少Key/data对。key的data是一对所以/2
 
 
#define PUT_HKEYDATA (pe, kd, len, etype) {    /
 ((HKEYDATA *)(pe))->type = etype;    /
 memcpy((u_int8_t *)(pe) + sizeof(u_int8_t), kd, len);  /
}
功能:
   拷贝keydata的dbt的数据到页指定索引slot指向的内存区域
 
 
#define  LEN_HITEM (dbp, pg, pgsize, indx)    /
 (((indx) == 0 ? (pgsize) :     /
 (P_INP(dbp, pg)[(indx) - 1])) - (P_INP(dbp, pg)[indx]))
功能:
   获取一个item项的长度 。 (就是计算前一个索引偏移量 - 下一个索引偏移量就得出item的长度)
 
 
#define LEN_HKEYDATA (dbp, pg, psize, indx)    /
 (db_indx_t)(LEN_HITEM(dbp, pg, psize, indx) - HKEYDATA_SIZE(0))
功能:
   拷贝keydata的dbt的数据到页指定索引slot指向的内存区域
 
 
typedef struct _hkeydata {
 u_int8_t  type;  /*    00: Page type. */
 u_int8_t  data[1]; /* Variable length key/data item. */
} HKEYDATA;
功能:
   拷贝keydata的dbt的数据到页指定索引slot指向的内存区域
 
 
 
#define HKEYDATA_SIZE ( len)      /
 ((len) + SSZA(HKEYDATA, data))
#define SSZA(name, field) P_TO_UINT16(&(((name *)0)->field[0]))
功能:
   拷贝keydata的dbt的数据到页指定索引slot指向的内存区域
 
 
/* Key/data structure -- a Data-Base Thang. */
struct __db_dbt {
 void  *data;      /* Key/data */
 u_int32_t size;   /* key/data length */
 u_int32_t ulen;   /* RO: length of user buffer. */
 u_int32_t dlen;   /* RO: get/put record length. */
 u_int32_t doff;   /* RO: get/put record offset. */
 void *app_data;
 u_int32_t flags;
}; <---
key_dbt
typedef struct __db_dbt DBT;
 
 if (key_type == H_OFFPAGE)
  memcpy(P_ENTRY(dbp, p, indx), key_dbt->data, key_dbt->size);
 else
  PUT_HKEYDATA(P_ENTRY(dbp, p, indx), key_dbt->data,
      key_dbt->size, key_type);

 if (data_type == H_OFFPAGE)
  memcpy(P_ENTRY(dbp, p, indx+1), data_dbt->data,
      data_dbt->size);
 else
  PUT_HKEYDATA(P_ENTRY(dbp, p, indx+1), data_dbt->data,
      data_dbt->size, data_type);
 
 
#define PUT_HKEYDATA (pe, kd, len, etype) {    /
 ((HKEYDATA *)(pe))->type = etype;    /
 memcpy((u_int8_t *)(pe) + sizeof(u_int8_t), kd, len);  /
}
功能:
   拷贝keydata的dbt的数据到页指定索引slot指向的内存区域
   具体是通过索引index拿到index指向页内便宜地址 P_ENTRY(dbp, p, indx),
   然后将得到的地址换成 HKEYDATA 指针,赋值类型和拷贝数据。
 
 
生成了一个HASH数据库dump
 
E:/MoreGroup/dev/TestBDB/TestBDB>db_dump -da my_db.db
In-memory DB structure:
hash: 0x48000 (open called, read-only)
meta_pgno: 0
h_ffactor: 0
h_nelem: 1
h_hash: 0x130029e1
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
page 0: hash metadata: LSN [0][1]: level 0
        magic: 0x61561
        version: 9
        pagesize: 8192
        type: 8
        keys: 0 records: 0
        free list: 0
        last_pgno: 2
        flags: 0
        uid: ed 1b 0 0 0 0 f 0 5 ed 5f 24 0 0 0 0 0 0 0 0
        max_bucket: 1
        high_mask: 0x1
        low_mask:  0
        ffactor: 0
        nelem: 1
        h_charkey: 0x5e688dd1
        spare points: 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
page 1: hash: LSN [0][1]: level 0
        prev:    0 next:    0 entries:    2 offset: 8178
        [000] 8185 len:   6 data: 0x660x720x750x690x7400
        [001] 8178 len:   6 data: 0x610x700x700x6c0x6500

page 2: hash: LSN [0][1]: level 0
        prev:    0 next:    0 entries:    0 offset: 8192
 
 
entries是当前页内的元素个数
offset是页内空闲空间的偏移值
 
再插入
    key.data = "WAHT";
    key.size = sizeof("WAHT");
    data.data = "123456789";
    data.size = sizeof("123456789");
 
 
E:/MoreGroup/dev/TestBDB/TestBDB>db_dump -da my_db.db
In-memory DB structure:
hash: 0x48000 (open called, read-only)
meta_pgno: 0
h_ffactor: 0
h_nelem: 2
h_hash: 0x130029e1
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
page 0: hash metadata: LSN [0][1]: level 0
        magic: 0x61561
        version: 9
        pagesize: 8192
        type: 8
        keys: 0 records: 0
        free list: 0
        last_pgno: 2
        flags: 0
        uid: ed 1b 0 0 0 0 f 0 5 ed 5f 24 0 0 0 0 0 0 0 0
        max_bucket: 1
        high_mask: 0x1
        low_mask:  0
        ffactor: 0
        nelem: 2
        h_charkey: 0x5e688dd1
        spare points: 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
page 1: hash: LSN [0][1]: level 0
        prev:    0 next:    0 entries:    4 offset: 8161
        [000] 8186 len:   5 data: 0x570x410x480x5400
        [001] 8175 len:  10 data: 0x310x320x330x340x350x360x370x380x3900

        [002] 8168 len:   6 data: 0x660x720x750x690x7400
        [003] 8161 len:   6 data: 0x610x700x700x6c0x6500
page 2: hash: LSN [0][1]: level 0
        prev:    0 next:    0 entries:    0 offset: 8192

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值