创建一个数据库my_db.db
int main()
{
DB *dbp; /* DB structure handle */
u_int32_t flags; /* database open flags */
DBT key, data;
int ret, t_ret;
{
DB *dbp; /* DB structure handle */
u_int32_t flags; /* database open flags */
DBT key, data;
int ret, t_ret;
/* Create the database handle and open the underlying database. */
printf ("test 1");
if ((ret = db_create(&dbp, NULL, 0)) != 0) {
fprintf(stderr, "db_create: %s/n", db_strerror(ret));
exit (1);
}
printf("test 2");
printf ("test 1");
if ((ret = db_create(&dbp, NULL, 0)) != 0) {
fprintf(stderr, "db_create: %s/n", db_strerror(ret));
exit (1);
}
printf("test 2");
/* Database open flags */
flags = DB_CREATE; /* If the database does not exist,
* create it.*/
flags = DB_CREATE; /* If the database does not exist,
* create it.*/
/* open the database */
ret = dbp->open(dbp, /* DB structure pointer */
NULL, /* Transaction pointer */
"my_db.db", /* On-disk file that holds the database. */
NULL, /* Optional logical database name */
DB_HASH, /* Database access method */
flags, /* Open flags */
0); /* File mode (using defaults) */
if (ret != 0) {
/* Error handling goes here */
}
ret = dbp->open(dbp, /* DB structure pointer */
NULL, /* Transaction pointer */
"my_db.db", /* On-disk file that holds the database. */
NULL, /* Optional logical database name */
DB_HASH, /* Database access method */
flags, /* Open flags */
0); /* File mode (using defaults) */
if (ret != 0) {
/* Error handling goes here */
}
printf("test 3");
/* Initialize key/data structures. */
memset(&key, 0, sizeof(key));
memset(&data, 0, sizeof(data));
key.data = "fruit";
key.size = sizeof("fruit");
data.data = "apple";
data.size = sizeof("apple");
/* Initialize key/data structures. */
memset(&key, 0, sizeof(key));
memset(&data, 0, sizeof(data));
key.data = "fruit";
key.size = sizeof("fruit");
data.data = "apple";
data.size = sizeof("apple");
/* Store a key/data pair. */
if ((ret = dbp->put(dbp, NULL, &key, &data, 0)) == 0)
printf("db: %s: key stored./n", (char *)key.data);
else {
dbp->err(dbp, ret, "DB->put");
goto err;
}
if ((ret = dbp->put(dbp, NULL, &key, &data, 0)) == 0)
printf("db: %s: key stored./n", (char *)key.data);
else {
dbp->err(dbp, ret, "DB->put");
goto err;
}
....
}
普通元数据页定义,对于HASH类型的数据库,第一页一般是放元数据页,后面陆续放数据页。
所谓元数据页,不过是对整个HASH文件总描述信息结构。
/*
* A DB_LSN has two parts, a fileid which identifies a specific file, and an
* offset within that file. The fileid is an unsigned 4-byte quantity that
* uniquely identifies a file within the log directory -- currently a simple
* counter inside the log. The offset is also an unsigned 4-byte value. The
* log manager guarantees the offset is never more than 4 bytes by switching
* to a new log file before the maximum length imposed by an unsigned 4-byte
* offset is reached.
*/
struct __db_lsn {
u_int32_t file; /* File ID. */ 文件fd
u_int32_t offset; /* File offset. */ 文件偏移
};
* A DB_LSN has two parts, a fileid which identifies a specific file, and an
* offset within that file. The fileid is an unsigned 4-byte quantity that
* uniquely identifies a file within the log directory -- currently a simple
* counter inside the log. The offset is also an unsigned 4-byte value. The
* log manager guarantees the offset is never more than 4 bytes by switching
* to a new log file before the maximum length imposed by an unsigned 4-byte
* offset is reached.
*/
struct __db_lsn {
u_int32_t file; /* File ID. */ 文件fd
u_int32_t offset; /* File offset. */ 文件偏移
};
/************************************************************************
GENERIC METADATA PAGE HEADER
* 通用元数据页头
* !!!
* The magic and version numbers have to be in the same place in all versions
* of the metadata page as the application may not have upgraded the database.
************************************************************************/
GENERIC METADATA PAGE HEADER
* 通用元数据页头
* !!!
* The magic and version numbers have to be in the same place in all versions
* of the metadata page as the application may not have upgraded the database.
************************************************************************/
typedef struct _dbmeta33 {
DB_LSN lsn ; /* 00-07: LSN. */
db_pgno_t pgno; /* 08-11: Current page number. */ 当前页号
u_int32_t magic; /* 12-15: Magic number. */
u_int32_t version; /* 16-19: Version. */
u_int32_t pagesize; /* 20-23: Pagesize. */ 页大小
u_int8_t encrypt_alg; /* 24: Encryption algorithm. */ 加密算法
u_int8_t type; /* 25: Page type. */ 页类型
#define DBMETA_CHKSUM 0x01
u_int8_t metaflags; /* 26: Meta-only flags */
u_int8_t unused1; /* 27: Unused. */
u_int32_t free; /* 28-31: Free list page number. */
db_pgno_t last_pgno; /* 32-35: Page number of last page in db. */
u_int32_t unused3; /* 36-39: Unused. */
u_int32_t key_count; /* 40-43: Cached key count. */
u_int32_t record_count; /* 44-47: Cached record count. */
u_int32_t flags; /* 48-51: Flags: unique to each AM. */
/* 52-71: Unique file ID. */
u_int8_t uid[DB_FILE_ID_LEN];
} DBMETA33, DBMETA ;
HASH类型的数据库元数据页定义
typedef struct _hashmeta33 {
#define DB_HASH_DUP 0x01 /* Duplicates. */
#define DB_HASH_SUBDB 0x02 /* Subdatabases. */
#define DB_HASH_DUPSORT 0x04 /* Duplicates are sorted. */
DBMETA dbmeta ; /* 00-71: Generic meta-data page header. */ 通用的数据库元结构
#define DB_HASH_DUP 0x01 /* Duplicates. */
#define DB_HASH_SUBDB 0x02 /* Subdatabases. */
#define DB_HASH_DUPSORT 0x04 /* Duplicates are sorted. */
DBMETA dbmeta ; /* 00-71: Generic meta-data page header. */ 通用的数据库元结构
下面的是hash自有的成员
u_int32_t max_bucket; /* 72-75: ID of Maximum bucket in use */
u_int32_t high_mask; /* 76-79: Modulo mask into table */
u_int32_t low_mask; /* 80-83: Modulo mask into table lower half */
u_int32_t ffactor; /* 84-87: Fill factor */
u_int32_t nelem; /* 88-91: Number of keys in hash table */
u_int32_t h_charkey; /* 92-95: Value of hash(CHARKEY) */
#define NCACHED 32 /* number of spare points */
/* 96-223: Spare pages for overflow */
u_int32_t spares[NCACHED];
u_int32_t unused[59]; /* 224-459: Unused space */
u_int32_t crypto_magic; /* 460-463: Crypto magic number */
u_int32_t trash[3]; /* 464-475: Trash space - Do not use */
u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */
u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */
u_int32_t high_mask; /* 76-79: Modulo mask into table */
u_int32_t low_mask; /* 80-83: Modulo mask into table lower half */
u_int32_t ffactor; /* 84-87: Fill factor */
u_int32_t nelem; /* 88-91: Number of keys in hash table */
u_int32_t h_charkey; /* 92-95: Value of hash(CHARKEY) */
#define NCACHED 32 /* number of spare points */
/* 96-223: Spare pages for overflow */
u_int32_t spares[NCACHED];
u_int32_t unused[59]; /* 224-459: Unused space */
u_int32_t crypto_magic; /* 460-463: Crypto magic number */
u_int32_t trash[3]; /* 464-475: Trash space - Do not use */
u_int8_t iv[DB_IV_BYTES]; /* 476-495: Crypto IV */
u_int8_t chksum[DB_MAC_KEY]; /* 496-511: Page chksum */
/*
* Minimum page size is 512.
*/
} HMETA33, HMETA;
* Minimum page size is 512.
*/
} HMETA33, HMETA;
数据页(page),是BDB对数据操作的最小单元。不同的数据存储结构对页的定义会稍有不同,
但都是在页头若干个字节标识该页的相关属性。下面是一个的页结构定义:
typedef struct _db_page
{
DB_LSN lsn; /* 00-07: Log sequence number. */
db_pgno_t pgno; /* 08-11: Current page number. */
db_pgno_t prev_pgno; /* 12-15: Previous page number. */
db_pgno_t next_pgno; /* 16-19: Next page number. */
db_indx_t entries; /* 20-21: Number of items on the page. */
db_indx_t hf_offset; /* 22-23: High free byte page offset. */
DB_LSN lsn; /* 00-07: Log sequence number. */
db_pgno_t pgno; /* 08-11: Current page number. */
db_pgno_t prev_pgno; /* 12-15: Previous page number. */
db_pgno_t next_pgno; /* 16-19: Next page number. */
db_indx_t entries; /* 20-21: Number of items on the page. */
db_indx_t hf_offset; /* 22-23: High free byte page offset. */
/*
* The btree levels are numbered from the leaf to the root, starting
* with 1, so the leaf is level 1, its parent is level 2, and so on.
* We maintain this level on all btree pages, but the only place that
* we actually need it is on the root page. It would not be difficult
* to hide the byte on the root page once it becomes an internal page,
* so we could get this byte back if we needed it for something else.
*/
#define LEAFLEVEL 1
#define MAXBTREELEVEL 255
u_int8_t level; /* 24: Btree tree level. */
u_int8_t type; /* 25: Page type. */
} PAGE ;
* The btree levels are numbered from the leaf to the root, starting
* with 1, so the leaf is level 1, its parent is level 2, and so on.
* We maintain this level on all btree pages, but the only place that
* we actually need it is on the root page. It would not be difficult
* to hide the byte on the root page once it becomes an internal page,
* so we could get this byte back if we needed it for something else.
*/
#define LEAFLEVEL 1
#define MAXBTREELEVEL 255
u_int8_t level; /* 24: Btree tree level. */
u_int8_t type; /* 25: Page type. */
} PAGE ;
/*
* With many compilers sizeof(PAGE) == 28, while SIZEOF_PAGE == 26.
* We add in other things directly after the page header and need
* the SIZEOF_PAGE. When giving the sizeof(), many compilers will
* pad it out to the next 4-byte boundary.
*/
#define SIZEOF_PAGE 26 页头大小为26个byte
* With many compilers sizeof(PAGE) == 28, while SIZEOF_PAGE == 26.
* We add in other things directly after the page header and need
* the SIZEOF_PAGE. When giving the sizeof(), many compilers will
* pad it out to the next 4-byte boundary.
*/
#define SIZEOF_PAGE 26 页头大小为26个byte
HASH的内存页布局:
/*
* +-----------------------------------+
* | lsn | pgno | prev pgno |
* +-----------------------------------+
* | next pgno | entries | hf offset |
* +-----------------------------------+
* | level | type | index |
* +-----------------------------------+
* | index... |
* + +
* | F R E E A R E A |
* + +
* | <-- free | item |
* +-----------------------------------+
* | item | item | item |
* +-----------------------------------+
* +-----------------------------------+
* | lsn | pgno | prev pgno |
* +-----------------------------------+
* | next pgno | entries | hf offset |
* +-----------------------------------+
* | level | type | index |
* +-----------------------------------+
* | index... |
* + +
* | F R E E A R E A |
* + +
* | <-- free | item |
* +-----------------------------------+
* | item | item | item |
* +-----------------------------------+
*/
数据是页尾巴往前放,索引是前面往后面放,索引是keydata的页内偏移量
下面有些来自CSDN,加上自己的,并加了写描述,这些宏是处理Page内数据的
最基本东西,一旦理解了,HASH的db中key/data操作将变的很简单了。其他的
key到桶和文件cache的MVC等计算另当别论了。
#define MINFILL 4
#define ISBIG (I, N) (((N) > ((I)->hdr->dbmeta.pagesize / MINFILL)) ? 1 : 0)
#define ISBIG (I, N) (((N) > ((I)->hdr->dbmeta.pagesize / MINFILL)) ? 1 : 0)
功能:
判断是否数据的长度大过页的空间 。是页大小的1/4
#define
P_INP (dbp, pg) /
((db_indx_t *)((u_int8_t *)(pg) + SIZEOF_PAGE + /
(F_ISSET((dbp), DB_AM_ENCRYPT) ? sizeof(PG_CRYPTO) : /
(F_ISSET((dbp), DB_AM_CHKSUM) ? sizeof(PG_CHKSUM) : 0))))
功能:
取得页中index对应的首地址 。
执行流程:
1.首先加上公共的页头大小,这里为
SIZEOF_PAGE(26字节)
2.判断数据库是否有DB_AM_ENCRYPT标志,如果有,则加上PG_CRYPTO结构的大小。并返回。否则执行下一步。
3.判断数据库是否有DB_AM_CHKSUM标志,如果有,则加上PG_CHKSUM结构的大小。并返回。
备注:
DB_AM_ENCRYPT 总是伴随DB_AM_CHKSUM标志,所以先测试DB_AM_ENCRYPT标志。
#define P_OVERHEAD (dbp) P_TO_UINT16(P_INP(dbp, 0))
功能:
取
页头长度。 一般为26字节。
#define NUM_ENT (p) (((PAGE *)p)->entries)
功能:
取 页内元素个数。
#define HOFFSET (dbp, pg) (((PAGE *)p)->hf_offset)
功能:
空闲空间的空闲区的高地址。
#define LOFFSET (dbp, pg)
(P_OVERHEAD(dbp) + NUM_ENT(pg) * sizeof(db_indx_t))
功能:
空闲空间的首地址。 typedef u_int16_t db_indx_t; /* Page offset type. */ 是页内偏移地址16位整数
#define P_FREESPACE ( dbp, pg) (HOFFSET(pg) - LOFFSET(dbp, pg))
功能:
空闲空间的大小。索引后面地址到放数据的开始地方
#define P_ ENTRY ( dbp, pg, indx) ((u_int8_t *)pg + P_INP(dbp, pg)[indx])
功能:
取得特定索引的指针。
#define H_NUMPAIRS (pg) (NUM_ENT(pg) / 2)
功能:
取得页内多少Key/data对。key的data是一对所以/2
#define PUT_HKEYDATA (pe, kd, len, etype) { /
((HKEYDATA *)(pe))->type = etype; /
memcpy((u_int8_t *)(pe) + sizeof(u_int8_t), kd, len); /
}
((HKEYDATA *)(pe))->type = etype; /
memcpy((u_int8_t *)(pe) + sizeof(u_int8_t), kd, len); /
}
功能:
拷贝keydata的dbt的数据到页指定索引slot指向的内存区域 。
#define
LEN_HITEM (dbp, pg, pgsize, indx) /
(((indx) == 0 ? (pgsize) : /
(P_INP(dbp, pg)[(indx) - 1])) - (P_INP(dbp, pg)[indx]))
(((indx) == 0 ? (pgsize) : /
(P_INP(dbp, pg)[(indx) - 1])) - (P_INP(dbp, pg)[indx]))
功能:
获取一个item项的长度 。 (就是计算前一个索引偏移量 - 下一个索引偏移量就得出item的长度)
#define LEN_HKEYDATA (dbp, pg, psize, indx) /
(db_indx_t)(LEN_HITEM(dbp, pg, psize, indx) - HKEYDATA_SIZE(0))
(db_indx_t)(LEN_HITEM(dbp, pg, psize, indx) - HKEYDATA_SIZE(0))
功能:
拷贝keydata的dbt的数据到页指定索引slot指向的内存区域 。
typedef struct _hkeydata {
u_int8_t type; /* 00: Page type. */
u_int8_t data[1]; /* Variable length key/data item. */
} HKEYDATA;
u_int8_t type; /* 00: Page type. */
u_int8_t data[1]; /* Variable length key/data item. */
} HKEYDATA;
功能:
拷贝keydata的dbt的数据到页指定索引slot指向的内存区域 。
#define HKEYDATA_SIZE ( len) /
((len) + SSZA(HKEYDATA, data))
((len) + SSZA(HKEYDATA, data))
#define SSZA(name, field) P_TO_UINT16(&(((name *)0)->field[0]))
功能:
拷贝keydata的dbt的数据到页指定索引slot指向的内存区域 。
/* Key/data structure -- a Data-Base Thang. */
struct __db_dbt {
void *data; /* Key/data */
u_int32_t size; /* key/data length */
struct __db_dbt {
void *data; /* Key/data */
u_int32_t size; /* key/data length */
u_int32_t ulen; /* RO: length of user buffer. */
u_int32_t dlen; /* RO: get/put record length. */
u_int32_t doff; /* RO: get/put record offset. */
u_int32_t dlen; /* RO: get/put record length. */
u_int32_t doff; /* RO: get/put record offset. */
void *app_data;
u_int32_t flags;
}; <--- key_dbt
u_int32_t flags;
}; <--- key_dbt
typedef struct __db_dbt DBT;
if (key_type == H_OFFPAGE)
memcpy(P_ENTRY(dbp, p, indx), key_dbt->data, key_dbt->size);
else
PUT_HKEYDATA(P_ENTRY(dbp, p, indx), key_dbt->data,
key_dbt->size, key_type);
memcpy(P_ENTRY(dbp, p, indx), key_dbt->data, key_dbt->size);
else
PUT_HKEYDATA(P_ENTRY(dbp, p, indx), key_dbt->data,
key_dbt->size, key_type);
if (data_type == H_OFFPAGE)
memcpy(P_ENTRY(dbp, p, indx+1), data_dbt->data,
data_dbt->size);
else
PUT_HKEYDATA(P_ENTRY(dbp, p, indx+1), data_dbt->data,
data_dbt->size, data_type);
#define PUT_HKEYDATA (pe, kd, len, etype) { /
((HKEYDATA *)(pe))->type = etype; /
memcpy((u_int8_t *)(pe) + sizeof(u_int8_t), kd, len); /
}
((HKEYDATA *)(pe))->type = etype; /
memcpy((u_int8_t *)(pe) + sizeof(u_int8_t), kd, len); /
}
功能:
拷贝keydata的dbt的数据到页指定索引slot指向的内存区域 。
具体是通过索引index拿到index指向页内便宜地址 P_ENTRY(dbp, p, indx),
然后将得到的地址换成 HKEYDATA 指针,赋值类型和拷贝数据。
生成了一个HASH数据库dump
E:/MoreGroup/dev/TestBDB/TestBDB>db_dump -da my_db.db
In-memory DB structure:
hash: 0x48000 (open called, read-only)
meta_pgno: 0
h_ffactor: 0
h_nelem: 1
h_hash: 0x130029e1
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
page 0: hash metadata: LSN [0][1]: level 0
magic: 0x61561
version: 9
pagesize: 8192
type: 8
keys: 0 records: 0
free list: 0
last_pgno: 2
flags: 0
uid: ed 1b 0 0 0 0 f 0 5 ed 5f 24 0 0 0 0 0 0 0 0
max_bucket: 1
high_mask: 0x1
low_mask: 0
ffactor: 0
nelem: 1
h_charkey: 0x5e688dd1
spare points: 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
page 1: hash: LSN [0][1]: level 0
prev: 0 next: 0 entries: 2 offset: 8178
[000] 8185 len: 6 data: 0x660x720x750x690x7400
[001] 8178 len: 6 data: 0x610x700x700x6c0x6500
page 2: hash: LSN [0][1]: level 0
prev: 0 next: 0 entries: 0 offset: 8192
In-memory DB structure:
hash: 0x48000 (open called, read-only)
meta_pgno: 0
h_ffactor: 0
h_nelem: 1
h_hash: 0x130029e1
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
page 0: hash metadata: LSN [0][1]: level 0
magic: 0x61561
version: 9
pagesize: 8192
type: 8
keys: 0 records: 0
free list: 0
last_pgno: 2
flags: 0
uid: ed 1b 0 0 0 0 f 0 5 ed 5f 24 0 0 0 0 0 0 0 0
max_bucket: 1
high_mask: 0x1
low_mask: 0
ffactor: 0
nelem: 1
h_charkey: 0x5e688dd1
spare points: 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
page 1: hash: LSN [0][1]: level 0
prev: 0 next: 0 entries: 2 offset: 8178
[000] 8185 len: 6 data: 0x660x720x750x690x7400
[001] 8178 len: 6 data: 0x610x700x700x6c0x6500
page 2: hash: LSN [0][1]: level 0
prev: 0 next: 0 entries: 0 offset: 8192
entries是当前页内的元素个数
offset是页内空闲空间的偏移值
再插入
key.data = "WAHT";
key.size = sizeof("WAHT");
data.data = "123456789";
data.size = sizeof("123456789");
key.size = sizeof("WAHT");
data.data = "123456789";
data.size = sizeof("123456789");
E:/MoreGroup/dev/TestBDB/TestBDB>db_dump -da my_db.db
In-memory DB structure:
hash: 0x48000 (open called, read-only)
meta_pgno: 0
h_ffactor: 0
h_nelem: 2
h_hash: 0x130029e1
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
page 0: hash metadata: LSN [0][1]: level 0
magic: 0x61561
version: 9
pagesize: 8192
type: 8
keys: 0 records: 0
free list: 0
last_pgno: 2
flags: 0
uid: ed 1b 0 0 0 0 f 0 5 ed 5f 24 0 0 0 0 0 0 0 0
max_bucket: 1
high_mask: 0x1
low_mask: 0
ffactor: 0
nelem: 2
h_charkey: 0x5e688dd1
spare points: 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
page 1: hash: LSN [0][1]: level 0
prev: 0 next: 0 entries: 4 offset: 8161
[000] 8186 len: 5 data: 0x570x410x480x5400
[001] 8175 len: 10 data: 0x310x320x330x340x350x360x370x380x3900
[002] 8168 len: 6 data: 0x660x720x750x690x7400
[003] 8161 len: 6 data: 0x610x700x700x6c0x6500
page 2: hash: LSN [0][1]: level 0
prev: 0 next: 0 entries: 0 offset: 8192
In-memory DB structure:
hash: 0x48000 (open called, read-only)
meta_pgno: 0
h_ffactor: 0
h_nelem: 2
h_hash: 0x130029e1
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
page 0: hash metadata: LSN [0][1]: level 0
magic: 0x61561
version: 9
pagesize: 8192
type: 8
keys: 0 records: 0
free list: 0
last_pgno: 2
flags: 0
uid: ed 1b 0 0 0 0 f 0 5 ed 5f 24 0 0 0 0 0 0 0 0
max_bucket: 1
high_mask: 0x1
low_mask: 0
ffactor: 0
nelem: 2
h_charkey: 0x5e688dd1
spare points: 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
page 1: hash: LSN [0][1]: level 0
prev: 0 next: 0 entries: 4 offset: 8161
[000] 8186 len: 5 data: 0x570x410x480x5400
[001] 8175 len: 10 data: 0x310x320x330x340x350x360x370x380x3900
[002] 8168 len: 6 data: 0x660x720x750x690x7400
[003] 8161 len: 6 data: 0x610x700x700x6c0x6500
page 2: hash: LSN [0][1]: level 0
prev: 0 next: 0 entries: 0 offset: 8192