这里先对一个具体的表做实现,然后在想办法抽象成一般的表。
解析以及格式化提取内容部分
if (strncmp(input_buffer->buffer, "insert", 6) == 0) {
statement->type = STATEMENT_INSERT;
int args_assigned = sscanf(
input_buffer->buffer, "insert %d %s %s", &(statement->row_to_insert.id),
statement->row_to_insert.username, statement->row_to_insert.email);
if (args_assigned < 3) {
return PREPARE_SYNTAX_ERROR;
}
return PREPARE_SUCCESS;
}
为此,我们实现一个表对应的结构体
#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
typedef struct {
uint32_t id;
char username[COLUMN_USERNAME_SIZE];
char email[COLUMN_EMAIL_SIZE];
} Row;
我们希望做以下处理
Here’s my plan:
- Store rows in blocks of memory called pages
- Each page stores as many rows as it can fit
- Rows are serialized into a compact representation with each page
- Pages are only allocated as needed
- Keep a fixed-size array of pointers to pages
First we’ll define the compact representation of a row:
下面我们定义一些列的变量来快速获取结构体大小以及偏移量
因此我们下一步就能用C实现序列化和反序列化
void serialize_row(Row* source, void* destination) {
memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}
void deserialize_row(void* source, Row* destination) {
memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}
Next, a Table
structure that points to pages of rows and keeps track of how many rows there are:
const uint32_t PAGE_SIZE = 4096;
#define TABLE_MAX_PAGES 100
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
typedef struct {
uint32_t num_rows;
void* pages[TABLE_MAX_PAGES];
} Table;
我将页面大小设为4 KB,因为它与大多数计算机体系结构的虚拟内存系统中使用的页面大小相同。这意味着数据库中的一页对应于操作系统使用的一页。操作系统会将页面作为整体单元移入和移出内存,而不是将其拆分。
我设定了100个页面的任意限制,我们将分配这些页面。当我们切换到树形结构时,数据库的最大大小将仅受文件最大大小的限制。(尽管我们仍然会限制一次存储在内存中的页面数)
行不应跨越页面边界。由于页面可能不会在内存中彼此相邻存在,因此这种假设使读取/写入行变得更加容易。
说到这,这就是我们如何找出特定行在内存中的读取/写入位置:
void* row_slot(Table* table, uint32_t row_num) {
// 先找到对应页的编号
uint32_t page_num = row_num / ROWS_PER_PAGE;
void* page = table->pages[page_num];
if (page == NULL) {
// Allocate memory only when we try to access page
page = table->pages[page_num] = malloc(PAGE_SIZE);
}
uint32_t row_offset = row_num % ROWS_PER_PAGE;
uint32_t byte_offset = row_offset * ROW_SIZE;
return page + byte_offset;
}
执行select和insert的函数
ExecuteResult execute_insert(Statement* statement, Table* table) {
if (table->num_rows >= TABLE_MAX_ROWS) {
return EXECUTE_TABLE_FULL;
}
Row* row_to_insert = &(statement->row_to_insert);
serialize_row(row_to_insert, row_slot(table, table->num_rows));
table->num_rows += 1;
return EXECUTE_SUCCESS;
}
ExecuteResult execute_select(Statement* statement, Table* table) {
Row row;
for (uint32_t i = 0; i < table->num_rows; i++) {
deserialize_row(row_slot(table, i), &row);
print_row(&row);
}
return EXECUTE_SUCCESS;
}
ExecuteResult execute_statement(Statement* statement, Table* table) {
switch (statement->type) {
case (STATEMENT_INSERT):
return execute_insert(statement, table);
break;
case (STATEMENT_SELECT):
return execute_select(statement, table);
break;
}
}
表的创建和删除
Table* new_table() {
Table* table = malloc(sizeof(Table));
table->num_rows = 0;
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
table->pages[i] = NULL;
}
return table;
}
void free_table(Table* table) {
for (int i = 0; table->pages[i]; i++) {
free(table->pages[i]);
}
free(table);
}