项目地址 How Does a Database Work? | Let’s Build a Simple Database (cstack.github.io)。
b+树它的效率高,这体现在增删改查各个方面.。以下是作者的对比。
未排序数组 | 排序数组 | 节点树 | |
---|---|---|---|
页面包含 | 仅数据 | 仅数据 | 元数据、主键和数据 |
每页行数 | 多 | 多 | 少 |
插入 | O(1) | O(n) | O(log(n)) |
删除 | O(n) | O(n) | O(log(n)) |
按 ID 查找 | O(n) | O(log(n)) | O(log(n)) |
这里我说一下排序数组的查找为什么是O(log(n)),我们可以二分查找。比如我们可以先找并比较n/2元素的大小,也就是说,我们把数组分两块,小的话找左边块,大的话找右边块。(假设从小到大排)再比较这块的中间,这样一半一半的剔除,最后就是O(log(n)),而插入删除需要修改其他成员的位置,故为O(n)。我们以前的数据库是未排序数组那一档的,效率低下。故我们引入b+树。
这里可能会劝退一些人,我当时也属实不想做了。但仔细一想,连b+树都没有的东西能叫数据库?明白数据结构的重要性,是每个程序员应有的素养!
首先我们对节点分类,分为内部节点和叶节点,之所以这么分是因为叶节点存储数据而内部节点不用。
typedef enum {
NODE_INTERNAL,
NODE_LEAF
} NodeType;
我们先处理叶节点,在我们的设想中,我们一叶放一页,一页最多放13个节点。(理论上哪怕别的什么也不存,也到不了14个节点 4096/(4+33+256)=13.97)0.97个空间没法放数据,那么就得放点有用的。首先得判断是不是是叶节点,是的话才有下面的内容。然后得判断是不是根节点,如果不是根节点接下来就是父节点的指针,不能忘本嘛!接下来记录节点数量。最后就是关键词及数据。还剩下的空间就只能浪费了。
这里我们开始定义大小及偏移量。有人不理解为什么这么做,我的理解是定位功能。我们需要用到某个变量时,通过这些变量定位到指定位置。
const uint32_t NODE_TYPE_SIZE = sizeof(uint8_t);
const uint32_t NODE_TYPE_OFFSET = 0;
const uint32_t IS_ROOT_SIZE = sizeof(uint8_t);
const uint32_t IS_ROOT_OFFSET = NODE_TYPE_SIZE;
const uint32_t PARENT_POINTER_SIZE = sizeof(uint32_t);
const uint32_t PARENT_POINTER_OFFSET = IS_ROOT_OFFSET + IS_ROOT_SIZE;
const uint8_t COMMON_NODE_HEADER_SIZE =NODE_TYPE_SIZE + IS_ROOT_SIZE + PARENT_POINTER_SIZE;
const uint32_t LEAF_NODE_NUM_CELLS_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_NUM_CELLS_OFFSET = COMMON_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_HEADER_SIZE =COMMON_NODE_HEADER_SIZE + LEAF_NODE_NUM_CELLS_SIZE;
const uint32_t LEAF_NODE_KEY_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_KEY_OFFSET = 0;
const uint32_t LEAF_NODE_VALUE_SIZE = ROW_SIZE;
const uint32_t LEAF_NODE_VALUE_OFFSET =LEAF_NODE_KEY_OFFSET + LEAF_NODE_KEY_SIZE;
const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE;
const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_MAX_CELLS =LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE;
uint32_t* leaf_node_num_cells(void* node) {
return node + LEAF_NODE_NUM_CELLS_OFFSET;
}
void* leaf_node_cell(void* node, uint32_t cell_num) {
return node + LEAF_NODE_HEADER_SIZE + cell_num * LEAF_NODE_CELL_SIZE;
}
uint32_t* leaf_node_key(void* node, uint32_t cell_num) {
return leaf_node_cell(node, cell_num);
}
void* leaf_node_value(void* node, uint32_t cell_num) {
return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE;
}
void initialize_leaf_node(void* node) {
*leaf_node_num_cells(node) = 0;
}
接下来我们对page相关函数进行修改,由于页存的和以前不同了(以前就只有数据),我们的pager不能像以前一样读部分页面了。所以这部分删除,这样page_flush函数也没必要有三个参数了。
void db_close(Table* table) {
Pager* pager = table->pager;
for (uint32_t i = 0; i < pager->num_pages; i++) {
if (pager->pages[i] == NULL) {
continue;
}
pager_flush(pager, i);
free(pager->pages[i]);
pager->pages[i] = NULL;
}
int result = close(pager->file_descriptor);
if (result == -1) {
printf("Error closing db file.\n");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
void* page = pager->pages[i];
if (page) {
free(page);
pager->pages[i] = NULL;
}
}
free(pager);
free(table);
}
void pager_flush(Pager* pager, uint32_t page_num) {
if (pager->pages[page_num] == NULL) {
printf("Tried to flush null page\n");
exit(EXIT_FAILURE);
}
off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
if (offset == -1) {
printf("Error seeking: %d\n", errno);
exit(EXIT_FAILURE);
}
ssize_t bytes_written =write(pager->file_descriptor, pager->pages[page_num], PAGE_SIZE);
if (bytes_written == -1) {
printf("Error writing: %d\n", errno);
exit(EXIT_FAILURE);
}
}
我们叶节点与页挂钩,那么就不能还是以前的行存储了。所有与行存储的东西有关得全部改掉。虽然于心不忍,虽然要改很多东西,但是为了效率,我们必须要这么做。
我们删除ROWS_PER_PAGE 和TABLE_MAX_ROWS两个参数,并且修改Pager和Table
typedef struct {
int file_descriptor;
uint32_t file_length;
uint32_t num_pages;
void* pages[TABLE_MAX_PAGES];
} Pager;
typedef struct {
Pager* pager;
uint32_t root_page_num;
} Table;
我们在修改pager_open和get_page确保页的完整性。
Pager* pager_open(const char* filename) {
int fd = open(filename,
O_RDWR | // Read/Write mode
O_CREAT, // Create file if it does not exist
S_IWUSR | // User write permission
S_IRUSR // User read permission
);
if (fd == -1) {
printf("Unable to open file\n");
exit(EXIT_FAILURE);
}
off_t file_length = lseek(fd, 0, SEEK_END);
Pager* pager = malloc(sizeof(Pager));
pager->file_descriptor = fd;
pager->file_length = file_length;
pager->num_pages = (file_length / PAGE_SIZE);
if (file_length % PAGE_SIZE != 0) {
printf("Db file is not a whole number of pages. Corrupt file.\n");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
pager->pages[i] = NULL;
}
return pager;
}
void* get_page(Pager* pager, uint32_t page_num) {
if (page_num > TABLE_MAX_PAGES) {
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num,
TABLE_MAX_PAGES);
exit(EXIT_FAILURE);
}
if (pager->pages[page_num] == NULL) {
// Cache miss. Allocate memory and load from file.
void* page = malloc(PAGE_SIZE);
uint32_t num_pages = pager->file_length / PAGE_SIZE;
// We might save a partial page at the end of the file
if (pager->file_length % PAGE_SIZE) {
num_pages += 1;
}
if (page_num <= num_pages) {
lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
if (bytes_read == -1) {
printf("Error reading file: %d\n", errno);
exit(EXIT_FAILURE);
}
}
pager->pages[page_num] = page;
if (page_num >= pager->num_pages) {
pager->num_pages = page_num + 1;
}
}
return pager->pages[page_num];
}
cursor中还有row,改它!先改定义。
typedef struct {
Table* table;
uint32_t page_num;
uint32_t cell_num;
bool end_of_table;
} Cursor;
再改实现
Cursor* table_start(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
cursor->cell_num = 0;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->end_of_table = (num_cells == 0);
return cursor;
}
Cursor* table_end(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->cell_num = num_cells;
cursor->end_of_table = true;
return cursor;
}
void* cursor_value(Cursor* cursor) {
uint32_t page_num = cursor->page_num;
void* page = get_page(cursor->table->pager, page_num);
return leaf_node_value(page, cursor->cell_num);
}
void cursor_advance(Cursor* cursor) {
uint32_t page_num = cursor->page_num;
void* node = get_page(cursor->table->pager, page_num);
cursor->cell_num += 1;
if (cursor->cell_num >= (*leaf_node_num_cells(node))) {
cursor->end_of_table = true;
}
}
当我们第一次打开数据库时,文件是空的,因此我们将第 0 页初始化为根节点。
Table* db_open(const char* filename) {
Pager* pager = pager_open(filename);
Table* table = malloc(sizeof(Table));
table->pager = pager;
table->root_page_num = 0;
if (pager->num_pages == 0) {
void* root_node = get_page(pager, 0);
initialize_leaf_node(root_node);
}
return table;
}
好了,我们开始修改插入数据的实现。我们用cursor来插入到叶节点,这里如果节点满了就直接fail,之后再来拆分。
void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
void* node = get_page(cursor->table->pager, cursor->page_num);
uint32_t num_cells = *leaf_node_num_cells(node);
if (num_cells >= LEAF_NODE_MAX_CELLS) {
printf("Need to implement splitting a leaf node.\n");
exit(EXIT_FAILURE);
}
if (cursor->cell_num < num_cells) {
for (uint32_t i = num_cells; i > cursor->cell_num; i--) {
memcpy(leaf_node_cell(node, i), leaf_node_cell(node, i - 1),
LEAF_NODE_CELL_SIZE);
}
}
*(leaf_node_num_cells(node)) += 1;
*(leaf_node_key(node, cursor->cell_num)) = key;
serialize_row(value, leaf_node_value(node, cursor->cell_num));
}
然后修改调用。这里有点问题,我们判断主键重复的地方仍然用到了row,我们下章在改。
ExecuteResult execute_insert(Statement* statement, Table* table) {
Row row;
void* node = get_page(table->pager, table->root_page_num);
if ((*leaf_node_num_cells(node) >= LEAF_NODE_MAX_CELLS)) {
return EXECUTE_TABLE_FULL;
}
while (!(cursor->end_of_table)) {
deserialize_row(cursor_value(cursor), &row);
if(row.id==statement->row_to_insert.id){
return EXECUTE_PRIMARY_KEY_DUPLICATION;
}
}
Row* row_to_insert = &(statement->row_to_insert);
Cursor* cursor = table_end(table);
leaf_node_insert(cursor, row_to_insert->id, row_to_insert);
free(cursor);
return EXECUTE_SUCCESS;
}
select,update,delete也在下一章改,所以代码是不能运行的,还请大家体谅。
还记得元命令吗?如果只有一个.exit,那么太简单了,这里我们增加可视化内容。.content查看节点大小及数量。.btree查看节点内容。
void print_constants() {
printf("ROW_SIZE: %d\n", ROW_SIZE);
printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}
void print_leaf_node(void* node) {
uint32_t num_cells = *leaf_node_num_cells(node);
printf("leaf (size %d)\n", num_cells);
for (uint32_t i = 0; i < num_cells; i++) {
uint32_t key = *leaf_node_key(node, i);
printf(" - %d : %d\n", i, key);
}
}
修改实现
MetaCommandResult do_meta_command(InputBuffer* input_buffer,Table* table) {
if (strcmp(input_buffer->buffer, ".exit") == 0) {
close_input_buffer(input_buffer);
db_close(table);
exit(EXIT_SUCCESS);
} else if (strcmp(input_buffer->buffer, ".btree") == 0) {
printf("Tree:\n");
print_leaf_node(get_page(table->pager, 0));
return META_COMMAND_SUCCESS;
} else if (strcmp(input_buffer->buffer, ".constants") == 0) {
printf("Constants:\n");
print_constants();
return META_COMMAND_SUCCESS;
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
目前不能成功运行,这个我们留到下一期。这里给出代码。
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
#define TABLE_MAX_PAGES 100
#include <limits.h>
typedef enum {
META_COMMAND_SUCCESS,
META_COMMAND_UNRECOGNIZED_COMMAND
} MetaCommandResult;
typedef enum {
PREPARE_SUCCESS,
PREPARE_NEGATIVE_ID,
PREPARE_ILLEGAL_ID,
PREPARE_ID_TOO_LONG,
PREPARE_SYNTAX_ERROR,
PREPARE_STRING_TOO_LONG,
PREPARE_UNRECOGNIZED_STATEMENT
} PrepareResult;
typedef enum {
EXECUTE_SUCCESS,
EXECUTE_TABLE_FULL,
EXECUTE_PRIMARY_KEY_DUPLICATION,
EXECUTE_NOT_FOUND
} ExecuteResult;
typedef enum {
STATEMENT_INSERT,
STATEMENT_SELECT,
STATEMENT_DELETE,
STATEMENT_UPDATE
} StatementType;
typedef enum {
NODE_INTERNAL,
NODE_LEAF
} NodeType;
typedef enum {
ID,
USERNAME,
EMAIL,
ALL
} SelectResult;
typedef struct {
int file_descriptor;
uint32_t file_length;
uint32_t num_pages;
void* pages[TABLE_MAX_PAGES];
} Pager;
typedef struct {
Pager* pager;
uint32_t root_page_num;
} Table;
typedef struct {
uint32_t id;
char username[COLUMN_USERNAME_SIZE+1];
char email[COLUMN_EMAIL_SIZE+1];
} Row;
typedef struct{
char* buffer;
size_t buffer_length;
ssize_t input_length;
} InputBuffer;
typedef struct {
StatementType type;
Row row_to_insert;
SelectResult row_to_select;
Row row_to_delete;
Row row_to_update_old;
Row row_to_update_new;
} Statement;
typedef struct {
Table* table;
uint32_t page_num;
uint32_t cell_num;
bool end_of_table;
} Cursor;
const uint32_t ID_OFFSET = 0;
const uint32_t PAGE_SIZE = 4096;
const uint32_t ID_SIZE = size_of_attribute(Row, id);
const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
const uint32_t NODE_TYPE_SIZE = sizeof(uint8_t);
const uint32_t NODE_TYPE_OFFSET = 0;
const uint32_t IS_ROOT_SIZE = sizeof(uint8_t);
const uint32_t IS_ROOT_OFFSET = NODE_TYPE_SIZE;
const uint32_t PARENT_POINTER_SIZE = sizeof(uint32_t);
const uint32_t PARENT_POINTER_OFFSET = IS_ROOT_OFFSET + IS_ROOT_SIZE;
const uint8_t COMMON_NODE_HEADER_SIZE =NODE_TYPE_SIZE + IS_ROOT_SIZE + PARENT_POINTER_SIZE;
const uint32_t LEAF_NODE_NUM_CELLS_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_NUM_CELLS_OFFSET = COMMON_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_HEADER_SIZE =COMMON_NODE_HEADER_SIZE + LEAF_NODE_NUM_CELLS_SIZE;
const uint32_t LEAF_NODE_KEY_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_KEY_OFFSET = 0;
const uint32_t LEAF_NODE_VALUE_SIZE = ROW_SIZE;
const uint32_t LEAF_NODE_VALUE_OFFSET =LEAF_NODE_KEY_OFFSET + LEAF_NODE_KEY_SIZE;
const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE;
const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_MAX_CELLS =LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE;
void free_table(Table* table) ;
void close_input_buffer(InputBuffer* input_buffer);
void pager_flush(Pager* pager, uint32_t page_num) ;
Pager* pager_open(const char* filename);
void* get_page(Pager* pager, uint32_t page_num);
void db_close(Table* table);
Cursor* table_start(Table* table);
Cursor* table_end(Table* table);
void* cursor_value(Cursor* cursor);
void cursor_advance(Cursor* cursor);
uint32_t* leaf_node_num_cells(void* node);
void* leaf_node_cell(void* node, uint32_t cell_num);
uint32_t* leaf_node_key(void* node, uint32_t cell_num);
void* leaf_node_value(void* node, uint32_t cell_num);
void initialize_leaf_node(void* node);
void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value);
void print_constants();
void print_leaf_node(void* node);
MetaCommandResult do_meta_command(InputBuffer* input_buffer,Table* table) {
if (strcmp(input_buffer->buffer, ".exit") == 0) {
close_input_buffer(input_buffer);
db_close(table);
exit(EXIT_SUCCESS);
} else if (strcmp(input_buffer->buffer, ".btree") == 0) {
printf("Tree:\n");
print_leaf_node(get_page(table->pager, 0));
return META_COMMAND_SUCCESS;
} else if (strcmp(input_buffer->buffer, ".constants") == 0) {
printf("Constants:\n");
print_constants();
return META_COMMAND_SUCCESS;
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
InputBuffer* new_input_buffer(){
InputBuffer* input_buffer=(InputBuffer*)malloc(sizeof(InputBuffer));
input_buffer->buffer=NULL;
input_buffer->buffer_length=0;
input_buffer->input_length=0;
return input_buffer;
}
Table* db_open(const char* filename) {
Pager* pager = pager_open(filename);
Table* table = malloc(sizeof(Table));
table->pager = pager;
table->root_page_num = 0;
if (pager->num_pages == 0) {
void* root_node = get_page(pager, 0);
initialize_leaf_node(root_node);
}
return table;
}
Pager* pager_open(const char* filename) {
int fd = open(filename,
O_RDWR | // Read/Write mode
O_CREAT, // Create file if it does not exist
S_IWUSR | // User write permission
S_IRUSR // User read permission
);
if (fd == -1) {
printf("Unable to open file\n");
exit(EXIT_FAILURE);
}
off_t file_length = lseek(fd, 0, SEEK_END);
Pager* pager = malloc(sizeof(Pager));
pager->file_descriptor = fd;
pager->file_length = file_length;
pager->num_pages = (file_length / PAGE_SIZE);
if (file_length % PAGE_SIZE != 0) {
printf("Db file is not a whole number of pages. Corrupt file.\n");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
pager->pages[i] = NULL;
}
return pager;
}
void* get_page(Pager* pager, uint32_t page_num) {
if (page_num > TABLE_MAX_PAGES) {
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num,
TABLE_MAX_PAGES);
exit(EXIT_FAILURE);
}
if (pager->pages[page_num] == NULL) {
// Cache miss. Allocate memory and load from file.
void* page = malloc(PAGE_SIZE);
uint32_t num_pages = pager->file_length / PAGE_SIZE;
// We might save a partial page at the end of the file
if (pager->file_length % PAGE_SIZE) {
num_pages += 1;
}
if (page_num <= num_pages) {
lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
if (bytes_read == -1) {
printf("Error reading file: %d\n", errno);
exit(EXIT_FAILURE);
}
}
pager->pages[page_num] = page;
if (page_num >= pager->num_pages) {
pager->num_pages = page_num + 1;
}
}
return pager->pages[page_num];
}
void db_close(Table* table) {
Pager* pager = table->pager;
for (uint32_t i = 0; i < pager->num_pages; i++) {
if (pager->pages[i] == NULL) {
continue;
}
pager_flush(pager, i);
free(pager->pages[i]);
pager->pages[i] = NULL;
}
int result = close(pager->file_descriptor);
if (result == -1) {
printf("Error closing db file.\n");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
void* page = pager->pages[i];
if (page) {
free(page);
pager->pages[i] = NULL;
}
}
free(pager);
free(table);
}
void pager_flush(Pager* pager, uint32_t page_num) {
if (pager->pages[page_num] == NULL) {
printf("Tried to flush null page\n");
exit(EXIT_FAILURE);
}
off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
if (offset == -1) {
printf("Error seeking: %d\n", errno);
exit(EXIT_FAILURE);
}
ssize_t bytes_written =write(pager->file_descriptor, pager->pages[page_num], PAGE_SIZE);
if (bytes_written == -1) {
printf("Error writing: %d\n", errno);
exit(EXIT_FAILURE);
}
}
void serialize_row(Row* source, void* destination) {
memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}
void deserialize_row(void* source, Row* destination) {
memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}
PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) {
statement->type = STATEMENT_INSERT;
char* keyword = strtok(input_buffer->buffer, " ");
char* id_string = strtok(NULL, " ");
char* username = strtok(NULL, " ");
char* email = strtok(NULL, " ");
char* check=strtok(NULL," ");
if (id_string == NULL || username == NULL || email == NULL||check!=NULL) {
return PREPARE_SYNTAX_ERROR;
}
for(uint32_t i=0;i<strlen(id_string);i++){
if(isdigit(id_string[i])==0){
return PREPARE_ILLEGAL_ID;
}
}
int id = atoi(id_string);
if(strlen(id_string)>=10){
return PREPARE_ID_TOO_LONG;
}
if (id < 0) {
return PREPARE_NEGATIVE_ID;
}
if (strlen(username) > COLUMN_USERNAME_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
if (strlen(email) > COLUMN_EMAIL_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
statement->row_to_insert.id = id;
strcpy(statement->row_to_insert.username, username);
strcpy(statement->row_to_insert.email, email);
return PREPARE_SUCCESS;
}
PrepareResult prepare_select(InputBuffer* input_buffer,Statement* statement){
statement->type=STATEMENT_SELECT;
char *keyword =strtok(input_buffer->buffer," ");
char * args=strtok(NULL," ");
char* check=strtok(NULL," ");
if(check!=NULL){
return PREPARE_SYNTAX_ERROR;
}
if(args==NULL){
statement->row_to_select=ALL;
return PREPARE_SUCCESS;
}
if(strcmp(args,"id")==0){
statement->row_to_select=ID;
}
else if(strcmp(args,"username")==0){
statement->row_to_select=USERNAME;
}
else if(strcmp(args,"email")==0){
statement->row_to_select=EMAIL;
}
else return PREPARE_SYNTAX_ERROR;
return PREPARE_SUCCESS;
}
PrepareResult prepare_delete(InputBuffer* input_buffer,Statement* statement){
statement->type = STATEMENT_DELETE;
char* keyword = strtok(input_buffer->buffer, " ");
char* id_string = strtok(NULL, " ");
char* username = strtok(NULL, " ");
char* email = strtok(NULL, " ");
char* check=strtok(NULL," ");
if(check!=NULL){
return PREPARE_SYNTAX_ERROR;
}
if (id_string == NULL || username == NULL || email == NULL) {
return PREPARE_SYNTAX_ERROR;
}
int id = atoi(id_string);
for(uint32_t i=0;i<strlen(id_string);i++){
if(isdigit(id_string[i])==0){
return PREPARE_ILLEGAL_ID;
}
}
if(strlen(id_string)>=10)
return PREPARE_ID_TOO_LONG;
if (strlen(username) > COLUMN_USERNAME_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
if (strlen(email) > COLUMN_EMAIL_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
statement->row_to_delete.id = id;
strcpy(statement->row_to_delete.username, username);
strcpy(statement->row_to_delete.email, email);
return PREPARE_SUCCESS;
}
PrepareResult prepare_update(InputBuffer* input_buffer,Statement* statement){
statement->type = STATEMENT_UPDATE;
char* keyword = strtok(input_buffer->buffer, " ");
char* id_string_old = strtok(NULL, " ");
char* username_old = strtok(NULL, " ");
char* email_old = strtok(NULL, " ");
char* id_string_new = strtok(NULL, " ");
char* username_new = strtok(NULL, " ");
char* email_new = strtok(NULL, " ");
char* check=strtok(NULL," ");
if(check!=NULL){
return PREPARE_SYNTAX_ERROR;
}
if (id_string_old == NULL || username_old == NULL || email_old == NULL) {
return PREPARE_SYNTAX_ERROR;
}
if (id_string_new == NULL || username_new == NULL || email_new == NULL) {
return PREPARE_SYNTAX_ERROR;
}
int id_old = atoi(id_string_old);
int id_new = atoi(id_string_new);
for(uint32_t i=0;i<strlen(id_string_old);i++){
if(isdigit(id_string_old[i])==0){
return PREPARE_ILLEGAL_ID;
}
}
for(uint32_t i=0;i<strlen(id_string_new);i++){
if(isdigit(id_string_new[i])==0){
return PREPARE_ILLEGAL_ID;
}
}
if(strlen(id_string_old)>=10||strlen(id_string_new)>=10)
return PREPARE_ID_TOO_LONG;
if (id_new < 0) {
return PREPARE_NEGATIVE_ID;
}
if (strlen(username_old) > COLUMN_USERNAME_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
if (strlen(email_old) > COLUMN_EMAIL_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
if (strlen(username_new) > COLUMN_USERNAME_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
if (strlen(email_new) > COLUMN_EMAIL_SIZE) {
return PREPARE_STRING_TOO_LONG;
}
statement->row_to_update_old.id = id_old;
strcpy(statement->row_to_update_old.username, username_old);
strcpy(statement->row_to_update_old.email, email_old);
statement->row_to_update_new.id = id_new;
strcpy(statement->row_to_update_new.username, username_new);
strcpy(statement->row_to_update_new.email, email_new);
return PREPARE_SUCCESS;
}
PrepareResult prepare_statement(InputBuffer* input_buffer,Statement* statement) {
if (strncmp(input_buffer->buffer, "insert",6) == 0) {
return prepare_insert(input_buffer, statement);
}
else if(strncmp(input_buffer->buffer,"select",6)==0){
return prepare_select(input_buffer, statement);
}
else if(strncmp(input_buffer->buffer,"delete",6)==0){
return prepare_delete(input_buffer, statement);
}
else if(strncmp(input_buffer->buffer,"update",6)==0){
return prepare_update(input_buffer, statement);
}
else return PREPARE_UNRECOGNIZED_STATEMENT;
}
ExecuteResult execute_insert(Statement* statement, Table* table) {
Row row;
void* node = get_page(table->pager, table->root_page_num);
if ((*leaf_node_num_cells(node) >= LEAF_NODE_MAX_CELLS)) {
return EXECUTE_TABLE_FULL;
}
Cursor* cursor = table_start(table);
while (!(cursor->end_of_table)) {
deserialize_row(cursor_value(cursor), &row);
if(row.id==statement->row_to_insert.id){
return EXECUTE_PRIMARY_KEY_DUPLICATION;
}
}
Row* row_to_insert = &(statement->row_to_insert);
cursor = table_end(table);
leaf_node_insert(cursor, row_to_insert->id, row_to_insert);
free(cursor);
return EXECUTE_SUCCESS;
}
ExecuteResult execute_select(Statement* statement, Table* table) {
Cursor* cursor = table_start(table);
Row row;
while (!(cursor->end_of_table)) {
deserialize_row(cursor_value(cursor), &row);
if(statement->row_to_select==ID){
printf("(%d)\n", row.id);
}
else if(statement->row_to_select==USERNAME){
printf("(%s)\n", row.username);
}
else if(statement->row_to_select==EMAIL){
printf("(%s)\n", row.email);
}
else if(statement->row_to_select==ALL){
printf("(%d, %s, %s)\n", row.id, row.username, row.email);
}
cursor_advance(cursor);
}
free(cursor);
return EXECUTE_SUCCESS;
}
ExecuteResult execute_delete(Statement* statement, Table* table) {
Cursor* cursor_one = table_start(table);
Cursor* cursor_two = table_start(table);
Row row;
bool flag=false;
Row* row_to_delete = &(statement->row_to_delete);
while (!(cursor_one->end_of_table)) {
deserialize_row(cursor_value(cursor_two), &row);
deserialize_row(cursor_value(cursor_one), &row);
if(statement->row_to_delete.id==row.id && strcmp(statement->row_to_delete.username,row.username )==0&&strcmp(statement->row_to_delete.email,row.email)==0){
flag=true;
}
else {
serialize_row(&row,cursor_value(cursor_two));
cursor_advance(cursor_two);
}
cursor_advance(cursor_one);
}
free(cursor_one);
free(cursor_two);
if(flag==true){
return EXECUTE_SUCCESS;
}
else return EXECUTE_NOT_FOUND;
}
ExecuteResult execute_update(Statement* statement, Table* table) {
Cursor* cursor = table_start(table);
Row row;
bool flag=false;
while (!(cursor->end_of_table)) {
deserialize_row(cursor_value(cursor), &row);
if(statement->row_to_update_new.id==row.id&&statement->row_to_update_old.id!=row.id){
return EXECUTE_PRIMARY_KEY_DUPLICATION;
}
cursor_advance(cursor);
}
cursor = table_start(table);
while (!(cursor->end_of_table)) {
deserialize_row(cursor_value(cursor), &row);
if(statement->row_to_update_old.id==row.id && strcmp(statement->row_to_update_old.username,row.username )==0&&strcmp(statement->row_to_update_old.email,row.email)==0)
{
flag=true;
row.id=statement->row_to_update_new.id;
strcpy(row.username,statement->row_to_update_new.username);
strcpy(row.email,statement->row_to_update_new.email);
serialize_row(&row,cursor_value(cursor));
free(cursor);
return EXECUTE_SUCCESS;
}
serialize_row(&row,cursor_value(cursor));
cursor_advance(cursor);
}
free(cursor);
return EXECUTE_NOT_FOUND;
}
ExecuteResult execute_statement(Statement* statement, Table* table) {
switch (statement->type) {
case (STATEMENT_INSERT):
return execute_insert(statement, table);
case (STATEMENT_SELECT):
return execute_select(statement, table);
case (STATEMENT_DELETE):
return execute_delete(statement, table);
case (STATEMENT_UPDATE):
return execute_update(statement, table);
}
}
void print_prompt() {
printf("sqlite> ");
}
void read_input(InputBuffer* input_buffer){
ssize_t bytes_read=getline(&(input_buffer->buffer), &(input_buffer->buffer_length), stdin);
if(bytes_read<=0){
printf("Error: Wrong reading input.\n");
exit(EXIT_FAILURE);
}
input_buffer->input_length=bytes_read-1;
input_buffer->buffer[bytes_read -1 ]=0;
}
void close_input_buffer(InputBuffer* input_buffer){
free(input_buffer->buffer);
free(input_buffer);
}
Cursor* table_start(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
cursor->cell_num = 0;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->end_of_table = (num_cells == 0);
return cursor;
}
Cursor* table_end(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->cell_num = num_cells;
cursor->end_of_table = true;
return cursor;
}
void* cursor_value(Cursor* cursor) {
uint32_t page_num = cursor->page_num;
void* page = get_page(cursor->table->pager, page_num);
return leaf_node_value(page, cursor->cell_num);
}
void cursor_advance(Cursor* cursor) {
uint32_t page_num = cursor->page_num;
void* node = get_page(cursor->table->pager, page_num);
cursor->cell_num += 1;
if (cursor->cell_num >= (*leaf_node_num_cells(node))) {
cursor->end_of_table = true;
}
}
uint32_t* leaf_node_num_cells(void* node) {
return node + LEAF_NODE_NUM_CELLS_OFFSET;
}
void* leaf_node_cell(void* node, uint32_t cell_num) {
return node + LEAF_NODE_HEADER_SIZE + cell_num * LEAF_NODE_CELL_SIZE;
}
uint32_t* leaf_node_key(void* node, uint32_t cell_num) {
return leaf_node_cell(node, cell_num);
}
void* leaf_node_value(void* node, uint32_t cell_num) {
return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE;
}
void initialize_leaf_node(void* node) {
*leaf_node_num_cells(node) = 0;
}
void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
void* node = get_page(cursor->table->pager, cursor->page_num);
uint32_t num_cells = *leaf_node_num_cells(node);
if (num_cells >= LEAF_NODE_MAX_CELLS) {
printf("Need to implement splitting a leaf node.\n");
exit(EXIT_FAILURE);
}
if (cursor->cell_num < num_cells) {
for (uint32_t i = num_cells; i > cursor->cell_num; i--) {
memcpy(leaf_node_cell(node, i), leaf_node_cell(node, i - 1),
LEAF_NODE_CELL_SIZE);
}
}
*(leaf_node_num_cells(node)) += 1;
*(leaf_node_key(node, cursor->cell_num)) = key;
serialize_row(value, leaf_node_value(node, cursor->cell_num));
}
void print_constants() {
printf("ROW_SIZE: %d\n", ROW_SIZE);
printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}
void print_leaf_node(void* node) {
uint32_t num_cells = *leaf_node_num_cells(node);
printf("leaf (size %d)\n", num_cells);
for (uint32_t i = 0; i < num_cells; i++) {
uint32_t key = *leaf_node_key(node, i);
printf(" - %d : %d\n", i, key);
}
}
int main(int argc, char* argv[]) {
if (argc != 2) {
printf("Must supply a database filename.\n");
exit(EXIT_FAILURE);
}
char* filename = argv[1];
Table* table = db_open(filename);
InputBuffer* input_buffer = new_input_buffer();
while (true) {
print_prompt();
read_input(input_buffer);
if (input_buffer->buffer[0] == '.') {
switch (do_meta_command(input_buffer,table)) {
case (META_COMMAND_SUCCESS):
continue;
case (META_COMMAND_UNRECOGNIZED_COMMAND):
printf("Error: Unrecognized command '%s'.\n", input_buffer->buffer);
continue;
}
}
Statement statement;
switch (prepare_statement(input_buffer, &statement)) {
case (PREPARE_SUCCESS):
break;
case (PREPARE_STRING_TOO_LONG):
printf("Error: String is too long.\n");
continue;
case (PREPARE_NEGATIVE_ID):
printf("Error: ID must be positive.\n");
continue;
case (PREPARE_ID_TOO_LONG):
printf("Error: Id is too long.\n");
continue;
case (PREPARE_SYNTAX_ERROR):
printf("Error: Syntax error.\n");
continue;
case (PREPARE_UNRECOGNIZED_STATEMENT):
printf("Error: Unrecognized keyword at start of '%s'.\n",input_buffer->buffer);
continue;
case (PREPARE_ILLEGAL_ID):
printf("Error: ILLEGAL ID.\n");
continue;
}
switch (execute_statement(&statement, table)) {
case (EXECUTE_SUCCESS):
printf("Executed.\n");
break;
case (EXECUTE_PRIMARY_KEY_DUPLICATION):
printf("Error: Primary key duplication.\n");
break;
case (EXECUTE_NOT_FOUND):
printf("Error: Not found.\n");
break;
case (EXECUTE_TABLE_FULL):
printf("Error: Table full.\n");
break;
}
}
}
以上就是所有内容,喜欢的点个赞吧!