闲来无事,写个数据库吧(8)

项目地址 ​​​​​​How Does a Database Work? | Let’s Build a Simple Database (cstack.github.io)

b+树它的效率高,这体现在增删改查各个方面.。以下是作者的对比。

未排序数组排序数组节点树
页面包含仅数据仅数据元数据、主键和数据
每页行数
插入O(1)O(n)O(log(n))
删除O(n)O(n)O(log(n))
按 ID 查找O(n)O(log(n))O(log(n))

这里我说一下排序数组的查找为什么是O(log(n)),我们可以二分查找。比如我们可以先找并比较n/2元素的大小,也就是说,我们把数组分两块,小的话找左边块,大的话找右边块。(假设从小到大排)再比较这块的中间,这样一半一半的剔除,最后就是O(log(n)),而插入删除需要修改其他成员的位置,故为O(n)。我们以前的数据库是未排序数组那一档的,效率低下。故我们引入b+树。

这里可能会劝退一些人,我当时也属实不想做了。但仔细一想,连b+树都没有的东西能叫数据库?明白数据结构的重要性,是每个程序员应有的素养!

首先我们对节点分类,分为内部节点和叶节点,之所以这么分是因为叶节点存储数据而内部节点不用。

typedef enum {
  NODE_INTERNAL,
  NODE_LEAF 
} NodeType;

我们先处理叶节点,在我们的设想中,我们一叶放一页,一页最多放13个节点。(理论上哪怕别的什么也不存,也到不了14个节点 4096/(4+33+256)=13.97)0.97个空间没法放数据,那么就得放点有用的。首先得判断是不是是叶节点,是的话才有下面的内容。然后得判断是不是根节点,如果不是根节点接下来就是父节点的指针,不能忘本嘛!接下来记录节点数量。最后就是关键词及数据。还剩下的空间就只能浪费了。

这里我们开始定义大小及偏移量。有人不理解为什么这么做,我的理解是定位功能。我们需要用到某个变量时,通过这些变量定位到指定位置。

const uint32_t NODE_TYPE_SIZE = sizeof(uint8_t);
const uint32_t NODE_TYPE_OFFSET = 0;
const uint32_t IS_ROOT_SIZE = sizeof(uint8_t);
const uint32_t IS_ROOT_OFFSET = NODE_TYPE_SIZE;
const uint32_t PARENT_POINTER_SIZE = sizeof(uint32_t);
const uint32_t PARENT_POINTER_OFFSET = IS_ROOT_OFFSET + IS_ROOT_SIZE;
const uint8_t COMMON_NODE_HEADER_SIZE =NODE_TYPE_SIZE + IS_ROOT_SIZE + PARENT_POINTER_SIZE;

const uint32_t LEAF_NODE_NUM_CELLS_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_NUM_CELLS_OFFSET = COMMON_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_HEADER_SIZE =COMMON_NODE_HEADER_SIZE + LEAF_NODE_NUM_CELLS_SIZE;

const uint32_t LEAF_NODE_KEY_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_KEY_OFFSET = 0;
const uint32_t LEAF_NODE_VALUE_SIZE = ROW_SIZE;
const uint32_t LEAF_NODE_VALUE_OFFSET =LEAF_NODE_KEY_OFFSET + LEAF_NODE_KEY_SIZE;
const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE;
const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_MAX_CELLS =LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE;

uint32_t* leaf_node_num_cells(void* node) {
  return node + LEAF_NODE_NUM_CELLS_OFFSET;
}

void* leaf_node_cell(void* node, uint32_t cell_num) {
  return node + LEAF_NODE_HEADER_SIZE + cell_num * LEAF_NODE_CELL_SIZE;
}

uint32_t* leaf_node_key(void* node, uint32_t cell_num) {
  return leaf_node_cell(node, cell_num);
}

void* leaf_node_value(void* node, uint32_t cell_num) {
  return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE;
}
void initialize_leaf_node(void* node) {
  *leaf_node_num_cells(node) = 0; 
}

接下来我们对page相关函数进行修改,由于页存的和以前不同了(以前就只有数据),我们的pager不能像以前一样读部分页面了。所以这部分删除,这样page_flush函数也没必要有三个参数了。

void db_close(Table* table) {
  Pager* pager = table->pager;

  for (uint32_t i = 0; i < pager->num_pages; i++) {
    if (pager->pages[i] == NULL) {
      continue;
    }
    pager_flush(pager, i);
    free(pager->pages[i]);
    pager->pages[i] = NULL;
  }

  int result = close(pager->file_descriptor);
  if (result == -1) {
    printf("Error closing db file.\n");
    exit(EXIT_FAILURE);
  }
  for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
    void* page = pager->pages[i];
    if (page) {
      free(page);
      pager->pages[i] = NULL;
    }
  }
  free(pager);
  free(table);
}


void pager_flush(Pager* pager, uint32_t page_num) {
  if (pager->pages[page_num] == NULL) {
    printf("Tried to flush null page\n");
    exit(EXIT_FAILURE);
  }
  off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
  if (offset == -1) {
    printf("Error seeking: %d\n", errno);
    exit(EXIT_FAILURE);
  }
  ssize_t bytes_written =write(pager->file_descriptor, pager->pages[page_num], PAGE_SIZE);
  if (bytes_written == -1) {
    printf("Error writing: %d\n", errno);
    exit(EXIT_FAILURE);
  }
}

我们叶节点与页挂钩,那么就不能还是以前的行存储了。所有与行存储的东西有关得全部改掉。虽然于心不忍,虽然要改很多东西,但是为了效率,我们必须要这么做。

我们删除ROWS_PER_PAGE 和TABLE_MAX_ROWS两个参数,并且修改Pager和Table

typedef struct {
  int file_descriptor;
  uint32_t file_length;
  uint32_t num_pages;
  void* pages[TABLE_MAX_PAGES];
} Pager;
 
typedef struct {
  Pager* pager;
  uint32_t root_page_num;
} Table;

我们在修改pager_open和get_page确保页的完整性。

Pager* pager_open(const char* filename) {
  int fd = open(filename,
    O_RDWR |      // Read/Write mode
    O_CREAT,      // Create file if it does not exist
    S_IWUSR |     // User write permission
    S_IRUSR       // User read permission
  );
  if (fd == -1) {
    printf("Unable to open file\n");
    exit(EXIT_FAILURE);
  }

  off_t file_length = lseek(fd, 0, SEEK_END);
  Pager* pager = malloc(sizeof(Pager));
  pager->file_descriptor = fd;
  pager->file_length = file_length;
  pager->num_pages = (file_length / PAGE_SIZE);
  if (file_length % PAGE_SIZE != 0) {
    printf("Db file is not a whole number of pages. Corrupt file.\n");
    exit(EXIT_FAILURE);
  }
  for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
    pager->pages[i] = NULL;
  }
  return pager;
}

void* get_page(Pager* pager, uint32_t page_num) {
  if (page_num > TABLE_MAX_PAGES) {
    printf("Tried to fetch page number out of bounds. %d > %d\n", page_num,
           TABLE_MAX_PAGES);
    exit(EXIT_FAILURE);
  }

  if (pager->pages[page_num] == NULL) {
    // Cache miss. Allocate memory and load from file.
    void* page = malloc(PAGE_SIZE);
    uint32_t num_pages = pager->file_length / PAGE_SIZE;

    // We might save a partial page at the end of the file
    if (pager->file_length % PAGE_SIZE) {
      num_pages += 1;
    }

    if (page_num <= num_pages) {
      lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
      ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
      if (bytes_read == -1) {
        printf("Error reading file: %d\n", errno);
        exit(EXIT_FAILURE);
      }
    }

    pager->pages[page_num] = page;
    if (page_num >= pager->num_pages) {
      pager->num_pages = page_num + 1;
    }
  }

  return pager->pages[page_num];
}

cursor中还有row,改它!先改定义。 

typedef struct {
  Table* table;
  uint32_t page_num;
  uint32_t cell_num;
  bool end_of_table;  
} Cursor;

再改实现

Cursor* table_start(Table* table) {
  Cursor* cursor = malloc(sizeof(Cursor));
  cursor->table = table;
  cursor->page_num = table->root_page_num;
  cursor->cell_num = 0;
  void* root_node = get_page(table->pager, table->root_page_num);
  uint32_t num_cells = *leaf_node_num_cells(root_node);
  cursor->end_of_table = (num_cells == 0);
 
  return cursor;
}

Cursor* table_end(Table* table) {
  Cursor* cursor = malloc(sizeof(Cursor));
  cursor->table = table;
  cursor->page_num = table->root_page_num;
  void* root_node = get_page(table->pager, table->root_page_num);
  uint32_t num_cells = *leaf_node_num_cells(root_node);
  cursor->cell_num = num_cells;
  cursor->end_of_table = true;
 
  return cursor;
}

void* cursor_value(Cursor* cursor) {

  uint32_t page_num = cursor->page_num;
  void* page = get_page(cursor->table->pager, page_num);

  return leaf_node_value(page, cursor->cell_num);
}

void cursor_advance(Cursor* cursor) {
  uint32_t page_num = cursor->page_num;
  void* node = get_page(cursor->table->pager, page_num);
  cursor->cell_num += 1;
  if (cursor->cell_num >= (*leaf_node_num_cells(node))) {
    cursor->end_of_table = true;
  }
}

当我们第一次打开数据库时,文件是空的,因此我们将第 0 页初始化为根节点。

Table* db_open(const char* filename) {
  Pager* pager = pager_open(filename);
  Table* table = malloc(sizeof(Table));
  table->pager = pager;
  table->root_page_num = 0;
  if (pager->num_pages == 0) {
    void* root_node = get_page(pager, 0);
    initialize_leaf_node(root_node);
  }
  return table;
}

好了,我们开始修改插入数据的实现。我们用cursor来插入到叶节点,这里如果节点满了就直接fail,之后再来拆分。

void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
  void* node = get_page(cursor->table->pager, cursor->page_num);

  uint32_t num_cells = *leaf_node_num_cells(node);
  if (num_cells >= LEAF_NODE_MAX_CELLS) {
    printf("Need to implement splitting a leaf node.\n");
    exit(EXIT_FAILURE);
  }

  if (cursor->cell_num < num_cells) {
    for (uint32_t i = num_cells; i > cursor->cell_num; i--) {
      memcpy(leaf_node_cell(node, i), leaf_node_cell(node, i - 1),
             LEAF_NODE_CELL_SIZE);
    }
  }

  *(leaf_node_num_cells(node)) += 1;
  *(leaf_node_key(node, cursor->cell_num)) = key;
  serialize_row(value, leaf_node_value(node, cursor->cell_num));
}

然后修改调用。这里有点问题,我们判断主键重复的地方仍然用到了row,我们下章在改。

ExecuteResult execute_insert(Statement* statement, Table* table) {
  Row row;
  void* node = get_page(table->pager, table->root_page_num);
  if ((*leaf_node_num_cells(node) >= LEAF_NODE_MAX_CELLS)) {
    return EXECUTE_TABLE_FULL;
  }
  while (!(cursor->end_of_table)) {
    deserialize_row(cursor_value(cursor), &row);
    if(row.id==statement->row_to_insert.id){
      return EXECUTE_PRIMARY_KEY_DUPLICATION;	
	}
  }
 
  Row* row_to_insert = &(statement->row_to_insert);
  Cursor* cursor = table_end(table);
  leaf_node_insert(cursor, row_to_insert->id, row_to_insert);
  free(cursor);
  return EXECUTE_SUCCESS;
}

select,update,delete也在下一章改,所以代码是不能运行的,还请大家体谅。

还记得元命令吗?如果只有一个.exit,那么太简单了,这里我们增加可视化内容。.content查看节点大小及数量。.btree查看节点内容。

void print_constants() {
  printf("ROW_SIZE: %d\n", ROW_SIZE);
  printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
  printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
  printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
  printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
  printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}
void print_leaf_node(void* node) {
  uint32_t num_cells = *leaf_node_num_cells(node);
  printf("leaf (size %d)\n", num_cells);
  for (uint32_t i = 0; i < num_cells; i++) {
    uint32_t key = *leaf_node_key(node, i);
    printf("  - %d : %d\n", i, key);
  }
}

修改实现

MetaCommandResult do_meta_command(InputBuffer* input_buffer,Table* table) {
  if (strcmp(input_buffer->buffer, ".exit") == 0) {
  	close_input_buffer(input_buffer);
	db_close(table);
    exit(EXIT_SUCCESS);
  } else if (strcmp(input_buffer->buffer, ".btree") == 0) {
    printf("Tree:\n");
    print_leaf_node(get_page(table->pager, 0));
    return META_COMMAND_SUCCESS;
  } else if (strcmp(input_buffer->buffer, ".constants") == 0) {
    printf("Constants:\n");
    print_constants();
    return META_COMMAND_SUCCESS;
  } else {
    return META_COMMAND_UNRECOGNIZED_COMMAND;
  }
}

目前不能成功运行,这个我们留到下一期。这里给出代码。

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <ctype.h>
#define COLUMN_USERNAME_SIZE 32
#define COLUMN_EMAIL_SIZE 255
#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute)
#define TABLE_MAX_PAGES 100
#include <limits.h>

typedef enum {
  META_COMMAND_SUCCESS,
  META_COMMAND_UNRECOGNIZED_COMMAND
} MetaCommandResult;

typedef enum { 
  PREPARE_SUCCESS, 
  PREPARE_NEGATIVE_ID,
  PREPARE_ILLEGAL_ID,
  PREPARE_ID_TOO_LONG,
  PREPARE_SYNTAX_ERROR,
  PREPARE_STRING_TOO_LONG,
  PREPARE_UNRECOGNIZED_STATEMENT
} PrepareResult;

typedef enum { 
  EXECUTE_SUCCESS,
  EXECUTE_TABLE_FULL,
  EXECUTE_PRIMARY_KEY_DUPLICATION,
  EXECUTE_NOT_FOUND
} ExecuteResult;


typedef enum { 
  STATEMENT_INSERT, 
  STATEMENT_SELECT,
  STATEMENT_DELETE,
  STATEMENT_UPDATE
} StatementType;

typedef enum {
  NODE_INTERNAL,
  NODE_LEAF 
} NodeType;

typedef enum {
  ID,	
  USERNAME,
  EMAIL,
  ALL	
} SelectResult;

typedef struct {
  int file_descriptor;
  uint32_t file_length;
  uint32_t num_pages;
  void* pages[TABLE_MAX_PAGES];
} Pager;
 
typedef struct {
  Pager* pager;
  uint32_t root_page_num;
} Table;

typedef struct {
  uint32_t id;
  char username[COLUMN_USERNAME_SIZE+1];
  char email[COLUMN_EMAIL_SIZE+1];
} Row;

typedef struct{
  char* buffer;
  size_t buffer_length;
  ssize_t input_length;
} InputBuffer;

typedef struct {
  StatementType type;
  Row row_to_insert;
  SelectResult row_to_select; 
  Row row_to_delete;
  Row row_to_update_old;
  Row row_to_update_new;
} Statement;

 typedef struct {
   Table* table;
   uint32_t page_num;
   uint32_t cell_num;
   bool end_of_table;  
 } Cursor;

const uint32_t ID_OFFSET = 0;
const uint32_t PAGE_SIZE = 4096;
const uint32_t ID_SIZE = size_of_attribute(Row, id);
const uint32_t USERNAME_SIZE = size_of_attribute(Row, username);
const uint32_t EMAIL_SIZE = size_of_attribute(Row, email);
const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE;
const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE;
const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE;
const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE;
const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES;
const uint32_t NODE_TYPE_SIZE = sizeof(uint8_t);
const uint32_t NODE_TYPE_OFFSET = 0;
const uint32_t IS_ROOT_SIZE = sizeof(uint8_t);
const uint32_t IS_ROOT_OFFSET = NODE_TYPE_SIZE;
const uint32_t PARENT_POINTER_SIZE = sizeof(uint32_t);
const uint32_t PARENT_POINTER_OFFSET = IS_ROOT_OFFSET + IS_ROOT_SIZE;
const uint8_t COMMON_NODE_HEADER_SIZE =NODE_TYPE_SIZE + IS_ROOT_SIZE + PARENT_POINTER_SIZE;

const uint32_t LEAF_NODE_NUM_CELLS_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_NUM_CELLS_OFFSET = COMMON_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_HEADER_SIZE =COMMON_NODE_HEADER_SIZE + LEAF_NODE_NUM_CELLS_SIZE;

const uint32_t LEAF_NODE_KEY_SIZE = sizeof(uint32_t);
const uint32_t LEAF_NODE_KEY_OFFSET = 0;
const uint32_t LEAF_NODE_VALUE_SIZE = ROW_SIZE;
const uint32_t LEAF_NODE_VALUE_OFFSET =LEAF_NODE_KEY_OFFSET + LEAF_NODE_KEY_SIZE;
const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE;
const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_MAX_CELLS =LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE;

void free_table(Table* table) ;
void close_input_buffer(InputBuffer* input_buffer);
void pager_flush(Pager* pager, uint32_t page_num) ; 
Pager* pager_open(const char* filename);
void* get_page(Pager* pager, uint32_t page_num); 
void db_close(Table* table);
Cursor* table_start(Table* table);
Cursor* table_end(Table* table);
void* cursor_value(Cursor* cursor);
void cursor_advance(Cursor* cursor);
uint32_t* leaf_node_num_cells(void* node);
void* leaf_node_cell(void* node, uint32_t cell_num);
uint32_t* leaf_node_key(void* node, uint32_t cell_num);
void* leaf_node_value(void* node, uint32_t cell_num);
void initialize_leaf_node(void* node);
void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value);
void print_constants();
void print_leaf_node(void* node);
 
MetaCommandResult do_meta_command(InputBuffer* input_buffer,Table* table) {
  if (strcmp(input_buffer->buffer, ".exit") == 0) {
  	close_input_buffer(input_buffer);
	db_close(table);
    exit(EXIT_SUCCESS);
  } else if (strcmp(input_buffer->buffer, ".btree") == 0) {
    printf("Tree:\n");
    print_leaf_node(get_page(table->pager, 0));
    return META_COMMAND_SUCCESS;
  } else if (strcmp(input_buffer->buffer, ".constants") == 0) {
    printf("Constants:\n");
    print_constants();
    return META_COMMAND_SUCCESS;
  } else {
    return META_COMMAND_UNRECOGNIZED_COMMAND;
  }
}
InputBuffer* new_input_buffer(){
  InputBuffer* input_buffer=(InputBuffer*)malloc(sizeof(InputBuffer));
  input_buffer->buffer=NULL;
  input_buffer->buffer_length=0;
  input_buffer->input_length=0;
  return input_buffer;
}

Table* db_open(const char* filename) {
  Pager* pager = pager_open(filename);
  Table* table = malloc(sizeof(Table));
  table->pager = pager;
  table->root_page_num = 0;
  if (pager->num_pages == 0) {
    void* root_node = get_page(pager, 0);
    initialize_leaf_node(root_node);
  }
  return table;
}

Pager* pager_open(const char* filename) {
  int fd = open(filename,
    O_RDWR |      // Read/Write mode
    O_CREAT,      // Create file if it does not exist
    S_IWUSR |     // User write permission
    S_IRUSR       // User read permission
  );
  if (fd == -1) {
    printf("Unable to open file\n");
    exit(EXIT_FAILURE);
  }

  off_t file_length = lseek(fd, 0, SEEK_END);
  Pager* pager = malloc(sizeof(Pager));
  pager->file_descriptor = fd;
  pager->file_length = file_length;
  pager->num_pages = (file_length / PAGE_SIZE);
  if (file_length % PAGE_SIZE != 0) {
    printf("Db file is not a whole number of pages. Corrupt file.\n");
    exit(EXIT_FAILURE);
  }
  for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
    pager->pages[i] = NULL;
  }
  return pager;
}

void* get_page(Pager* pager, uint32_t page_num) {
  if (page_num > TABLE_MAX_PAGES) {
    printf("Tried to fetch page number out of bounds. %d > %d\n", page_num,
           TABLE_MAX_PAGES);
    exit(EXIT_FAILURE);
  }

  if (pager->pages[page_num] == NULL) {
    // Cache miss. Allocate memory and load from file.
    void* page = malloc(PAGE_SIZE);
    uint32_t num_pages = pager->file_length / PAGE_SIZE;

    // We might save a partial page at the end of the file
    if (pager->file_length % PAGE_SIZE) {
      num_pages += 1;
    }

    if (page_num <= num_pages) {
      lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
      ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
      if (bytes_read == -1) {
        printf("Error reading file: %d\n", errno);
        exit(EXIT_FAILURE);
      }
    }

    pager->pages[page_num] = page;
    if (page_num >= pager->num_pages) {
      pager->num_pages = page_num + 1;
    }
  }

  return pager->pages[page_num];
}

void db_close(Table* table) {
  Pager* pager = table->pager;

  for (uint32_t i = 0; i < pager->num_pages; i++) {
    if (pager->pages[i] == NULL) {
      continue;
    }
    pager_flush(pager, i);
    free(pager->pages[i]);
    pager->pages[i] = NULL;
  }

  int result = close(pager->file_descriptor);
  if (result == -1) {
    printf("Error closing db file.\n");
    exit(EXIT_FAILURE);
  }
  for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
    void* page = pager->pages[i];
    if (page) {
      free(page);
      pager->pages[i] = NULL;
    }
  }
  free(pager);
  free(table);
}


void pager_flush(Pager* pager, uint32_t page_num) {
  if (pager->pages[page_num] == NULL) {
    printf("Tried to flush null page\n");
    exit(EXIT_FAILURE);
  }
  off_t offset = lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
  if (offset == -1) {
    printf("Error seeking: %d\n", errno);
    exit(EXIT_FAILURE);
  }
  ssize_t bytes_written =write(pager->file_descriptor, pager->pages[page_num], PAGE_SIZE);
  if (bytes_written == -1) {
    printf("Error writing: %d\n", errno);
    exit(EXIT_FAILURE);
  }
}


void serialize_row(Row* source, void* destination) {
  memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
  memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
  memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}

void deserialize_row(void* source, Row* destination) {
  memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
  memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
  memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}

PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) {
  statement->type = STATEMENT_INSERT;

  char* keyword = strtok(input_buffer->buffer, " ");
  char* id_string = strtok(NULL, " ");
  char* username = strtok(NULL, " ");
  char* email = strtok(NULL, " ");
  char* check=strtok(NULL," ");
  if (id_string == NULL || username == NULL || email == NULL||check!=NULL) {
    return PREPARE_SYNTAX_ERROR;
  }
  for(uint32_t i=0;i<strlen(id_string);i++){
    if(isdigit(id_string[i])==0){
      return PREPARE_ILLEGAL_ID;
    }
  }
  int id = atoi(id_string);
  if(strlen(id_string)>=10){
  	return PREPARE_ID_TOO_LONG;
  }
  if (id < 0) {
    return PREPARE_NEGATIVE_ID;
  }
  if (strlen(username) > COLUMN_USERNAME_SIZE) {
    return PREPARE_STRING_TOO_LONG;
  }
  if (strlen(email) > COLUMN_EMAIL_SIZE) {
    return PREPARE_STRING_TOO_LONG;
  }

  statement->row_to_insert.id = id;
  strcpy(statement->row_to_insert.username, username);
  strcpy(statement->row_to_insert.email, email);

  return PREPARE_SUCCESS;
}

PrepareResult prepare_select(InputBuffer* input_buffer,Statement* statement){
  statement->type=STATEMENT_SELECT;
	
  char *keyword =strtok(input_buffer->buffer," ");
  char * args=strtok(NULL," ");
  char* check=strtok(NULL," ");
  if(check!=NULL){
	return PREPARE_SYNTAX_ERROR;
  }
  if(args==NULL){
	statement->row_to_select=ALL;
	return PREPARE_SUCCESS;
  }
  if(strcmp(args,"id")==0){
	statement->row_to_select=ID;
  }
  else if(strcmp(args,"username")==0){
	statement->row_to_select=USERNAME;
  }
  else if(strcmp(args,"email")==0){
	statement->row_to_select=EMAIL;
  }
  else return PREPARE_SYNTAX_ERROR;
	
  return PREPARE_SUCCESS;
}

PrepareResult prepare_delete(InputBuffer* input_buffer,Statement* statement){
  statement->type = STATEMENT_DELETE;
  char* keyword = strtok(input_buffer->buffer, " ");
  char* id_string = strtok(NULL, " ");
  char* username = strtok(NULL, " ");
  char* email = strtok(NULL, " ");
  char* check=strtok(NULL," ");
  if(check!=NULL){
	return PREPARE_SYNTAX_ERROR;
  }
  if (id_string == NULL || username == NULL || email == NULL) {
    return PREPARE_SYNTAX_ERROR;
  }

  int id = atoi(id_string);
  for(uint32_t i=0;i<strlen(id_string);i++){
    if(isdigit(id_string[i])==0){
      return PREPARE_ILLEGAL_ID;
    }
  }
  if(strlen(id_string)>=10)
    return PREPARE_ID_TOO_LONG;
  if (strlen(username) > COLUMN_USERNAME_SIZE) {
    return PREPARE_STRING_TOO_LONG;
  }
  if (strlen(email) > COLUMN_EMAIL_SIZE) {
    return PREPARE_STRING_TOO_LONG;
  }

  statement->row_to_delete.id = id;
  strcpy(statement->row_to_delete.username, username);
  strcpy(statement->row_to_delete.email, email);

  return PREPARE_SUCCESS;
}

PrepareResult prepare_update(InputBuffer* input_buffer,Statement* statement){
  statement->type = STATEMENT_UPDATE;
  char* keyword = strtok(input_buffer->buffer, " ");
  char* id_string_old = strtok(NULL, " ");
  char* username_old = strtok(NULL, " ");
  char* email_old = strtok(NULL, " ");
  char* id_string_new = strtok(NULL, " ");
  char* username_new = strtok(NULL, " ");
  char* email_new = strtok(NULL, " ");
  char* check=strtok(NULL," ");
  if(check!=NULL){
	return PREPARE_SYNTAX_ERROR;
  }
  if (id_string_old == NULL || username_old == NULL || email_old == NULL) {
    return PREPARE_SYNTAX_ERROR;
  }
    
  if (id_string_new == NULL || username_new == NULL || email_new == NULL) {
    return PREPARE_SYNTAX_ERROR;
  }
    
  int id_old = atoi(id_string_old);
  int id_new = atoi(id_string_new);
  for(uint32_t i=0;i<strlen(id_string_old);i++){
    if(isdigit(id_string_old[i])==0){
      return PREPARE_ILLEGAL_ID;
    }
  }
  for(uint32_t i=0;i<strlen(id_string_new);i++){
    if(isdigit(id_string_new[i])==0){
      return PREPARE_ILLEGAL_ID;
    }
  }
  if(strlen(id_string_old)>=10||strlen(id_string_new)>=10)
    return PREPARE_ID_TOO_LONG;
  if (id_new < 0) {
    return PREPARE_NEGATIVE_ID;
  }
  if (strlen(username_old) > COLUMN_USERNAME_SIZE) {
    return PREPARE_STRING_TOO_LONG;
  }
  if (strlen(email_old) > COLUMN_EMAIL_SIZE) {
    return PREPARE_STRING_TOO_LONG;
  }
  if (strlen(username_new) > COLUMN_USERNAME_SIZE) {
    return PREPARE_STRING_TOO_LONG;
  }
  if (strlen(email_new) > COLUMN_EMAIL_SIZE) {
    return PREPARE_STRING_TOO_LONG;
  }

  statement->row_to_update_old.id = id_old;
  strcpy(statement->row_to_update_old.username, username_old);
  strcpy(statement->row_to_update_old.email, email_old);
  statement->row_to_update_new.id = id_new;
  strcpy(statement->row_to_update_new.username, username_new);
  strcpy(statement->row_to_update_new.email, email_new);
  return PREPARE_SUCCESS;
}

PrepareResult prepare_statement(InputBuffer* input_buffer,Statement* statement) {
  if (strncmp(input_buffer->buffer, "insert",6) == 0) {
    return prepare_insert(input_buffer, statement);
  }
  else if(strncmp(input_buffer->buffer,"select",6)==0){
    return prepare_select(input_buffer, statement);
  }
  else if(strncmp(input_buffer->buffer,"delete",6)==0){
   	return prepare_delete(input_buffer, statement);  	   
  }  
  else if(strncmp(input_buffer->buffer,"update",6)==0){
   	return prepare_update(input_buffer, statement);
  }
  else return  PREPARE_UNRECOGNIZED_STATEMENT;
}


ExecuteResult execute_insert(Statement* statement, Table* table) {
  Row row;
  void* node = get_page(table->pager, table->root_page_num);
  if ((*leaf_node_num_cells(node) >= LEAF_NODE_MAX_CELLS)) {
    return EXECUTE_TABLE_FULL;
  }
  Cursor* cursor = table_start(table);
  while (!(cursor->end_of_table)) {
    deserialize_row(cursor_value(cursor), &row);
    if(row.id==statement->row_to_insert.id){
      return EXECUTE_PRIMARY_KEY_DUPLICATION;	
	}
  }
 
  Row* row_to_insert = &(statement->row_to_insert);
  cursor = table_end(table);
  leaf_node_insert(cursor, row_to_insert->id, row_to_insert);
  free(cursor);
  return EXECUTE_SUCCESS;
}
 
ExecuteResult execute_select(Statement* statement, Table* table) {
  Cursor* cursor = table_start(table);
  Row row;
  while (!(cursor->end_of_table)) {
    deserialize_row(cursor_value(cursor), &row);
    if(statement->row_to_select==ID){
      printf("(%d)\n", row.id);
    } 
    else if(statement->row_to_select==USERNAME){
      printf("(%s)\n", row.username);
    }
    else if(statement->row_to_select==EMAIL){
      printf("(%s)\n", row.email);
    }
    else if(statement->row_to_select==ALL){
      printf("(%d, %s, %s)\n", row.id, row.username, row.email);
    }
    cursor_advance(cursor);
  } 
  free(cursor);
  return EXECUTE_SUCCESS;
}
 
ExecuteResult execute_delete(Statement* statement, Table* table) {
  Cursor* cursor_one = table_start(table);
  Cursor* cursor_two = table_start(table);
  Row row;
  bool flag=false; 
  Row* row_to_delete = &(statement->row_to_delete);
  while (!(cursor_one->end_of_table)) {
  	deserialize_row(cursor_value(cursor_two), &row);
    deserialize_row(cursor_value(cursor_one), &row);
	if(statement->row_to_delete.id==row.id && strcmp(statement->row_to_delete.username,row.username )==0&&strcmp(statement->row_to_delete.email,row.email)==0){   
	  flag=true;
	}
	else {
		serialize_row(&row,cursor_value(cursor_two));
        cursor_advance(cursor_two);
	}
	cursor_advance(cursor_one);  
  }
  free(cursor_one);	
  free(cursor_two);
  if(flag==true){
	
	return EXECUTE_SUCCESS;
  } 
  else return EXECUTE_NOT_FOUND;
}
 
ExecuteResult execute_update(Statement* statement, Table* table) {
  Cursor* cursor = table_start(table);
  Row row;
  bool flag=false; 
  while (!(cursor->end_of_table)) {
    deserialize_row(cursor_value(cursor), &row);
    if(statement->row_to_update_new.id==row.id&&statement->row_to_update_old.id!=row.id){
      return EXECUTE_PRIMARY_KEY_DUPLICATION;
	}
	cursor_advance(cursor);
  }
  cursor = table_start(table);
  while (!(cursor->end_of_table)) {
    deserialize_row(cursor_value(cursor), &row);
		if(statement->row_to_update_old.id==row.id && strcmp(statement->row_to_update_old.username,row.username )==0&&strcmp(statement->row_to_update_old.email,row.email)==0)
		{ 
           flag=true;
		   row.id=statement->row_to_update_new.id;
		   strcpy(row.username,statement->row_to_update_new.username);	
           strcpy(row.email,statement->row_to_update_new.email);
           serialize_row(&row,cursor_value(cursor));
           free(cursor);
           return EXECUTE_SUCCESS;
        }
        serialize_row(&row,cursor_value(cursor));
        cursor_advance(cursor);
    }
    free(cursor);
    return EXECUTE_NOT_FOUND;
	
}

ExecuteResult execute_statement(Statement* statement, Table* table) {
  switch (statement->type) {
    case (STATEMENT_INSERT):
      return execute_insert(statement, table);
    case (STATEMENT_SELECT):
      return execute_select(statement, table);
    case (STATEMENT_DELETE):
	  return execute_delete(statement, table);  
	case (STATEMENT_UPDATE):
	  return execute_update(statement, table);
  }
 
}


void print_prompt() { 
  printf("sqlite> ");
}

void read_input(InputBuffer* input_buffer){

  ssize_t bytes_read=getline(&(input_buffer->buffer), &(input_buffer->buffer_length), stdin);
 
  if(bytes_read<=0){
    printf("Error: Wrong reading input.\n");
    exit(EXIT_FAILURE);
  }
  input_buffer->input_length=bytes_read-1;
  input_buffer->buffer[bytes_read -1 ]=0;
}

void close_input_buffer(InputBuffer* input_buffer){
  free(input_buffer->buffer);
  free(input_buffer);
}

Cursor* table_start(Table* table) {
  Cursor* cursor = malloc(sizeof(Cursor));
  cursor->table = table;
  cursor->page_num = table->root_page_num;
  cursor->cell_num = 0;
  void* root_node = get_page(table->pager, table->root_page_num);
  uint32_t num_cells = *leaf_node_num_cells(root_node);
  cursor->end_of_table = (num_cells == 0);
 
  return cursor;
}

Cursor* table_end(Table* table) {
  Cursor* cursor = malloc(sizeof(Cursor));
  cursor->table = table;
  cursor->page_num = table->root_page_num;
  void* root_node = get_page(table->pager, table->root_page_num);
  uint32_t num_cells = *leaf_node_num_cells(root_node);
  cursor->cell_num = num_cells;
  cursor->end_of_table = true;
 
  return cursor;
}

void* cursor_value(Cursor* cursor) {

  uint32_t page_num = cursor->page_num;
  void* page = get_page(cursor->table->pager, page_num);

  return leaf_node_value(page, cursor->cell_num);
}

void cursor_advance(Cursor* cursor) {
  uint32_t page_num = cursor->page_num;
  void* node = get_page(cursor->table->pager, page_num);
  cursor->cell_num += 1;
  if (cursor->cell_num >= (*leaf_node_num_cells(node))) {
    cursor->end_of_table = true;
  }
}

uint32_t* leaf_node_num_cells(void* node) {
  return node + LEAF_NODE_NUM_CELLS_OFFSET;
}

void* leaf_node_cell(void* node, uint32_t cell_num) {
  return node + LEAF_NODE_HEADER_SIZE + cell_num * LEAF_NODE_CELL_SIZE;
}

uint32_t* leaf_node_key(void* node, uint32_t cell_num) {
  return leaf_node_cell(node, cell_num);
}

void* leaf_node_value(void* node, uint32_t cell_num) {
  return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE;
}
void initialize_leaf_node(void* node) {
  *leaf_node_num_cells(node) = 0; 
}

void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
  void* node = get_page(cursor->table->pager, cursor->page_num);

  uint32_t num_cells = *leaf_node_num_cells(node);
  if (num_cells >= LEAF_NODE_MAX_CELLS) {
    printf("Need to implement splitting a leaf node.\n");
    exit(EXIT_FAILURE);
  }
  if (cursor->cell_num < num_cells) {
    for (uint32_t i = num_cells; i > cursor->cell_num; i--) {
      memcpy(leaf_node_cell(node, i), leaf_node_cell(node, i - 1),
             LEAF_NODE_CELL_SIZE);
    }
  }

  *(leaf_node_num_cells(node)) += 1;
  *(leaf_node_key(node, cursor->cell_num)) = key;
  serialize_row(value, leaf_node_value(node, cursor->cell_num));
}
void print_constants() {
  printf("ROW_SIZE: %d\n", ROW_SIZE);
  printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
  printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
  printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
  printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
  printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}
void print_leaf_node(void* node) {
  uint32_t num_cells = *leaf_node_num_cells(node);
  printf("leaf (size %d)\n", num_cells);
  for (uint32_t i = 0; i < num_cells; i++) {
    uint32_t key = *leaf_node_key(node, i);
    printf("  - %d : %d\n", i, key);
  }
}
int main(int argc, char* argv[]) {
   if (argc != 2) {
     printf("Must supply a database filename.\n");
     exit(EXIT_FAILURE);
   }

   char* filename = argv[1];
   Table* table = db_open(filename);
   InputBuffer* input_buffer = new_input_buffer();
   while (true) {
     print_prompt();
     read_input(input_buffer);

     if (input_buffer->buffer[0] == '.') {
       switch (do_meta_command(input_buffer,table)) {
         case (META_COMMAND_SUCCESS):
           continue;
         case (META_COMMAND_UNRECOGNIZED_COMMAND):
           printf("Error: Unrecognized command '%s'.\n", input_buffer->buffer);
           continue;
       }
     }

     Statement statement;
     switch (prepare_statement(input_buffer, &statement)) {
       case (PREPARE_SUCCESS):
         break;
       case (PREPARE_STRING_TOO_LONG):
           printf("Error: String is too long.\n");
           continue;
         case (PREPARE_NEGATIVE_ID):
           printf("Error: ID must be positive.\n");
           continue;
         case (PREPARE_ID_TOO_LONG):
           printf("Error: Id is too long.\n");
           continue;
         case (PREPARE_SYNTAX_ERROR):
           printf("Error: Syntax error.\n");
	  	   continue; 
         case (PREPARE_UNRECOGNIZED_STATEMENT):
           printf("Error: Unrecognized keyword at start of '%s'.\n",input_buffer->buffer);
           continue;
         case (PREPARE_ILLEGAL_ID):
           printf("Error: ILLEGAL ID.\n");
           continue;
      }

   
      switch (execute_statement(&statement, table)) {
        case (EXECUTE_SUCCESS):
          printf("Executed.\n");
          break;
        case (EXECUTE_PRIMARY_KEY_DUPLICATION):
       	  printf("Error: Primary key duplication.\n");
      	  break;
        case (EXECUTE_NOT_FOUND):
	      printf("Error: Not found.\n");
		  break;   
        case (EXECUTE_TABLE_FULL):
          printf("Error: Table full.\n");
          break;
      }
   
  }
}

以上就是所有内容,喜欢的点个赞吧!

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

weixin_51275728

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值