This commit is contained in:
Timothy Warren 2019-04-30 15:17:58 -04:00
parent 1e4a7defdc
commit 53c27d8cdf
2 changed files with 252 additions and 178 deletions

406
db.c
View File

@ -15,6 +15,7 @@ typedef struct InputBuffer_t InputBuffer;
enum ExecuteResult_t { enum ExecuteResult_t {
EXECUTE_SUCCESS, EXECUTE_SUCCESS,
EXECUTE_DUPLICATE_KEY,
EXECUTE_TABLE_FULL EXECUTE_TABLE_FULL
}; };
typedef enum ExecuteResult_t ExecuteResult; typedef enum ExecuteResult_t ExecuteResult;
@ -90,6 +91,10 @@ struct Cursor_t {
}; };
typedef struct Cursor_t Cursor; typedef struct Cursor_t Cursor;
void print_row(Row* row) {
printf("(%d, %s, %s)\n", row->id, row->username, row->email);
}
enum NodeType_t { NODE_INTERNAL, NODE_LEAF }; enum NodeType_t { NODE_INTERNAL, NODE_LEAF };
typedef enum NodeType_t NodeType; typedef enum NodeType_t NodeType;
@ -122,6 +127,16 @@ const uint32_t LEAF_NODE_CELL_SIZE = LEAF_NODE_KEY_SIZE + LEAF_NODE_VALUE_SIZE;
const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE; const uint32_t LEAF_NODE_SPACE_FOR_CELLS = PAGE_SIZE - LEAF_NODE_HEADER_SIZE;
const uint32_t LEAF_NODE_MAX_CELLS = LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE; const uint32_t LEAF_NODE_MAX_CELLS = LEAF_NODE_SPACE_FOR_CELLS / LEAF_NODE_CELL_SIZE;
NodeType get_node_type(void* node) {
uint8_t value = *((uint8_t*)(node + NODE_TYPE_OFFSET));
return (NodeType)value;
}
void set_node_type(void* node, NodeType type) {
uint8_t value = type;
*((uint8_t*)(node + NODE_TYPE_OFFSET)) = value;
}
uint32_t* leaf_node_num_cells(void* node) { uint32_t* leaf_node_num_cells(void* node) {
return (char *)node + LEAF_NODE_NUM_CELLS_OFFSET; return (char *)node + LEAF_NODE_NUM_CELLS_OFFSET;
} }
@ -138,10 +153,186 @@ void* leaf_node_value(void* node, uint32_t cell_num) {
return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE; return leaf_node_cell(node, cell_num) + LEAF_NODE_KEY_SIZE;
} }
void print_constants() {
printf("ROW_SIZE: %d\n", ROW_SIZE);
printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}
void* get_page(Pager* pager, uint32_t page_num) {
if (page_num > TABLE_MAX_PAGES) {
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES);
exit(EXIT_FAILURE);
}
if (pager->pages[page_num] == NULL) {
// Cache miss. Allocate memory and load from file.
void* page = malloc(PAGE_SIZE);
uint32_t num_pages = pager->file_length / PAGE_SIZE;
// We might save a partial page at the end of the file
if (pager->file_length % PAGE_SIZE) {
num_pages += 1;
}
if (page_num <= num_pages) {
lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
if (bytes_read == -1) {
printf("Error reading file: %d\n", errno);
exit(EXIT_FAILURE);
}
}
pager->pages[page_num] = page;
if (page_num >= pager->num_pages) {
pager->num_pages = page_num + 1;
}
}
return pager->pages[page_num];
}
void serialize_row(Row* source, void* destination) {
memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}
void deserialize_row(void* source, Row* destination) {
memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}
void initialize_leaf_node(void* node) { void initialize_leaf_node(void* node) {
set_node_type(node, NODE_LEAF);
*leaf_node_num_cells(node) = 0; *leaf_node_num_cells(node) = 0;
} }
Cursor* leaf_node_find(Table* table, uint32_t page_num, uint32_t key) {
void* node = get_page(table->pager, page_num);
uint32_t num_cells = *leaf_node_num_cells(node);
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = page_num;
// Binary search
uint32_t min_index = 0;
uint32_t one_past_max_index = num_cells;
while (one_past_max_index != min_index) {
uint32_t index = (min_index + one_past_max_index) / 2;
uint32_t key_at_index = *leaf_node_key(node, index);
if (key == key_at_index) {
cursor->cell_num = index;
return cursor;
}
if (key < key_at_index) {
one_past_max_index = index;
} else {
min_index = index + 1;
}
}
cursor->cell_num = min_index;
return cursor;
}
/*
* Return the position of the given key.
* If the key is not present, return the position
* where it should be inserted
*/
Cursor* table_find(Table* table, uint32_t key) {
uint32_t root_page_num = table->root_page_num;
void* root_node = get_page(table->pager, root_page_num);
if (get_node_type(root_node) == NODE_LEAF) {
return leaf_node_find(table, root_page_num, key);
} else {
printf("Need to implement searching an internal node\n");
exit(EXIT_FAILURE);
}
}
Cursor* table_start(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
cursor->cell_num = 0;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->end_of_table = (num_cells == 0);
return cursor;
}
void* cursor_value(Cursor* cursor) {
uint32_t page_num = cursor->page_num;
void* page = get_page(cursor->table->pager, page_num);
return leaf_node_value(page, cursor->cell_num);
}
void cursor_advance(Cursor* cursor) {
uint32_t page_num = cursor->page_num;
void* node = get_page(cursor->table->pager, page_num);
cursor->cell_num += 1;
if (cursor->cell_num >= (*leaf_node_num_cells(node))) {
cursor->end_of_table = true;
}
}
Pager* pager_open(const char* filename) {
// Read/Write mode, Create non-existent file, user read and write permission
int fd = open(filename, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);
if (fd == -1) {
printf("Unable to open file\n");
exit(EXIT_FAILURE);
}
off_t file_length = lseek(fd, 0, SEEK_END);
Pager* pager = malloc(sizeof(Pager));
pager->file_descriptor = fd;
pager->file_length = file_length;
pager->num_pages = (file_length / PAGE_SIZE);
if (file_length % PAGE_SIZE != 0) {
printf("Db file is not a whole number of pages. Corrupt file.\n");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
pager->pages[i] = NULL;
}
return pager;
}
Table* db_open(const char* filename) {
Pager* pager = pager_open(filename);
Table* table = malloc(sizeof(Table));
table->pager = pager;
table->root_page_num = 0;
if (pager->num_pages == 0) {
// New database file. Initialize page 0 as leaf node.
void* root_node = get_page(pager, 0);
initialize_leaf_node(root_node);
}
return table;
}
InputBuffer* new_input_buffer() { InputBuffer* new_input_buffer() {
InputBuffer* input_buffer = malloc(sizeof(InputBuffer)); InputBuffer* input_buffer = malloc(sizeof(InputBuffer));
@ -156,24 +347,6 @@ void print_prompt() {
printf("db > "); printf("db > ");
} }
void print_constants() {
printf("ROW_SIZE: %d\n", ROW_SIZE);
printf("COMMON_NODE_HEADER_SIZE: %d\n", COMMON_NODE_HEADER_SIZE);
printf("LEAF_NODE_HEADER_SIZE: %d\n", LEAF_NODE_HEADER_SIZE);
printf("LEAF_NODE_CELL_SIZE: %d\n", LEAF_NODE_CELL_SIZE);
printf("LEAF_NODE_SPACE_FOR_CELLS: %d\n", LEAF_NODE_SPACE_FOR_CELLS);
printf("LEAF_NODE_MAX_CELLS: %d\n", LEAF_NODE_MAX_CELLS);
}
void print_leaf_node(void* node) {
uint32_t num_cells = *leaf_node_num_cells(node);
printf("leaf (size %d)\n", num_cells);
for (uint32_t i = 0; i < num_cells; i++) {
uint32_t key = *leaf_node_key(node, i);
printf(" - %d : %d\n", i, key);
}
}
void read_input(InputBuffer* input_buffer) { void read_input(InputBuffer* input_buffer) {
ssize_t bytes_read = getline( ssize_t bytes_read = getline(
&(input_buffer->buffer), &(input_buffer->buffer),
@ -239,6 +412,32 @@ void db_close(Table* table) {
free(pager); free(pager);
} }
void print_leaf_node(void* node) {
uint32_t num_cells = *leaf_node_num_cells(node);
printf("leaf (size %d)\n", num_cells);
for (uint32_t i = 0; i < num_cells; i++) {
uint32_t key = *leaf_node_key(node, i);
printf(" - %d : %d\n", i, key);
}
}
MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) {
if (strcmp(input_buffer->buffer, ".exit") == 0) {
db_close(table);
exit(EXIT_SUCCESS);
} else if (strcmp(input_buffer->buffer, ".btree") == 0){
printf("Tree:\n");
print_leaf_node(get_page(table->pager, 0));
return META_COMMAND_SUCCESS;
} else if (strcmp(input_buffer->buffer, ".constants") == 0) {
printf("Constants:\n");
print_constants();
return META_COMMAND_SUCCESS;
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) { PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) {
statement->type = STATEMENT_INSERT; statement->type = STATEMENT_INSERT;
@ -281,57 +480,6 @@ PrepareResult prepare_statement(InputBuffer* input_buffer, Statement* statement)
return PREPARE_UNRECOGNIZED_STATEMENT; return PREPARE_UNRECOGNIZED_STATEMENT;
} }
void print_row(Row* row) {
printf("(%d, %s, %s)\n", row->id, row->username, row->email);
}
void serialize_row(Row* source, void* destination) {
memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE);
memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE);
memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE);
}
void deserialize_row(void* source, Row* destination) {
memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE);
memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE);
memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE);
}
void* get_page(Pager* pager, uint32_t page_num) {
if (page_num > TABLE_MAX_PAGES) {
printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES);
exit(EXIT_FAILURE);
}
if (pager->pages[page_num] == NULL) {
// Cache miss. Allocate memory and load from file.
void* page = malloc(PAGE_SIZE);
uint32_t num_pages = pager->file_length / PAGE_SIZE;
// We might save a partial page at the end of the file
if (pager->file_length % PAGE_SIZE) {
num_pages += 1;
}
if (page_num <= num_pages) {
lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET);
ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE);
if (bytes_read == -1) {
printf("Error reading file: %d\n", errno);
exit(EXIT_FAILURE);
}
}
pager->pages[page_num] = page;
if (page_num >= pager->num_pages) {
pager->num_pages = page_num + 1;
}
}
return pager->pages[page_num];
}
void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) { void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
void* node = get_page(cursor->table->pager, cursor->page_num); void* node = get_page(cursor->table->pager, cursor->page_num);
@ -355,56 +503,23 @@ void leaf_node_insert(Cursor* cursor, uint32_t key, Row* value) {
} }
} }
Cursor* table_start(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
cursor->cell_num = 0;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->end_of_table = (num_cells == 0);
return cursor;
}
Cursor* table_end(Table* table) {
Cursor* cursor = malloc(sizeof(Cursor));
cursor->table = table;
cursor->page_num = table->root_page_num;
void* root_node = get_page(table->pager, table->root_page_num);
uint32_t num_cells = *leaf_node_num_cells(root_node);
cursor->cell_num = num_cells;
cursor->end_of_table = true;
return cursor;
}
void* cursor_value(Cursor* cursor) {
uint32_t page_num = cursor->page_num;
void* page = get_page(cursor->table->pager, page_num);
return leaf_node_value(page, cursor->cell_num);
}
void cursor_advance(Cursor* cursor) {
uint32_t page_num = cursor->page_num;
void* node = get_page(cursor->table->pager, page_num);
cursor->cell_num += 1;
if (cursor->cell_num >= (*leaf_node_num_cells(node))) {
cursor->end_of_table = true;
}
}
ExecuteResult execute_insert(Statement* statement, Table* table) { ExecuteResult execute_insert(Statement* statement, Table* table) {
void* node = get_page(table->pager, table->root_page_num); void* node = get_page(table->pager, table->root_page_num);
if ((*leaf_node_num_cells(node) >= LEAF_NODE_MAX_CELLS)) { uint32_t num_cells = (*leaf_node_num_cells(node));
if (num_cells >= LEAF_NODE_MAX_CELLS) {
return EXECUTE_TABLE_FULL; return EXECUTE_TABLE_FULL;
} }
Row* row_to_insert = &(statement->row_to_insert); Row* row_to_insert = &(statement->row_to_insert);
Cursor* cursor = table_end(table); uint32_t key_to_insert = row_to_insert->id;
Cursor* cursor = table_find(table, key_to_insert);
if (cursor->cell_num < num_cells) {
uint32_t key_at_index = *leaf_node_key(node, cursor->cell_num);
if (key_at_index == key_to_insert) {
return EXECUTE_DUPLICATE_KEY;
}
}
leaf_node_insert(cursor, row_to_insert->id, row_to_insert); leaf_node_insert(cursor, row_to_insert->id, row_to_insert);
@ -438,67 +553,6 @@ ExecuteResult execute_statement(Statement* statement, Table* table) {
} }
} }
Pager* pager_open(const char* filename) {
// Read/Write mode, Create non-existent file, user read and write permission
int fd = open(filename, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR);
if (fd == -1) {
printf("Unable to open file\n");
exit(EXIT_FAILURE);
}
off_t file_length = lseek(fd, 0, SEEK_END);
Pager* pager = malloc(sizeof(Pager));
pager->file_descriptor = fd;
pager->file_length = file_length;
pager->num_pages = (file_length / PAGE_SIZE);
if (file_length % PAGE_SIZE != 0) {
printf("Db file is not a whole number of pages. Corrupt file.\n");
exit(EXIT_FAILURE);
}
for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) {
pager->pages[i] = NULL;
}
return pager;
}
Table* db_open(const char* filename) {
Pager* pager = pager_open(filename);
Table* table = malloc(sizeof(Table));
table->pager = pager;
table->root_page_num = 0;
if (pager->num_pages == 0) {
// New database file. Initialize page 0 as leaf node.
void* root_node = get_page(pager, 0);
initialize_leaf_node(root_node);
}
return table;
}
MetaCommandResult do_meta_command(InputBuffer* input_buffer, Table* table) {
if (strcmp(input_buffer->buffer, ".exit") == 0) {
db_close(table);
exit(EXIT_SUCCESS);
} else if (strcmp(input_buffer->buffer, ".btree") == 0){
printf("Tree:\n");
print_leaf_node(get_page(table->pager, 0));
return META_COMMAND_SUCCESS;
} else if (strcmp(input_buffer->buffer, ".constants") == 0) {
printf("Constants:\n");
print_constants();
return META_COMMAND_SUCCESS;
} else {
return META_COMMAND_UNRECOGNIZED_COMMAND;
}
}
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if (argc < 2) { if (argc < 2) {
printf("Must supply a database filename\n"); printf("Must supply a database filename\n");
@ -548,6 +602,10 @@ int main(int argc, char* argv[]) {
printf("Executed.\n"); printf("Executed.\n");
break; break;
case (EXECUTE_DUPLICATE_KEY):
printf("Error: Duplicate key.\n");
break;
case (EXECUTE_TABLE_FULL): case (EXECUTE_TABLE_FULL):
printf("Error: Table full.\n"); printf("Error: Table full.\n");
break; break;

View File

@ -142,11 +142,27 @@ describe 'database' do
"db > Executed.", "db > Executed.",
"db > Tree:", "db > Tree:",
"leaf (size 3)", "leaf (size 3)",
" - 0 : 3", " - 0 : 1",
" - 1 : 1", " - 1 : 2",
" - 2 : 2", " - 2 : 3",
"db > " "db > "
]) ])
end end
it 'prints an error message if there is a duplicate id' do
script = [
"insert 1 user1 person1@example.com",
"insert 1 user1 person1@example.com",
"select",
".exit",
]
result = run_script(script)
expect(result).to match_array([
"db > Executed.",
"db > Error: Duplicate key.",
"db > (1, user1, person1@example.com)",
"Executed.",
"db > ",
])
end
end end