From 103ab099c18c3a3c0fa052271e978d6168596e6b Mon Sep 17 00:00:00 2001 From: Timothy Warren Date: Fri, 8 Sep 2017 16:58:42 -0400 Subject: [PATCH] First commit, partially through page 5 --- .gitignore | 142 +++++++++++++++++++ CMakeLists.txt | 7 + Gemfile | 2 + Makefile | 11 ++ db.c | 334 ++++++++++++++++++++++++++++++++++++++++++++ spec/main_spec.rb | 107 ++++++++++++++ spec/spec_helper.rb | 100 +++++++++++++ 7 files changed, 703 insertions(+) create mode 100644 .gitignore create mode 100644 CMakeLists.txt create mode 100644 Gemfile create mode 100644 Makefile create mode 100644 db.c create mode 100644 spec/main_spec.rb create mode 100644 spec/spec_helper.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c6b7eda --- /dev/null +++ b/.gitignore @@ -0,0 +1,142 @@ + +# Created by https://www.gitignore.io/api/ruby,macos,jetbrains+all + +### JetBrains+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and Webstorm +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff: +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/dictionaries + +# Sensitive or high-churn files: +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.xml +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml + +# Gradle: +.idea/**/gradle.xml +.idea/**/libraries + +# CMake +cmake-build-debug/ + +# Mongo Explorer plugin: +.idea/**/mongoSettings.xml + +## File-based project format: +*.iws + +## Plugin-specific files: + +# IntelliJ +/out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +### JetBrains+all Patch ### +# Ignores the whole idea folder +# See https://github.com/joeblau/gitignore.io/issues/186 and https://github.com/joeblau/gitignore.io/issues/360 + +.idea/ + +### macOS ### +*.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### Ruby ### +*.gem +*.rbc +/.config +/coverage/ +/InstalledFiles +/pkg/ +/spec/reports/ +/spec/examples.txt +/test/tmp/ +/test/version_tmp/ +/tmp/ + +# Used by dotenv library to load environment variables. +# .env + +## Specific to RubyMotion: +.dat* +.repl_history +build/ +*.bridgesupport +build-iPhoneOS/ +build-iPhoneSimulator/ + +## Specific to RubyMotion (use of CocoaPods): +# +# We recommend against adding the Pods directory to your .gitignore. However +# you should judge for yourself, the pros and cons are mentioned at: +# https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control +# +# vendor/Pods/ + +## Documentation cache and generated files: +/.yardoc/ +/_yardoc/ +/doc/ +/rdoc/ + +## Environment normalization: +/.bundle/ +/vendor/bundle +/lib/bundler/man/ + +# for a library or gem, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# Gemfile.lock +# .ruby-version +# .ruby-gemset + +# unless supporting rvm < 1.11.0 or doing something fancy, ignore this: +.rvmrc + +# End of https://www.gitignore.io/api/ruby,macos,jetbrains+all + +/db \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..1a259a1 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.8) +project(sqlite_clone) + +set(CMAKE_C_STANDARD 99) + +set(SOURCE_FILES db.c) +add_executable(sqlite_clone ${SOURCE_FILES}) \ No newline at end of file diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..15aa5c2 --- /dev/null +++ b/Gemfile @@ -0,0 +1,2 @@ +source 'https://rubygems.org' +gem "rspec" \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..7f890fe --- /dev/null +++ b/Makefile @@ -0,0 +1,11 @@ +db: db.c + gcc db.c -o db + +run: db + ./db mydb.db + +clean: + rm -f db *.db + +test: db + rspec diff --git a/db.c b/db.c new file mode 100644 index 0000000..2a67162 --- /dev/null +++ b/db.c @@ -0,0 +1,334 @@ +#include +#include +#include +#include +#include +#include +#include + +struct InputBuffer_t { + char* buffer; + size_t buffer_length; + ssize_t input_length; +}; +typedef struct InputBuffer_t InputBuffer; + +enum ExecuteResult_t { + EXECUTE_SUCCESS, + EXECUTE_TABLE_FULL +}; +typedef enum ExecuteResult_t ExecuteResult; + +enum MetaCommandResult_t { + META_COMMAND_SUCCESS, + META_COMMAND_UNRECOGNIZED_COMMAND +}; +typedef enum MetaCommandResult_t MetaCommandResult; + +enum PrepareResult_t { + PREPARE_SUCCESS, + PREPARE_NEGATIVE_ID, + PREPARE_STRING_TOO_LONG, + PREPARE_SYNTAX_ERROR, + PREPARE_UNRECOGNIZED_STATEMENT +}; +typedef enum PrepareResult_t PrepareResult; + +enum StatementType_t { + STATEMENT_INSERT, + STATEMENT_SELECT +}; +typedef enum StatementType_t StatementType; + +const uint32_t COLUMN_USERNAME_SIZE = 32; +const uint32_t COLUMN_EMAIL_SIZE = 255; +struct Row_t { + uint32_t id; + char username[COLUMN_USERNAME_SIZE + 1]; + char email[COLUMN_EMAIL_SIZE + 1]; +}; +typedef struct Row_t Row; + +struct Statement_t { + StatementType type; + Row row_to_insert; // only used by insert statement +}; +typedef struct Statement_t Statement; + +#define size_of_attribute(Struct, Attribute) sizeof(((Struct*)0)->Attribute) + +const uint32_t ID_SIZE = size_of_attribute(Row, id); +const uint32_t USERNAME_SIZE = size_of_attribute(Row, username); +const uint32_t EMAIL_SIZE = size_of_attribute(Row, email); +const uint32_t ID_OFFSET = 0; +const uint32_t USERNAME_OFFSET = ID_OFFSET + ID_SIZE; +const uint32_t EMAIL_OFFSET = USERNAME_OFFSET + USERNAME_SIZE; +const uint32_t ROW_SIZE = ID_SIZE + USERNAME_SIZE + EMAIL_SIZE; + +const uint32_t PAGE_SIZE = 4096; +const uint32_t TABLE_MAX_PAGES = 100; +const uint32_t ROWS_PER_PAGE = PAGE_SIZE / ROW_SIZE; +const uint32_t TABLE_MAX_ROWS = ROWS_PER_PAGE * TABLE_MAX_PAGES; + +struct Pager_t { + int file_descriptor; + uint32_t file_length; + void* pages[TABLE_MAX_PAGES]; +}; +typedef struct Pager_t Pager; + +struct Table_t { + Pager* pager; + uint32_t num_rows; +}; +typedef struct Table_t Table; + +InputBuffer* new_input_buffer() { + InputBuffer* input_buffer = malloc(sizeof(InputBuffer)); + + input_buffer->buffer = NULL; + input_buffer->buffer_length = 0; + input_buffer->input_length = 0; + + return input_buffer; +} + +void print_prompt() { + printf("db > "); +} + +void read_input(InputBuffer* input_buffer) { + ssize_t bytes_read = getline( + &(input_buffer->buffer), + &(input_buffer->buffer_length), + stdin + ); + + if (bytes_read <= 0) { + printf("Error reading input\n"); + exit(EXIT_FAILURE); + } + + // Ignore trailing newline + input_buffer->input_length = bytes_read -1; + input_buffer->buffer[bytes_read - 1] = 0; +} + +MetaCommandResult do_meta_command(InputBuffer* input_buffer) { + if (strcmp(input_buffer->buffer, ".exit") == 0) { + exit(EXIT_SUCCESS); + } else { + return META_COMMAND_UNRECOGNIZED_COMMAND; + } +} + +PrepareResult prepare_insert(InputBuffer* input_buffer, Statement* statement) { + statement->type = STATEMENT_INSERT; + + char* keyword = strtok(input_buffer->buffer, " "); + char* id_string = strtok(NULL, " "); + char* username = strtok(NULL, " "); + char* email = strtok(NULL, " "); + + if (id_string == NULL || username == NULL || email == NULL) { + return PREPARE_SYNTAX_ERROR; + } + + int id = atoi(id_string); + if (id < 0) { + return PREPARE_NEGATIVE_ID; + } + if (strlen(username) > COLUMN_USERNAME_SIZE) { + return PREPARE_STRING_TOO_LONG; + } + if (strlen(email) > COLUMN_EMAIL_SIZE) { + return PREPARE_STRING_TOO_LONG; + } + + statement->row_to_insert.id = id; + strcpy(statement->row_to_insert.username, username); + strcpy(statement->row_to_insert.email, email); + + return PREPARE_SUCCESS; +} + +PrepareResult prepare_statement(InputBuffer* input_buffer, Statement* statement) { + if (strncmp(input_buffer->buffer, "insert", 6) == 0) { + return prepare_insert(input_buffer, statement); + } + if (strncmp(input_buffer->buffer, "select", 6) == 0) { + statement->type = STATEMENT_SELECT; + return PREPARE_SUCCESS; + } + + return PREPARE_UNRECOGNIZED_STATEMENT; +} + +void print_row(Row* row) { + printf("(%d, %s, %s)\n", row->id, row->username, row->email); +} + +void serialize_row(Row* source, void* destination) { + memcpy(destination + ID_OFFSET, &(source->id), ID_SIZE); + memcpy(destination + USERNAME_OFFSET, &(source->username), USERNAME_SIZE); + memcpy(destination + EMAIL_OFFSET, &(source->email), EMAIL_SIZE); +} + +void deserialize_row(void* source, Row* destination) { + memcpy(&(destination->id), source + ID_OFFSET, ID_SIZE); + memcpy(&(destination->username), source + USERNAME_OFFSET, USERNAME_SIZE); + memcpy(&(destination->email), source + EMAIL_OFFSET, EMAIL_SIZE); +} + +void* get_page(Pager* pager, uint32_t page_num) { + if (page_num > TABLE_MAX_PAGES) { + printf("Tried to fetch page number out of bounds. %d > %d\n", page_num, TABLE_MAX_PAGES); + exit(EXIT_FAILURE); + } + + if (pager->pages[page_num] == NULL) { + // Cache miss. Allocate memory and load from file. + void* page = malloc(PAGE_SIZE); + uint32_t num_pages = pager->file_length / PAGE_SIZE; + + // We might save a partial page at the end of the file + if (pager->file_length % PAGE_SIZE) { + num_pages += 1; + } + + if (page_num <= num_pages) { + lseek(pager->file_descriptor, page_num * PAGE_SIZE, SEEK_SET); + ssize_t bytes_read = read(pager->file_descriptor, page, PAGE_SIZE); + if (bytes_read == -1) { + printf("Error reading file: %d\n", errno); + exit(EXIT_FAILURE); + } + } + + pager->pages[page_num] = page; + } + + return pager->pages[page_num]; +} + +void* row_slot(Table* table, uint32_t row_num) { + uint32_t page_num = row_num / ROWS_PER_PAGE; + void* page = get_page(table->pager, page_num); + uint32_t row_offset = row_num % ROWS_PER_PAGE; + uint32_t byte_offset = row_offset * ROW_SIZE; + return page + byte_offset; +} + +ExecuteResult execute_insert(Statement* statement, Table* table) { + if (table->num_rows >= TABLE_MAX_ROWS) { + return EXECUTE_TABLE_FULL; + } + + Row* row_to_insert = &(statement->row_to_insert); + + serialize_row(row_to_insert, row_slot(table, table->num_rows)); + table->num_rows += 1; + + return EXECUTE_SUCCESS; +} + +ExecuteResult execute_select(Statement* statement, Table* table) { + Row row; + for (uint32_t i = 0; i < table->num_rows; i++) { + deserialize_row(row_slot(table, i), &row); + print_row(&row); + } + return EXECUTE_SUCCESS; +} + +ExecuteResult execute_statement(Statement* statement, Table* table) { + switch(statement->type) { + case (STATEMENT_INSERT): + return execute_insert(statement, table); + + case (STATEMENT_SELECT): + return execute_select(statement, table); + } +} + +Pager* pager_open(const char* filename) { + // Read/Write mode, Create non-existent file, user read and write permission + int fd = open(filename, O_RDWR | O_CREAT, S_IWUSR | S_IRUSR); + + if (fd == -1) { + printf("Unable to open file\n"); + exit(EXIT_FAILURE); + } + + off_t file_length = lseek(fd, 0, SEEK_END); + + Pager* pager = malloc(sizeof(Pager)); + pager->file_descriptor = fd; + pager->file_length = file_length; + + for (uint32_t i = 0; i < TABLE_MAX_PAGES; i++) { + pager->pages[i] = NULL; + } + + return pager; +} + +Table* db_open(const char* filename) { + Pager* pager = pager_open(filename); + uint32_t num_rows = pager->file_length / ROW_SIZE; + + Table* table = malloc(sizeof(Table)); + table->pager = pager; + table->num_rows = num_rows; + + return table; +} + +int main(int argc, char* argv[]) { + Table* table = new_table(); + InputBuffer* input_buffer = new_input_buffer(); + while (true) { + print_prompt(); + read_input(input_buffer); + + if (input_buffer->buffer[0] == '.') { + switch (do_meta_command(input_buffer)) { + case (META_COMMAND_SUCCESS): + continue; + + case (META_COMMAND_UNRECOGNIZED_COMMAND): + printf("Unrecognized command '%s'\n", input_buffer->buffer); + continue; + } + } + + Statement statement; + + switch(prepare_statement(input_buffer, &statement)) { + case (PREPARE_SUCCESS): + break; + case (PREPARE_NEGATIVE_ID): + printf("ID must be positive.\n"); + continue; + case (PREPARE_STRING_TOO_LONG): + printf("String is too long.\n"); + continue; + case (PREPARE_SYNTAX_ERROR): + printf("Syntax error. Could not parse statement.\n"); + continue; + case (PREPARE_UNRECOGNIZED_STATEMENT): + printf("Unrecognized keyword at start of '%s'.\n", input_buffer->buffer); + continue; + } + + switch(execute_statement(&statement, table)) { + case (EXECUTE_SUCCESS): + printf("Executed.\n"); + break; + + case (EXECUTE_TABLE_FULL): + printf("Error: Table full.\n"); + break; + } + } +} \ No newline at end of file diff --git a/spec/main_spec.rb b/spec/main_spec.rb new file mode 100644 index 0000000..bd02bc3 --- /dev/null +++ b/spec/main_spec.rb @@ -0,0 +1,107 @@ +describe 'database' do + def run_script(commands) + raw_output = nil + IO.popen("./db", "r+") do |pipe| + commands.each do |command| + pipe.puts command + end + + pipe.close_write + + # Read entire output + raw_output = pipe.gets(nil) + end + raw_output.split("\n") + end + + it 'inserts and retreives a row' do + result = run_script([ + "insert 1 user1 person1@example.com", + "select", + ".exit", + ]) + expect(result).to eq([ + "db > Executed.", + "db > (1, user1, person1@example.com)", + "Executed.", + "db > ", + ]) + end + + it 'prints error message when table is full' do + script = (1..1401).map do |i| + "insert #{i} user#{i} person#{i}@example.com" + end + script << ".exit" + result = run_script(script) + expect(result[-2]).to eq('db > Error: Table full.') + end + + it 'allows inserting strings that are the maximum length' do + long_username = "a"*32 + long_email = "a"*255 + script = [ + "insert 1 #{long_username} #{long_email}", + "select", + ".exit", + ] + result = run_script(script) + expect(result).to eq([ + "db > Executed.", + "db > (1, #{long_username}, #{long_email})", + "Executed.", + "db > ", + ]) + end + + it 'prints error message if strings are too long' do + long_username = "a"*33 + long_email = "a"*256 + script = [ + "insert 1 #{long_username} #{long_email}", + "select", + ".exit", + ] + result = run_script(script) + expect(result).to eq([ + "db > String is too long.", + "db > Executed.", + "db > ", + ]) + end + + it 'prints an error message if id is negative' do + script = [ + "insert -1 cstack foo@bar.com", + "select", + ".exit", + ] + result = run_script(script) + expect(result).to eq([ + "db > ID must be positive.", + "db > Executed.", + "db > ", + ]) + end + + it 'keeps data after closing connection' do + result1 = run_script([ + "insert 1 user1 person1@example.com", + ".exit", + ]) + expect(result1).to eq([ + "db > Executed.", + "db > ", + ]) + result2 = run_script([ + "select", + ".exit", + ]) + expect(result2).to eq([ + "db > (1, user1, person1@example.com)", + "Executed.", + "db > ", + ]) + end + +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb new file mode 100644 index 0000000..251aa51 --- /dev/null +++ b/spec/spec_helper.rb @@ -0,0 +1,100 @@ +# This file was generated by the `rspec --init` command. Conventionally, all +# specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`. +# The generated `.rspec` file contains `--require spec_helper` which will cause +# this file to always be loaded, without a need to explicitly require it in any +# files. +# +# Given that it is always loaded, you are encouraged to keep this file as +# light-weight as possible. Requiring heavyweight dependencies from this file +# will add to the boot time of your test suite on EVERY test run, even for an +# individual file that may not need all of that loaded. Instead, consider making +# a separate helper file that requires the additional dependencies and performs +# the additional setup, and require it from the spec files that actually need +# it. +# +# See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration +RSpec.configure do |config| + # rspec-expectations config goes here. You can use an alternate + # assertion/expectation library such as wrong or the stdlib/minitest + # assertions if you prefer. + config.expect_with :rspec do |expectations| + # This option will default to `true` in RSpec 4. It makes the `description` + # and `failure_message` of custom matchers include text for helper methods + # defined using `chain`, e.g.: + # be_bigger_than(2).and_smaller_than(4).description + # # => "be bigger than 2 and smaller than 4" + # ...rather than: + # # => "be bigger than 2" + expectations.include_chain_clauses_in_custom_matcher_descriptions = true + end + + # rspec-mocks config goes here. You can use an alternate test double + # library (such as bogus or mocha) by changing the `mock_with` option here. + config.mock_with :rspec do |mocks| + # Prevents you from mocking or stubbing a method that does not exist on + # a real object. This is generally recommended, and will default to + # `true` in RSpec 4. + mocks.verify_partial_doubles = true + end + + # This option will default to `:apply_to_host_groups` in RSpec 4 (and will + # have no way to turn it off -- the option exists only for backwards + # compatibility in RSpec 3). It causes shared context metadata to be + # inherited by the metadata hash of host groups and examples, rather than + # triggering implicit auto-inclusion in groups with matching metadata. + config.shared_context_metadata_behavior = :apply_to_host_groups + +# The settings below are suggested to provide a good initial experience +# with RSpec, but feel free to customize to your heart's content. +=begin + # This allows you to limit a spec run to individual examples or groups + # you care about by tagging them with `:focus` metadata. When nothing + # is tagged with `:focus`, all examples get run. RSpec also provides + # aliases for `it`, `describe`, and `context` that include `:focus` + # metadata: `fit`, `fdescribe` and `fcontext`, respectively. + config.filter_run_when_matching :focus + + # Allows RSpec to persist some state between runs in order to support + # the `--only-failures` and `--next-failure` CLI options. We recommend + # you configure your source control system to ignore this file. + config.example_status_persistence_file_path = "spec/examples.txt" + + # Limits the available syntax to the non-monkey patched syntax that is + # recommended. For more details, see: + # - http://rspec.info/blog/2012/06/rspecs-new-expectation-syntax/ + # - http://www.teaisaweso.me/blog/2013/05/27/rspecs-new-message-expectation-syntax/ + # - http://rspec.info/blog/2014/05/notable-changes-in-rspec-3/#zero-monkey-patching-mode + config.disable_monkey_patching! + + # This setting enables warnings. It's recommended, but in some cases may + # be too noisy due to issues in dependencies. + config.warnings = true + + # Many RSpec users commonly either run the entire suite or an individual + # file, and it's useful to allow more verbose output when running an + # individual spec file. + if config.files_to_run.one? + # Use the documentation formatter for detailed output, + # unless a formatter has already been configured + # (e.g. via a command-line flag). + config.default_formatter = "doc" + end + + # Print the 10 slowest examples and example groups at the + # end of the spec run, to help surface which specs are running + # particularly slow. + config.profile_examples = 10 + + # Run specs in random order to surface order dependencies. If you find an + # order dependency and want to debug it, you can fix the order by providing + # the seed, which is printed after each run. + # --seed 1234 + config.order = :random + + # Seed global randomization in this process using the `--seed` CLI option. + # Setting this allows you to use `--seed` to deterministically reproduce + # test failures related to randomization by passing the same `--seed` value + # as the one that triggered the failure. + Kernel.srand config.seed +=end +end