diff options
| -rw-r--r-- | Makefile | 16 | ||||
| -rw-r--r-- | source/btree_impl.h | 182 | ||||
| -rw-r--r-- | source/csv_decoder.h | 17 | ||||
| -rw-r--r-- | source/tb_db.c | 9 |
4 files changed, 146 insertions, 78 deletions
| @@ -1,13 +1,25 @@ | |||
| 1 | BIN = build/engine | 1 | BIN = build/engine |
| 2 | SRC = source/tb_db.c | 2 | SRC = source/tb_db.c |
| 3 | |||
| 4 | # CC = gcc | ||
| 3 | CC = clang | 5 | CC = clang |
| 4 | CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -Werror | 6 | |
| 7 | COMPILER := $(shell $(CC) --version | grep -o "gcc\|clang" | head -1) | ||
| 8 | |||
| 9 | # check for compile optimizations per compiler | ||
| 10 | ifeq ($(COMPILER),gcc) | ||
| 11 | CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -Werror -O0 | ||
| 12 | else ifeq ($(COMPILER),clang) | ||
| 13 | CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -O0 | ||
| 14 | else | ||
| 15 | CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -O0 | ||
| 16 | endif | ||
| 5 | 17 | ||
| 6 | $(BIN): $(SRC) | 18 | $(BIN): $(SRC) |
| 7 | mkdir -p build | 19 | mkdir -p build |
| 8 | $(CC) $(CFLAGS) $< -o $@ | 20 | $(CC) $(CFLAGS) $< -o $@ |
| 9 | 21 | ||
| 10 | run: | 22 | run: $(BIN) |
| 11 | $(BIN) | 23 | $(BIN) |
| 12 | 24 | ||
| 13 | .PHONY: clean | 25 | .PHONY: clean |
diff --git a/source/btree_impl.h b/source/btree_impl.h index 8eb0723..7dda6c9 100644 --- a/source/btree_impl.h +++ b/source/btree_impl.h | |||
| @@ -1,38 +1,46 @@ | |||
| 1 | /** | ||
| 2 | * >> api notes | ||
| 3 | * >> by Abdellah El Morabit | ||
| 4 | * | ||
| 5 | * - creation of a btree happens when doing an insertion on an | ||
| 6 | * empty btree where the root is null or nil | ||
| 7 | * | ||
| 8 | **/ | ||
| 9 | |||
| 1 | /* single header b-tree implementation by Abdellah El Morabit */ | 10 | /* single header b-tree implementation by Abdellah El Morabit */ |
| 2 | #ifndef B_TREE_H | 11 | #ifndef BTREE_H |
| 3 | #define B_TREE_H | 12 | #define BTREE_H |
| 4 | 13 | ||
| 5 | // maximum height of the tree the lower the lower the lower amount | 14 | // maximum height of the tree the lower the lower the lower amount |
| 6 | // of disk reads which translates into faster? | 15 | // of disk reads which translates into faster? |
| 7 | #if 0 | 16 | #if 0 |
| 8 | global_variable read_only s16 B_TREE_ORDER = 4; | 17 | global_variable read_only s16 btree_ORDER = 4; |
| 9 | #endif | 18 | #endif |
| 10 | #define B_TREE_ORDER 4 | 19 | #define BTREE_ORDER 4 |
| 11 | 20 | ||
| 12 | //- NOTE(nasr): defining a key to improve sorting | 21 | //- NOTE(nasr): defining a key to improve sorting |
| 13 | // i think saying that a key is a combination of the column + row is a good way of appraoching this | 22 | // i think saying that a key is a combination of the column + row is a good way of appraoching this |
| 14 | typedef struct key key; | 23 | typedef struct key key; |
| 15 | struct key | 24 | struct key |
| 16 | { | 25 | { |
| 17 | string8 header; | 26 | s32 header_index; //- col number |
| 18 | s32 row; | 27 | s32 row_index; |
| 19 | }; | 28 | }; |
| 20 | 29 | ||
| 21 | typedef struct b_tree_node b_tree_node; | 30 | typedef struct btree_node btree_node; |
| 22 | struct b_tree_node | 31 | struct btree_node |
| 23 | { | 32 | { |
| 24 | // store the key values of the sub nodes? if they are leaves? | 33 | // store the key values of the sub nodes? if they are leaves? |
| 25 | key keys[B_TREE_ORDER - 1]; | 34 | key keys[BTREE_ORDER - 1]; |
| 26 | // TODO(nasr): replace with something more generic? | 35 | // TODO(nasr): replace with something more generic? |
| 27 | // NOTE(nasr): cons of void * -> no type safety | 36 | // NOTE(nasr): cons of void * -> no type safety |
| 28 | // is there a way to still have some sort of that? | 37 | // is there a way to still have some sort of that? size not variable |
| 29 | // size not variable | 38 | void *payload_per_key[BTREE_ORDER - 1]; |
| 30 | void *payload_per_key[B_TREE_ORDER - 1]; | 39 | btree_node *parent; |
| 31 | b_tree_node *parent; | ||
| 32 | // handle to store children faster than linked list | 40 | // handle to store children faster than linked list |
| 33 | // because now we can iterate over the nodes instead of having cache misses | 41 | // because now we can iterate over the nodes instead of having cache misses |
| 34 | // when jumping through linked nodes | 42 | // when jumping through linked nodes |
| 35 | b_tree_node *children[B_TREE_ORDER]; | 43 | btree_node *children[BTREE_ORDER]; |
| 36 | // this shouldn't change things but could be a performance improvement on a bigger scale?? | 44 | // this shouldn't change things but could be a performance improvement on a bigger scale?? |
| 37 | s32 *max_children; | 45 | s32 *max_children; |
| 38 | // NOTE(nasr): reference count ::: check how many leaves are using this node | 46 | // NOTE(nasr): reference count ::: check how many leaves are using this node |
| @@ -46,120 +54,162 @@ struct b_tree_node | |||
| 46 | // this could solve memory location issues? | 54 | // this could solve memory location issues? |
| 47 | }; | 55 | }; |
| 48 | 56 | ||
| 49 | typedef struct b_tree b_tree; | 57 | |
| 50 | struct b_tree | 58 | typedef struct btree btree; |
| 59 | struct btree | ||
| 51 | { | 60 | { |
| 52 | // NOTE(nasr): make sure this always stays in memory | 61 | // NOTE(nasr): make sure this always stays in memory |
| 53 | // so that initial fetch never requires a disk read | 62 | // so that initial fetch never requires a disk read |
| 54 | b_tree_node *root; | 63 | btree_node *root; |
| 55 | }; | 64 | }; |
| 56 | 65 | ||
| 57 | #ifdef B_TREE_IMPLEMENTATION | 66 | |
| 67 | |||
| 68 | #ifdef BTREE_IMPLEMENTATION | ||
| 58 | /////////////////////////////////////////////////////////////////////////////// | 69 | /////////////////////////////////////////////////////////////////////////////// |
| 59 | //- b_tree.c | 70 | //- btree.c |
| 71 | |||
| 72 | |||
| 73 | internal b32 | ||
| 74 | is_nil_btree_node(btree_node *node) | ||
| 75 | { | ||
| 76 | // NOTE(nasr): looks like a useless function but could be usefull when we implement a nil_btree_node | ||
| 77 | return (node != NULL); | ||
| 78 | } | ||
| 60 | 79 | ||
| 61 | // TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees? | 80 | // TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees? |
| 62 | 81 | ||
| 63 | internal b_tree_node * | 82 | internal b8 |
| 64 | btree_node_alloc(mem_arena *arena) | 83 | key_compare(key destination_key, key source_key) |
| 65 | { | 84 | { |
| 66 | b_tree_node *node = PushStructZero(arena, b_tree_node); | 85 | s32 source_index = source_key.header_index + source_key.row_index; |
| 67 | node->leaf = 1; | 86 | s32 destination_index = destination_key.header_index + destination_key.row_index; |
| 68 | return node; | 87 | |
| 88 | if(destination_index > source_index) return 1; | ||
| 89 | else if(destination_index < source_index) return -1; | ||
| 90 | else return 0; | ||
| 69 | } | 91 | } |
| 70 | 92 | ||
| 71 | // NOTE(nasr): @return the index of the found element | 93 | // NOTE(nasr): @return the index of the found element |
| 94 | //- @param: start node could be the root node for all we care. but this enables optimizations in the future if we need them | ||
| 72 | internal s32 | 95 | internal s32 |
| 73 | btree_node_find_pos(string8 value, b_tree_node *node) | 96 | btree_find_ipos(key k, btree_node *start_node) |
| 74 | { | 97 | { |
| 75 | unused(value); | 98 | s32 index = 0; |
| 76 | unused(node); | 99 | for (btree_node *current_node = start_node; |
| 77 | 100 | index < current_node->key_count && key_compare(k, current_node->keys[0]) < 0; | |
| 78 | #if 0 | 101 | ++index); |
| 79 | s32 i = 0; | 102 | return index; |
| 80 | for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i); | ||
| 81 | return i; | ||
| 82 | #endif | ||
| 83 | 103 | ||
| 84 | return 0; | 104 | return 0; |
| 85 | 105 | ||
| 86 | } | 106 | } |
| 87 | 107 | ||
| 88 | internal void | ||
| 89 | b_tree_create(mem_arena *arena, b_tree *tree) | ||
| 90 | { | ||
| 91 | tree->root = btree_node_alloc(arena); | ||
| 92 | tree->root->leaf = 1; | ||
| 93 | tree->root->key_count = 0; | ||
| 94 | } | ||
| 95 | |||
| 96 | |||
| 97 | // NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory | 108 | // NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory |
| 98 | internal void * | 109 | internal void * |
| 99 | b_tree_search(b_tree_node *node, string8 key) | 110 | btree_search(btree_node *node, key key) |
| 100 | { | 111 | { |
| 101 | unused(node); | ||
| 102 | unused(key); | ||
| 103 | 112 | ||
| 104 | #if 0 | 113 | s32 index = btree_find_ipos(key, node); |
| 105 | s32 i = btree_node_find_pos(key, node); | ||
| 106 | 114 | ||
| 107 | if (i < node->key_count && string8_cmp(node->keys[i], key) == 0) | 115 | if (index < node->key_count && key_compare(node->keys[index], key) == 0) |
| 108 | { | 116 | { |
| 109 | return node->payload_per_key[i]; | 117 | return node->payload_per_key[index]; |
| 110 | } | 118 | } |
| 111 | if (node->leaf) | 119 | if (node->leaf) |
| 112 | { | 120 | { |
| 113 | return NULL; | 121 | return NULL; |
| 114 | } | 122 | } |
| 115 | return b_tree_search(node->children[i], key); | 123 | return btree_search(node->children[index], key); |
| 116 | #endif | ||
| 117 | 124 | ||
| 118 | return NULL; | 125 | return NULL; |
| 119 | } | 126 | } |
| 120 | 127 | ||
| 121 | 128 | // TODO(nasr): split node when key_count == btree_ORDER - 1 (node is full) | |
| 122 | // TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full) | 129 | // -- each node has max btree_ORDER - 1 keys |
| 130 | // | ||
| 123 | internal void | 131 | internal void |
| 124 | b_tree_insert(b_tree *tree, string8 key, void *payload) | 132 | btree_insert(mem_arena *arena, btree *tree, key key, void *payload) |
| 125 | { | 133 | { |
| 126 | unused(tree); | 134 | |
| 127 | unused(key); | 135 | if(is_nil_btree_node(tree->root)) |
| 128 | unused(payload); | 136 | { |
| 129 | #if 0 | 137 | tree->root = PushStructZero(arena, btree_node); |
| 130 | b_tree_node *current_node = tree->root; | 138 | tree->root->leaf = 1; |
| 139 | tree->root->key_count = 0; | ||
| 140 | |||
| 141 | } | ||
| 142 | |||
| 143 | btree_node *current_node = tree->root; | ||
| 144 | |||
| 131 | 145 | ||
| 132 | if (current_node->leaf) | 146 | if (current_node->leaf) |
| 133 | { | 147 | { |
| 134 | // find the insertion position and shift keys right to make room | 148 | // find the insertion position and shift keys right to make room |
| 135 | s32 i = btree_node_find_pos(key, current_node); | 149 | //- after: but what is this. we are expecting a new key.but we are looking for |
| 150 | //- its position in the tree is useless because it's non existent | ||
| 151 | |||
| 152 | s32 i = btree_find_ipos(key, current_node); | ||
| 153 | |||
| 136 | for (s32 j = current_node->key_count; j > i; --j) | 154 | for (s32 j = current_node->key_count; j > i; --j) |
| 137 | { | 155 | { |
| 138 | current_node->keys[j] = current_node->keys[j - 1]; | 156 | current_node->keys[j] = current_node->keys[j - 1]; |
| 139 | current_node->payload_per_key[j] = current_node->payload_per_key[j - 1]; | 157 | current_node->payload_per_key[j] = current_node->payload_per_key[j - 1]; |
| 140 | } | 158 | } |
| 159 | |||
| 141 | current_node->keys[i] = key; | 160 | current_node->keys[i] = key; |
| 142 | current_node->payload_per_key[i] = payload; | 161 | current_node->payload_per_key[i] = payload; |
| 143 | 162 | ||
| 144 | if(current_node->key_count < (B_TREE_ORDER - 1)) | 163 | if(current_node->key_count < (BTREE_ORDER - 1)) |
| 145 | { | 164 | { |
| 146 | current_node->key_count += 1; | 165 | current_node->key_count += 1; |
| 147 | } | 166 | } |
| 148 | else { | 167 | else { |
| 149 | // TODO(nasr): creating a new branch / tree? | 168 | // TODO(nasr): creating a new branch / tree? |
| 150 | // make a seperate function for this | 169 | // make a seperate function for this |
| 170 | s32 split_pos = current_node->parent->key_count / 2 - 1; | ||
| 171 | s32 updated_keycount_p = 0; | ||
| 172 | for(s32 index = 0; index <= split_pos; ++index) | ||
| 173 | { | ||
| 174 | ++updated_keycount_p; | ||
| 175 | } | ||
| 176 | |||
| 177 | current_node->parent->key_count = updated_keycount_p; | ||
| 178 | |||
| 151 | } | 179 | } |
| 152 | } | 180 | } |
| 153 | // TODO(nasr): internal node case walk down then split on the way back up | 181 | // TODO(nasr): internal node case walk down then split on the way back up |
| 154 | #endif | ||
| 155 | } | 182 | } |
| 156 | 183 | ||
| 184 | |||
| 157 | internal void | 185 | internal void |
| 158 | b_tree_write(b_tree *bt) | 186 | btree_write(btree *bt) |
| 159 | { | 187 | { |
| 160 | // TODO(nasr): write the b_tree to disk | 188 | |
| 161 | unused(bt); | 189 | unused(bt); |
| 190 | #if 0 | ||
| 191 | temp_arena *temp_arena = temp_arena_begin(arena); | ||
| 192 | btree_node *root = bt->root; | ||
| 193 | |||
| 194 | for(btree_node *bt = PushStruct(arena); root->next; next = root;) | ||
| 195 | { | ||
| 196 | |||
| 197 | } | ||
| 198 | |||
| 199 | temp_arena_end(temp); | ||
| 200 | #endif | ||
| 201 | |||
| 202 | } | ||
| 203 | |||
| 204 | // - load the entire db into memory. | ||
| 205 | #if 0 | ||
| 206 | internal void | ||
| 207 | btree_load(mem_arena *arena) | ||
| 208 | { | ||
| 209 | |||
| 210 | |||
| 162 | } | 211 | } |
| 212 | #endif | ||
| 163 | 213 | ||
| 164 | #endif /* B_TREE_IMPLEMENTATION */ | 214 | #endif /* BTREE_IMPLEMENTATION */ |
| 165 | #endif /* B_TREE_H */ | 215 | #endif /* BTREE_H */ |
diff --git a/source/csv_decoder.h b/source/csv_decoder.h index b754ef5..446f1da 100644 --- a/source/csv_decoder.h +++ b/source/csv_decoder.h | |||
| @@ -157,7 +157,7 @@ tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list | |||
| 157 | unused(token_list); | 157 | unused(token_list); |
| 158 | b32 finding_headers = TRUE; | 158 | b32 finding_headers = TRUE; |
| 159 | 159 | ||
| 160 | if(buffer.size < 0) return NULL; | 160 | if(buffer.size == 0) return NULL; |
| 161 | 161 | ||
| 162 | csv_token *tok = PushStruct(arena, csv_token); | 162 | csv_token *tok = PushStruct(arena, csv_token); |
| 163 | 163 | ||
| @@ -238,11 +238,10 @@ tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list | |||
| 238 | } | 238 | } |
| 239 | 239 | ||
| 240 | //- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future | 240 | //- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future |
| 241 | internal b_tree * | 241 | internal btree * |
| 242 | parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) | 242 | parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) |
| 243 | { | 243 | { |
| 244 | b_tree *tree = PushStructZero(arena, b_tree); | 244 | btree *tree = PushStructZero(arena, btree); |
| 245 | b_tree_create(arena, tree); | ||
| 246 | 245 | ||
| 247 | // iterate over the token list while the token is not nil | 246 | // iterate over the token list while the token is not nil |
| 248 | for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token) | 247 | for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token) |
| @@ -266,7 +265,6 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) | |||
| 266 | { | 265 | { |
| 267 | 266 | ||
| 268 | } | 267 | } |
| 269 | |||
| 270 | } | 268 | } |
| 271 | } | 269 | } |
| 272 | 270 | ||
| @@ -281,7 +279,14 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) | |||
| 281 | // NOTE(nasr): payload is the cten itself so the caller can reach | 279 | // NOTE(nasr): payload is the cten itself so the caller can reach |
| 282 | // row/col metadata without us having to copy it | 280 | // row/col metadata without us having to copy it |
| 283 | // NOTE(nasr): heh why do we void cast again? | 281 | // NOTE(nasr): heh why do we void cast again? |
| 284 | b_tree_insert(tree, ct->lexeme, (void *)ct); | 282 | |
| 283 | key k = { | ||
| 284 | .header_index = 1, | ||
| 285 | .row_index = 1, | ||
| 286 | }; | ||
| 287 | |||
| 288 | // btree_insert(arena, tree, (key)ct->lexeme, (void *)ct); | ||
| 289 | btree_insert(arena, tree, k, (void *)ct); | ||
| 285 | } | 290 | } |
| 286 | 291 | ||
| 287 | return tree; | 292 | return tree; |
diff --git a/source/tb_db.c b/source/tb_db.c index b992111..8aba614 100644 --- a/source/tb_db.c +++ b/source/tb_db.c | |||
| @@ -1,4 +1,4 @@ | |||
| 1 | #define B_TREE_IMPLEMENTATION | 1 | #define BTREE_IMPLEMENTATION |
| 2 | #define BASE_UNITY | 2 | #define BASE_UNITY |
| 3 | #include "base/base_include.h" | 3 | #include "base/base_include.h" |
| 4 | 4 | ||
| @@ -32,7 +32,7 @@ is_delimiter(u8 point) | |||
| 32 | return (point == ','); | 32 | return (point == ','); |
| 33 | } | 33 | } |
| 34 | 34 | ||
| 35 | #include "b_tree_impl.h" | 35 | #include "btree_impl.h" |
| 36 | #include "csv_decoder.h" | 36 | #include "csv_decoder.h" |
| 37 | 37 | ||
| 38 | typedef struct query_token query_token; | 38 | typedef struct query_token query_token; |
| @@ -162,6 +162,7 @@ int main(int count, char **value) | |||
| 162 | print("error reading from stdin"); | 162 | print("error reading from stdin"); |
| 163 | } | 163 | } |
| 164 | 164 | ||
| 165 | |||
| 165 | // TODO(nasr): extract this later in the future and make a string copy function/macro | 166 | // TODO(nasr): extract this later in the future and make a string copy function/macro |
| 166 | // @params (s32 lbuf_size , string8 lbuf_stringified) | 167 | // @params (s32 lbuf_size , string8 lbuf_stringified) |
| 167 | s32 lbuf_size = sizeof(lbuf) - 1; | 168 | s32 lbuf_size = sizeof(lbuf) - 1; |
| @@ -189,9 +190,9 @@ int main(int count, char **value) | |||
| 189 | assert_msg(tokens != NULL, "Tokens are NULL."); | 190 | assert_msg(tokens != NULL, "Tokens are NULL."); |
| 190 | 191 | ||
| 191 | csv_token_list *ctl = PushStruct(global_arena, csv_token_list); | 192 | csv_token_list *ctl = PushStruct(global_arena, csv_token_list); |
| 192 | b_tree *bt = parse_csv(global_arena, ctl, table); | 193 | btree *bt = parse_csv(global_arena, ctl, table); |
| 193 | 194 | ||
| 194 | b_tree_write(bt); | 195 | btree_write(bt); |
| 195 | } | 196 | } |
| 196 | 197 | ||
| 197 | // NOTE(nasr): not sure on how to approach the b-tree and the table format thing | 198 | // NOTE(nasr): not sure on how to approach the b-tree and the table format thing |
