diff options
Diffstat (limited to 'source/tb_db')
| -rw-r--r-- | source/tb_db/btree_impl.h | 209 | ||||
| -rw-r--r-- | source/tb_db/csv_decoder.h | 294 | ||||
| -rw-r--r-- | source/tb_db/tb_db.c | 198 |
3 files changed, 701 insertions, 0 deletions
diff --git a/source/tb_db/btree_impl.h b/source/tb_db/btree_impl.h new file mode 100644 index 0000000..b7015bd --- /dev/null +++ b/source/tb_db/btree_impl.h | |||
| @@ -0,0 +1,209 @@ | |||
| 1 | /** | ||
| 2 | * >> api notes | ||
| 3 | * >> by Abdellah El Morabit | ||
| 4 | * | ||
| 5 | * - creation of a btree happens when doing an insertion on an | ||
| 6 | * empty btree where the root is null or nil | ||
| 7 | * | ||
| 8 | **/ | ||
| 9 | |||
| 10 | /* single header b-tree implementation by Abdellah El Morabit */ | ||
| 11 | #ifndef BTREE_H | ||
| 12 | #define BTREE_H | ||
| 13 | |||
| 14 | // maximum height of the tree the lower the lower the lower amount | ||
| 15 | // of disk reads which translates into faster? | ||
| 16 | #if 0 | ||
| 17 | global_variable read_only s16 btree_ORDER = 4; | ||
| 18 | #endif | ||
| 19 | #define BTREE_ORDER 4 | ||
| 20 | |||
| 21 | //- NOTE(nasr): defining a key to improve sorting | ||
| 22 | // i think saying that a key is a combination of the column + row is a good way of appraoching this | ||
| 23 | typedef struct key key; | ||
| 24 | struct key | ||
| 25 | { | ||
| 26 | s32 header_index; //- col number | ||
| 27 | s32 row_index; | ||
| 28 | }; | ||
| 29 | |||
| 30 | typedef struct btree_node btree_node; | ||
| 31 | struct btree_node | ||
| 32 | { | ||
| 33 | // store the key values of the sub nodes? if they are leaves? | ||
| 34 | key keys[BTREE_ORDER - 1]; | ||
| 35 | // TODO(nasr): replace with something more generic? | ||
| 36 | // NOTE(nasr): cons of void * -> no type safety | ||
| 37 | // is there a way to still have some sort of that? size not variable | ||
| 38 | void *payload_per_key[BTREE_ORDER - 1]; | ||
| 39 | btree_node *parent; | ||
| 40 | // handle to store children faster than linked list | ||
| 41 | // because now we can iterate over the nodes instead of having cache misses | ||
| 42 | // when jumping through linked nodes | ||
| 43 | btree_node *children[BTREE_ORDER]; | ||
| 44 | // this shouldn't change things but could be a performance improvement on a bigger scale?? | ||
| 45 | s32 *max_children; | ||
| 46 | // NOTE(nasr): reference count ::: check how many leaves are using this node | ||
| 47 | // also not needed for now because we don't free individual node because of arena allocator | ||
| 48 | // s32 *refc; | ||
| 49 | s32 key_count; | ||
| 50 | b32 leaf; | ||
| 51 | |||
| 52 | |||
| 53 | // NOTE(nasr): do we hold the reference to the arena? or do we pass is it as a reference? | ||
| 54 | // this could solve memory location issues? | ||
| 55 | }; | ||
| 56 | |||
| 57 | |||
| 58 | typedef struct btree btree; | ||
| 59 | struct btree | ||
| 60 | { | ||
| 61 | // NOTE(nasr): make sure this always stays in memory | ||
| 62 | // so that initial fetch never requires a disk read | ||
| 63 | btree_node *root; | ||
| 64 | }; | ||
| 65 | |||
| 66 | |||
| 67 | |||
| 68 | #ifdef BTREE_IMPLEMENTATION | ||
| 69 | /////////////////////////////////////////////////////////////////////////////// | ||
| 70 | //- btree.c | ||
| 71 | |||
| 72 | |||
| 73 | internal b32 | ||
| 74 | is_nil_btree_node(btree_node *node) | ||
| 75 | { | ||
| 76 | // NOTE(nasr): looks like a useless function but could be usefull when we implement a nil_btree_node | ||
| 77 | return (node == NULL); | ||
| 78 | } | ||
| 79 | |||
| 80 | // TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees? | ||
| 81 | |||
| 82 | internal b8 | ||
| 83 | key_compare(key destination_key, key source_key) | ||
| 84 | { | ||
| 85 | s32 source_index = source_key.header_index + source_key.row_index; | ||
| 86 | s32 destination_index = destination_key.header_index + destination_key.row_index; | ||
| 87 | |||
| 88 | if(destination_index > source_index) return 1; | ||
| 89 | else if(destination_index < source_index) return -1; | ||
| 90 | else return 0; | ||
| 91 | } | ||
| 92 | |||
| 93 | // NOTE(nasr): @return the index of the found element | ||
| 94 | //- @param: start node could be the root node for all we care. but this enables optimizations in the future if we need them | ||
| 95 | internal s32 | ||
| 96 | btree_find_ipos(key k, btree_node *start_node) | ||
| 97 | { | ||
| 98 | s32 index = 0; | ||
| 99 | // scan right while the node's key at [index] is strictly less than k | ||
| 100 | while (index < start_node->key_count && key_compare(start_node->keys[index], k) < 0) | ||
| 101 | { | ||
| 102 | ++index; | ||
| 103 | } | ||
| 104 | return index; | ||
| 105 | } | ||
| 106 | |||
| 107 | // NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory | ||
| 108 | internal void * | ||
| 109 | btree_search(btree_node *node, key key) | ||
| 110 | { | ||
| 111 | s32 index = btree_find_ipos(key, node); | ||
| 112 | |||
| 113 | if (index < node->key_count && key_compare(node->keys[index], key) == 0) | ||
| 114 | { | ||
| 115 | return node->payload_per_key[index]; | ||
| 116 | } | ||
| 117 | if (node->leaf) | ||
| 118 | { | ||
| 119 | return NULL; | ||
| 120 | } | ||
| 121 | return btree_search(node->children[index], key); | ||
| 122 | } | ||
| 123 | |||
| 124 | // TODO(nasr): split node when key_count == btree_ORDER - 1 (node is full) | ||
| 125 | // -- each node has max btree_ORDER - 1 keys | ||
| 126 | // | ||
| 127 | internal void | ||
| 128 | btree_insert(mem_arena *arena, btree *tree, key key, void *payload) | ||
| 129 | { | ||
| 130 | if(is_nil_btree_node(tree->root)) | ||
| 131 | { | ||
| 132 | tree->root = PushStructZero(arena, btree_node); | ||
| 133 | tree->root->leaf = 1; | ||
| 134 | tree->root->key_count = 0; | ||
| 135 | } | ||
| 136 | |||
| 137 | btree_node *current_node = tree->root; | ||
| 138 | |||
| 139 | if (current_node->leaf) | ||
| 140 | { | ||
| 141 | // find the insertion position and shift keys right to make room | ||
| 142 | //- after: but what is this. we are expecting a new key.but we are looking for | ||
| 143 | //- its position in the tree is useless because it's non existent | ||
| 144 | |||
| 145 | s32 i = btree_find_ipos(key, current_node); | ||
| 146 | |||
| 147 | for (s32 j = current_node->key_count; j > i; --j) | ||
| 148 | { | ||
| 149 | current_node->keys[j] = current_node->keys[j - 1]; | ||
| 150 | current_node->payload_per_key[j] = current_node->payload_per_key[j - 1]; | ||
| 151 | } | ||
| 152 | |||
| 153 | current_node->keys[i] = key; | ||
| 154 | current_node->payload_per_key[i] = payload; | ||
| 155 | |||
| 156 | if(current_node->key_count < (BTREE_ORDER - 1)) | ||
| 157 | { | ||
| 158 | current_node->key_count += 1; | ||
| 159 | } | ||
| 160 | else { | ||
| 161 | // TODO(nasr): creating a new branch / tree? | ||
| 162 | // make a seperate function for this | ||
| 163 | // NOTE(nasr): only split through the parent when one actually exists; | ||
| 164 | // root splits need to be handled separately (promote mid key, create two children) | ||
| 165 | if(current_node->parent != NULL) | ||
| 166 | { | ||
| 167 | s32 split_pos = current_node->parent->key_count / 2 - 1; | ||
| 168 | s32 updated_keycount_p = 0; | ||
| 169 | for(s32 index = 0; index <= split_pos; ++index) | ||
| 170 | { | ||
| 171 | ++updated_keycount_p; | ||
| 172 | } | ||
| 173 | current_node->parent->key_count = updated_keycount_p; | ||
| 174 | } | ||
| 175 | } | ||
| 176 | } | ||
| 177 | // TODO(nasr): internal node case walk down then split on the way back up | ||
| 178 | } | ||
| 179 | |||
| 180 | |||
| 181 | internal void | ||
| 182 | btree_write(btree *bt) | ||
| 183 | { | ||
| 184 | unused(bt); | ||
| 185 | #if 0 | ||
| 186 | temp_arena *temp_arena = temp_arena_begin(arena); | ||
| 187 | btree_node *root = bt->root; | ||
| 188 | |||
| 189 | for(btree_node *bt = PushStruct(arena); root->next; next = root;) | ||
| 190 | { | ||
| 191 | |||
| 192 | } | ||
| 193 | |||
| 194 | temp_arena_end(temp); | ||
| 195 | #endif | ||
| 196 | } | ||
| 197 | |||
| 198 | // - load the entire db into memory. | ||
| 199 | #if 0 | ||
| 200 | internal void | ||
| 201 | btree_load(mem_arena *arena) | ||
| 202 | { | ||
| 203 | |||
| 204 | |||
| 205 | } | ||
| 206 | #endif | ||
| 207 | |||
| 208 | #endif /* BTREE_IMPLEMENTATION */ | ||
| 209 | #endif /* BTREE_H */ | ||
diff --git a/source/tb_db/csv_decoder.h b/source/tb_db/csv_decoder.h new file mode 100644 index 0000000..3d09dc6 --- /dev/null +++ b/source/tb_db/csv_decoder.h | |||
| @@ -0,0 +1,294 @@ | |||
| 1 | #ifndef ENGINE_LEXER_H | ||
| 2 | #define ENGINE_LEXER_H | ||
| 3 | |||
| 4 | enum csv_token_flags | ||
| 5 | { | ||
| 6 | FL = 1 << 2, | ||
| 7 | }; | ||
| 8 | |||
| 9 | enum csv_token_type | ||
| 10 | { | ||
| 11 | // first 255 tokens for ascii characters | ||
| 12 | TOKEN_UNDEFINED = 255, | ||
| 13 | TOKEN_IDENTIFIER, | ||
| 14 | TOKEN_VALUE, | ||
| 15 | }; | ||
| 16 | |||
| 17 | typedef struct csv_token csv_token; | ||
| 18 | struct csv_token | ||
| 19 | { | ||
| 20 | string8 lexeme; | ||
| 21 | csv_token *next_token; | ||
| 22 | enum csv_token_type type; | ||
| 23 | enum csv_token_flags flags; | ||
| 24 | }; | ||
| 25 | |||
| 26 | // NOTE(nasr): i dont think im going to use this. | ||
| 27 | typedef struct csv_row csv_row; | ||
| 28 | struct csv_row | ||
| 29 | { | ||
| 30 | // array of size col_count, points into mmap buffer | ||
| 31 | string8 *fields; | ||
| 32 | s32 count; | ||
| 33 | }; | ||
| 34 | |||
| 35 | #if 0 | ||
| 36 | typedef struct csv_lntity csv_entity; | ||
| 37 | struct csv_entity | ||
| 38 | { | ||
| 39 | //- not needed because we use key header mapping i think | ||
| 40 | }; | ||
| 41 | #endif | ||
| 42 | |||
| 43 | typedef struct csv_header csv_header; | ||
| 44 | struct csv_header | ||
| 45 | { | ||
| 46 | string8 payload; | ||
| 47 | csv_header *next_header; | ||
| 48 | }; | ||
| 49 | |||
| 50 | typedef struct csv_table csv_table; | ||
| 51 | struct csv_table | ||
| 52 | { | ||
| 53 | // first row, col names | ||
| 54 | // all data rows | ||
| 55 | csv_header *header; | ||
| 56 | s32 row_count; | ||
| 57 | s32 header_count; | ||
| 58 | b32 finding_headers; | ||
| 59 | }; | ||
| 60 | |||
| 61 | |||
| 62 | typedef struct csv_token_list csv_token_list; | ||
| 63 | struct csv_token_list | ||
| 64 | { | ||
| 65 | csv_token *start_token; | ||
| 66 | csv_token *end_token; | ||
| 67 | }; | ||
| 68 | |||
| 69 | read_only global_variable | ||
| 70 | csv_token nil_csv_token= | ||
| 71 | { | ||
| 72 | .lexeme = {.data = NULL, .size = 0}, | ||
| 73 | .type = 0, | ||
| 74 | .flags = 0, | ||
| 75 | .next_token = &nil_csv_token, | ||
| 76 | }; | ||
| 77 | |||
| 78 | read_only global_variable | ||
| 79 | csv_header nil_csv_header = | ||
| 80 | { | ||
| 81 | .payload = {.data = NULL, .size = 0}, | ||
| 82 | .next_header = &nil_csv_header, | ||
| 83 | }; | ||
| 84 | |||
| 85 | read_only global_variable | ||
| 86 | csv_token_list nil_csv_token_list = | ||
| 87 | { | ||
| 88 | .start_token = &nil_csv_token, | ||
| 89 | .end_token = &nil_csv_token, | ||
| 90 | }; | ||
| 91 | |||
| 92 | read_only global_variable | ||
| 93 | csv_row nil_csv_row = | ||
| 94 | { | ||
| 95 | .fields = &nil_string, | ||
| 96 | .count = 0, | ||
| 97 | }; | ||
| 98 | |||
| 99 | read_only global_variable | ||
| 100 | csv_table nil_csv_table = | ||
| 101 | { | ||
| 102 | .header = &nil_csv_header, | ||
| 103 | .row_count = 0, | ||
| 104 | }; | ||
| 105 | |||
| 106 | #endif /* ENGINE_LEXER_H */ | ||
| 107 | |||
| 108 | internal b32 | ||
| 109 | is_nil_csv_token(csv_token *token) | ||
| 110 | { | ||
| 111 | return ((token == NULL) || (token == &nil_csv_token)); | ||
| 112 | } | ||
| 113 | |||
| 114 | // TODO(nasr): segfaulting because end_token not allocated | ||
| 115 | internal void | ||
| 116 | csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token) | ||
| 117 | { | ||
| 118 | source_token_list->end_token->next_token = source_token; | ||
| 119 | source_token_list->end_token = source_token; | ||
| 120 | } | ||
| 121 | |||
| 122 | //- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother | ||
| 123 | internal void | ||
| 124 | csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source) | ||
| 125 | { | ||
| 126 | if(is_nil_csv_token(source->start_token)) return; | ||
| 127 | |||
| 128 | csv_token *source_ct = source->start_token; | ||
| 129 | csv_token *destination_et = destination->end_token; | ||
| 130 | |||
| 131 | // walk source and stitch each node onto destination's tail | ||
| 132 | for(; !is_nil_csv_token(source_ct); source_ct = source_ct->next_token) | ||
| 133 | { | ||
| 134 | destination_et->next_token = source_ct; | ||
| 135 | destination_et = source_ct; | ||
| 136 | } | ||
| 137 | |||
| 138 | // destination_et now points at the last real source node (not the nil sentinel) | ||
| 139 | destination->end_token = destination_et; | ||
| 140 | } | ||
| 141 | |||
| 142 | #if 0 | ||
| 143 | internal csv_token_list * | ||
| 144 | parse_csv_row(string8 row_buffer) | ||
| 145 | { | ||
| 146 | // csv_token_list * | ||
| 147 | |||
| 148 | } | ||
| 149 | #endif | ||
| 150 | |||
| 151 | |||
| 152 | // the lexer acts as a table builder from a csv file | ||
| 153 | // and parsing indivudal rows and columns | ||
| 154 | // the next step would be building a the b-tree | ||
| 155 | internal csv_token * | ||
| 156 | tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list) | ||
| 157 | { | ||
| 158 | unused(token_list); | ||
| 159 | |||
| 160 | if(buffer.size == 0) return NULL; | ||
| 161 | |||
| 162 | // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit | ||
| 163 | // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? | ||
| 164 | // forgot what the solution was | ||
| 165 | // TODO(nasr): check what the problem here was | ||
| 166 | |||
| 167 | // string size tracking across the loop not inside it | ||
| 168 | s32 start = 0; | ||
| 169 | |||
| 170 | for(s32 index = 0; buffer.data[index] != '\0'; ++index) | ||
| 171 | { | ||
| 172 | u8 point = buffer.data[index]; | ||
| 173 | |||
| 174 | #if 0 | ||
| 175 | if(is_whitespace(point)) | ||
| 176 | { | ||
| 177 | warn("csv file is invalid, detected whitespace"); | ||
| 178 | return NULL; | ||
| 179 | } | ||
| 180 | #endif | ||
| 181 | |||
| 182 | if(point == ',') | ||
| 183 | { | ||
| 184 | // emit a token for the field that ended before this comma | ||
| 185 | csv_token *token = PushStructZero(arena, csv_token); | ||
| 186 | |||
| 187 | assert_msg(token != NULL, "did the push struct fail??"); | ||
| 188 | assert_msg(arena->current_position < arena->capacity, "no more arena size"); | ||
| 189 | |||
| 190 | token->lexeme = StringCast(&buffer.data[start], index - start); | ||
| 191 | token->type = TOKEN_VALUE; | ||
| 192 | token->next_token = &nil_csv_token; | ||
| 193 | csv_token_list_append_token(token_list, token); | ||
| 194 | |||
| 195 | start = index + 1; | ||
| 196 | |||
| 197 | if(table->finding_headers) | ||
| 198 | { | ||
| 199 | table->header_count++; | ||
| 200 | } | ||
| 201 | } | ||
| 202 | else if(point == '\n') | ||
| 203 | { | ||
| 204 | // emit a token for the field that ended at this newline | ||
| 205 | csv_token *token = PushStructZero(arena, csv_token); | ||
| 206 | token->lexeme = StringCast(&buffer.data[start], index - start); | ||
| 207 | token->type = TOKEN_VALUE; | ||
| 208 | token->flags |= FL; | ||
| 209 | token->next_token = &nil_csv_token; | ||
| 210 | |||
| 211 | assert_msg(token_list, "token list invalid"); | ||
| 212 | assert_msg(token, "you're tring to append an invalid token"); | ||
| 213 | |||
| 214 | csv_token_list_append_token(token_list, token); | ||
| 215 | |||
| 216 | start = index + 1; | ||
| 217 | |||
| 218 | if(table->finding_headers) | ||
| 219 | { | ||
| 220 | { | ||
| 221 | //- map new header token list to table headers | ||
| 222 | } | ||
| 223 | table->finding_headers = FALSE; | ||
| 224 | } | ||
| 225 | |||
| 226 | table->row_count++; | ||
| 227 | } | ||
| 228 | } | ||
| 229 | |||
| 230 | // NOTE(nasr): return the first token the caller can walk the list from token_list | ||
| 231 | return token_list->start_token; | ||
| 232 | } | ||
| 233 | |||
| 234 | //- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future | ||
| 235 | internal btree * | ||
| 236 | parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) | ||
| 237 | { | ||
| 238 | btree *tree = PushStructZero(arena, btree); | ||
| 239 | |||
| 240 | s32 col_index = 0; | ||
| 241 | s32 row_index = 0; | ||
| 242 | |||
| 243 | // iterate over the token list while the token is not nil | ||
| 244 | for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token) | ||
| 245 | { | ||
| 246 | { | ||
| 247 | //- are we parsing the first line tokens? | ||
| 248 | //- if so, do something :)) | ||
| 249 | if(ct->flags & FL) | ||
| 250 | { | ||
| 251 | // NOTE(nasr): FL marks end-of-line; advance row, reset col | ||
| 252 | row_index++; | ||
| 253 | col_index = 0; | ||
| 254 | |||
| 255 | // TODO(nasr): replace with nil header check function | ||
| 256 | // NOTE(nasr): == nil means header hasn't been set yet | ||
| 257 | if(table->header == &nil_csv_header || table->header == NULL) | ||
| 258 | { | ||
| 259 | #if 0 | ||
| 260 | // - no this should happen in the tokenization | ||
| 261 | table->headers->next = | ||
| 262 | #endif | ||
| 263 | } | ||
| 264 | else | ||
| 265 | { | ||
| 266 | |||
| 267 | } | ||
| 268 | |||
| 269 | // FL tokens are structural, no value to index | ||
| 270 | continue; | ||
| 271 | } | ||
| 272 | } | ||
| 273 | |||
| 274 | // skip non-value tokens, only index actual cell values | ||
| 275 | if (ct->type != TOKEN_VALUE) | ||
| 276 | { | ||
| 277 | col_index++; | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | |||
| 281 | // NOTE(nasr): payload is the cten itself so the caller can reach | ||
| 282 | // row/col metadata without us having to copy it | ||
| 283 | key k = { | ||
| 284 | .header_index = col_index, | ||
| 285 | .row_index = row_index, | ||
| 286 | }; | ||
| 287 | |||
| 288 | btree_insert(arena, tree, k, (void *)ct); | ||
| 289 | |||
| 290 | col_index++; | ||
| 291 | } | ||
| 292 | |||
| 293 | return tree; | ||
| 294 | } | ||
diff --git a/source/tb_db/tb_db.c b/source/tb_db/tb_db.c new file mode 100644 index 0000000..4ae247d --- /dev/null +++ b/source/tb_db/tb_db.c | |||
| @@ -0,0 +1,198 @@ | |||
| 1 | #define BTREE_IMPLEMENTATION | ||
| 2 | #define BASE_UNITY | ||
| 3 | #include "../base/base_include.h" | ||
| 4 | |||
| 5 | internal b32 | ||
| 6 | is_alpha(u8 point) | ||
| 7 | { | ||
| 8 | return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_')); | ||
| 9 | } | ||
| 10 | |||
| 11 | internal b32 | ||
| 12 | is_digit(u8 point) | ||
| 13 | { | ||
| 14 | return (point >= '0' && point <= '9'); | ||
| 15 | } | ||
| 16 | |||
| 17 | internal b32 | ||
| 18 | is_alpha_num(u8 point) | ||
| 19 | { | ||
| 20 | return (is_alpha(point) || is_digit(point)); | ||
| 21 | } | ||
| 22 | |||
| 23 | internal b32 | ||
| 24 | is_whitespace(u8 point) | ||
| 25 | { | ||
| 26 | return (point == '\n' || point == '\r' || point == ' ' || point == '\t'); | ||
| 27 | } | ||
| 28 | |||
| 29 | internal b32 | ||
| 30 | is_delimiter(u8 point) | ||
| 31 | { | ||
| 32 | return (point == ','); | ||
| 33 | } | ||
| 34 | |||
| 35 | #include "btree_impl.h" | ||
| 36 | #include "csv_decoder.h" | ||
| 37 | |||
| 38 | typedef struct query_token query_token; | ||
| 39 | struct query_token | ||
| 40 | { | ||
| 41 | string8 lexeme; | ||
| 42 | query_token *next; | ||
| 43 | }; | ||
| 44 | |||
| 45 | typedef struct query_token_list query_token_list; | ||
| 46 | struct query_token_list | ||
| 47 | { | ||
| 48 | query_token *start_token; | ||
| 49 | query_token *current_token; | ||
| 50 | }; | ||
| 51 | |||
| 52 | read_only global_variable | ||
| 53 | query_token nil_query_token = | ||
| 54 | { | ||
| 55 | .lexeme = {.data = NULL, .size = 0}, | ||
| 56 | .next = &nil_query_token | ||
| 57 | }; | ||
| 58 | |||
| 59 | |||
| 60 | read_only global_variable | ||
| 61 | query_token_list nil_query_token_list = | ||
| 62 | { | ||
| 63 | .start_token = &nil_query_token, | ||
| 64 | .current_token = &nil_query_token, | ||
| 65 | }; | ||
| 66 | |||
| 67 | internal b32 | ||
| 68 | is_nil_query_token(query_token *token) | ||
| 69 | { | ||
| 70 | return (token == &nil_query_token) || (token == NULL); | ||
| 71 | } | ||
| 72 | |||
| 73 | internal b32 | ||
| 74 | is_nil_query_token_list(query_token *token) | ||
| 75 | { | ||
| 76 | return (token == &nil_query_token) || (token == NULL); | ||
| 77 | } | ||
| 78 | |||
| 79 | // takes on line of the repl input | ||
| 80 | // return a reference to the passed list | ||
| 81 | internal query_token_list * | ||
| 82 | query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list) | ||
| 83 | { | ||
| 84 | b32 initialized = 0; | ||
| 85 | unused(initialized); | ||
| 86 | |||
| 87 | for (u64 index = 0; index < buffer->size; ++index) | ||
| 88 | { | ||
| 89 | u8 codepoint = buffer->data[index]; | ||
| 90 | |||
| 91 | if(codepoint == '\n' || codepoint == '\r') break; | ||
| 92 | |||
| 93 | s32 start = 0; | ||
| 94 | s32 end = 0; | ||
| 95 | |||
| 96 | if(is_whitespace(codepoint)) end = index; | ||
| 97 | |||
| 98 | // save the token | ||
| 99 | // TODO(nasr): work on the string macros cuz no work | ||
| 100 | { | ||
| 101 | query_token *new_token = PushStruct(arena, query_token); | ||
| 102 | |||
| 103 | //- initialize list | ||
| 104 | { | ||
| 105 | if(is_nil_query_token(list->start_token)) | ||
| 106 | { | ||
| 107 | list->start_token = new_token; | ||
| 108 | list->current_token = new_token; | ||
| 109 | } | ||
| 110 | else | ||
| 111 | { | ||
| 112 | //- all we need to do - we dont track parents or what ever. this is a token stream not a tree | ||
| 113 | list->current_token->next = new_token; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | s32 new_token_size = end - start; | ||
| 118 | |||
| 119 | new_token->lexeme = PushString(arena, new_token_size); | ||
| 120 | new_token->lexeme.data = &buffer->data[index]; | ||
| 121 | new_token->lexeme.size = new_token_size; | ||
| 122 | |||
| 123 | list->current_token->next = new_token; | ||
| 124 | |||
| 125 | start = index + 1; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | return list; | ||
| 130 | } | ||
| 131 | |||
| 132 | int main(int count, char **value) | ||
| 133 | { | ||
| 134 | #if 1 | ||
| 135 | unused(nil_query_token_list); | ||
| 136 | #endif | ||
| 137 | |||
| 138 | if(count < 2) value[1] = "./test/data.csv"; | ||
| 139 | |||
| 140 | local_persist b32 running = 1; | ||
| 141 | |||
| 142 | mem_arena *global_arena = arena_create(GiB(1)); | ||
| 143 | |||
| 144 | string8 buffer = load_file(global_arena, value[1]); | ||
| 145 | |||
| 146 | // NOTE(nasr): the use of tables is required for tracking headers etc. | ||
| 147 | // i think we can optimize this away in the future but for now its fine | ||
| 148 | csv_table *table = PushStructZero(global_arena, csv_table); | ||
| 149 | csv_token_list *token_list = PushStructZero(global_arena, csv_token_list); | ||
| 150 | |||
| 151 | token_list->start_token = &nil_csv_token; | ||
| 152 | token_list->end_token = &nil_csv_token; | ||
| 153 | |||
| 154 | csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list); | ||
| 155 | assert_msg(tokens != NULL, "tokens are null"); | ||
| 156 | |||
| 157 | // NOTE(nasr): token_list is now populated — pass it directly, not a fresh empty list | ||
| 158 | btree *bt = parse_csv(global_arena, token_list, table); | ||
| 159 | |||
| 160 | print("\nDatabase Engine\n"); | ||
| 161 | |||
| 162 | for(;;) | ||
| 163 | { | ||
| 164 | if(running) | ||
| 165 | { | ||
| 166 | u8 *lbuf = PushArray(global_arena, u8, 256); | ||
| 167 | s32 err = os_read(STDIN_FD, lbuf, 256); | ||
| 168 | |||
| 169 | if(err < 0) | ||
| 170 | { | ||
| 171 | print("error reading from stdin"); | ||
| 172 | } | ||
| 173 | |||
| 174 | // TODO(nasr): extract this later in the future and make a string copy function/macro | ||
| 175 | // @params (s32 lbuf_size, string8 lbuf_stringified) | ||
| 176 | // NOTE(nasr): use err (bytes read) not sizeof(lbuf*) — sizeof a pointer is always 8 | ||
| 177 | s32 lbuf_size = err; | ||
| 178 | string8 lbuf_stringified = PushString(global_arena, lbuf_size); | ||
| 179 | { | ||
| 180 | memcpy(lbuf_stringified.data, lbuf, lbuf_size); | ||
| 181 | lbuf_stringified.size = lbuf_size; | ||
| 182 | } | ||
| 183 | |||
| 184 | query_token_list *qtl = PushStructZero(global_arena, query_token_list); | ||
| 185 | |||
| 186 | |||
| 187 | query_tokenizer(global_arena, &lbuf_stringified, qtl); | ||
| 188 | |||
| 189 | // TODO(nasr): dispatch qtl against bt here | ||
| 190 | |||
| 191 | unused(bt); | ||
| 192 | |||
| 193 | sleep(1); | ||
| 194 | } | ||
| 195 | } | ||
| 196 | |||
| 197 | return 0; | ||
| 198 | } | ||
