diff options
| -rw-r--r-- | Makefile | 5 | ||||
| -rw-r--r-- | source/b_tree_impl.h (renamed from source/b_tree.h) | 49 | ||||
| -rw-r--r-- | source/csv_decoder.h | 289 | ||||
| -rw-r--r-- | source/csv_reader.h | 183 | ||||
| -rw-r--r-- | source/tb_db.c (renamed from source/engine.c) | 28 |
5 files changed, 348 insertions, 206 deletions
| @@ -1,8 +1,7 @@ | |||
| 1 | BIN = build/engine | 1 | BIN = build/engine |
| 2 | SRC = source/engine.c | 2 | SRC = source/tb_db.c |
| 3 | CC = clang | 3 | CC = clang |
| 4 | CFLAGS = -Wall -Wextra -Wfloat-equal -Wswitch-default -Wswitch-enum \ | 4 | CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -Werror |
| 5 | -Wno-unused-parameter -Wno-implicit-fallthrough -Wno-unused-function -g -Werror | ||
| 6 | 5 | ||
| 7 | $(BIN): $(SRC) | 6 | $(BIN): $(SRC) |
| 8 | mkdir -p build | 7 | mkdir -p build |
diff --git a/source/b_tree.h b/source/b_tree_impl.h index 0c4b1e1..8eb0723 100644 --- a/source/b_tree.h +++ b/source/b_tree_impl.h | |||
| @@ -4,16 +4,29 @@ | |||
| 4 | 4 | ||
| 5 | // maximum height of the tree the lower the lower the lower amount | 5 | // maximum height of the tree the lower the lower the lower amount |
| 6 | // of disk reads which translates into faster? | 6 | // of disk reads which translates into faster? |
| 7 | #define B_TREE_ORDER 4 | 7 | #if 0 |
| 8 | global_variable read_only s16 B_TREE_ORDER = 4; | ||
| 9 | #endif | ||
| 10 | #define B_TREE_ORDER 4 | ||
| 11 | |||
| 12 | //- NOTE(nasr): defining a key to improve sorting | ||
| 13 | // i think saying that a key is a combination of the column + row is a good way of appraoching this | ||
| 14 | typedef struct key key; | ||
| 15 | struct key | ||
| 16 | { | ||
| 17 | string8 header; | ||
| 18 | s32 row; | ||
| 19 | }; | ||
| 8 | 20 | ||
| 9 | typedef struct b_tree_node b_tree_node; | 21 | typedef struct b_tree_node b_tree_node; |
| 10 | struct b_tree_node | 22 | struct b_tree_node |
| 11 | { | 23 | { |
| 12 | // store the values | 24 | // store the key values of the sub nodes? if they are leaves? |
| 13 | string8 keys[B_TREE_ORDER - 1]; | 25 | key keys[B_TREE_ORDER - 1]; |
| 14 | // TODO(nasr): replace with something more generic? | 26 | // TODO(nasr): replace with something more generic? |
| 15 | // NOTE(nasr): cons of void * -> no type safety | 27 | // NOTE(nasr): cons of void * -> no type safety |
| 16 | // is there a way to still have some sort of that? | 28 | // is there a way to still have some sort of that? |
| 29 | // size not variable | ||
| 17 | void *payload_per_key[B_TREE_ORDER - 1]; | 30 | void *payload_per_key[B_TREE_ORDER - 1]; |
| 18 | b_tree_node *parent; | 31 | b_tree_node *parent; |
| 19 | // handle to store children faster than linked list | 32 | // handle to store children faster than linked list |
| @@ -27,6 +40,10 @@ struct b_tree_node | |||
| 27 | // s32 *refc; | 40 | // s32 *refc; |
| 28 | s32 key_count; | 41 | s32 key_count; |
| 29 | b32 leaf; | 42 | b32 leaf; |
| 43 | |||
| 44 | |||
| 45 | // NOTE(nasr): do we hold the reference to the arena? or do we pass is it as a reference? | ||
| 46 | // this could solve memory location issues? | ||
| 30 | }; | 47 | }; |
| 31 | 48 | ||
| 32 | typedef struct b_tree b_tree; | 49 | typedef struct b_tree b_tree; |
| @@ -55,9 +72,17 @@ btree_node_alloc(mem_arena *arena) | |||
| 55 | internal s32 | 72 | internal s32 |
| 56 | btree_node_find_pos(string8 value, b_tree_node *node) | 73 | btree_node_find_pos(string8 value, b_tree_node *node) |
| 57 | { | 74 | { |
| 75 | unused(value); | ||
| 76 | unused(node); | ||
| 77 | |||
| 78 | #if 0 | ||
| 58 | s32 i = 0; | 79 | s32 i = 0; |
| 59 | for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i); | 80 | for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i); |
| 60 | return i; | 81 | return i; |
| 82 | #endif | ||
| 83 | |||
| 84 | return 0; | ||
| 85 | |||
| 61 | } | 86 | } |
| 62 | 87 | ||
| 63 | internal void | 88 | internal void |
| @@ -68,10 +93,15 @@ b_tree_create(mem_arena *arena, b_tree *tree) | |||
| 68 | tree->root->key_count = 0; | 93 | tree->root->key_count = 0; |
| 69 | } | 94 | } |
| 70 | 95 | ||
| 96 | |||
| 71 | // NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory | 97 | // NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory |
| 72 | internal void * | 98 | internal void * |
| 73 | b_tree_search(b_tree_node *node, string8 key) | 99 | b_tree_search(b_tree_node *node, string8 key) |
| 74 | { | 100 | { |
| 101 | unused(node); | ||
| 102 | unused(key); | ||
| 103 | |||
| 104 | #if 0 | ||
| 75 | s32 i = btree_node_find_pos(key, node); | 105 | s32 i = btree_node_find_pos(key, node); |
| 76 | 106 | ||
| 77 | if (i < node->key_count && string8_cmp(node->keys[i], key) == 0) | 107 | if (i < node->key_count && string8_cmp(node->keys[i], key) == 0) |
| @@ -83,12 +113,20 @@ b_tree_search(b_tree_node *node, string8 key) | |||
| 83 | return NULL; | 113 | return NULL; |
| 84 | } | 114 | } |
| 85 | return b_tree_search(node->children[i], key); | 115 | return b_tree_search(node->children[i], key); |
| 116 | #endif | ||
| 117 | |||
| 118 | return NULL; | ||
| 86 | } | 119 | } |
| 87 | 120 | ||
| 121 | |||
| 88 | // TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full) | 122 | // TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full) |
| 89 | internal void | 123 | internal void |
| 90 | b_tree_insert(mem_arena *arena, b_tree *tree, string8 key, void *payload) | 124 | b_tree_insert(b_tree *tree, string8 key, void *payload) |
| 91 | { | 125 | { |
| 126 | unused(tree); | ||
| 127 | unused(key); | ||
| 128 | unused(payload); | ||
| 129 | #if 0 | ||
| 92 | b_tree_node *current_node = tree->root; | 130 | b_tree_node *current_node = tree->root; |
| 93 | 131 | ||
| 94 | if (current_node->leaf) | 132 | if (current_node->leaf) |
| @@ -109,15 +147,18 @@ b_tree_insert(mem_arena *arena, b_tree *tree, string8 key, void *payload) | |||
| 109 | } | 147 | } |
| 110 | else { | 148 | else { |
| 111 | // TODO(nasr): creating a new branch / tree? | 149 | // TODO(nasr): creating a new branch / tree? |
| 150 | // make a seperate function for this | ||
| 112 | } | 151 | } |
| 113 | } | 152 | } |
| 114 | // TODO(nasr): internal node case walk down then split on the way back up | 153 | // TODO(nasr): internal node case walk down then split on the way back up |
| 154 | #endif | ||
| 115 | } | 155 | } |
| 116 | 156 | ||
| 117 | internal void | 157 | internal void |
| 118 | b_tree_write(b_tree *bt) | 158 | b_tree_write(b_tree *bt) |
| 119 | { | 159 | { |
| 120 | // TODO(nasr): write the b_tree to disk | 160 | // TODO(nasr): write the b_tree to disk |
| 161 | unused(bt); | ||
| 121 | } | 162 | } |
| 122 | 163 | ||
| 123 | #endif /* B_TREE_IMPLEMENTATION */ | 164 | #endif /* B_TREE_IMPLEMENTATION */ |
diff --git a/source/csv_decoder.h b/source/csv_decoder.h new file mode 100644 index 0000000..b754ef5 --- /dev/null +++ b/source/csv_decoder.h | |||
| @@ -0,0 +1,289 @@ | |||
| 1 | #ifndef ENGINE_LEXER_H | ||
| 2 | #define ENGINE_LEXER_H | ||
| 3 | |||
| 4 | enum csv_token_flags | ||
| 5 | { | ||
| 6 | FL = 1 << 2, | ||
| 7 | }; | ||
| 8 | |||
| 9 | enum csv_token_type | ||
| 10 | { | ||
| 11 | // first 255 tokens for ascii characters | ||
| 12 | TOKEN_UNDEFINED = 255, | ||
| 13 | TOKEN_IDENTIFIER, | ||
| 14 | TOKEN_VALUE, | ||
| 15 | }; | ||
| 16 | |||
| 17 | typedef struct csv_token csv_token; | ||
| 18 | struct csv_token | ||
| 19 | { | ||
| 20 | string8 lexeme; | ||
| 21 | csv_token *next_token; | ||
| 22 | enum csv_token_type type; | ||
| 23 | enum csv_token_flags flags; | ||
| 24 | }; | ||
| 25 | |||
| 26 | // NOTE(nasr): i dont think im going to use this. | ||
| 27 | typedef struct csv_row csv_row; | ||
| 28 | struct csv_row | ||
| 29 | { | ||
| 30 | // array of size col_count, points into mmap buffer | ||
| 31 | string8 *fields; | ||
| 32 | s32 count; | ||
| 33 | }; | ||
| 34 | |||
| 35 | #if 0 | ||
| 36 | typedef struct csv_lntity csv_entity; | ||
| 37 | struct csv_entity | ||
| 38 | { | ||
| 39 | //- not needed because we use key header mapping i think | ||
| 40 | }; | ||
| 41 | #endif | ||
| 42 | |||
| 43 | typedef struct csv_header csv_header; | ||
| 44 | struct csv_header | ||
| 45 | { | ||
| 46 | string8 payload; | ||
| 47 | csv_header *next_header; | ||
| 48 | }; | ||
| 49 | |||
| 50 | typedef struct csv_table csv_table; | ||
| 51 | struct csv_table | ||
| 52 | { | ||
| 53 | // first row, col names | ||
| 54 | // all data rows | ||
| 55 | csv_header *header; | ||
| 56 | s32 row_count; | ||
| 57 | s32 header_count; | ||
| 58 | }; | ||
| 59 | |||
| 60 | |||
| 61 | typedef struct csv_token_list csv_token_list; | ||
| 62 | struct csv_token_list | ||
| 63 | { | ||
| 64 | csv_token *start_token; | ||
| 65 | csv_token *end_token; | ||
| 66 | |||
| 67 | }; | ||
| 68 | |||
| 69 | read_only global_variable | ||
| 70 | csv_token nil_csv_token= | ||
| 71 | { | ||
| 72 | .lexeme = {.data = NULL, .size =0}, | ||
| 73 | .type = 0, | ||
| 74 | .flags = 0, | ||
| 75 | .next_token = &nil_csv_token, | ||
| 76 | |||
| 77 | }; | ||
| 78 | |||
| 79 | read_only global_variable | ||
| 80 | csv_header nil_csv_header = | ||
| 81 | { | ||
| 82 | .payload = {.data = NULL, .size = 0}, | ||
| 83 | .next_header = &nil_csv_header, | ||
| 84 | }; | ||
| 85 | |||
| 86 | read_only global_variable | ||
| 87 | csv_token_list nil_csv_token_list = | ||
| 88 | { | ||
| 89 | .start_token = &nil_csv_token, | ||
| 90 | .end_token = &nil_csv_token, | ||
| 91 | }; | ||
| 92 | |||
| 93 | |||
| 94 | read_only global_variable | ||
| 95 | csv_row nil_csv_row = | ||
| 96 | { | ||
| 97 | .fields = &nil_string, | ||
| 98 | .count = 0, | ||
| 99 | }; | ||
| 100 | |||
| 101 | read_only global_variable | ||
| 102 | csv_table nil_csv_table = | ||
| 103 | { | ||
| 104 | .header = &nil_csv_header, | ||
| 105 | .row_count = 0, | ||
| 106 | }; | ||
| 107 | |||
| 108 | #endif /* ENGINE_LEXER_H */ | ||
| 109 | |||
| 110 | internal b32 | ||
| 111 | is_nil_csv_token(csv_token *token) | ||
| 112 | { | ||
| 113 | return ((token == NULL) || (token == &nil_csv_token)); | ||
| 114 | } | ||
| 115 | |||
| 116 | internal void | ||
| 117 | csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token) | ||
| 118 | { | ||
| 119 | source_token_list->end_token->next_token = source_token; | ||
| 120 | source_token_list->end_token = source_token; | ||
| 121 | |||
| 122 | } | ||
| 123 | |||
| 124 | //- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother | ||
| 125 | internal void | ||
| 126 | csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source) | ||
| 127 | { | ||
| 128 | |||
| 129 | csv_token *source_ct = source->start_token; | ||
| 130 | csv_token *destination_end_ct = destination->end_token; | ||
| 131 | |||
| 132 | for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token) | ||
| 133 | { | ||
| 134 | destination_end_ct->next_token = source_ct; | ||
| 135 | } | ||
| 136 | |||
| 137 | destination->end_token = source_ct; | ||
| 138 | } | ||
| 139 | |||
| 140 | #if 0 | ||
| 141 | internal csv_token_list * | ||
| 142 | parse_csv_row(string8 row_buffer) | ||
| 143 | { | ||
| 144 | // csv_token_list * | ||
| 145 | |||
| 146 | } | ||
| 147 | #endif | ||
| 148 | |||
| 149 | |||
| 150 | // the lexer acts as a table builder from a csv file | ||
| 151 | // and parsing indivudal rows and columns | ||
| 152 | // the next step would be building a the b-tree | ||
| 153 | internal csv_token * | ||
| 154 | tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list) | ||
| 155 | { | ||
| 156 | |||
| 157 | unused(token_list); | ||
| 158 | b32 finding_headers = TRUE; | ||
| 159 | |||
| 160 | if(buffer.size < 0) return NULL; | ||
| 161 | |||
| 162 | csv_token *tok = PushStruct(arena, csv_token); | ||
| 163 | |||
| 164 | // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit | ||
| 165 | // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? | ||
| 166 | // forgot what the solution was | ||
| 167 | // TODO(nasr): check what the problem here was | ||
| 168 | for(s32 index = 0; buffer.data[index] != '\0'; ++index) | ||
| 169 | { | ||
| 170 | u8 point = buffer.data[index]; | ||
| 171 | |||
| 172 | s32 start = 0; | ||
| 173 | s32 end = 0; | ||
| 174 | |||
| 175 | if(is_whitespace(point)) | ||
| 176 | { | ||
| 177 | warn("csv file is invalid, detected whitespace"); | ||
| 178 | return NULL; | ||
| 179 | } | ||
| 180 | |||
| 181 | |||
| 182 | if(point == '\n') | ||
| 183 | { | ||
| 184 | if(finding_headers) | ||
| 185 | { | ||
| 186 | #if 0 | ||
| 187 | string8 headers_buffer = {.data = &buffer.data[start], .size = end - start}; | ||
| 188 | #endif | ||
| 189 | finding_headers = FALSE; | ||
| 190 | |||
| 191 | { | ||
| 192 | //- map new header token list to table headers | ||
| 193 | } | ||
| 194 | } | ||
| 195 | #if 0 | ||
| 196 | else | ||
| 197 | { | ||
| 198 | |||
| 199 | } | ||
| 200 | #endif | ||
| 201 | |||
| 202 | |||
| 203 | table->row_count++; | ||
| 204 | } | ||
| 205 | else if(point == ',') | ||
| 206 | { | ||
| 207 | if (finding_headers) | ||
| 208 | { | ||
| 209 | table->header_count++; | ||
| 210 | } | ||
| 211 | } | ||
| 212 | |||
| 213 | switch(point) | ||
| 214 | { | ||
| 215 | case('\n'): | ||
| 216 | { | ||
| 217 | tok->flags |= FL; | ||
| 218 | break; | ||
| 219 | } | ||
| 220 | |||
| 221 | case(','): | ||
| 222 | { | ||
| 223 | end = index - 1; | ||
| 224 | start = index + 1; | ||
| 225 | break; | ||
| 226 | } | ||
| 227 | default: | ||
| 228 | { | ||
| 229 | break; | ||
| 230 | } | ||
| 231 | } | ||
| 232 | |||
| 233 | tok->lexeme = StringCast(&buffer.data[start], end - start); | ||
| 234 | tok->next_token = tok; | ||
| 235 | } | ||
| 236 | |||
| 237 | return tok; | ||
| 238 | } | ||
| 239 | |||
| 240 | //- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future | ||
| 241 | internal b_tree * | ||
| 242 | parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) | ||
| 243 | { | ||
| 244 | b_tree *tree = PushStructZero(arena, b_tree); | ||
| 245 | b_tree_create(arena, tree); | ||
| 246 | |||
| 247 | // iterate over the token list while the token is not nil | ||
| 248 | for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token) | ||
| 249 | { | ||
| 250 | |||
| 251 | //- TODO(nasr): check initizalization or something tomorrow | ||
| 252 | { | ||
| 253 | //- are we parsing the first line tokens? | ||
| 254 | //- if so, do something :)) | ||
| 255 | if(ct->flags & FL) | ||
| 256 | { | ||
| 257 | // TODO(nasr): replace with nil header check function | ||
| 258 | if(table->header != &nil_csv_header || table->header == NULL) | ||
| 259 | { | ||
| 260 | #if 0 | ||
| 261 | // - no this should happen in the tokenization | ||
| 262 | table->headers->next = | ||
| 263 | #endif | ||
| 264 | } | ||
| 265 | else | ||
| 266 | { | ||
| 267 | |||
| 268 | } | ||
| 269 | |||
| 270 | } | ||
| 271 | } | ||
| 272 | |||
| 273 | // TODO(nasr): fix this logic tomorrow | ||
| 274 | csv_token *ct = PushStruct(arena, csv_token); | ||
| 275 | // skip structural ctens, only index values | ||
| 276 | if (ct->type != TOKEN_VALUE) | ||
| 277 | { | ||
| 278 | continue; | ||
| 279 | } | ||
| 280 | |||
| 281 | // NOTE(nasr): payload is the cten itself so the caller can reach | ||
| 282 | // row/col metadata without us having to copy it | ||
| 283 | // NOTE(nasr): heh why do we void cast again? | ||
| 284 | b_tree_insert(tree, ct->lexeme, (void *)ct); | ||
| 285 | } | ||
| 286 | |||
| 287 | return tree; | ||
| 288 | } | ||
| 289 | |||
diff --git a/source/csv_reader.h b/source/csv_reader.h deleted file mode 100644 index f5205bf..0000000 --- a/source/csv_reader.h +++ /dev/null | |||
| @@ -1,183 +0,0 @@ | |||
| 1 | #ifndef ENGINE_LEXER_H | ||
| 2 | #define ENGINE_LEXER_H | ||
| 3 | |||
| 4 | typedef enum csv_token_flags csv_token_flags; | ||
| 5 | enum csv_token_flags | ||
| 6 | { | ||
| 7 | START_FL = 1 << 1, | ||
| 8 | END_FL = 1 << 2, | ||
| 9 | }; | ||
| 10 | |||
| 11 | typedef enum csv_token_type csv_token_type; | ||
| 12 | enum csv_token_type | ||
| 13 | { | ||
| 14 | // first 255 tokens for ascii characters | ||
| 15 | TOKEN_UNDEFINED = 255, | ||
| 16 | TOKEN_IDENTIFIER, | ||
| 17 | TOKEN_VALUE, | ||
| 18 | }; | ||
| 19 | |||
| 20 | typedef struct csv_token csv_token; | ||
| 21 | struct csv_token | ||
| 22 | { | ||
| 23 | string8 lexeme; | ||
| 24 | csv_token_type type; | ||
| 25 | csv_token_flags flags; | ||
| 26 | csv_token *next; | ||
| 27 | }; | ||
| 28 | |||
| 29 | // NOTE(nasr): i dont think im going to use this. | ||
| 30 | typedef struct csv_row csv_row; | ||
| 31 | struct csv_row | ||
| 32 | { | ||
| 33 | // array of size col_count, points into mmap buffer | ||
| 34 | string8 *fields; | ||
| 35 | s32 count; | ||
| 36 | }; | ||
| 37 | |||
| 38 | typedef struct csv_table csv_table; | ||
| 39 | struct csv_table | ||
| 40 | { | ||
| 41 | // first row, col names | ||
| 42 | // all data rows | ||
| 43 | string8 *headers; | ||
| 44 | csv_row *rows; | ||
| 45 | s32 col_count; | ||
| 46 | s32 row_count; | ||
| 47 | }; | ||
| 48 | |||
| 49 | |||
| 50 | typedef struct csv_token_list csv_token_list; | ||
| 51 | struct csv_token_list | ||
| 52 | { | ||
| 53 | csv_token *start_token; | ||
| 54 | csv_token *end_token; | ||
| 55 | |||
| 56 | }; | ||
| 57 | |||
| 58 | read_only global_variable | ||
| 59 | csv_token nil_csv_token= | ||
| 60 | { | ||
| 61 | .lexeme = {.data = NULL, .size =0}, | ||
| 62 | .type = (csv_token_type)0, | ||
| 63 | .flags = 0, | ||
| 64 | .next = &nil_csv_token, | ||
| 65 | |||
| 66 | }; | ||
| 67 | |||
| 68 | read_only global_variable | ||
| 69 | csv_token_list nil_csv_token_list = | ||
| 70 | { | ||
| 71 | .start_token = &nil_csv_token, | ||
| 72 | .end_token = &nil_csv_token, | ||
| 73 | }; | ||
| 74 | |||
| 75 | |||
| 76 | read_only global_variable | ||
| 77 | csv_row nil_csv_row = | ||
| 78 | { | ||
| 79 | .fields = &nil_string, | ||
| 80 | .count = 0, | ||
| 81 | }; | ||
| 82 | |||
| 83 | read_only global_variable | ||
| 84 | csv_table nil_csv_table = | ||
| 85 | { | ||
| 86 | .headers = &nil_string, | ||
| 87 | .rows = &nil_csv_row, | ||
| 88 | .col_count = 0, | ||
| 89 | .row_count = 0, | ||
| 90 | }; | ||
| 91 | |||
| 92 | #endif /* ENGINE_LEXER_H */ | ||
| 93 | |||
| 94 | // the lexer acts as a table builder from a csv file | ||
| 95 | // and parsing indivudal rows and columns | ||
| 96 | // the next step would be building a the b-tree | ||
| 97 | internal csv_token * | ||
| 98 | tokenize_csv(string8 buffer, mem_arena *arena) | ||
| 99 | { | ||
| 100 | b32 FL = TRUE; | ||
| 101 | |||
| 102 | if(buffer.size < 0) return NULL; | ||
| 103 | |||
| 104 | csv_token *tok = PushStruct(arena, csv_token); | ||
| 105 | |||
| 106 | // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit | ||
| 107 | // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? | ||
| 108 | for(s32 index = 0; buffer.data[index] != '\0'; ++index) | ||
| 109 | { | ||
| 110 | u8 point = buffer.data[index]; | ||
| 111 | |||
| 112 | s32 start = 0; | ||
| 113 | s32 end = 0; | ||
| 114 | |||
| 115 | if(is_whitespace(point)) | ||
| 116 | { | ||
| 117 | warn("csv file is invalid, detected whitespace"); | ||
| 118 | return NULL; | ||
| 119 | } | ||
| 120 | |||
| 121 | switch(point) | ||
| 122 | { | ||
| 123 | case('\n'): | ||
| 124 | { | ||
| 125 | if(FL) tok->flags |= END_FL; | ||
| 126 | break; | ||
| 127 | } | ||
| 128 | |||
| 129 | case(','): | ||
| 130 | { | ||
| 131 | end = index - 1; | ||
| 132 | start = index + 1; | ||
| 133 | break; | ||
| 134 | } | ||
| 135 | default: | ||
| 136 | { | ||
| 137 | break; | ||
| 138 | } | ||
| 139 | } | ||
| 140 | |||
| 141 | tok->lexeme = StringCast(&buffer.data[start], end - start); | ||
| 142 | tok->next = tok; | ||
| 143 | } | ||
| 144 | |||
| 145 | return tok; | ||
| 146 | } | ||
| 147 | |||
| 148 | internal void | ||
| 149 | read_csv(string8 buffer) | ||
| 150 | { | ||
| 151 | // printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data); | ||
| 152 | |||
| 153 | } | ||
| 154 | |||
| 155 | internal b_tree * | ||
| 156 | parse_csv(mem_arena *arena, csv_token_list *ctl) | ||
| 157 | { | ||
| 158 | b_tree *tree = PushStructZero(arena, b_tree); | ||
| 159 | b_tree_create(arena, tree); | ||
| 160 | |||
| 161 | //- TODO(nasr): check initizalization or something tomorrow | ||
| 162 | { | ||
| 163 | |||
| 164 | } | ||
| 165 | // TODO(nasr): fix this logic tomorrow | ||
| 166 | csv_token *ct = PushStruct(arena, csv_token); | ||
| 167 | |||
| 168 | for (;ct != NULL; ct = ct->next) | ||
| 169 | { | ||
| 170 | // skip structural ctens, only index values | ||
| 171 | if (ct->type != TOKEN_VALUE) | ||
| 172 | { | ||
| 173 | continue; | ||
| 174 | } | ||
| 175 | |||
| 176 | // NOTE(nasr): payload is the cten itself so the caller can reach | ||
| 177 | // row/col metadata without us having to copy it | ||
| 178 | // NOTE(nasr): heh why do we void cast again? | ||
| 179 | b_tree_insert(arena, tree, ct->lexeme, (void *)ct); | ||
| 180 | } | ||
| 181 | |||
| 182 | return tree; | ||
| 183 | } | ||
diff --git a/source/engine.c b/source/tb_db.c index 106f113..b992111 100644 --- a/source/engine.c +++ b/source/tb_db.c | |||
| @@ -1,6 +1,3 @@ | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 4 | #define B_TREE_IMPLEMENTATION | 1 | #define B_TREE_IMPLEMENTATION |
| 5 | #define BASE_UNITY | 2 | #define BASE_UNITY |
| 6 | #include "base/base_include.h" | 3 | #include "base/base_include.h" |
| @@ -33,11 +30,10 @@ internal b32 | |||
| 33 | is_delimiter(u8 point) | 30 | is_delimiter(u8 point) |
| 34 | { | 31 | { |
| 35 | return (point == ','); | 32 | return (point == ','); |
| 36 | |||
| 37 | } | 33 | } |
| 38 | 34 | ||
| 39 | #include "b_tree.h" | 35 | #include "b_tree_impl.h" |
| 40 | #include "csv_reader.h" | 36 | #include "csv_decoder.h" |
| 41 | 37 | ||
| 42 | typedef struct query_token query_token; | 38 | typedef struct query_token query_token; |
| 43 | struct query_token | 39 | struct query_token |
| @@ -80,7 +76,6 @@ is_nil_query_token_list(query_token *token) | |||
| 80 | return (token == &nil_query_token) || (token == NULL); | 76 | return (token == &nil_query_token) || (token == NULL); |
| 81 | } | 77 | } |
| 82 | 78 | ||
| 83 | |||
| 84 | // takes on line of the repl input | 79 | // takes on line of the repl input |
| 85 | // return a reference to the passed list | 80 | // return a reference to the passed list |
| 86 | internal query_token_list * | 81 | internal query_token_list * |
| @@ -90,7 +85,7 @@ query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list) | |||
| 90 | unused(initialized); | 85 | unused(initialized); |
| 91 | 86 | ||
| 92 | for (u64 index = 0; index < buffer->size; ++index) | 87 | for (u64 index = 0; index < buffer->size; ++index) |
| 93 | { | 88 | { |
| 94 | u8 codepoint = buffer->data[index]; | 89 | u8 codepoint = buffer->data[index]; |
| 95 | 90 | ||
| 96 | if(codepoint == '\n' || codepoint == '\r') break; | 91 | if(codepoint == '\n' || codepoint == '\r') break; |
| @@ -98,10 +93,7 @@ query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list) | |||
| 98 | s32 start = 0; | 93 | s32 start = 0; |
| 99 | s32 end = 0; | 94 | s32 end = 0; |
| 100 | 95 | ||
| 101 | if(is_whitespace(codepoint)) | 96 | if(is_whitespace(codepoint)) end = index; |
| 102 | { | ||
| 103 | end = index; | ||
| 104 | } | ||
| 105 | 97 | ||
| 106 | // save the token | 98 | // save the token |
| 107 | // TODO(nasr): work on the string macros cuz no work | 99 | // TODO(nasr): work on the string macros cuz no work |
| @@ -185,19 +177,23 @@ int main(int count, char **value) | |||
| 185 | } | 177 | } |
| 186 | 178 | ||
| 187 | { | 179 | { |
| 188 | read_csv(buffer); | ||
| 189 | 180 | ||
| 190 | csv_token *tokens = tokenize_csv(buffer, global_arena); | 181 | // NOTE(nasr): the use of tables is required for tracking headers etc. |
| 182 | // i think we can optimize this away in the future but for now its fine | ||
| 183 | csv_table *table = PushStruct(global_arena, csv_table); | ||
| 184 | |||
| 185 | csv_token_list *token_list = PushStruct(global_arena, csv_token_list); | ||
| 186 | |||
| 187 | csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list); | ||
| 191 | 188 | ||
| 192 | assert_msg(tokens != NULL, "Tokens are NULL."); | 189 | assert_msg(tokens != NULL, "Tokens are NULL."); |
| 193 | 190 | ||
| 194 | csv_token_list *ctl = PushStruct(global_arena, csv_token_list); | 191 | csv_token_list *ctl = PushStruct(global_arena, csv_token_list); |
| 195 | b_tree *bt = parse_csv(global_arena, ctl); | 192 | b_tree *bt = parse_csv(global_arena, ctl, table); |
| 196 | 193 | ||
| 197 | b_tree_write(bt); | 194 | b_tree_write(bt); |
| 198 | } | 195 | } |
| 199 | 196 | ||
| 200 | |||
| 201 | // NOTE(nasr): not sure on how to approach the b-tree and the table format thing | 197 | // NOTE(nasr): not sure on how to approach the b-tree and the table format thing |
| 202 | // we kind of want our table format i think? but i wouldnt be sure about the use case | 198 | // we kind of want our table format i think? but i wouldnt be sure about the use case |
| 203 | // so we stick to the regular b_tree for now. commenting out the tables. | 199 | // so we stick to the regular b_tree for now. commenting out the tables. |
