diff options
| author | nasr <nsrddyn@gmail.com> | 2026-03-14 21:45:30 +0000 |
|---|---|---|
| committer | nasr <nsrddyn@gmail.com> | 2026-03-14 21:45:30 +0000 |
| commit | ff2ac6f584a1b08c0e66ac65b9c8e8d68e79f124 (patch) | |
| tree | 63e9602c5710cd2550c38bf49e5f4a18c0c33437 | |
| parent | 53cd9c4c3408b5f2e54e891baf471c0d774ea2cd (diff) | |
feature(main): bugfixes
still a segfault when generating the token linked list
| -rw-r--r-- | source/csv_reader.h | 61 | ||||
| -rw-r--r-- | source/engine.c | 46 |
2 files changed, 65 insertions, 42 deletions
diff --git a/source/csv_reader.h b/source/csv_reader.h index 2b6f49c..7f5bf06 100644 --- a/source/csv_reader.h +++ b/source/csv_reader.h | |||
| @@ -8,7 +8,6 @@ enum token_flags | |||
| 8 | END_FL = 1 << 2, | 8 | END_FL = 1 << 2, |
| 9 | }; | 9 | }; |
| 10 | 10 | ||
| 11 | |||
| 12 | typedef enum token_type token_type; | 11 | typedef enum token_type token_type; |
| 13 | enum token_type | 12 | enum token_type |
| 14 | { | 13 | { |
| @@ -27,6 +26,7 @@ struct token | |||
| 27 | token *next; | 26 | token *next; |
| 28 | }; | 27 | }; |
| 29 | 28 | ||
| 29 | // NOTE(nasr): i dont think im going to use this. | ||
| 30 | typedef struct csv_row csv_row; | 30 | typedef struct csv_row csv_row; |
| 31 | struct csv_row | 31 | struct csv_row |
| 32 | { | 32 | { |
| @@ -62,8 +62,6 @@ csv_table nil_csv_table = | |||
| 62 | .row_count = 0, | 62 | .row_count = 0, |
| 63 | }; | 63 | }; |
| 64 | 64 | ||
| 65 | |||
| 66 | |||
| 67 | #endif /* ENGINE_LEXER_H */ | 65 | #endif /* ENGINE_LEXER_H */ |
| 68 | 66 | ||
| 69 | // the lexer acts as a table builder from a csv file | 67 | // the lexer acts as a table builder from a csv file |
| @@ -72,13 +70,16 @@ csv_table nil_csv_table = | |||
| 72 | internal token * | 70 | internal token * |
| 73 | tokenize_csv(string8 buffer, mem_arena *arena) | 71 | tokenize_csv(string8 buffer, mem_arena *arena) |
| 74 | { | 72 | { |
| 75 | |||
| 76 | b32 FL = TRUE; | 73 | b32 FL = TRUE; |
| 77 | 74 | ||
| 78 | if(buffer.size < 0) return NULL; | 75 | if(buffer.size < 0) return NULL; |
| 76 | |||
| 77 | token *tok = PushStruct(arena, token); | ||
| 78 | |||
| 79 | // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit | ||
| 80 | // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? | ||
| 79 | for(s32 index = 0; buffer.data[index] != '\0'; ++index) | 81 | for(s32 index = 0; buffer.data[index] != '\0'; ++index) |
| 80 | { | 82 | { |
| 81 | token *tok = PushStruct(arena, token); | ||
| 82 | u8 point = buffer.data[index]; | 83 | u8 point = buffer.data[index]; |
| 83 | 84 | ||
| 84 | s32 start = 0; | 85 | s32 start = 0; |
| @@ -86,35 +87,35 @@ tokenize_csv(string8 buffer, mem_arena *arena) | |||
| 86 | 87 | ||
| 87 | if(is_whitespace(point)) | 88 | if(is_whitespace(point)) |
| 88 | { | 89 | { |
| 89 | print("csv file is invalid"); | 90 | warn("csv file is invalid, detected whitespace"); |
| 90 | return NULL; | 91 | return NULL; |
| 91 | } | 92 | } |
| 92 | 93 | ||
| 93 | switch(point) | 94 | switch(point) |
| 94 | { | 95 | { |
| 95 | case('\n'): | 96 | case('\n'): |
| 96 | { | 97 | { |
| 97 | if(FL) tok->flags |= END_FL; | 98 | if(FL) tok->flags |= END_FL; |
| 98 | break; | 99 | break; |
| 99 | } | 100 | } |
| 100 | 101 | ||
| 101 | case(','): | 102 | case(','): |
| 102 | { | 103 | { |
| 103 | end = index - 1; | 104 | end = index - 1; |
| 104 | start = index + 1; | 105 | start = index + 1; |
| 105 | break; | 106 | break; |
| 106 | } | 107 | } |
| 107 | default: | 108 | default: |
| 108 | { | 109 | { |
| 109 | break; | 110 | break; |
| 110 | } | 111 | } |
| 111 | } | 112 | } |
| 112 | 113 | ||
| 113 | tok->lexeme = StringCast(&buffer.data[start], end - start); | 114 | tok->lexeme = StringCast(&buffer.data[start], end - start); |
| 114 | tok->next = tok; | 115 | tok->next = tok; |
| 115 | } | 116 | } |
| 116 | 117 | ||
| 117 | return NULL; | 118 | return tok; |
| 118 | } | 119 | } |
| 119 | 120 | ||
| 120 | internal void | 121 | internal void |
| @@ -124,18 +125,24 @@ read_csv(string8 buffer) | |||
| 124 | 125 | ||
| 125 | } | 126 | } |
| 126 | 127 | ||
| 127 | internal b_tree * | 128 | internal b_tree * |
| 128 | parse_csv(csv_token *tok, csv_table *table) | 129 | parse_csv(mem_arena *arena, token *tok) |
| 129 | { | 130 | { |
| 131 | b_tree *tree = PushStructZero(arena, b_tree); | ||
| 132 | b_tree_create(arena, tree); | ||
| 130 | 133 | ||
| 131 | 134 | for (; tok != NULL; tok = tok->next) | |
| 132 | for (;tok->next; tok = tok->next) | ||
| 133 | { | 135 | { |
| 134 | b_tree_node *current_btree_node = btree_node_alloc; | 136 | // skip structural tokens, only index values |
| 137 | if (tok->type != TOKEN_VALUE) | ||
| 138 | { | ||
| 139 | continue; | ||
| 140 | } | ||
| 135 | 141 | ||
| 136 | 142 | // NOTE(nasr): payload is the token itself so the caller can reach | |
| 143 | // row/col metadata without us having to copy it | ||
| 144 | b_tree_insert(arena, tree, tok->lexeme, (void *)tok); | ||
| 137 | } | 145 | } |
| 138 | 146 | ||
| 139 | return NULL; | 147 | return tree; |
| 140 | } | 148 | } |
| 141 | |||
diff --git a/source/engine.c b/source/engine.c index 1cfbab0..9797d8a 100644 --- a/source/engine.c +++ b/source/engine.c | |||
| @@ -40,7 +40,7 @@ is_delimiter(u8 point) | |||
| 40 | typedef struct query_token query_token; | 40 | typedef struct query_token query_token; |
| 41 | struct query_token | 41 | struct query_token |
| 42 | { | 42 | { |
| 43 | string8 *lexeme; | 43 | string8 lexeme; |
| 44 | query_token *next; | 44 | query_token *next; |
| 45 | }; | 45 | }; |
| 46 | 46 | ||
| @@ -72,8 +72,9 @@ query_tokenizer(mem_arena *arena, string8 *buffer) | |||
| 72 | 72 | ||
| 73 | s32 new_token_size = end - start; | 73 | s32 new_token_size = end - start; |
| 74 | 74 | ||
| 75 | tok->lexeme->data = &buffer->data[index]; | 75 | tok->lexeme = PushString(arena, new_token_size); |
| 76 | tok->lexeme->size = new_token_size; | 76 | tok->lexeme.data = &buffer->data[index]; |
| 77 | tok->lexeme.size = new_token_size; | ||
| 77 | 78 | ||
| 78 | tok->next = tok; | 79 | tok->next = tok; |
| 79 | start = index + 1; | 80 | start = index + 1; |
| @@ -83,21 +84,18 @@ query_tokenizer(mem_arena *arena, string8 *buffer) | |||
| 83 | return tok; | 84 | return tok; |
| 84 | } | 85 | } |
| 85 | 86 | ||
| 86 | int main(int c, char **v) | 87 | int main(int count, char **value) |
| 87 | { | 88 | { |
| 88 | 89 | if(count < 2) value[1] = "./test/data.csv"; | |
| 89 | if(c < 2) | ||
| 90 | { | ||
| 91 | print("bad file, setting default file\n"); | ||
| 92 | } | ||
| 93 | else v[1] = "./test/customers-10000.csv"; | ||
| 94 | 90 | ||
| 95 | local_persist b32 running = 1; | 91 | local_persist b32 running = 1; |
| 96 | 92 | ||
| 97 | mem_arena *global_arena = arena_create(MiB(30)); | 93 | mem_arena *global_arena = arena_create(MiB(30)); |
| 98 | csv_table *global_table = PushStruct(global_arena, csv_table); | ||
| 99 | 94 | ||
| 100 | string8 buffer = load_file(v[1]); | 95 | // NOTE(nasr): see note down below |
| 96 | // csv_table *global_table = PushStruct(global_arena, csv_table); | ||
| 97 | |||
| 98 | string8 buffer = load_file(global_arena, value[1]); | ||
| 101 | 99 | ||
| 102 | print("\nDatabase Engine\n"); | 100 | print("\nDatabase Engine\n"); |
| 103 | 101 | ||
| @@ -106,23 +104,41 @@ int main(int c, char **v) | |||
| 106 | if (running) | 104 | if (running) |
| 107 | { | 105 | { |
| 108 | { | 106 | { |
| 109 | u8 lbuf[256] = {}; | 107 | u8 *lbuf = PushArray(global_arena, u8, 256); |
| 110 | s32 err = os_read(STDIN_FD, lbuf, 256); | 108 | s32 err = os_read(STDIN_FD, lbuf, 256); |
| 109 | |||
| 111 | if(err < 0) | 110 | if(err < 0) |
| 112 | { | 111 | { |
| 113 | print("error reading from stdin"); | 112 | print("error reading from stdin"); |
| 114 | } | 113 | } |
| 115 | 114 | ||
| 116 | query_tokenizer(global_arena, &StringLit(lbuf)); | 115 | // TODO(nasr): extract this later in the future and make a string copy function/macro |
| 116 | // @params (s32 lbuf_size , string8 lbuf_stringified) | ||
| 117 | s32 lbuf_size = sizeof(lbuf) - 1; | ||
| 118 | string8 lbuf_stringified = PushString(global_arena, lbuf_size); | ||
| 119 | { | ||
| 120 | memcpy(lbuf_stringified.data, lbuf, lbuf_size); | ||
| 121 | lbuf_stringified.size = sizeof(lbuf) - 1; | ||
| 122 | } | ||
| 117 | 123 | ||
| 124 | query_tokenizer(global_arena, &lbuf_stringified); | ||
| 118 | } | 125 | } |
| 119 | 126 | ||
| 120 | { | 127 | { |
| 121 | read_csv(buffer); | 128 | read_csv(buffer); |
| 122 | token *tokens = tokenize_csv(buffer, global_arena); | 129 | token *tokens = tokenize_csv(buffer, global_arena); |
| 123 | global_table = parse_csv(tokens, global_table); | 130 | |
| 131 | assert_msg(tokens != NULL, "Tokens are NULL."); | ||
| 132 | |||
| 133 | b_tree *bt = parse_csv(global_arena, tokens); | ||
| 134 | b_tree_write(bt); | ||
| 124 | } | 135 | } |
| 125 | 136 | ||
| 137 | |||
| 138 | // NOTE(nasr): not sure on how to approach the b-tree and the table format thing | ||
| 139 | // we kind of want our table format i think? but i wouldnt be sure about the use case | ||
| 140 | // so we stick to the regular b_tree for now. commenting out the tables. | ||
| 141 | |||
| 126 | sleep(1); | 142 | sleep(1); |
| 127 | } | 143 | } |
| 128 | } | 144 | } |
