diff options
| author | nasr <nsrddyn@gmail.com> | 2026-03-15 15:16:22 +0000 |
|---|---|---|
| committer | nasr <nsrddyn@gmail.com> | 2026-03-15 15:16:22 +0000 |
| commit | 2e258673171c2e4663a8b5d58e2ad174bb0ecd96 (patch) | |
| tree | 25f50ee09d67f5b6466c1ba7655c26c1806bce57 /source | |
| parent | ff2ac6f584a1b08c0e66ac65b9c8e8d68e79f124 (diff) | |
feature(main): nil tokens for both lexers :)
Diffstat (limited to 'source')
| -rw-r--r-- | source/csv_reader.h | 69 | ||||
| -rw-r--r-- | source/engine.c | 89 |
2 files changed, 128 insertions, 30 deletions
diff --git a/source/csv_reader.h b/source/csv_reader.h index 7f5bf06..f5205bf 100644 --- a/source/csv_reader.h +++ b/source/csv_reader.h | |||
| @@ -1,15 +1,15 @@ | |||
| 1 | #ifndef ENGINE_LEXER_H | 1 | #ifndef ENGINE_LEXER_H |
| 2 | #define ENGINE_LEXER_H | 2 | #define ENGINE_LEXER_H |
| 3 | 3 | ||
| 4 | typedef enum token_flags token_flags; | 4 | typedef enum csv_token_flags csv_token_flags; |
| 5 | enum token_flags | 5 | enum csv_token_flags |
| 6 | { | 6 | { |
| 7 | START_FL = 1 << 1, | 7 | START_FL = 1 << 1, |
| 8 | END_FL = 1 << 2, | 8 | END_FL = 1 << 2, |
| 9 | }; | 9 | }; |
| 10 | 10 | ||
| 11 | typedef enum token_type token_type; | 11 | typedef enum csv_token_type csv_token_type; |
| 12 | enum token_type | 12 | enum csv_token_type |
| 13 | { | 13 | { |
| 14 | // first 255 tokens for ascii characters | 14 | // first 255 tokens for ascii characters |
| 15 | TOKEN_UNDEFINED = 255, | 15 | TOKEN_UNDEFINED = 255, |
| @@ -17,13 +17,13 @@ enum token_type | |||
| 17 | TOKEN_VALUE, | 17 | TOKEN_VALUE, |
| 18 | }; | 18 | }; |
| 19 | 19 | ||
| 20 | typedef struct token token; | 20 | typedef struct csv_token csv_token; |
| 21 | struct token | 21 | struct csv_token |
| 22 | { | 22 | { |
| 23 | string8 lexeme; | 23 | string8 lexeme; |
| 24 | token_type type; | 24 | csv_token_type type; |
| 25 | token_flags flags; | 25 | csv_token_flags flags; |
| 26 | token *next; | 26 | csv_token *next; |
| 27 | }; | 27 | }; |
| 28 | 28 | ||
| 29 | // NOTE(nasr): i dont think im going to use this. | 29 | // NOTE(nasr): i dont think im going to use this. |
| @@ -46,6 +46,33 @@ struct csv_table | |||
| 46 | s32 row_count; | 46 | s32 row_count; |
| 47 | }; | 47 | }; |
| 48 | 48 | ||
| 49 | |||
| 50 | typedef struct csv_token_list csv_token_list; | ||
| 51 | struct csv_token_list | ||
| 52 | { | ||
| 53 | csv_token *start_token; | ||
| 54 | csv_token *end_token; | ||
| 55 | |||
| 56 | }; | ||
| 57 | |||
| 58 | read_only global_variable | ||
| 59 | csv_token nil_csv_token= | ||
| 60 | { | ||
| 61 | .lexeme = {.data = NULL, .size =0}, | ||
| 62 | .type = (csv_token_type)0, | ||
| 63 | .flags = 0, | ||
| 64 | .next = &nil_csv_token, | ||
| 65 | |||
| 66 | }; | ||
| 67 | |||
| 68 | read_only global_variable | ||
| 69 | csv_token_list nil_csv_token_list = | ||
| 70 | { | ||
| 71 | .start_token = &nil_csv_token, | ||
| 72 | .end_token = &nil_csv_token, | ||
| 73 | }; | ||
| 74 | |||
| 75 | |||
| 49 | read_only global_variable | 76 | read_only global_variable |
| 50 | csv_row nil_csv_row = | 77 | csv_row nil_csv_row = |
| 51 | { | 78 | { |
| @@ -67,14 +94,14 @@ csv_table nil_csv_table = | |||
| 67 | // the lexer acts as a table builder from a csv file | 94 | // the lexer acts as a table builder from a csv file |
| 68 | // and parsing indivudal rows and columns | 95 | // and parsing indivudal rows and columns |
| 69 | // the next step would be building a the b-tree | 96 | // the next step would be building a the b-tree |
| 70 | internal token * | 97 | internal csv_token * |
| 71 | tokenize_csv(string8 buffer, mem_arena *arena) | 98 | tokenize_csv(string8 buffer, mem_arena *arena) |
| 72 | { | 99 | { |
| 73 | b32 FL = TRUE; | 100 | b32 FL = TRUE; |
| 74 | 101 | ||
| 75 | if(buffer.size < 0) return NULL; | 102 | if(buffer.size < 0) return NULL; |
| 76 | 103 | ||
| 77 | token *tok = PushStruct(arena, token); | 104 | csv_token *tok = PushStruct(arena, csv_token); |
| 78 | 105 | ||
| 79 | // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit | 106 | // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit |
| 80 | // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? | 107 | // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? |
| @@ -126,22 +153,30 @@ read_csv(string8 buffer) | |||
| 126 | } | 153 | } |
| 127 | 154 | ||
| 128 | internal b_tree * | 155 | internal b_tree * |
| 129 | parse_csv(mem_arena *arena, token *tok) | 156 | parse_csv(mem_arena *arena, csv_token_list *ctl) |
| 130 | { | 157 | { |
| 131 | b_tree *tree = PushStructZero(arena, b_tree); | 158 | b_tree *tree = PushStructZero(arena, b_tree); |
| 132 | b_tree_create(arena, tree); | 159 | b_tree_create(arena, tree); |
| 133 | 160 | ||
| 134 | for (; tok != NULL; tok = tok->next) | 161 | //- TODO(nasr): check initizalization or something tomorrow |
| 162 | { | ||
| 163 | |||
| 164 | } | ||
| 165 | // TODO(nasr): fix this logic tomorrow | ||
| 166 | csv_token *ct = PushStruct(arena, csv_token); | ||
| 167 | |||
| 168 | for (;ct != NULL; ct = ct->next) | ||
| 135 | { | 169 | { |
| 136 | // skip structural tokens, only index values | 170 | // skip structural ctens, only index values |
| 137 | if (tok->type != TOKEN_VALUE) | 171 | if (ct->type != TOKEN_VALUE) |
| 138 | { | 172 | { |
| 139 | continue; | 173 | continue; |
| 140 | } | 174 | } |
| 141 | 175 | ||
| 142 | // NOTE(nasr): payload is the token itself so the caller can reach | 176 | // NOTE(nasr): payload is the cten itself so the caller can reach |
| 143 | // row/col metadata without us having to copy it | 177 | // row/col metadata without us having to copy it |
| 144 | b_tree_insert(arena, tree, tok->lexeme, (void *)tok); | 178 | // NOTE(nasr): heh why do we void cast again? |
| 179 | b_tree_insert(arena, tree, ct->lexeme, (void *)ct); | ||
| 145 | } | 180 | } |
| 146 | 181 | ||
| 147 | return tree; | 182 | return tree; |
diff --git a/source/engine.c b/source/engine.c index 9797d8a..106f113 100644 --- a/source/engine.c +++ b/source/engine.c | |||
| @@ -1,3 +1,6 @@ | |||
| 1 | |||
| 2 | |||
| 3 | |||
| 1 | #define B_TREE_IMPLEMENTATION | 4 | #define B_TREE_IMPLEMENTATION |
| 2 | #define BASE_UNITY | 5 | #define BASE_UNITY |
| 3 | #include "base/base_include.h" | 6 | #include "base/base_include.h" |
| @@ -33,7 +36,6 @@ is_delimiter(u8 point) | |||
| 33 | 36 | ||
| 34 | } | 37 | } |
| 35 | 38 | ||
| 36 | |||
| 37 | #include "b_tree.h" | 39 | #include "b_tree.h" |
| 38 | #include "csv_reader.h" | 40 | #include "csv_reader.h" |
| 39 | 41 | ||
| @@ -44,13 +46,48 @@ struct query_token | |||
| 44 | query_token *next; | 46 | query_token *next; |
| 45 | }; | 47 | }; |
| 46 | 48 | ||
| 49 | typedef struct query_token_list query_token_list; | ||
| 50 | struct query_token_list | ||
| 51 | { | ||
| 52 | query_token *start_token; | ||
| 53 | query_token *current_token; | ||
| 54 | }; | ||
| 55 | |||
| 56 | read_only global_variable | ||
| 57 | query_token nil_query_token = | ||
| 58 | { | ||
| 59 | .lexeme = {.data = NULL, .size = 0}, | ||
| 60 | .next = &nil_query_token | ||
| 61 | }; | ||
| 62 | |||
| 63 | |||
| 64 | read_only global_variable | ||
| 65 | query_token_list nil_query_token_list = | ||
| 66 | { | ||
| 67 | .start_token = &nil_query_token, | ||
| 68 | .current_token = &nil_query_token, | ||
| 69 | }; | ||
| 70 | |||
| 71 | internal b32 | ||
| 72 | is_nil_query_token(query_token *token) | ||
| 73 | { | ||
| 74 | return (token == &nil_query_token) || (token == NULL); | ||
| 75 | } | ||
| 76 | |||
| 77 | internal b32 | ||
| 78 | is_nil_query_token_list(query_token *token) | ||
| 79 | { | ||
| 80 | return (token == &nil_query_token) || (token == NULL); | ||
| 81 | } | ||
| 82 | |||
| 47 | 83 | ||
| 48 | // takes on line of the repl input | 84 | // takes on line of the repl input |
| 49 | internal query_token * | 85 | // return a reference to the passed list |
| 50 | query_tokenizer(mem_arena *arena, string8 *buffer) | 86 | internal query_token_list * |
| 87 | query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list) | ||
| 51 | { | 88 | { |
| 52 | query_token *tok = PushStruct(arena, query_token); | 89 | b32 initialized = 0; |
| 53 | unused(tok); | 90 | unused(initialized); |
| 54 | 91 | ||
| 55 | for (u64 index = 0; index < buffer->size; ++index) | 92 | for (u64 index = 0; index < buffer->size; ++index) |
| 56 | { | 93 | { |
| @@ -69,23 +106,44 @@ query_tokenizer(mem_arena *arena, string8 *buffer) | |||
| 69 | // save the token | 106 | // save the token |
| 70 | // TODO(nasr): work on the string macros cuz no work | 107 | // TODO(nasr): work on the string macros cuz no work |
| 71 | { | 108 | { |
| 109 | query_token *new_token = PushStruct(arena, query_token); | ||
| 110 | |||
| 111 | //- initialize list | ||
| 112 | { | ||
| 113 | if(is_nil_query_token(list->start_token)) | ||
| 114 | { | ||
| 115 | list->start_token = new_token; | ||
| 116 | list->current_token = new_token; | ||
| 117 | } | ||
| 118 | else | ||
| 119 | { | ||
| 120 | //- all we need to do - we dont track parents or what ever. this is a token stream not a tree | ||
| 121 | list->current_token->next = new_token; | ||
| 122 | } | ||
| 123 | } | ||
| 72 | 124 | ||
| 73 | s32 new_token_size = end - start; | 125 | s32 new_token_size = end - start; |
| 74 | 126 | ||
| 75 | tok->lexeme = PushString(arena, new_token_size); | 127 | new_token->lexeme = PushString(arena, new_token_size); |
| 76 | tok->lexeme.data = &buffer->data[index]; | 128 | new_token->lexeme.data = &buffer->data[index]; |
| 77 | tok->lexeme.size = new_token_size; | 129 | new_token->lexeme.size = new_token_size; |
| 130 | |||
| 131 | list->current_token->next = new_token; | ||
| 78 | 132 | ||
| 79 | tok->next = tok; | ||
| 80 | start = index + 1; | 133 | start = index + 1; |
| 81 | } | 134 | } |
| 82 | } | 135 | } |
| 83 | 136 | ||
| 84 | return tok; | 137 | return list; |
| 85 | } | 138 | } |
| 86 | 139 | ||
| 87 | int main(int count, char **value) | 140 | int main(int count, char **value) |
| 88 | { | 141 | { |
| 142 | |||
| 143 | #if 1 | ||
| 144 | unused(nil_query_token_list); | ||
| 145 | #endif | ||
| 146 | |||
| 89 | if(count < 2) value[1] = "./test/data.csv"; | 147 | if(count < 2) value[1] = "./test/data.csv"; |
| 90 | 148 | ||
| 91 | local_persist b32 running = 1; | 149 | local_persist b32 running = 1; |
| @@ -121,16 +179,21 @@ int main(int count, char **value) | |||
| 121 | lbuf_stringified.size = sizeof(lbuf) - 1; | 179 | lbuf_stringified.size = sizeof(lbuf) - 1; |
| 122 | } | 180 | } |
| 123 | 181 | ||
| 124 | query_tokenizer(global_arena, &lbuf_stringified); | 182 | query_token_list *qtl = PushStruct(global_arena, query_token_list); |
| 183 | |||
| 184 | query_tokenizer(global_arena, &lbuf_stringified, qtl); | ||
| 125 | } | 185 | } |
| 126 | 186 | ||
| 127 | { | 187 | { |
| 128 | read_csv(buffer); | 188 | read_csv(buffer); |
| 129 | token *tokens = tokenize_csv(buffer, global_arena); | 189 | |
| 190 | csv_token *tokens = tokenize_csv(buffer, global_arena); | ||
| 130 | 191 | ||
| 131 | assert_msg(tokens != NULL, "Tokens are NULL."); | 192 | assert_msg(tokens != NULL, "Tokens are NULL."); |
| 132 | 193 | ||
| 133 | b_tree *bt = parse_csv(global_arena, tokens); | 194 | csv_token_list *ctl = PushStruct(global_arena, csv_token_list); |
| 195 | b_tree *bt = parse_csv(global_arena, ctl); | ||
| 196 | |||
| 134 | b_tree_write(bt); | 197 | b_tree_write(bt); |
| 135 | } | 198 | } |
| 136 | 199 | ||
