From 2e258673171c2e4663a8b5d58e2ad174bb0ecd96 Mon Sep 17 00:00:00 2001 From: nasr Date: Sun, 15 Mar 2026 15:16:22 +0000 Subject: feature(main): nil tokens for both lexers :) --- source/csv_reader.h | 69 +++++++++++++++++++++++++++++++---------- source/engine.c | 89 +++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 128 insertions(+), 30 deletions(-) diff --git a/source/csv_reader.h b/source/csv_reader.h index 7f5bf06..f5205bf 100644 --- a/source/csv_reader.h +++ b/source/csv_reader.h @@ -1,15 +1,15 @@ #ifndef ENGINE_LEXER_H #define ENGINE_LEXER_H -typedef enum token_flags token_flags; -enum token_flags +typedef enum csv_token_flags csv_token_flags; +enum csv_token_flags { START_FL = 1 << 1, END_FL = 1 << 2, }; -typedef enum token_type token_type; -enum token_type +typedef enum csv_token_type csv_token_type; +enum csv_token_type { // first 255 tokens for ascii characters TOKEN_UNDEFINED = 255, @@ -17,13 +17,13 @@ enum token_type TOKEN_VALUE, }; -typedef struct token token; -struct token +typedef struct csv_token csv_token; +struct csv_token { string8 lexeme; - token_type type; - token_flags flags; - token *next; + csv_token_type type; + csv_token_flags flags; + csv_token *next; }; // NOTE(nasr): i dont think im going to use this. @@ -46,6 +46,33 @@ struct csv_table s32 row_count; }; + +typedef struct csv_token_list csv_token_list; +struct csv_token_list +{ + csv_token *start_token; + csv_token *end_token; + +}; + +read_only global_variable +csv_token nil_csv_token= +{ + .lexeme = {.data = NULL, .size =0}, + .type = (csv_token_type)0, + .flags = 0, + .next = &nil_csv_token, + +}; + +read_only global_variable +csv_token_list nil_csv_token_list = +{ + .start_token = &nil_csv_token, + .end_token = &nil_csv_token, +}; + + read_only global_variable csv_row nil_csv_row = { @@ -67,14 +94,14 @@ csv_table nil_csv_table = // the lexer acts as a table builder from a csv file // and parsing indivudal rows and columns // the next step would be building a the b-tree -internal token * +internal csv_token * tokenize_csv(string8 buffer, mem_arena *arena) { b32 FL = TRUE; if(buffer.size < 0) return NULL; - token *tok = PushStruct(arena, token); + csv_token *tok = PushStruct(arena, csv_token); // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? @@ -126,22 +153,30 @@ read_csv(string8 buffer) } internal b_tree * -parse_csv(mem_arena *arena, token *tok) +parse_csv(mem_arena *arena, csv_token_list *ctl) { b_tree *tree = PushStructZero(arena, b_tree); b_tree_create(arena, tree); - for (; tok != NULL; tok = tok->next) + //- TODO(nasr): check initizalization or something tomorrow + { + + } + // TODO(nasr): fix this logic tomorrow + csv_token *ct = PushStruct(arena, csv_token); + + for (;ct != NULL; ct = ct->next) { - // skip structural tokens, only index values - if (tok->type != TOKEN_VALUE) + // skip structural ctens, only index values + if (ct->type != TOKEN_VALUE) { continue; } - // NOTE(nasr): payload is the token itself so the caller can reach + // NOTE(nasr): payload is the cten itself so the caller can reach // row/col metadata without us having to copy it - b_tree_insert(arena, tree, tok->lexeme, (void *)tok); + // NOTE(nasr): heh why do we void cast again? + b_tree_insert(arena, tree, ct->lexeme, (void *)ct); } return tree; diff --git a/source/engine.c b/source/engine.c index 9797d8a..106f113 100644 --- a/source/engine.c +++ b/source/engine.c @@ -1,3 +1,6 @@ + + + #define B_TREE_IMPLEMENTATION #define BASE_UNITY #include "base/base_include.h" @@ -33,7 +36,6 @@ is_delimiter(u8 point) } - #include "b_tree.h" #include "csv_reader.h" @@ -44,13 +46,48 @@ struct query_token query_token *next; }; +typedef struct query_token_list query_token_list; +struct query_token_list +{ + query_token *start_token; + query_token *current_token; +}; + +read_only global_variable +query_token nil_query_token = +{ + .lexeme = {.data = NULL, .size = 0}, + .next = &nil_query_token +}; + + +read_only global_variable +query_token_list nil_query_token_list = +{ + .start_token = &nil_query_token, + .current_token = &nil_query_token, +}; + +internal b32 +is_nil_query_token(query_token *token) +{ + return (token == &nil_query_token) || (token == NULL); +} + +internal b32 +is_nil_query_token_list(query_token *token) +{ + return (token == &nil_query_token) || (token == NULL); +} + // takes on line of the repl input -internal query_token * -query_tokenizer(mem_arena *arena, string8 *buffer) +// return a reference to the passed list +internal query_token_list * +query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list) { - query_token *tok = PushStruct(arena, query_token); - unused(tok); + b32 initialized = 0; + unused(initialized); for (u64 index = 0; index < buffer->size; ++index) { @@ -69,23 +106,44 @@ query_tokenizer(mem_arena *arena, string8 *buffer) // save the token // TODO(nasr): work on the string macros cuz no work { + query_token *new_token = PushStruct(arena, query_token); + + //- initialize list + { + if(is_nil_query_token(list->start_token)) + { + list->start_token = new_token; + list->current_token = new_token; + } + else + { + //- all we need to do - we dont track parents or what ever. this is a token stream not a tree + list->current_token->next = new_token; + } + } s32 new_token_size = end - start; - tok->lexeme = PushString(arena, new_token_size); - tok->lexeme.data = &buffer->data[index]; - tok->lexeme.size = new_token_size; + new_token->lexeme = PushString(arena, new_token_size); + new_token->lexeme.data = &buffer->data[index]; + new_token->lexeme.size = new_token_size; + + list->current_token->next = new_token; - tok->next = tok; start = index + 1; } } - return tok; + return list; } int main(int count, char **value) { + +#if 1 + unused(nil_query_token_list); +#endif + if(count < 2) value[1] = "./test/data.csv"; local_persist b32 running = 1; @@ -121,16 +179,21 @@ int main(int count, char **value) lbuf_stringified.size = sizeof(lbuf) - 1; } - query_tokenizer(global_arena, &lbuf_stringified); + query_token_list *qtl = PushStruct(global_arena, query_token_list); + + query_tokenizer(global_arena, &lbuf_stringified, qtl); } { read_csv(buffer); - token *tokens = tokenize_csv(buffer, global_arena); + + csv_token *tokens = tokenize_csv(buffer, global_arena); assert_msg(tokens != NULL, "Tokens are NULL."); - b_tree *bt = parse_csv(global_arena, tokens); + csv_token_list *ctl = PushStruct(global_arena, csv_token_list); + b_tree *bt = parse_csv(global_arena, ctl); + b_tree_write(bt); } -- cgit v1.3