From ff2ac6f584a1b08c0e66ac65b9c8e8d68e79f124 Mon Sep 17 00:00:00 2001 From: nasr Date: Sat, 14 Mar 2026 21:45:30 +0000 Subject: feature(main): bugfixes still a segfault when generating the token linked list --- source/csv_reader.h | 61 +++++++++++++++++++++++++++++------------------------ source/engine.c | 46 +++++++++++++++++++++++++++------------- 2 files changed, 65 insertions(+), 42 deletions(-) (limited to 'source') diff --git a/source/csv_reader.h b/source/csv_reader.h index 2b6f49c..7f5bf06 100644 --- a/source/csv_reader.h +++ b/source/csv_reader.h @@ -8,7 +8,6 @@ enum token_flags END_FL = 1 << 2, }; - typedef enum token_type token_type; enum token_type { @@ -27,6 +26,7 @@ struct token token *next; }; +// NOTE(nasr): i dont think im going to use this. typedef struct csv_row csv_row; struct csv_row { @@ -62,8 +62,6 @@ csv_table nil_csv_table = .row_count = 0, }; - - #endif /* ENGINE_LEXER_H */ // the lexer acts as a table builder from a csv file @@ -72,13 +70,16 @@ csv_table nil_csv_table = internal token * tokenize_csv(string8 buffer, mem_arena *arena) { - b32 FL = TRUE; if(buffer.size < 0) return NULL; + + token *tok = PushStruct(arena, token); + + // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit + // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? for(s32 index = 0; buffer.data[index] != '\0'; ++index) { - token *tok = PushStruct(arena, token); u8 point = buffer.data[index]; s32 start = 0; @@ -86,35 +87,35 @@ tokenize_csv(string8 buffer, mem_arena *arena) if(is_whitespace(point)) { - print("csv file is invalid"); + warn("csv file is invalid, detected whitespace"); return NULL; } switch(point) { case('\n'): - { - if(FL) tok->flags |= END_FL; - break; - } + { + if(FL) tok->flags |= END_FL; + break; + } case(','): - { - end = index - 1; - start = index + 1; - break; - } + { + end = index - 1; + start = index + 1; + break; + } default: - { - break; - } + { + break; + } } tok->lexeme = StringCast(&buffer.data[start], end - start); tok->next = tok; } - return NULL; + return tok; } internal void @@ -124,18 +125,24 @@ read_csv(string8 buffer) } -internal b_tree * -parse_csv(csv_token *tok, csv_table *table) +internal b_tree * +parse_csv(mem_arena *arena, token *tok) { + b_tree *tree = PushStructZero(arena, b_tree); + b_tree_create(arena, tree); - - for (;tok->next; tok = tok->next) + for (; tok != NULL; tok = tok->next) { - b_tree_node *current_btree_node = btree_node_alloc; + // skip structural tokens, only index values + if (tok->type != TOKEN_VALUE) + { + continue; + } - + // NOTE(nasr): payload is the token itself so the caller can reach + // row/col metadata without us having to copy it + b_tree_insert(arena, tree, tok->lexeme, (void *)tok); } - return NULL; + return tree; } - diff --git a/source/engine.c b/source/engine.c index 1cfbab0..9797d8a 100644 --- a/source/engine.c +++ b/source/engine.c @@ -40,7 +40,7 @@ is_delimiter(u8 point) typedef struct query_token query_token; struct query_token { - string8 *lexeme; + string8 lexeme; query_token *next; }; @@ -72,8 +72,9 @@ query_tokenizer(mem_arena *arena, string8 *buffer) s32 new_token_size = end - start; - tok->lexeme->data = &buffer->data[index]; - tok->lexeme->size = new_token_size; + tok->lexeme = PushString(arena, new_token_size); + tok->lexeme.data = &buffer->data[index]; + tok->lexeme.size = new_token_size; tok->next = tok; start = index + 1; @@ -83,21 +84,18 @@ query_tokenizer(mem_arena *arena, string8 *buffer) return tok; } -int main(int c, char **v) +int main(int count, char **value) { - - if(c < 2) - { - print("bad file, setting default file\n"); - } - else v[1] = "./test/customers-10000.csv"; + if(count < 2) value[1] = "./test/data.csv"; local_persist b32 running = 1; mem_arena *global_arena = arena_create(MiB(30)); - csv_table *global_table = PushStruct(global_arena, csv_table); - string8 buffer = load_file(v[1]); + // NOTE(nasr): see note down below + // csv_table *global_table = PushStruct(global_arena, csv_table); + + string8 buffer = load_file(global_arena, value[1]); print("\nDatabase Engine\n"); @@ -106,23 +104,41 @@ int main(int c, char **v) if (running) { { - u8 lbuf[256] = {}; + u8 *lbuf = PushArray(global_arena, u8, 256); s32 err = os_read(STDIN_FD, lbuf, 256); + if(err < 0) { print("error reading from stdin"); } - query_tokenizer(global_arena, &StringLit(lbuf)); + // TODO(nasr): extract this later in the future and make a string copy function/macro + // @params (s32 lbuf_size , string8 lbuf_stringified) + s32 lbuf_size = sizeof(lbuf) - 1; + string8 lbuf_stringified = PushString(global_arena, lbuf_size); + { + memcpy(lbuf_stringified.data, lbuf, lbuf_size); + lbuf_stringified.size = sizeof(lbuf) - 1; + } + query_tokenizer(global_arena, &lbuf_stringified); } { read_csv(buffer); token *tokens = tokenize_csv(buffer, global_arena); - global_table = parse_csv(tokens, global_table); + + assert_msg(tokens != NULL, "Tokens are NULL."); + + b_tree *bt = parse_csv(global_arena, tokens); + b_tree_write(bt); } + + // NOTE(nasr): not sure on how to approach the b-tree and the table format thing + // we kind of want our table format i think? but i wouldnt be sure about the use case + // so we stick to the regular b_tree for now. commenting out the tables. + sleep(1); } } -- cgit v1.3