From 2c9057b8f009bd39d97a2d30cf71135cb07c5e4b Mon Sep 17 00:00:00 2001 From: nasr Date: Fri, 6 Mar 2026 18:53:23 +0000 Subject: improvement(main): worked on the lexer, close to finishing the tokenization csv's are simple --- source/engine/engine.c | 25 ++++++++---------- source/lexer/lexer.c | 70 ++++++++++++++++++++++++++++++++------------------ source/repl/repl.c | 22 +++++++--------- 3 files changed, 65 insertions(+), 52 deletions(-) diff --git a/source/engine/engine.c b/source/engine/engine.c index 05c143c..64b15bf 100644 --- a/source/engine/engine.c +++ b/source/engine/engine.c @@ -3,8 +3,7 @@ #include -#include "../lexer/lexer.h" -#include "../lexer/lexer.c" + #include "../parser/parser.h" #include "../parser/parser.c" @@ -15,24 +14,22 @@ #include "../storage/csv_reader.h" #include "../storage/csv_reader.c" +#include "../lexer/lexer.h" +#include "../lexer/lexer.c" + + int main(int c, char **v) { if(c < 2) return -999; + mem_arena *global_arena = arena_create(MiB(20)); + csv_table *global_table = PushStruct(global_arena, csv_table); + string8 buffer = load_file(v[1]); - // read_csv(buffer); - tokenize_csv(buffer); - - - // for(;;) - // { - // print("reading user input..."); - // // TODO(nasr): design a repl system - // - // sleep(1); - // } - // + read_csv(buffer); + tokenize_csv(buffer, global_table, global_arena); + return 0; } diff --git a/source/lexer/lexer.c b/source/lexer/lexer.c index 1c7ab38..948afd0 100644 --- a/source/lexer/lexer.c +++ b/source/lexer/lexer.c @@ -1,77 +1,97 @@ +// the lexer acts as a table builder from a csv file +// and parsing indivudal rows and columns +// the next step would be building a the b-tree internal b32 is_alpha(u8 point) { - return ((point >= 'a' && point <= 'z') || - (point >= 'A' && point <= 'Z') || - (point == '_')); + return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_')); } internal b32 is_digit(u8 point) { - return (point >= '0' && point <= '9'); + return (point >= '0' && point <= '9'); } internal b32 is_alpha_num(u8 point) { - return (is_alpha(point) || is_digit(point)); + return (is_alpha(point) || is_digit(point)); } internal b32 is_whitespace(u8 point) { - return (point == '\n' || point == '\r' || - point == ' ' || point == '\t'); + return (point == '\n' || point == '\r' || point == ' ' || point == '\t'); } internal b32 is_delimiter(u8 point) { - return (point == ','); - } internal token * -tokenize_csv(string8 buffer) +tokenize_csv(string8 buffer, csv_table *global_table, mem_arena *arena) { i32 count = 0; string8 **tokens = PushArray(arena, string8 *, buffer.size / 10); + b32 first_line = 1; if(buffer.size < 0) return NULL; for(i32 index = 0; - buffer.data[index] != '\0'; - ++index) + buffer.data[index] != '\0'; + ++index) { - string8 tokens = {0}; + csv_row *row = PushStruct(arena, csv_row); + string8 token = {0}; u8 point = buffer.data[index]; - if(is_whitespace(point)) continue; - u8 *start = &buffer.data; - - if(is_delimiter(point)) - { - - - } + u8 *start = buffer.data; + u8 *end = NULL; - u8 *end = start - 1; - - unused(start); - unused(end); + unused(row); switch (point) { + case '\n': + { + first_line = -1; + break; + } + case ',': + { + end = start - 1; + + if (first_line) + { + global_table->headers = &token; + ++global_table->headers; + break; + } + else + { + + break; + } + } default: { printf("point: %c\n", point); count++; + break; } } + token = (string8){ + .data = start, + .size = end - start, + }; + + **tokens = token; + ++*tokens; } printf("%d", count); diff --git a/source/repl/repl.c b/source/repl/repl.c index 4c57345..dd289d8 100644 --- a/source/repl/repl.c +++ b/source/repl/repl.c @@ -1,16 +1,12 @@ -#ifndef ENGINE_REPL_H -#define ENGINE_REPL_H - -typedef struct node node; -struct node -{ - -}; - -typedef struct btree btree; -struct btree +internal void +init_repl() { + for(;;) + { + print("reading user input..."); + // TODO(nasr): design a repl system -}; + sleep(1); + } -#endif /* ENGINE_H */ +} -- cgit v1.3