From d8c52d6c408a172f1210c77df3e3a9629ea68dc6 Mon Sep 17 00:00:00 2001 From: nasr Date: Thu, 5 Mar 2026 22:42:55 +0000 Subject: feature(main): helper functions for lexing refactor(main): helper script for testing to gitignore --- .gitignore | 2 + source/base/base_string.h | 9 ++++ source/engine/engine.c | 9 ++-- source/lexer/lexer.c | 74 ++++++++++++++++++++++++++++++- source/storage/csv_reader.c | 8 ---- source/storage/csv_reader.h | 8 ++-- tags | 105 +++++++++++++++++++++++++++++--------------- 7 files changed, 161 insertions(+), 54 deletions(-) diff --git a/.gitignore b/.gitignore index 8bebb0b..a0e4bdd 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,5 @@ idea/* /build /notes.txt /test +/tags +/helper.sh diff --git a/source/base/base_string.h b/source/base/base_string.h index 64a3162..189b38a 100644 --- a/source/base/base_string.h +++ b/source/base/base_string.h @@ -47,4 +47,13 @@ string8_append_char(string8 *buf, u8 c) buf->size += 1; } +read_only global_variable +string8 nil_string = +{ + + .data = NULL, + .size = 0, + +}; + #endif /* BASE_STRING_H */ diff --git a/source/engine/engine.c b/source/engine/engine.c index ada4ecb..05c143c 100644 --- a/source/engine/engine.c +++ b/source/engine/engine.c @@ -1,6 +1,8 @@ #define BASE_UNITY #include "../base/base_include.h" +#include + #include "../lexer/lexer.h" #include "../lexer/lexer.c" @@ -13,17 +15,14 @@ #include "../storage/csv_reader.h" #include "../storage/csv_reader.c" -#if 1 -#include -#endif - int main(int c, char **v) { if(c < 2) return -999; string8 buffer = load_file(v[1]); - read_csv(buffer); + // read_csv(buffer); + tokenize_csv(buffer); // for(;;) diff --git a/source/lexer/lexer.c b/source/lexer/lexer.c index 8182c5a..1c7ab38 100644 --- a/source/lexer/lexer.c +++ b/source/lexer/lexer.c @@ -1,10 +1,80 @@ +internal b32 +is_alpha(u8 point) +{ + return ((point >= 'a' && point <= 'z') || + (point >= 'A' && point <= 'Z') || + (point == '_')); +} + +internal b32 +is_digit(u8 point) +{ + return (point >= '0' && point <= '9'); +} + +internal b32 +is_alpha_num(u8 point) +{ + return (is_alpha(point) || is_digit(point)); +} + +internal b32 +is_whitespace(u8 point) +{ + return (point == '\n' || point == '\r' || + point == ' ' || point == '\t'); +} + +internal b32 +is_delimiter(u8 point) +{ + + return (point == ','); + +} + internal token * tokenize_csv(string8 buffer) { + i32 count = 0; + string8 **tokens = PushArray(arena, string8 *, buffer.size / 10); + if(buffer.size < 0) return NULL; for(i32 index = 0; - buffer.data[index] != '\0' - ;) + buffer.data[index] != '\0'; + ++index) + { + string8 tokens = {0}; + + u8 point = buffer.data[index]; + if(is_whitespace(point)) continue; + + u8 *start = &buffer.data; + + if(is_delimiter(point)) + { + + + } + + u8 *end = start - 1; + + unused(start); + unused(end); + + switch (point) + { + + default: + { + printf("point: %c\n", point); + count++; + } + } + + } + + printf("%d", count); return NULL; } diff --git a/source/storage/csv_reader.c b/source/storage/csv_reader.c index a06e9c4..2fcbe04 100644 --- a/source/storage/csv_reader.c +++ b/source/storage/csv_reader.c @@ -1,15 +1,7 @@ -#define STD_TEST -#if defined(STD_TEST) -#include -#endif - internal void read_csv(string8 buffer) { -#if defined(STD_TEST) printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data); -#endif } - diff --git a/source/storage/csv_reader.h b/source/storage/csv_reader.h index 711499f..36e07a4 100644 --- a/source/storage/csv_reader.h +++ b/source/storage/csv_reader.h @@ -23,8 +23,8 @@ struct csv_table read_only global_variable csv_row nil_csv_row = { - .fields = {NULL, 0}, - .count = 0, + .fields = &nil_string, + .count = 0, }; @@ -32,8 +32,8 @@ csv_row nil_csv_row = read_only global_variable csv_table nil_csv_table = { - .string8 = {NULL, 0}, - .csv_row = &nil_csv_row, + .headers = &nil_string, + .rows = &nil_csv_row, .col_count = 0, .row_count = 0, }; diff --git a/tags b/tags index 1f8a45d..44c6eee 100644 --- a/tags +++ b/tags @@ -86,20 +86,23 @@ BASE_MEM_H source/base/base_mem.h /^#define BASE_MEM_H$/;" d BASE_OS_H source/base/base_os.h /^#define BASE_OS_H$/;" d BASE_STRING_H source/base/base_string.h /^#define BASE_STRING_H$/;" d BASE_TEST_H source/base/base_test.h /^#define BASE_TEST_H$/;" d -BASE_UNITY source/engine/engine_main.c /^#define BASE_UNITY$/;" d file: +BASE_UNITY source/engine/engine.c /^#define BASE_UNITY$/;" d file: BIN Makefile /^BIN = build\/engine$/;" m BLUE source/base/base_test.h /^#define BLUE /;" d +BTREE_H source/storage/b_tree.h /^#define BTREE_H$/;" d BUFF_DEFAULT source/base/base.h /^#define BUFF_DEFAULT /;" d BUFF_LARGE source/base/base.h /^#define BUFF_LARGE /;" d BUFF_SMALL source/base/base.h /^#define BUFF_SMALL /;" d +B_TREE_ORDER source/storage/b_tree.h /^#define B_TREE_ORDER /;" d CC Makefile /^CC = clang$/;" m CFLAGS Makefile /^CFLAGS = -Wall -Wextra -Wfloat-equal -Wswitch-default -Wswitch-enum \\$/;" m +CSV_READER_H source/storage/csv_reader.h /^#define CSV_READER_H$/;" d DEPRECATED source/base/base.h /^#define DEPRECATED /;" d -ENGINE_LEXER_H source/engine/engine_lexer.h /^#define ENGINE_LEXER_H$/;" d -ENGINE_REPL_H source/engine/engine_parser.c /^#define ENGINE_REPL_H$/;" d file: -ENGINE_REPL_H source/engine/engine_parser.h /^#define ENGINE_REPL_H$/;" d -ENGINE_REPL_H source/engine/engine_repl.c /^#define ENGINE_REPL_H$/;" d file: -ENGINE_REPL_H source/engine/engine_repl.h /^#define ENGINE_REPL_H$/;" d +ENGINE_LEXER_H source/lexer/lexer.h /^#define ENGINE_LEXER_H$/;" d +ENGINE_REPL_H source/parser/parser.c /^#define ENGINE_REPL_H$/;" d file: +ENGINE_REPL_H source/parser/parser.h /^#define ENGINE_REPL_H$/;" d +ENGINE_REPL_H source/repl/repl.c /^#define ENGINE_REPL_H$/;" d file: +ENGINE_REPL_H source/repl/repl.h /^#define ENGINE_REPL_H$/;" d ERR_INVALID source/base/base.h /^#define ERR_INVALID /;" d ERR_IO source/base/base.h /^#define ERR_IO /;" d ERR_OK source/base/base.h /^#define ERR_OK /;" d @@ -123,13 +126,16 @@ PushString source/base/base_string.h /^ #define PushString(/;" d PushStruct source/base/base_arena.h /^#define PushStruct(/;" d RED source/base/base_test.h /^#define RED /;" d RESET source/base/base_test.h /^#define RESET /;" d -SRC Makefile /^SRC = source\/engine\/engine_main.c$/;" m +SRC Makefile /^SRC = source\/engine\/engine.c$/;" m STACK_H source/base/base_stack.h /^#define STACK_H$/;" d +StringFmt source/base/base_string.h /^#define StringFmt /;" d StringLit source/base/base_string.h /^#define StringLit(/;" d -TOKEN_IDENTIFIER source/engine/engine_lexer.h /^ TOKEN_IDENTIFIER,$/;" e enum:token_type -TOKEN_UNDEFINED source/engine/engine_lexer.h /^ TOKEN_UNDEFINED = 255,$/;" e enum:token_type -TOKEN_VALUE source/engine/engine_lexer.h /^ TOKEN_VALUE,$/;" e enum:token_type +TOKEN_IDENTIFIER source/lexer/lexer.h /^ TOKEN_IDENTIFIER,$/;" e enum:token_type +TOKEN_UNDEFINED source/lexer/lexer.h /^ TOKEN_UNDEFINED = 255,$/;" e enum:token_type +TOKEN_VALUE source/lexer/lexer.h /^ TOKEN_VALUE,$/;" e enum:token_type TRUE source/base/base.h /^#define TRUE /;" d +ULLongFmt source/base/base_string.h /^#define ULLongFmt /;" d +ULongFmt source/base/base_string.h /^#define ULongFmt /;" d align source/base/base_mem.h /^align(u64 pointer, umm alignment)$/;" f typeref:typename:internal u64 arena source/base/base_arena.h /^ mem_arena *arena;$/;" m struct:temp_arena typeref:typename:mem_arena * arena_alloc source/base/base_arena.c /^arena_alloc(mem_arena *arena, u64 size)$/;" f typeref:typename:internal void * @@ -143,17 +149,25 @@ arena_resize_align source/base/base_arena.c /^arena_resize_align(mem_arena *aren b16 source/base/base.h /^typedef i16 b16;$/;" t typeref:typename:i16 b32 source/base/base.h /^typedef i32 b32;$/;" t typeref:typename:i32 b8 source/base/base.h /^typedef u8 b8;$/;" t typeref:typename:u8 +b_tree source/storage/b_tree.h /^struct b_tree$/;" s +b_tree source/storage/b_tree.h /^typedef struct b_tree b_tree;$/;" t typeref:struct:b_tree +b_tree_create source/storage/b_tree.c /^b_tree_create(mem_arena *arena, u16 order)$/;" f typeref:typename:internal void +b_tree_insert source/storage/b_tree.c /^b_tree_insert()$/;" f typeref:typename:internal void +b_tree_node source/storage/b_tree.h /^struct b_tree_node$/;" s +b_tree_node source/storage/b_tree.h /^typedef struct b_tree_node b_tree_node;$/;" t typeref:struct:b_tree_node +b_tree_search source/storage/b_tree.c /^b_tree_search(node *node)$/;" f typeref:typename:internal void +b_tree_write source/storage/b_tree.c /^b_tree_write()$/;" f typeref:typename:internal void base_position source/base/base_arena.h /^ u8 *base_position;$/;" m struct:mem_arena typeref:typename:u8 * base_position source/base/base_stack.h /^ u8 *base_position;$/;" m struct:mem_stack typeref:typename:u8 * breakpoint source/base/base.h /^#define breakpoint /;" d -btree source/engine/engine_parser.c /^struct btree$/;" s file: -btree source/engine/engine_parser.c /^typedef struct btree btree;$/;" t typeref:struct:btree file: -btree source/engine/engine_parser.h /^struct btree$/;" s -btree source/engine/engine_parser.h /^typedef struct btree btree;$/;" t typeref:struct:btree -btree source/engine/engine_repl.c /^struct btree$/;" s file: -btree source/engine/engine_repl.c /^typedef struct btree btree;$/;" t typeref:struct:btree file: -btree source/engine/engine_repl.h /^struct btree$/;" s -btree source/engine/engine_repl.h /^typedef struct btree btree;$/;" t typeref:struct:btree +btree source/parser/parser.c /^struct btree$/;" s file: +btree source/parser/parser.c /^typedef struct btree btree;$/;" t typeref:struct:btree file: +btree source/parser/parser.h /^struct btree$/;" s +btree source/parser/parser.h /^typedef struct btree btree;$/;" t typeref:struct:btree +btree source/repl/repl.c /^struct btree$/;" s file: +btree source/repl/repl.c /^typedef struct btree btree;$/;" t typeref:struct:btree file: +btree source/repl/repl.h /^struct btree$/;" s +btree source/repl/repl.h /^typedef struct btree btree;$/;" t typeref:struct:btree calculate_padding source/base/base_stack.c /^calculate_padding(u64 pointer, u8 alignment, u64 header_size)$/;" f typeref:typename:internal u8 capacity source/base/base_arena.h /^ u64 capacity;$/;" m struct:mem_arena typeref:typename:u64 capacity source/base/base_stack.h /^ u64 capacity;$/;" m struct:mem_stack typeref:typename:u64 @@ -161,19 +175,28 @@ check source/base/base_test.h /^#define check(/;" d checkpoint source/base/base_test.h /^#define checkpoint /;" d checkpoint_end_output source/base/base_test.h /^#define checkpoint_end_output /;" d checkpoint_output source/base/base_test.h /^#define checkpoint_output /;" d +children source/storage/b_tree.h /^ b_tree_node *children[B_TREE_ORDER];$/;" m struct:b_tree_node typeref:typename:b_tree_node * [] clean Makefile /^clean:$/;" t +col_count source/storage/csv_reader.h /^ i32 col_count;$/;" m struct:csv_table typeref:typename:i32 +count source/storage/csv_reader.h /^ i32 count;$/;" m struct:csv_row typeref:typename:i32 +csv_row source/storage/csv_reader.h /^struct csv_row$/;" s +csv_row source/storage/csv_reader.h /^typedef struct csv_row csv_row;$/;" t typeref:struct:csv_row +csv_table source/storage/csv_reader.h /^struct csv_table$/;" s +csv_table source/storage/csv_reader.h /^typedef struct csv_table csv_table;$/;" t typeref:struct:csv_table current_offset source/base/base_stack.h /^ u64 current_offset;$/;" m struct:mem_stack typeref:typename:u64 current_position source/base/base_arena.h /^ u64 current_position;$/;" m struct:mem_arena typeref:typename:u64 data source/base/base_string.h /^ u8 *data;$/;" m struct:string8 typeref:typename:u8 * database engine in c README.md /^# database engine in c$/;" c f32 source/base/base.h /^typedef float f32;$/;" t typeref:typename:float f64 source/base/base.h /^typedef double f64;$/;" t typeref:typename:double +fields source/storage/csv_reader.h /^ string8 *fields;$/;" m struct:csv_row typeref:typename:string8 * generate_hash source/base/base_hash.c /^generate_hash()$/;" f typeref:typename:internal u64 global_variable source/base/base.h /^#define global_variable /;" d hash source/base/bash_hash.h /^typedef struct hash hash;$/;" t typeref:struct:hash hash_map source/base/bash_hash.h /^struct hash_map $/;" s hash_map source/base/bash_hash.h /^typedef struct hash_map hash_map;$/;" t typeref:struct:hash_map header source/base/base_stack.h /^ mem_stack_header *header;$/;" m struct:mem_stack typeref:typename:mem_stack_header * +headers source/storage/csv_reader.h /^ string8 *headers;$/;" m struct:csv_table typeref:typename:string8 * i16 source/base/base.h /^typedef int16_t i16;$/;" t typeref:typename:int16_t i32 source/base/base.h /^typedef int32_t i32;$/;" t typeref:typename:int32_t i64 source/base/base.h /^typedef int64_t i64;$/;" t typeref:typename:int64_t @@ -181,30 +204,42 @@ i8 source/base/base.h /^typedef int8_t i8;$/;" t typeref:typename:int8_t input_read source/base/base_io.h /^input_read()$/;" f typeref:typename:internal void internal source/base/base.h /^#define internal /;" d is_pow source/base/base_mem.h /^is_pow(umm x)$/;" f typeref:typename:internal b8 -lexeme source/engine/engine_lexer.h /^ string8 lexeme;$/;" m struct:token typeref:typename:string8 +key_count source/storage/b_tree.h /^ i32 key_count;$/;" m struct:b_tree_node typeref:typename:i32 +keys source/storage/b_tree.h /^ string8 keys[B_TREE_ORDER - 1];$/;" m struct:b_tree_node typeref:typename:string8[] +leaf source/storage/b_tree.h /^ b32 leaf;$/;" m struct:b_tree_node typeref:typename:b32 +lexeme source/lexer/lexer.h /^ string8 lexeme;$/;" m struct:token typeref:typename:string8 +load_file source/base/base_os.h /^load_file(const char *path)$/;" f typeref:typename:internal string8 local_persist source/base/base.h /^#define local_persist /;" d -main source/engine/engine_main.c /^int main(int c, char **v)$/;" f typeref:typename:int +main source/engine/engine.c /^int main(int c, char **v)$/;" f typeref:typename:int mem_arena source/base/base_arena.h /^struct mem_arena$/;" s mem_arena source/base/base_arena.h /^typedef struct mem_arena mem_arena;$/;" t typeref:struct:mem_arena mem_stack source/base/base_stack.h /^struct mem_stack$/;" s mem_stack source/base/base_stack.h /^typedef struct mem_stack mem_stack;$/;" t typeref:struct:mem_stack mem_stack_header source/base/base_stack.h /^struct mem_stack_header$/;" s mem_stack_header source/base/base_stack.h /^typedef struct mem_stack_header mem_stack_header;$/;" t typeref:struct:mem_stack_header -node source/engine/engine_parser.c /^struct node$/;" s file: -node source/engine/engine_parser.c /^typedef struct node node;$/;" t typeref:struct:node file: -node source/engine/engine_parser.h /^struct node$/;" s -node source/engine/engine_parser.h /^typedef struct node node;$/;" t typeref:struct:node -node source/engine/engine_repl.c /^struct node$/;" s file: -node source/engine/engine_repl.c /^typedef struct node node;$/;" t typeref:struct:node file: -node source/engine/engine_repl.h /^struct node$/;" s -node source/engine/engine_repl.h /^typedef struct node node;$/;" t typeref:struct:node +nil_csv_row source/storage/csv_reader.h /^csv_row nil_csv_row =$/;" v typeref:typename:read_only global_variable csv_row +nil_csv_table source/storage/csv_reader.h /^csv_table nil_csv_table =$/;" v typeref:typename:read_only global_variable csv_table +nil_string source/base/base_string.h /^string8 nil_string =$/;" v typeref:typename:read_only global_variable string8 +node source/parser/parser.c /^struct node$/;" s file: +node source/parser/parser.c /^typedef struct node node;$/;" t typeref:struct:node file: +node source/parser/parser.h /^struct node$/;" s +node source/parser/parser.h /^typedef struct node node;$/;" t typeref:struct:node +node source/repl/repl.c /^struct node$/;" s file: +node source/repl/repl.c /^typedef struct node node;$/;" t typeref:struct:node file: +node source/repl/repl.h /^struct node$/;" s +node source/repl/repl.h /^typedef struct node node;$/;" t typeref:struct:node padding source/base/base_stack.h /^ u8 padding;$/;" m struct:mem_stack_header typeref:typename:u8 +parent source/storage/b_tree.h /^ b_tree_node *parent;$/;" m struct:b_tree_node typeref:typename:b_tree_node * previous_offset source/base/base_stack.h /^ u8 previous_offset;$/;" m struct:mem_stack_header typeref:typename:u8 previous_position source/base/base_arena.h /^ u64 previous_position;$/;" m struct:mem_arena typeref:typename:u64 -print source/base/base_os.h /^#define print(Format) print(/;" d print source/base/base_os.h /^print(const char *str)$/;" f typeref:typename:internal void +read_csv source/storage/csv_reader.c /^read_csv(string8 buffer)$/;" f typeref:typename:internal void read_only source/base/base.h /^#define read_only /;" d read_only source/base/base.h /^#define read_only$/;" d +root source/storage/b_tree.h /^ b_tree_node *root;$/;" m struct:b_tree typeref:typename:b_tree_node * +row_count source/storage/csv_reader.h /^ i32 row_count;$/;" m struct:csv_table typeref:typename:i32 +rows source/storage/b_tree.h /^ csv_row *rows[B_TREE_ORDER - 1];$/;" m struct:b_tree_node typeref:typename:csv_row * [] +rows source/storage/csv_reader.h /^ csv_row *rows;$/;" m struct:csv_table typeref:typename:csv_row * run Makefile /^run:$/;" t show source/base/base_test.h /^#define show /;" d size source/base/base_string.h /^ u64 size;$/;" m struct:string8 typeref:typename:u64 @@ -228,12 +263,12 @@ temp_arena_begin source/base/base_arena.c /^temp_arena_begin(mem_arena *arena)$/ temp_arena_end source/base/base_arena.c /^temp_arena_end(temp_arena temp)$/;" f typeref:typename:internal void temp_breakpoint source/base/base.h /^#define temp_breakpoint /;" d test source/base/base_test.h /^#define test(/;" d -token source/engine/engine_lexer.h /^struct token$/;" s -token source/engine/engine_lexer.h /^typedef struct token token;$/;" t typeref:struct:token -token_type source/engine/engine_lexer.h /^enum token_type$/;" g -token_type source/engine/engine_lexer.h /^typedef enum token_type token_type;$/;" t typeref:enum:token_type -tokenize source/engine/engine_lexer.c /^tokenize(string8 buffer)$/;" f typeref:typename:internal token * -type source/engine/engine_lexer.h /^ token_type type;$/;" m struct:token typeref:typename:token_type +token source/lexer/lexer.h /^struct token$/;" s +token source/lexer/lexer.h /^typedef struct token token;$/;" t typeref:struct:token +token_type source/lexer/lexer.h /^enum token_type$/;" g +token_type source/lexer/lexer.h /^typedef enum token_type token_type;$/;" t typeref:enum:token_type +tokenize_csv source/lexer/lexer.c /^tokenize_csv(string8 buffer)$/;" f typeref:typename:internal token * +type source/lexer/lexer.h /^ token_type type;$/;" m struct:token typeref:typename:token_type u16 source/base/base.h /^typedef uint16_t u16;$/;" t typeref:typename:uint16_t u32 source/base/base.h /^typedef uint32_t u32;$/;" t typeref:typename:uint32_t u64 source/base/base.h /^typedef uint64_t u64;$/;" t typeref:typename:uint64_t -- cgit v1.3