diff options
| author | nasr <nsrddyn@gmail.com> | 2026-03-16 19:20:23 +0000 |
|---|---|---|
| committer | nasr <nsrddyn@gmail.com> | 2026-03-16 19:20:23 +0000 |
| commit | 180ccc84aac07c7bee2b09a6e07f7406908409b9 (patch) | |
| tree | efa39665e41c3132626f2c08b2f3ae0d18adc17a /source/tb_db.c | |
| parent | 2e258673171c2e4663a8b5d58e2ad174bb0ecd96 (diff) | |
feature(main): lots of stuff see description
1. increased compile time warnings to help with some optimizations.
2. impelmeented csv lexing helper functions that do stuff on tokenlists
like appending and concatenating lists with each other
3. realiszed that btree design in faulty so disabled it and will
refactor it in the next approach
Diffstat (limited to 'source/tb_db.c')
| -rw-r--r-- | source/tb_db.c | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/source/tb_db.c b/source/tb_db.c new file mode 100644 index 0000000..b992111 --- /dev/null +++ b/source/tb_db.c | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | #define B_TREE_IMPLEMENTATION | ||
| 2 | #define BASE_UNITY | ||
| 3 | #include "base/base_include.h" | ||
| 4 | |||
| 5 | internal b32 | ||
| 6 | is_alpha(u8 point) | ||
| 7 | { | ||
| 8 | return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_')); | ||
| 9 | } | ||
| 10 | |||
| 11 | internal b32 | ||
| 12 | is_digit(u8 point) | ||
| 13 | { | ||
| 14 | return (point >= '0' && point <= '9'); | ||
| 15 | } | ||
| 16 | |||
| 17 | internal b32 | ||
| 18 | is_alpha_num(u8 point) | ||
| 19 | { | ||
| 20 | return (is_alpha(point) || is_digit(point)); | ||
| 21 | } | ||
| 22 | |||
| 23 | internal b32 | ||
| 24 | is_whitespace(u8 point) | ||
| 25 | { | ||
| 26 | return (point == '\n' || point == '\r' || point == ' ' || point == '\t'); | ||
| 27 | } | ||
| 28 | |||
| 29 | internal b32 | ||
| 30 | is_delimiter(u8 point) | ||
| 31 | { | ||
| 32 | return (point == ','); | ||
| 33 | } | ||
| 34 | |||
| 35 | #include "b_tree_impl.h" | ||
| 36 | #include "csv_decoder.h" | ||
| 37 | |||
| 38 | typedef struct query_token query_token; | ||
| 39 | struct query_token | ||
| 40 | { | ||
| 41 | string8 lexeme; | ||
| 42 | query_token *next; | ||
| 43 | }; | ||
| 44 | |||
| 45 | typedef struct query_token_list query_token_list; | ||
| 46 | struct query_token_list | ||
| 47 | { | ||
| 48 | query_token *start_token; | ||
| 49 | query_token *current_token; | ||
| 50 | }; | ||
| 51 | |||
| 52 | read_only global_variable | ||
| 53 | query_token nil_query_token = | ||
| 54 | { | ||
| 55 | .lexeme = {.data = NULL, .size = 0}, | ||
| 56 | .next = &nil_query_token | ||
| 57 | }; | ||
| 58 | |||
| 59 | |||
| 60 | read_only global_variable | ||
| 61 | query_token_list nil_query_token_list = | ||
| 62 | { | ||
| 63 | .start_token = &nil_query_token, | ||
| 64 | .current_token = &nil_query_token, | ||
| 65 | }; | ||
| 66 | |||
| 67 | internal b32 | ||
| 68 | is_nil_query_token(query_token *token) | ||
| 69 | { | ||
| 70 | return (token == &nil_query_token) || (token == NULL); | ||
| 71 | } | ||
| 72 | |||
| 73 | internal b32 | ||
| 74 | is_nil_query_token_list(query_token *token) | ||
| 75 | { | ||
| 76 | return (token == &nil_query_token) || (token == NULL); | ||
| 77 | } | ||
| 78 | |||
| 79 | // takes on line of the repl input | ||
| 80 | // return a reference to the passed list | ||
| 81 | internal query_token_list * | ||
| 82 | query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list) | ||
| 83 | { | ||
| 84 | b32 initialized = 0; | ||
| 85 | unused(initialized); | ||
| 86 | |||
| 87 | for (u64 index = 0; index < buffer->size; ++index) | ||
| 88 | { | ||
| 89 | u8 codepoint = buffer->data[index]; | ||
| 90 | |||
| 91 | if(codepoint == '\n' || codepoint == '\r') break; | ||
| 92 | |||
| 93 | s32 start = 0; | ||
| 94 | s32 end = 0; | ||
| 95 | |||
| 96 | if(is_whitespace(codepoint)) end = index; | ||
| 97 | |||
| 98 | // save the token | ||
| 99 | // TODO(nasr): work on the string macros cuz no work | ||
| 100 | { | ||
| 101 | query_token *new_token = PushStruct(arena, query_token); | ||
| 102 | |||
| 103 | //- initialize list | ||
| 104 | { | ||
| 105 | if(is_nil_query_token(list->start_token)) | ||
| 106 | { | ||
| 107 | list->start_token = new_token; | ||
| 108 | list->current_token = new_token; | ||
| 109 | } | ||
| 110 | else | ||
| 111 | { | ||
| 112 | //- all we need to do - we dont track parents or what ever. this is a token stream not a tree | ||
| 113 | list->current_token->next = new_token; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | s32 new_token_size = end - start; | ||
| 118 | |||
| 119 | new_token->lexeme = PushString(arena, new_token_size); | ||
| 120 | new_token->lexeme.data = &buffer->data[index]; | ||
| 121 | new_token->lexeme.size = new_token_size; | ||
| 122 | |||
| 123 | list->current_token->next = new_token; | ||
| 124 | |||
| 125 | start = index + 1; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | return list; | ||
| 130 | } | ||
| 131 | |||
| 132 | int main(int count, char **value) | ||
| 133 | { | ||
| 134 | |||
| 135 | #if 1 | ||
| 136 | unused(nil_query_token_list); | ||
| 137 | #endif | ||
| 138 | |||
| 139 | if(count < 2) value[1] = "./test/data.csv"; | ||
| 140 | |||
| 141 | local_persist b32 running = 1; | ||
| 142 | |||
| 143 | mem_arena *global_arena = arena_create(MiB(30)); | ||
| 144 | |||
| 145 | // NOTE(nasr): see note down below | ||
| 146 | // csv_table *global_table = PushStruct(global_arena, csv_table); | ||
| 147 | |||
| 148 | string8 buffer = load_file(global_arena, value[1]); | ||
| 149 | |||
| 150 | print("\nDatabase Engine\n"); | ||
| 151 | |||
| 152 | for(;;) | ||
| 153 | { | ||
| 154 | if (running) | ||
| 155 | { | ||
| 156 | { | ||
| 157 | u8 *lbuf = PushArray(global_arena, u8, 256); | ||
| 158 | s32 err = os_read(STDIN_FD, lbuf, 256); | ||
| 159 | |||
| 160 | if(err < 0) | ||
| 161 | { | ||
| 162 | print("error reading from stdin"); | ||
| 163 | } | ||
| 164 | |||
| 165 | // TODO(nasr): extract this later in the future and make a string copy function/macro | ||
| 166 | // @params (s32 lbuf_size , string8 lbuf_stringified) | ||
| 167 | s32 lbuf_size = sizeof(lbuf) - 1; | ||
| 168 | string8 lbuf_stringified = PushString(global_arena, lbuf_size); | ||
| 169 | { | ||
| 170 | memcpy(lbuf_stringified.data, lbuf, lbuf_size); | ||
| 171 | lbuf_stringified.size = sizeof(lbuf) - 1; | ||
| 172 | } | ||
| 173 | |||
| 174 | query_token_list *qtl = PushStruct(global_arena, query_token_list); | ||
| 175 | |||
| 176 | query_tokenizer(global_arena, &lbuf_stringified, qtl); | ||
| 177 | } | ||
| 178 | |||
| 179 | { | ||
| 180 | |||
| 181 | // NOTE(nasr): the use of tables is required for tracking headers etc. | ||
| 182 | // i think we can optimize this away in the future but for now its fine | ||
| 183 | csv_table *table = PushStruct(global_arena, csv_table); | ||
| 184 | |||
| 185 | csv_token_list *token_list = PushStruct(global_arena, csv_token_list); | ||
| 186 | |||
| 187 | csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list); | ||
| 188 | |||
| 189 | assert_msg(tokens != NULL, "Tokens are NULL."); | ||
| 190 | |||
| 191 | csv_token_list *ctl = PushStruct(global_arena, csv_token_list); | ||
| 192 | b_tree *bt = parse_csv(global_arena, ctl, table); | ||
| 193 | |||
| 194 | b_tree_write(bt); | ||
| 195 | } | ||
| 196 | |||
| 197 | // NOTE(nasr): not sure on how to approach the b-tree and the table format thing | ||
| 198 | // we kind of want our table format i think? but i wouldnt be sure about the use case | ||
| 199 | // so we stick to the regular b_tree for now. commenting out the tables. | ||
| 200 | |||
| 201 | sleep(1); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | return 0; | ||
| 206 | } | ||
