diff options
Diffstat (limited to 'source/tb_db.c')
| -rw-r--r-- | source/tb_db.c | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/source/tb_db.c b/source/tb_db.c new file mode 100644 index 0000000..b992111 --- /dev/null +++ b/source/tb_db.c | |||
| @@ -0,0 +1,206 @@ | |||
| 1 | #define B_TREE_IMPLEMENTATION | ||
| 2 | #define BASE_UNITY | ||
| 3 | #include "base/base_include.h" | ||
| 4 | |||
| 5 | internal b32 | ||
| 6 | is_alpha(u8 point) | ||
| 7 | { | ||
| 8 | return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_')); | ||
| 9 | } | ||
| 10 | |||
| 11 | internal b32 | ||
| 12 | is_digit(u8 point) | ||
| 13 | { | ||
| 14 | return (point >= '0' && point <= '9'); | ||
| 15 | } | ||
| 16 | |||
| 17 | internal b32 | ||
| 18 | is_alpha_num(u8 point) | ||
| 19 | { | ||
| 20 | return (is_alpha(point) || is_digit(point)); | ||
| 21 | } | ||
| 22 | |||
| 23 | internal b32 | ||
| 24 | is_whitespace(u8 point) | ||
| 25 | { | ||
| 26 | return (point == '\n' || point == '\r' || point == ' ' || point == '\t'); | ||
| 27 | } | ||
| 28 | |||
| 29 | internal b32 | ||
| 30 | is_delimiter(u8 point) | ||
| 31 | { | ||
| 32 | return (point == ','); | ||
| 33 | } | ||
| 34 | |||
| 35 | #include "b_tree_impl.h" | ||
| 36 | #include "csv_decoder.h" | ||
| 37 | |||
| 38 | typedef struct query_token query_token; | ||
| 39 | struct query_token | ||
| 40 | { | ||
| 41 | string8 lexeme; | ||
| 42 | query_token *next; | ||
| 43 | }; | ||
| 44 | |||
| 45 | typedef struct query_token_list query_token_list; | ||
| 46 | struct query_token_list | ||
| 47 | { | ||
| 48 | query_token *start_token; | ||
| 49 | query_token *current_token; | ||
| 50 | }; | ||
| 51 | |||
| 52 | read_only global_variable | ||
| 53 | query_token nil_query_token = | ||
| 54 | { | ||
| 55 | .lexeme = {.data = NULL, .size = 0}, | ||
| 56 | .next = &nil_query_token | ||
| 57 | }; | ||
| 58 | |||
| 59 | |||
| 60 | read_only global_variable | ||
| 61 | query_token_list nil_query_token_list = | ||
| 62 | { | ||
| 63 | .start_token = &nil_query_token, | ||
| 64 | .current_token = &nil_query_token, | ||
| 65 | }; | ||
| 66 | |||
| 67 | internal b32 | ||
| 68 | is_nil_query_token(query_token *token) | ||
| 69 | { | ||
| 70 | return (token == &nil_query_token) || (token == NULL); | ||
| 71 | } | ||
| 72 | |||
| 73 | internal b32 | ||
| 74 | is_nil_query_token_list(query_token *token) | ||
| 75 | { | ||
| 76 | return (token == &nil_query_token) || (token == NULL); | ||
| 77 | } | ||
| 78 | |||
| 79 | // takes on line of the repl input | ||
| 80 | // return a reference to the passed list | ||
| 81 | internal query_token_list * | ||
| 82 | query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list) | ||
| 83 | { | ||
| 84 | b32 initialized = 0; | ||
| 85 | unused(initialized); | ||
| 86 | |||
| 87 | for (u64 index = 0; index < buffer->size; ++index) | ||
| 88 | { | ||
| 89 | u8 codepoint = buffer->data[index]; | ||
| 90 | |||
| 91 | if(codepoint == '\n' || codepoint == '\r') break; | ||
| 92 | |||
| 93 | s32 start = 0; | ||
| 94 | s32 end = 0; | ||
| 95 | |||
| 96 | if(is_whitespace(codepoint)) end = index; | ||
| 97 | |||
| 98 | // save the token | ||
| 99 | // TODO(nasr): work on the string macros cuz no work | ||
| 100 | { | ||
| 101 | query_token *new_token = PushStruct(arena, query_token); | ||
| 102 | |||
| 103 | //- initialize list | ||
| 104 | { | ||
| 105 | if(is_nil_query_token(list->start_token)) | ||
| 106 | { | ||
| 107 | list->start_token = new_token; | ||
| 108 | list->current_token = new_token; | ||
| 109 | } | ||
| 110 | else | ||
| 111 | { | ||
| 112 | //- all we need to do - we dont track parents or what ever. this is a token stream not a tree | ||
| 113 | list->current_token->next = new_token; | ||
| 114 | } | ||
| 115 | } | ||
| 116 | |||
| 117 | s32 new_token_size = end - start; | ||
| 118 | |||
| 119 | new_token->lexeme = PushString(arena, new_token_size); | ||
| 120 | new_token->lexeme.data = &buffer->data[index]; | ||
| 121 | new_token->lexeme.size = new_token_size; | ||
| 122 | |||
| 123 | list->current_token->next = new_token; | ||
| 124 | |||
| 125 | start = index + 1; | ||
| 126 | } | ||
| 127 | } | ||
| 128 | |||
| 129 | return list; | ||
| 130 | } | ||
| 131 | |||
| 132 | int main(int count, char **value) | ||
| 133 | { | ||
| 134 | |||
| 135 | #if 1 | ||
| 136 | unused(nil_query_token_list); | ||
| 137 | #endif | ||
| 138 | |||
| 139 | if(count < 2) value[1] = "./test/data.csv"; | ||
| 140 | |||
| 141 | local_persist b32 running = 1; | ||
| 142 | |||
| 143 | mem_arena *global_arena = arena_create(MiB(30)); | ||
| 144 | |||
| 145 | // NOTE(nasr): see note down below | ||
| 146 | // csv_table *global_table = PushStruct(global_arena, csv_table); | ||
| 147 | |||
| 148 | string8 buffer = load_file(global_arena, value[1]); | ||
| 149 | |||
| 150 | print("\nDatabase Engine\n"); | ||
| 151 | |||
| 152 | for(;;) | ||
| 153 | { | ||
| 154 | if (running) | ||
| 155 | { | ||
| 156 | { | ||
| 157 | u8 *lbuf = PushArray(global_arena, u8, 256); | ||
| 158 | s32 err = os_read(STDIN_FD, lbuf, 256); | ||
| 159 | |||
| 160 | if(err < 0) | ||
| 161 | { | ||
| 162 | print("error reading from stdin"); | ||
| 163 | } | ||
| 164 | |||
| 165 | // TODO(nasr): extract this later in the future and make a string copy function/macro | ||
| 166 | // @params (s32 lbuf_size , string8 lbuf_stringified) | ||
| 167 | s32 lbuf_size = sizeof(lbuf) - 1; | ||
| 168 | string8 lbuf_stringified = PushString(global_arena, lbuf_size); | ||
| 169 | { | ||
| 170 | memcpy(lbuf_stringified.data, lbuf, lbuf_size); | ||
| 171 | lbuf_stringified.size = sizeof(lbuf) - 1; | ||
| 172 | } | ||
| 173 | |||
| 174 | query_token_list *qtl = PushStruct(global_arena, query_token_list); | ||
| 175 | |||
| 176 | query_tokenizer(global_arena, &lbuf_stringified, qtl); | ||
| 177 | } | ||
| 178 | |||
| 179 | { | ||
| 180 | |||
| 181 | // NOTE(nasr): the use of tables is required for tracking headers etc. | ||
| 182 | // i think we can optimize this away in the future but for now its fine | ||
| 183 | csv_table *table = PushStruct(global_arena, csv_table); | ||
| 184 | |||
| 185 | csv_token_list *token_list = PushStruct(global_arena, csv_token_list); | ||
| 186 | |||
| 187 | csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list); | ||
| 188 | |||
| 189 | assert_msg(tokens != NULL, "Tokens are NULL."); | ||
| 190 | |||
| 191 | csv_token_list *ctl = PushStruct(global_arena, csv_token_list); | ||
| 192 | b_tree *bt = parse_csv(global_arena, ctl, table); | ||
| 193 | |||
| 194 | b_tree_write(bt); | ||
| 195 | } | ||
| 196 | |||
| 197 | // NOTE(nasr): not sure on how to approach the b-tree and the table format thing | ||
| 198 | // we kind of want our table format i think? but i wouldnt be sure about the use case | ||
| 199 | // so we stick to the regular b_tree for now. commenting out the tables. | ||
| 200 | |||
| 201 | sleep(1); | ||
| 202 | } | ||
| 203 | } | ||
| 204 | |||
| 205 | return 0; | ||
| 206 | } | ||
