From a9cb228861a6b0fad4d508c05c0614757a7f0a34 Mon Sep 17 00:00:00 2001
From: nasr <nsrddyn@gmail.com>
Date: Mon, 13 Apr 2026 14:58:49 +0200
Subject: refactor(main): refactor directory structure

---
 Makefile                   |   4 +-
 source/btree_impl.h        | 209 --------------------------------
 source/csv_decoder.h       | 294 ---------------------------------------------
 source/tb_db.c             | 198 ------------------------------
 source/tb_db/btree_impl.h  | 209 ++++++++++++++++++++++++++++++++
 source/tb_db/csv_decoder.h | 294 +++++++++++++++++++++++++++++++++++++++++++++
 source/tb_db/tb_db.c       | 198 ++++++++++++++++++++++++++++++
 7 files changed, 703 insertions(+), 703 deletions(-)
 delete mode 100644 source/btree_impl.h
 delete mode 100644 source/csv_decoder.h
 delete mode 100644 source/tb_db.c
 create mode 100644 source/tb_db/btree_impl.h
 create mode 100644 source/tb_db/csv_decoder.h
 create mode 100644 source/tb_db/tb_db.c

diff --git a/Makefile b/Makefile
index 29aa256..619c89f 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-BIN = build/engine
-SRC = source/tb_db.c
+BIN = build/tb_db
+SRC = source/tb_db/tb_db.c
 
 # CC = gcc
 CC = clang
diff --git a/source/btree_impl.h b/source/btree_impl.h
deleted file mode 100644
index b7015bd..0000000
--- a/source/btree_impl.h
+++ /dev/null
@@ -1,209 +0,0 @@
-/**
- * >> api notes
- * >> by Abdellah El Morabit
- *
- * - creation of a btree happens when doing an insertion on an
- *  empty btree where the root is null or nil
- *
- **/
-
-/* single header b-tree implementation by Abdellah El Morabit */
-#ifndef BTREE_H
-#define BTREE_H
-
-// maximum height of the tree the lower the lower the lower amount
-// of disk reads which translates into faster?
-#if 0
-global_variable read_only s16 btree_ORDER = 4;
-#endif
-#define BTREE_ORDER  4
-
-//- NOTE(nasr): defining a key to improve sorting
-//  i think saying that a key is a combination of the column + row is a good way of appraoching this
-typedef struct key key;
-struct key
-{
-    s32     header_index; //- col number
-    s32     row_index;
-};
-
-typedef struct btree_node btree_node;
-struct btree_node
-{
-    // store the key values of the sub nodes? if they are leaves?
-    key keys[BTREE_ORDER - 1];
-    // TODO(nasr): replace with something more generic?
-    // NOTE(nasr): cons of void * -> no type safety
-    // is there a way to still have some sort of that? size not variable
-    void *payload_per_key[BTREE_ORDER - 1];
-    btree_node *parent;
-    // handle to store children faster than linked list
-    // because now we can iterate over the nodes instead of having cache misses
-    // when jumping through linked nodes
-    btree_node *children[BTREE_ORDER];
-    // this shouldn't change things but could be a performance improvement on a bigger scale??
-    s32 *max_children;
-    // NOTE(nasr): reference count ::: check how many leaves are using this node
-    // also not needed for now because we don't free individual node because of arena allocator
-    // s32 *refc;
-    s32 key_count;
-    b32 leaf;
-
-
-    // NOTE(nasr): do we hold the reference to the arena? or do we pass is it as a reference? 
-    // this could solve memory location issues?
-};
-
-
-typedef struct btree btree;
-struct btree
-{
-    // NOTE(nasr): make sure this always stays in memory
-    // so that initial fetch never requires a disk read
-    btree_node *root;
-};
-
-
-
-#ifdef BTREE_IMPLEMENTATION
-///////////////////////////////////////////////////////////////////////////////
-//- btree.c
-
-
-internal b32
-is_nil_btree_node(btree_node *node)
-{
-    // NOTE(nasr): looks like a useless function but could be usefull when we implement a nil_btree_node 
-    return (node == NULL);
-}
-
-// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?
-
-internal b8 
-key_compare(key destination_key, key source_key)
-{
-    s32 source_index      = source_key.header_index      + source_key.row_index;
-    s32 destination_index = destination_key.header_index + destination_key.row_index;
-
-    if(destination_index > source_index) return 1;
-    else if(destination_index < source_index) return -1;
-    else return 0;
-}
-
-// NOTE(nasr): @return the index of the found element
-//- @param: start node could be the root node for all we care. but this enables optimizations in the future if we need them
-internal s32
-btree_find_ipos(key k, btree_node *start_node)
-{
-    s32 index = 0;
-    // scan right while the node's key at [index] is strictly less than k
-    while (index < start_node->key_count && key_compare(start_node->keys[index], k) < 0)
-    {
-        ++index;
-    }
-    return index;
-}
-
-// NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory
-internal void *
-btree_search(btree_node *node, key key)
-{
-    s32 index = btree_find_ipos(key, node);
-
-    if (index < node->key_count && key_compare(node->keys[index], key) == 0)
-    {
-        return node->payload_per_key[index];
-    }
-    if (node->leaf)
-    {
-        return NULL;
-    }
-    return btree_search(node->children[index], key);
-}
-
-// TODO(nasr): split node when key_count == btree_ORDER - 1 (node is full)
-// -- each node has max btree_ORDER - 1 keys
-//
-internal void
-btree_insert(mem_arena *arena, btree *tree, key key, void *payload)
-{
-    if(is_nil_btree_node(tree->root)) 
-    {
-        tree->root = PushStructZero(arena, btree_node);
-        tree->root->leaf = 1;
-        tree->root->key_count = 0;
-    }
-
-    btree_node *current_node = tree->root;
-
-    if (current_node->leaf)
-    {
-        // find the insertion position and shift keys right to make room
-        //- after: but what is this. we are expecting a new key.but we are looking for 
-        //- its position in the tree is useless because it's non existent
-
-        s32 i = btree_find_ipos(key, current_node);
-
-        for (s32 j = current_node->key_count; j > i; --j)
-        {
-            current_node->keys[j]            = current_node->keys[j - 1];
-            current_node->payload_per_key[j] = current_node->payload_per_key[j - 1];
-        }
-
-        current_node->keys[i]            = key;
-        current_node->payload_per_key[i] = payload;
-
-        if(current_node->key_count < (BTREE_ORDER - 1))
-        {
-            current_node->key_count += 1;
-        }
-        else {
-            // TODO(nasr): creating a new branch / tree?
-            // make a seperate function for this
-            // NOTE(nasr): only split through the parent when one actually exists;
-            // root splits need to be handled separately (promote mid key, create two children)
-            if(current_node->parent != NULL)
-            {
-                s32 split_pos = current_node->parent->key_count / 2 - 1;
-                s32 updated_keycount_p = 0;
-                for(s32 index = 0; index <= split_pos; ++index)
-                {
-                    ++updated_keycount_p;
-                }
-                current_node->parent->key_count = updated_keycount_p;
-            }
-        }
-    }
-    // TODO(nasr): internal node case walk down then split on the way back up
-}
-
-
-internal void
-btree_write(btree *bt)
-{
-    unused(bt);
-#if 0
-    temp_arena *temp_arena = temp_arena_begin(arena);
-    btree_node *root = bt->root;
-
-    for(btree_node *bt = PushStruct(arena); root->next; next = root;)
-    {
-        
-    }
-
-    temp_arena_end(temp);
-#endif
-}
-
-// - load the entire db into memory.
-#if 0
-internal void
-btree_load(mem_arena *arena)
-{
-
-
-}
-#endif
-
-#endif /* BTREE_IMPLEMENTATION */
-#endif /* BTREE_H */
diff --git a/source/csv_decoder.h b/source/csv_decoder.h
deleted file mode 100644
index 3d09dc6..0000000
--- a/source/csv_decoder.h
+++ /dev/null
@@ -1,294 +0,0 @@
-#ifndef ENGINE_LEXER_H
-#define ENGINE_LEXER_H
-
-enum csv_token_flags
-{
-    FL      = 1 << 2,
-};
-
-enum csv_token_type
-{
-    // first 255 tokens  for ascii characters
-    TOKEN_UNDEFINED = 255,
-    TOKEN_IDENTIFIER,
-    TOKEN_VALUE,
-};
-
-typedef struct csv_token csv_token;
-struct csv_token
-{
-    string8 lexeme;
-    csv_token *next_token;
-    enum csv_token_type type;
-    enum csv_token_flags flags;
-};
-
-// NOTE(nasr): i dont think im going to use this.
-typedef struct csv_row csv_row;
-struct csv_row
-{
-    // array of size col_count, points into mmap buffer
-    string8 *fields;
-    s32      count;
-};
-
-#if 0
-typedef struct csv_lntity csv_entity;
-struct csv_entity 
-{
-    //- not needed because we use key header mapping i think
-};
-#endif
-
-typedef struct csv_header csv_header;
-struct csv_header 
-{
-    string8  payload;
-    csv_header *next_header;
-};
-
-typedef struct csv_table csv_table;
-struct csv_table
-{
-    // first row, col names
-    // all data rows
-    csv_header  *header;
-    s32         row_count;
-    s32         header_count;
-    b32         finding_headers;
-};
-
-
-typedef struct csv_token_list csv_token_list;
-struct csv_token_list
-{
-    csv_token *start_token;
-    csv_token *end_token;
-};
-
-read_only global_variable
-csv_token nil_csv_token=
-{
-    .lexeme         = {.data = NULL, .size = 0},
-    .type           = 0,
-    .flags          = 0,
-    .next_token     = &nil_csv_token,
-};
-
-read_only global_variable
-csv_header nil_csv_header = 
-{
-    .payload =  {.data = NULL, .size = 0},
-    .next_header = &nil_csv_header,
-};
-
-read_only global_variable
-csv_token_list nil_csv_token_list =
-{
-    .start_token = &nil_csv_token,
-    .end_token   = &nil_csv_token,
-};
-
-read_only global_variable
-csv_row  nil_csv_row =
-{
-    .fields     = &nil_string,
-    .count      = 0,
-};
-
-read_only global_variable
-csv_table nil_csv_table =
-{
-    .header     = &nil_csv_header,
-    .row_count   = 0,
-};
-
-#endif /* ENGINE_LEXER_H */
-
-internal b32 
-is_nil_csv_token(csv_token *token)
-{
-    return ((token == NULL) || (token == &nil_csv_token)); 
-}
-
-// TODO(nasr): segfaulting because end_token not allocated
-internal void
-csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token)
-{
-    source_token_list->end_token->next_token = source_token;
-    source_token_list->end_token             = source_token;
-}
-
-//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
-internal void
-csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source)
-{
-    if(is_nil_csv_token(source->start_token)) return;
-
-    csv_token *source_ct      = source->start_token;
-    csv_token *destination_et = destination->end_token;
-
-    // walk source and stitch each node onto destination's tail
-    for(; !is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
-    {
-        destination_et->next_token = source_ct;
-        destination_et             = source_ct;
-    }
-
-    // destination_et now points at the last real source node (not the nil sentinel)
-    destination->end_token = destination_et;
-}
-
-#if 0
-internal csv_token_list *
-parse_csv_row(string8 row_buffer)
-{
-    // csv_token_list *
-
-}
-#endif
-
-
-// the lexer acts as a table builder from a csv  file
-// and parsing indivudal rows and columns
-// the next step would be building a the b-tree
-internal csv_token *
-tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list)
-{
-    unused(token_list);
-
-    if(buffer.size == 0) return NULL;
-
-    // URGENT(nasr): segfaulting because memcpy of strring value doesnt  work dammit
-    // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
-    // forgot what the solution was
-    // TODO(nasr): check what the problem here was
-
-    // string size tracking across the loop not inside it
-    s32 start = 0;
-
-    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
-    {
-        u8 point = buffer.data[index];
-
-#if 0
-        if(is_whitespace(point))
-        {
-            warn("csv file is invalid, detected whitespace");
-            return NULL;
-        }
-#endif
-
-        if(point == ',')
-        {
-            // emit a token for the field that ended before this comma
-            csv_token *token  = PushStructZero(arena, csv_token);
-
-            assert_msg(token != NULL, "did the push struct fail??");
-            assert_msg(arena->current_position < arena->capacity, "no more arena size");
-
-            token->lexeme     = StringCast(&buffer.data[start], index - start);
-            token->type       = TOKEN_VALUE;
-            token->next_token = &nil_csv_token;
-            csv_token_list_append_token(token_list, token);
-
-            start = index + 1;
-
-            if(table->finding_headers)
-            {
-                table->header_count++;
-            }
-        }
-        else if(point == '\n')
-        {
-            // emit a token for the field that ended at this newline
-            csv_token *token  = PushStructZero(arena, csv_token);
-            token->lexeme     = StringCast(&buffer.data[start], index - start);
-            token->type       = TOKEN_VALUE;
-            token->flags     |= FL;
-            token->next_token = &nil_csv_token;
-
-            assert_msg(token_list, "token list invalid");
-            assert_msg(token, "you're tring to append an invalid token");
-
-            csv_token_list_append_token(token_list, token);
-
-            start = index + 1;
-
-            if(table->finding_headers)
-            {
-                {
-                    //- map new header token list to table headers
-                }
-                table->finding_headers = FALSE;
-            }
-
-            table->row_count++;
-        }
-    }
-
-    // NOTE(nasr): return the first token the caller can walk the list from token_list
-    return token_list->start_token;
-}
-
-//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
-internal btree *
-parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
-{
-    btree *tree = PushStructZero(arena, btree);
-
-    s32 col_index = 0;
-    s32 row_index = 0;
-
-    // iterate over the token list while the token is not nil
-    for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
-    {
-        {
-            //- are we parsing the first line tokens?
-            //- if so, do something :))
-            if(ct->flags & FL) 
-            {
-                // NOTE(nasr): FL marks end-of-line; advance row, reset col
-                row_index++;
-                col_index = 0;
-
-                // TODO(nasr): replace with nil header check function
-                // NOTE(nasr): == nil means header hasn't been set yet
-                if(table->header == &nil_csv_header || table->header == NULL)
-                {
-#if 0
-                    // - no this should happen in the tokenization
-                    table->headers->next =     
-#endif
-                }
-                else 
-                {
-
-                }
-
-                // FL tokens are structural, no value to index
-                continue;
-            }
-        }
-
-        // skip non-value tokens, only index actual cell values
-        if (ct->type != TOKEN_VALUE)
-        {
-            col_index++;
-            continue;
-        }
-
-        // NOTE(nasr): payload is the cten itself so the caller can reach
-        // row/col metadata without us having to copy it
-        key k = {
-            .header_index = col_index,
-            .row_index    = row_index,
-        };
-
-        btree_insert(arena, tree, k, (void *)ct);
-
-        col_index++;
-    }
-
-    return tree;
-}
diff --git a/source/tb_db.c b/source/tb_db.c
deleted file mode 100644
index cfa26b1..0000000
--- a/source/tb_db.c
+++ /dev/null
@@ -1,198 +0,0 @@
-#define BTREE_IMPLEMENTATION
-#define BASE_UNITY
-#include "base/base_include.h"
-
-internal b32
-is_alpha(u8 point)
-{
-    return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
-}
-
-internal b32
-is_digit(u8 point)
-{
-    return (point >= '0' && point <= '9');
-}
-
-internal b32
-is_alpha_num(u8 point)
-{
-    return (is_alpha(point) || is_digit(point));
-}
-
-internal b32
-is_whitespace(u8 point)
-{
-    return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
-}
-
-internal b32
-is_delimiter(u8 point)
-{
-    return (point == ',');
-}
-
-#include "btree_impl.h"
-#include "csv_decoder.h"
-
-typedef struct query_token query_token;
-struct query_token
-{
-    string8      lexeme;
-    query_token  *next;
-};
-
-typedef struct query_token_list query_token_list;
-struct query_token_list
-{
-    query_token *start_token;
-    query_token *current_token;
-};
-
-read_only global_variable
-query_token nil_query_token =
-{
-    .lexeme = {.data = NULL, .size = 0},
-    .next = &nil_query_token
-};
-
-
-read_only global_variable
-query_token_list nil_query_token_list =
-{
-    .start_token = &nil_query_token,
-    .current_token = &nil_query_token,
-};
-
-internal b32
-is_nil_query_token(query_token *token)
-{
-    return (token == &nil_query_token) || (token == NULL);
-}
-
-internal b32
-is_nil_query_token_list(query_token *token)
-{
-    return (token == &nil_query_token) || (token == NULL);
-}
-
-// takes on line of the repl input
-// return a reference to the passed list
-internal query_token_list *
-query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list)
-{
-    b32 initialized = 0;
-    unused(initialized);
-
-    for (u64 index = 0; index < buffer->size; ++index)
-    {
-        u8 codepoint = buffer->data[index];
-
-        if(codepoint == '\n' || codepoint == '\r') break;
-
-        s32 start   = 0; 
-        s32 end     = 0; 
-
-        if(is_whitespace(codepoint)) end = index; 
-
-        // save the token
-        // TODO(nasr): work on the string macros cuz no work
-        {
-            query_token *new_token = PushStruct(arena, query_token);
-
-            //- initialize list
-            {
-                if(is_nil_query_token(list->start_token))
-                {
-                    list->start_token   = new_token;
-                    list->current_token = new_token; 
-                }
-                else 
-                {
-                    //- all we need to do - we dont track parents or what ever. this is a token stream not a tree
-                    list->current_token->next = new_token;
-                }
-            }
-
-            s32 new_token_size = end - start;
-
-            new_token->lexeme = PushString(arena, new_token_size);
-            new_token->lexeme.data = &buffer->data[index]; 
-            new_token->lexeme.size = new_token_size;
-
-            list->current_token->next = new_token;
-
-            start = index + 1;
-        }
-    }
-
-    return list;
-}
-
-int main(int count, char **value)
-{
-#if 1
-    unused(nil_query_token_list);
-#endif
-
-    if(count < 2) value[1] = "./test/data.csv";
-
-    local_persist b32 running = 1;
-
-    mem_arena *global_arena = arena_create(GiB(1));
-
-    string8 buffer = load_file(global_arena, value[1]);
-
-    // NOTE(nasr): the use of tables is required for tracking headers etc.
-    // i think we can optimize this away in the future but for now its fine
-    csv_table      *table      = PushStructZero(global_arena, csv_table);
-    csv_token_list *token_list = PushStructZero(global_arena, csv_token_list);
-
-    token_list->start_token = &nil_csv_token; 
-    token_list->end_token   = &nil_csv_token;   
-
-    csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list);
-    assert_msg(tokens != NULL, "tokens are null");
-
-    // NOTE(nasr): token_list is now populated — pass it directly, not a fresh empty list
-    btree *bt = parse_csv(global_arena, token_list, table);
-
-    print("\nDatabase Engine\n");
-
-    for(;;)
-    {
-        if(running)
-        {
-            u8  *lbuf     = PushArray(global_arena, u8, 256);
-            s32  err      = os_read(STDIN_FD, lbuf, 256);
-
-            if(err < 0)
-            {
-                print("error reading from stdin");
-            }
-
-            // TODO(nasr): extract this later in the future and make a string copy function/macro
-            // @params (s32 lbuf_size, string8 lbuf_stringified)
-            // NOTE(nasr): use err (bytes read) not sizeof(lbuf*) — sizeof a pointer is always 8
-            s32     lbuf_size       = err;
-            string8 lbuf_stringified = PushString(global_arena, lbuf_size);
-            {
-                memcpy(lbuf_stringified.data, lbuf, lbuf_size);
-                lbuf_stringified.size = lbuf_size;
-            }
-
-            query_token_list *qtl = PushStructZero(global_arena, query_token_list);
-
-
-            query_tokenizer(global_arena, &lbuf_stringified, qtl);
-
-            // TODO(nasr): dispatch qtl against bt here
-
-            unused(bt);
-
-            sleep(1);
-        }
-    }
-
-    return 0;
-}
diff --git a/source/tb_db/btree_impl.h b/source/tb_db/btree_impl.h
new file mode 100644
index 0000000..b7015bd
--- /dev/null
+++ b/source/tb_db/btree_impl.h
@@ -0,0 +1,209 @@
+/**
+ * >> api notes
+ * >> by Abdellah El Morabit
+ *
+ * - creation of a btree happens when doing an insertion on an
+ *  empty btree where the root is null or nil
+ *
+ **/
+
+/* single header b-tree implementation by Abdellah El Morabit */
+#ifndef BTREE_H
+#define BTREE_H
+
+// maximum height of the tree the lower the lower the lower amount
+// of disk reads which translates into faster?
+#if 0
+global_variable read_only s16 btree_ORDER = 4;
+#endif
+#define BTREE_ORDER  4
+
+//- NOTE(nasr): defining a key to improve sorting
+//  i think saying that a key is a combination of the column + row is a good way of appraoching this
+typedef struct key key;
+struct key
+{
+    s32     header_index; //- col number
+    s32     row_index;
+};
+
+typedef struct btree_node btree_node;
+struct btree_node
+{
+    // store the key values of the sub nodes? if they are leaves?
+    key keys[BTREE_ORDER - 1];
+    // TODO(nasr): replace with something more generic?
+    // NOTE(nasr): cons of void * -> no type safety
+    // is there a way to still have some sort of that? size not variable
+    void *payload_per_key[BTREE_ORDER - 1];
+    btree_node *parent;
+    // handle to store children faster than linked list
+    // because now we can iterate over the nodes instead of having cache misses
+    // when jumping through linked nodes
+    btree_node *children[BTREE_ORDER];
+    // this shouldn't change things but could be a performance improvement on a bigger scale??
+    s32 *max_children;
+    // NOTE(nasr): reference count ::: check how many leaves are using this node
+    // also not needed for now because we don't free individual node because of arena allocator
+    // s32 *refc;
+    s32 key_count;
+    b32 leaf;
+
+
+    // NOTE(nasr): do we hold the reference to the arena? or do we pass is it as a reference? 
+    // this could solve memory location issues?
+};
+
+
+typedef struct btree btree;
+struct btree
+{
+    // NOTE(nasr): make sure this always stays in memory
+    // so that initial fetch never requires a disk read
+    btree_node *root;
+};
+
+
+
+#ifdef BTREE_IMPLEMENTATION
+///////////////////////////////////////////////////////////////////////////////
+//- btree.c
+
+
+internal b32
+is_nil_btree_node(btree_node *node)
+{
+    // NOTE(nasr): looks like a useless function but could be usefull when we implement a nil_btree_node 
+    return (node == NULL);
+}
+
+// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?
+
+internal b8 
+key_compare(key destination_key, key source_key)
+{
+    s32 source_index      = source_key.header_index      + source_key.row_index;
+    s32 destination_index = destination_key.header_index + destination_key.row_index;
+
+    if(destination_index > source_index) return 1;
+    else if(destination_index < source_index) return -1;
+    else return 0;
+}
+
+// NOTE(nasr): @return the index of the found element
+//- @param: start node could be the root node for all we care. but this enables optimizations in the future if we need them
+internal s32
+btree_find_ipos(key k, btree_node *start_node)
+{
+    s32 index = 0;
+    // scan right while the node's key at [index] is strictly less than k
+    while (index < start_node->key_count && key_compare(start_node->keys[index], k) < 0)
+    {
+        ++index;
+    }
+    return index;
+}
+
+// NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory
+internal void *
+btree_search(btree_node *node, key key)
+{
+    s32 index = btree_find_ipos(key, node);
+
+    if (index < node->key_count && key_compare(node->keys[index], key) == 0)
+    {
+        return node->payload_per_key[index];
+    }
+    if (node->leaf)
+    {
+        return NULL;
+    }
+    return btree_search(node->children[index], key);
+}
+
+// TODO(nasr): split node when key_count == btree_ORDER - 1 (node is full)
+// -- each node has max btree_ORDER - 1 keys
+//
+internal void
+btree_insert(mem_arena *arena, btree *tree, key key, void *payload)
+{
+    if(is_nil_btree_node(tree->root)) 
+    {
+        tree->root = PushStructZero(arena, btree_node);
+        tree->root->leaf = 1;
+        tree->root->key_count = 0;
+    }
+
+    btree_node *current_node = tree->root;
+
+    if (current_node->leaf)
+    {
+        // find the insertion position and shift keys right to make room
+        //- after: but what is this. we are expecting a new key.but we are looking for 
+        //- its position in the tree is useless because it's non existent
+
+        s32 i = btree_find_ipos(key, current_node);
+
+        for (s32 j = current_node->key_count; j > i; --j)
+        {
+            current_node->keys[j]            = current_node->keys[j - 1];
+            current_node->payload_per_key[j] = current_node->payload_per_key[j - 1];
+        }
+
+        current_node->keys[i]            = key;
+        current_node->payload_per_key[i] = payload;
+
+        if(current_node->key_count < (BTREE_ORDER - 1))
+        {
+            current_node->key_count += 1;
+        }
+        else {
+            // TODO(nasr): creating a new branch / tree?
+            // make a seperate function for this
+            // NOTE(nasr): only split through the parent when one actually exists;
+            // root splits need to be handled separately (promote mid key, create two children)
+            if(current_node->parent != NULL)
+            {
+                s32 split_pos = current_node->parent->key_count / 2 - 1;
+                s32 updated_keycount_p = 0;
+                for(s32 index = 0; index <= split_pos; ++index)
+                {
+                    ++updated_keycount_p;
+                }
+                current_node->parent->key_count = updated_keycount_p;
+            }
+        }
+    }
+    // TODO(nasr): internal node case walk down then split on the way back up
+}
+
+
+internal void
+btree_write(btree *bt)
+{
+    unused(bt);
+#if 0
+    temp_arena *temp_arena = temp_arena_begin(arena);
+    btree_node *root = bt->root;
+
+    for(btree_node *bt = PushStruct(arena); root->next; next = root;)
+    {
+        
+    }
+
+    temp_arena_end(temp);
+#endif
+}
+
+// - load the entire db into memory.
+#if 0
+internal void
+btree_load(mem_arena *arena)
+{
+
+
+}
+#endif
+
+#endif /* BTREE_IMPLEMENTATION */
+#endif /* BTREE_H */
diff --git a/source/tb_db/csv_decoder.h b/source/tb_db/csv_decoder.h
new file mode 100644
index 0000000..3d09dc6
--- /dev/null
+++ b/source/tb_db/csv_decoder.h
@@ -0,0 +1,294 @@
+#ifndef ENGINE_LEXER_H
+#define ENGINE_LEXER_H
+
+enum csv_token_flags
+{
+    FL      = 1 << 2,
+};
+
+enum csv_token_type
+{
+    // first 255 tokens  for ascii characters
+    TOKEN_UNDEFINED = 255,
+    TOKEN_IDENTIFIER,
+    TOKEN_VALUE,
+};
+
+typedef struct csv_token csv_token;
+struct csv_token
+{
+    string8 lexeme;
+    csv_token *next_token;
+    enum csv_token_type type;
+    enum csv_token_flags flags;
+};
+
+// NOTE(nasr): i dont think im going to use this.
+typedef struct csv_row csv_row;
+struct csv_row
+{
+    // array of size col_count, points into mmap buffer
+    string8 *fields;
+    s32      count;
+};
+
+#if 0
+typedef struct csv_lntity csv_entity;
+struct csv_entity 
+{
+    //- not needed because we use key header mapping i think
+};
+#endif
+
+typedef struct csv_header csv_header;
+struct csv_header 
+{
+    string8  payload;
+    csv_header *next_header;
+};
+
+typedef struct csv_table csv_table;
+struct csv_table
+{
+    // first row, col names
+    // all data rows
+    csv_header  *header;
+    s32         row_count;
+    s32         header_count;
+    b32         finding_headers;
+};
+
+
+typedef struct csv_token_list csv_token_list;
+struct csv_token_list
+{
+    csv_token *start_token;
+    csv_token *end_token;
+};
+
+read_only global_variable
+csv_token nil_csv_token=
+{
+    .lexeme         = {.data = NULL, .size = 0},
+    .type           = 0,
+    .flags          = 0,
+    .next_token     = &nil_csv_token,
+};
+
+read_only global_variable
+csv_header nil_csv_header = 
+{
+    .payload =  {.data = NULL, .size = 0},
+    .next_header = &nil_csv_header,
+};
+
+read_only global_variable
+csv_token_list nil_csv_token_list =
+{
+    .start_token = &nil_csv_token,
+    .end_token   = &nil_csv_token,
+};
+
+read_only global_variable
+csv_row  nil_csv_row =
+{
+    .fields     = &nil_string,
+    .count      = 0,
+};
+
+read_only global_variable
+csv_table nil_csv_table =
+{
+    .header     = &nil_csv_header,
+    .row_count   = 0,
+};
+
+#endif /* ENGINE_LEXER_H */
+
+internal b32 
+is_nil_csv_token(csv_token *token)
+{
+    return ((token == NULL) || (token == &nil_csv_token)); 
+}
+
+// TODO(nasr): segfaulting because end_token not allocated
+internal void
+csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token)
+{
+    source_token_list->end_token->next_token = source_token;
+    source_token_list->end_token             = source_token;
+}
+
+//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
+internal void
+csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source)
+{
+    if(is_nil_csv_token(source->start_token)) return;
+
+    csv_token *source_ct      = source->start_token;
+    csv_token *destination_et = destination->end_token;
+
+    // walk source and stitch each node onto destination's tail
+    for(; !is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
+    {
+        destination_et->next_token = source_ct;
+        destination_et             = source_ct;
+    }
+
+    // destination_et now points at the last real source node (not the nil sentinel)
+    destination->end_token = destination_et;
+}
+
+#if 0
+internal csv_token_list *
+parse_csv_row(string8 row_buffer)
+{
+    // csv_token_list *
+
+}
+#endif
+
+
+// the lexer acts as a table builder from a csv  file
+// and parsing indivudal rows and columns
+// the next step would be building a the b-tree
+internal csv_token *
+tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list)
+{
+    unused(token_list);
+
+    if(buffer.size == 0) return NULL;
+
+    // URGENT(nasr): segfaulting because memcpy of strring value doesnt  work dammit
+    // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
+    // forgot what the solution was
+    // TODO(nasr): check what the problem here was
+
+    // string size tracking across the loop not inside it
+    s32 start = 0;
+
+    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
+    {
+        u8 point = buffer.data[index];
+
+#if 0
+        if(is_whitespace(point))
+        {
+            warn("csv file is invalid, detected whitespace");
+            return NULL;
+        }
+#endif
+
+        if(point == ',')
+        {
+            // emit a token for the field that ended before this comma
+            csv_token *token  = PushStructZero(arena, csv_token);
+
+            assert_msg(token != NULL, "did the push struct fail??");
+            assert_msg(arena->current_position < arena->capacity, "no more arena size");
+
+            token->lexeme     = StringCast(&buffer.data[start], index - start);
+            token->type       = TOKEN_VALUE;
+            token->next_token = &nil_csv_token;
+            csv_token_list_append_token(token_list, token);
+
+            start = index + 1;
+
+            if(table->finding_headers)
+            {
+                table->header_count++;
+            }
+        }
+        else if(point == '\n')
+        {
+            // emit a token for the field that ended at this newline
+            csv_token *token  = PushStructZero(arena, csv_token);
+            token->lexeme     = StringCast(&buffer.data[start], index - start);
+            token->type       = TOKEN_VALUE;
+            token->flags     |= FL;
+            token->next_token = &nil_csv_token;
+
+            assert_msg(token_list, "token list invalid");
+            assert_msg(token, "you're tring to append an invalid token");
+
+            csv_token_list_append_token(token_list, token);
+
+            start = index + 1;
+
+            if(table->finding_headers)
+            {
+                {
+                    //- map new header token list to table headers
+                }
+                table->finding_headers = FALSE;
+            }
+
+            table->row_count++;
+        }
+    }
+
+    // NOTE(nasr): return the first token the caller can walk the list from token_list
+    return token_list->start_token;
+}
+
+//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
+internal btree *
+parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
+{
+    btree *tree = PushStructZero(arena, btree);
+
+    s32 col_index = 0;
+    s32 row_index = 0;
+
+    // iterate over the token list while the token is not nil
+    for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
+    {
+        {
+            //- are we parsing the first line tokens?
+            //- if so, do something :))
+            if(ct->flags & FL) 
+            {
+                // NOTE(nasr): FL marks end-of-line; advance row, reset col
+                row_index++;
+                col_index = 0;
+
+                // TODO(nasr): replace with nil header check function
+                // NOTE(nasr): == nil means header hasn't been set yet
+                if(table->header == &nil_csv_header || table->header == NULL)
+                {
+#if 0
+                    // - no this should happen in the tokenization
+                    table->headers->next =     
+#endif
+                }
+                else 
+                {
+
+                }
+
+                // FL tokens are structural, no value to index
+                continue;
+            }
+        }
+
+        // skip non-value tokens, only index actual cell values
+        if (ct->type != TOKEN_VALUE)
+        {
+            col_index++;
+            continue;
+        }
+
+        // NOTE(nasr): payload is the cten itself so the caller can reach
+        // row/col metadata without us having to copy it
+        key k = {
+            .header_index = col_index,
+            .row_index    = row_index,
+        };
+
+        btree_insert(arena, tree, k, (void *)ct);
+
+        col_index++;
+    }
+
+    return tree;
+}
diff --git a/source/tb_db/tb_db.c b/source/tb_db/tb_db.c
new file mode 100644
index 0000000..4ae247d
--- /dev/null
+++ b/source/tb_db/tb_db.c
@@ -0,0 +1,198 @@
+#define BTREE_IMPLEMENTATION
+#define BASE_UNITY
+#include "../base/base_include.h"
+
+internal b32
+is_alpha(u8 point)
+{
+    return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
+}
+
+internal b32
+is_digit(u8 point)
+{
+    return (point >= '0' && point <= '9');
+}
+
+internal b32
+is_alpha_num(u8 point)
+{
+    return (is_alpha(point) || is_digit(point));
+}
+
+internal b32
+is_whitespace(u8 point)
+{
+    return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
+}
+
+internal b32
+is_delimiter(u8 point)
+{
+    return (point == ',');
+}
+
+#include "btree_impl.h"
+#include "csv_decoder.h"
+
+typedef struct query_token query_token;
+struct query_token
+{
+    string8      lexeme;
+    query_token  *next;
+};
+
+typedef struct query_token_list query_token_list;
+struct query_token_list
+{
+    query_token *start_token;
+    query_token *current_token;
+};
+
+read_only global_variable
+query_token nil_query_token =
+{
+    .lexeme = {.data = NULL, .size = 0},
+    .next = &nil_query_token
+};
+
+
+read_only global_variable
+query_token_list nil_query_token_list =
+{
+    .start_token = &nil_query_token,
+    .current_token = &nil_query_token,
+};
+
+internal b32
+is_nil_query_token(query_token *token)
+{
+    return (token == &nil_query_token) || (token == NULL);
+}
+
+internal b32
+is_nil_query_token_list(query_token *token)
+{
+    return (token == &nil_query_token) || (token == NULL);
+}
+
+// takes on line of the repl input
+// return a reference to the passed list
+internal query_token_list *
+query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list)
+{
+    b32 initialized = 0;
+    unused(initialized);
+
+    for (u64 index = 0; index < buffer->size; ++index)
+    {
+        u8 codepoint = buffer->data[index];
+
+        if(codepoint == '\n' || codepoint == '\r') break;
+
+        s32 start   = 0; 
+        s32 end     = 0; 
+
+        if(is_whitespace(codepoint)) end = index; 
+
+        // save the token
+        // TODO(nasr): work on the string macros cuz no work
+        {
+            query_token *new_token = PushStruct(arena, query_token);
+
+            //- initialize list
+            {
+                if(is_nil_query_token(list->start_token))
+                {
+                    list->start_token   = new_token;
+                    list->current_token = new_token; 
+                }
+                else 
+                {
+                    //- all we need to do - we dont track parents or what ever. this is a token stream not a tree
+                    list->current_token->next = new_token;
+                }
+            }
+
+            s32 new_token_size = end - start;
+
+            new_token->lexeme = PushString(arena, new_token_size);
+            new_token->lexeme.data = &buffer->data[index]; 
+            new_token->lexeme.size = new_token_size;
+
+            list->current_token->next = new_token;
+
+            start = index + 1;
+        }
+    }
+
+    return list;
+}
+
+int main(int count, char **value)
+{
+#if 1
+    unused(nil_query_token_list);
+#endif
+
+    if(count < 2) value[1] = "./test/data.csv";
+
+    local_persist b32 running = 1;
+
+    mem_arena *global_arena = arena_create(GiB(1));
+
+    string8 buffer = load_file(global_arena, value[1]);
+
+    // NOTE(nasr): the use of tables is required for tracking headers etc.
+    // i think we can optimize this away in the future but for now its fine
+    csv_table      *table      = PushStructZero(global_arena, csv_table);
+    csv_token_list *token_list = PushStructZero(global_arena, csv_token_list);
+
+    token_list->start_token = &nil_csv_token; 
+    token_list->end_token   = &nil_csv_token;   
+
+    csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list);
+    assert_msg(tokens != NULL, "tokens are null");
+
+    // NOTE(nasr): token_list is now populated — pass it directly, not a fresh empty list
+    btree *bt = parse_csv(global_arena, token_list, table);
+
+    print("\nDatabase Engine\n");
+
+    for(;;)
+    {
+        if(running)
+        {
+            u8  *lbuf     = PushArray(global_arena, u8, 256);
+            s32  err      = os_read(STDIN_FD, lbuf, 256);
+
+            if(err < 0)
+            {
+                print("error reading from stdin");
+            }
+
+            // TODO(nasr): extract this later in the future and make a string copy function/macro
+            // @params (s32 lbuf_size, string8 lbuf_stringified)
+            // NOTE(nasr): use err (bytes read) not sizeof(lbuf*) — sizeof a pointer is always 8
+            s32     lbuf_size       = err;
+            string8 lbuf_stringified = PushString(global_arena, lbuf_size);
+            {
+                memcpy(lbuf_stringified.data, lbuf, lbuf_size);
+                lbuf_stringified.size = lbuf_size;
+            }
+
+            query_token_list *qtl = PushStructZero(global_arena, query_token_list);
+
+
+            query_tokenizer(global_arena, &lbuf_stringified, qtl);
+
+            // TODO(nasr): dispatch qtl against bt here
+
+            unused(bt);
+
+            sleep(1);
+        }
+    }
+
+    return 0;
+}
-- 
cgit v1.3