From 854d66bb73cd131bda6452aac74aae3d9a77b91a Mon Sep 17 00:00:00 2001
From: nasr <nsrddyn@gmail.com>
Date: Mon, 9 Mar 2026 19:44:59 +0000
Subject: refactor(main): simplified the project. going towards a single header
 file project maybe...

---
 source/b_tree.c           |  66 ---------------------
 source/b_tree.h           |  44 --------------
 source/base/base_string.h |   3 +-
 source/btree.h            | 108 ++++++++++++++++++++++++++++++++++
 source/csv_parser.c       |  26 ---------
 source/csv_parser.h       |  40 -------------
 source/csv_reader.h       | 145 ++++++++++++++++++++++++++++++++++++++++++++++
 source/engine.c           | 101 +++++++++++++++++++++++++++++---
 source/lexer.c            |  80 -------------------------
 source/lexer.h            |  30 ----------
 source/query.c            |   0
 source/query.h            |   0
 12 files changed, 348 insertions(+), 295 deletions(-)
 delete mode 100644 source/b_tree.c
 delete mode 100644 source/b_tree.h
 create mode 100644 source/btree.h
 delete mode 100644 source/csv_parser.c
 delete mode 100644 source/csv_parser.h
 create mode 100644 source/csv_reader.h
 delete mode 100644 source/lexer.c
 delete mode 100644 source/lexer.h
 delete mode 100644 source/query.c
 delete mode 100644 source/query.h

diff --git a/source/b_tree.c b/source/b_tree.c
deleted file mode 100644
index e42f709..0000000
--- a/source/b_tree.c
+++ /dev/null
@@ -1,66 +0,0 @@
-
-// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?
-
-internal b_tree_node *
-node_alloc(mem_arena *arena)
-{
-    b_tree_node *node = PushStructZero(arena, type);
-    node->leaf = 1;
-    return node;
-}
-
-// NOTE(nasr): @return the index of of the found element
-internal s32
-node_find_pos(mem_arena *arena, string8 value)
-{
-    s32 i = 0;
-    while (i < n->key_count && str8_cmp(n->keys[i], k) < 0)
-    {
-        ++i;
-    }
-
-    return i;
-}
-
-interal void
-b_tree_create(mem_arena *arena, b_tree *tree)
-{
-    tree->root = node_alloc(arena);
-    tree->root->leaf = 1;
-    tree->root->key_count = 0;
-}
-
-// NOTE(nasr): nodes that get  passed as parameters should've already been loaded into memory
-internal void
-b_tree_search(b_tree_node *node, string8 key)
-{
-    s32 found_index = node_find_pos(node, key);
-
-     if (found_index < n->key_count && string_compare(n->keys[i], key) == 0)
-     {
-         return n->rows[i];
-     }
-     if (n->leaf)
-     {
-         return NULL;
-
-     }
-
-     return b_tree_search(n->children[i], key);
-
-
-}
-
-internal void
-b_tree_insert()
-{
-
-}
-
-internal void
-b_tree_write()
-{
-    // TODO(nasr): write the b_tree to disk
-}
-
-
diff --git a/source/b_tree.h b/source/b_tree.h
deleted file mode 100644
index 0fc4d3c..0000000
--- a/source/b_tree.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef BTREE_H
-#define BTREE_H
-
-// maximum height  of the tree the lower the lower the lower amount
-// of disk reads which translates into faster?
-#define B_TREE_ORDER 4
-
-typedef struct b_tree_node b_tree_node;
-struct  b_tree_node
-{
-
-    // store the values
-    string8 keys[B_TREE_ORDER - 1];
-    csv_row *rows[B_TREE_ORDER - 1];
-
-    b_tree_node *parent;
-    // handle to store children faster than linked list
-    // because now we can iteratate over the nodes instead of having cache misses
-    // when jumping through linked nodes
-    b_tree_node *children[B_TREE_ORDER];
-
-    // NOTE(nasr): reference count ::: check how many leaves are using this node
-    // also not needed for now because we don't free individual node because of arena allocator
-    // s32 *refc;
-
-    s32 key_count;
-
-    b32 leaf;
-
-};
-
-typedef struct b_tree b_tree;
-struct b_tree
-{
-    // NOTE(nasr): make sure this always stays in memory
-    // so that initial fetch never requires a disk read
-    b_tree_node *root;
-
-};
-
-
-
-
-#endif /* BTREE_H */
diff --git a/source/base/base_string.h b/source/base/base_string.h
index 4e0d923..ef2cf16 100644
--- a/source/base/base_string.h
+++ b/source/base/base_string.h
@@ -12,7 +12,8 @@
         result; \
     })
 
-#define String8Cast(literal, literal_count) ( string8 ){( u8 * )( literal ),( u64 )( literal_count ) } 
+#define StringCast(data, size) (string8){(u8 *)(data), (u64)(size) } 
+#define StringPCast(data, size) (string8 *){(u8 *)(data), (u64)(size) } 
 
 #define StringFmt "%.*s"
 #define ULongFmt  "%lu"
diff --git a/source/btree.h b/source/btree.h
new file mode 100644
index 0000000..42c8e21
--- /dev/null
+++ b/source/btree.h
@@ -0,0 +1,108 @@
+#ifndef BTREE_H
+#define BTREE_H
+
+// maximum height  of the tree the lower the lower the lower amount
+// of disk reads which translates into faster?
+#define B_TREE_ORDER 4
+
+typedef struct b_tree_node b_tree_node;
+struct  b_tree_node
+{
+
+    // store the values
+    string8 keys[B_TREE_ORDER - 1];
+    csv_row *rows[B_TREE_ORDER - 1];
+
+    b_tree_node *parent;
+    // handle to store children faster than linked list
+    // because now we can iteratate over the nodes instead of having cache misses
+    // when jumping through linked nodes
+    b_tree_node *children[B_TREE_ORDER];
+
+    // NOTE(nasr): reference count ::: check how many leaves are using this node
+    // also not needed for now because we don't free individual node because of arena allocator
+    // s32 *refc;
+
+    s32 key_count;
+
+    b32 leaf;
+
+};
+
+typedef struct b_tree b_tree;
+struct b_tree
+{
+    // NOTE(nasr): make sure this always stays in memory
+    // so that initial fetch never requires a disk read
+    b_tree_node *root;
+
+};
+
+
+
+
+// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?
+
+internal b_tree_node *
+node_alloc(mem_arena *arena)
+{
+    b_tree_node *node = PushStructZero(arena, type);
+    node->leaf = 1;
+    return node;
+}
+
+// NOTE(nasr): @return the index of of the found element
+internal s32
+node_find_pos(mem_arena *arena, string8 value)
+{
+    s32 i = 0;
+    while (i < n->key_count && str8_cmp(n->keys[i], k) < 0)
+    {
+        ++i;
+    }
+
+    return i;
+}
+
+interal void
+b_tree_create(mem_arena *arena, b_tree *tree)
+{
+    tree->root = node_alloc(arena);
+    tree->root->leaf = 1;
+    tree->root->key_count = 0;
+}
+
+// NOTE(nasr): nodes that get  passed as parameters should've already been loaded into memory
+internal void
+b_tree_search(b_tree_node *node, string8 key)
+{
+    s32 found_index = node_find_pos(node, key);
+
+     if (found_index < n->key_count && string_compare(n->keys[i], key) == 0)
+     {
+         return n->rows[i];
+     }
+     if (n->leaf)
+     {
+         return NULL;
+
+     }
+
+     return b_tree_search(n->children[i], key);
+
+
+}
+
+internal void
+b_tree_insert()
+{
+
+}
+
+internal void
+b_tree_write()
+{
+    // TODO(nasr): write the b_tree to disk
+}
+
+#endif /* BTREE_H */
diff --git a/source/csv_parser.c b/source/csv_parser.c
deleted file mode 100644
index 6679d2f..0000000
--- a/source/csv_parser.c
+++ /dev/null
@@ -1,26 +0,0 @@
-internal void
-strip_new_line(string8 buffer)
-{
-
-    for (u64 index = 0; index < buffer.size; index++)
-	{
-
-	}
-
-    return;
-
-}
-
-internal void
-read_csv(string8 buffer)
-{
-    printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
-
-}
-
-internal csv_table *
-parse_csv(token *tokens, csv_table *table)
-{
-
-    return NULL;
-}
diff --git a/source/csv_parser.h b/source/csv_parser.h
deleted file mode 100644
index 1aa96b9..0000000
--- a/source/csv_parser.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef CSV_PARSER_H
-#define CSV_PARSER_H
-
-typedef struct csv_row csv_row;
-struct csv_row
-{
-    // array of size col_count, points into mmap buffer
-    string8 *fields;
-    s32      count;
-};
-
-typedef struct csv_table csv_table;
-struct csv_table
-{
-    // first row, col names
-    // all data rows
-    string8  *headers;
-    csv_row  *rows;
-    s32       col_count;
-    s32       row_count;
-};
-
-read_only global_variable
-csv_row  nil_csv_row =
-{
-    .fields     = &nil_string,
-    .count      = 0,
-};
-
-read_only global_variable
-csv_table nil_csv_table =
-{
-    .headers     = &nil_string,
-    .rows        = &nil_csv_row,
-    .col_count   = 0,
-    .row_count   = 0,
-};
-
-
-#endif /* CSV_PARSER_H */
diff --git a/source/csv_reader.h b/source/csv_reader.h
new file mode 100644
index 0000000..6c8e625
--- /dev/null
+++ b/source/csv_reader.h
@@ -0,0 +1,145 @@
+#ifndef ENGINE_LEXER_H
+#define ENGINE_LEXER_H
+
+typedef enum token_flags token_flags;
+enum token_flags 
+{
+    START_FL    = 1 << 1,
+    END_FL      = 1 << 2,
+};
+
+
+typedef enum token_type token_type;
+enum token_type
+{
+    // first 255 tokens  for ascii characters
+    TOKEN_UNDEFINED = 255,
+    TOKEN_IDENTIFIER,
+    TOKEN_VALUE,
+};
+
+typedef struct token token;
+struct token
+{
+    string8 lexeme;
+    token_type type;
+    token_flags flags;
+    token *next;
+};
+
+typedef struct csv_row csv_row;
+struct csv_row
+{
+    // array of size col_count, points into mmap buffer
+    string8 *fields;
+    s32      count;
+};
+
+typedef struct csv_table csv_table;
+struct csv_table
+{
+    // first row, col names
+    // all data rows
+    string8  *headers;
+    csv_row  *rows;
+    s32       col_count;
+    s32       row_count;
+};
+
+read_only global_variable
+csv_row  nil_csv_row =
+{
+    .fields     = &nil_string,
+    .count      = 0,
+};
+
+read_only global_variable
+csv_table nil_csv_table =
+{
+    .headers     = &nil_string,
+    .rows        = &nil_csv_row,
+    .col_count   = 0,
+    .row_count   = 0,
+};
+
+
+// the lexer acts as a table builder from a csv  file
+// and parsing indivudal rows and columns
+// the next step would be building a the b-tree
+internal token *
+tokenize_csv(string8 buffer, mem_arena *arena)
+{
+
+    b32 FL = TRUE;
+
+    if(buffer.size < 0) return NULL;
+    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
+    {
+        token *tok = PushStruct(arena, token);
+        u8 point = buffer.data[index];
+
+        s32 start   = 0;
+        s32 end     = 0;
+
+        if(is_whitespace(point))
+        {
+            print("csv file is invalid");
+            return NULL;
+        }
+
+        switch(point)
+        {
+            case('\n'):
+                {
+                    if(FL) tok->flags |= END_FL;
+                    break;
+                }
+
+            case(','):
+                {
+                    end = index - 1;
+                    start = index + 1;
+                    break;
+                }
+            default:
+                {
+                    break;
+                }
+        }
+
+        tok->lexeme = StringCast(&buffer.data[start], end - start);
+        tok->next = tok;
+    }
+
+    return NULL;
+}
+
+internal void
+strip_new_line(string8 buffer)
+{
+
+    for (u64 index = 0; index < buffer.size; index++)
+	{
+
+	}
+
+    return;
+
+}
+
+internal void
+read_csv(string8 buffer)
+{
+    // printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
+
+}
+
+internal csv_table *
+parse_csv(token *tokens, csv_table *table)
+{
+
+    return NULL;
+}
+
+
+#endif /* ENGINE_LEXER_H */
diff --git a/source/engine.c b/source/engine.c
index fe609e9..3527e27 100644
--- a/source/engine.c
+++ b/source/engine.c
@@ -1,17 +1,88 @@
 #define BASE_UNITY
 #include "base/base_include.h"
 
-#include <stdio.h>
+internal b32
+is_alpha(u8 point)
+{
+    return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
+}
+
+internal b32
+is_digit(u8 point)
+{
+    return (point >= '0' && point <= '9');
+}
 
-#include "csv_parser.h"
-#include "lexer.h"
+internal b32
+is_alpha_num(u8 point)
+{
+    return (is_alpha(point) || is_digit(point));
+}
+
+internal b32
+is_whitespace(u8 point)
+{
+    return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
+}
+
+internal b32
+is_delimiter(u8 point)
+{
+    return (point == ',');
+}
+
+#include "csv_reader.h"
+// #include "btree.h"
+
+typedef struct query_token query_token;
+struct query_token
+{
+    string8      *lexeme;
+    query_token  *next;
+};
+
+
+// takes on line of the repl input
+internal query_token *
+query_tokenizer(mem_arena *arena, string8 *buffer)
+{
+    query_token *tok = PushStruct(arena, query_token);
+    unused(tok);
 
-#include "lexer.c"
-#include "csv_parser.c"
+    for (u64 index = 0; index < buffer->size; ++index)
+    {            
+        u8 codepoint = buffer->data[index];
+
+        if(codepoint == '\n' || codepoint == '\r') break;
+
+        s32 start   = 0; 
+        s32 end     = 0; 
+
+        if(is_whitespace(codepoint))
+        {
+            end = index; 
+        }
+
+        s32 new_token_size = end - start;
+
+        tok->lexeme->data = &buffer->data[index]; 
+        tok->lexeme->size = new_token_size;
+
+        tok->next = tok;
+        start = index + 1;
+
+    }
+
+    return tok;
+}
 
 int main(int c, char **v)
 {
-    if(c < 2) return -999;
+    if(c < 2)
+    { 
+        print("bad file, setting default file\n");
+    }
+    else v[1] =  "./test/customers-10000.csv";
 
     local_persist b32 running = 1;
 
@@ -26,13 +97,27 @@ int main(int c, char **v)
     {
         if (running)
         {
-            // TODO(nasr): check for return key
             {
                 u8 line_buffer[256] = {};
-
                 s64 codepoint = os_read(STDIN_FD, line_buffer, 256);
                 unused(codepoint);
+                for(s32 index = 0; index < 256; ++index)
+                {
+                    if(line_buffer[index] == '\n')
+                    {
+                        print("exiting");
+                        return 0;
+                    }
+                    else if(line_buffer[index] == ' ')
+                    {
 
+                        print("TODO(nasr): ");
+                    }
+
+                }
+            }
+
+            {
                 read_csv(buffer);
                 token *tokens = tokenize_csv(buffer, global_arena);
                 global_table = parse_csv(tokens, global_table);
diff --git a/source/lexer.c b/source/lexer.c
deleted file mode 100644
index decfb7a..0000000
--- a/source/lexer.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// the lexer acts as a table builder from a csv  file
-// and parsing indivudal rows and columns
-// the next step would be building a the b-tree
-internal b32
-is_alpha(u8 point)
-{
-    return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
-}
-
-internal b32
-is_digit(u8 point)
-{
-    return (point >= '0' && point <= '9');
-}
-
-internal b32
-is_alpha_num(u8 point)
-{
-    return (is_alpha(point) || is_digit(point));
-}
-
-internal b32
-is_whitespace(u8 point)
-{
-    return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
-}
-
-internal b32
-is_delimiter(u8 point)
-{
-    return (point == ',');
-}
-
-internal token *
-tokenize_csv(string8 buffer, mem_arena *arena)
-{
-
-    b32 FL = TRUE;
-
-    if(buffer.size < 0) return NULL;
-    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
-    {
-        token *tok = PushStruct(arena, token);
-        u8 point = buffer.data[index];
-
-        s32 start   = 0;
-        s32 end     = 0;
-
-        if(is_whitespace(point))
-        {
-            print("csv file is invalid");
-            return NULL;
-        }
-
-        switch(point)
-        {
-            case('\n'):
-                {
-                    if(FL) tok->flags |= END_FL;
-                    break;
-                }
-
-            case(','):
-                {
-                    end = index - 1;
-                    start = index + 1;
-                    break;
-                }
-            default:
-                {
-                    break;
-                }
-        }
-
-        tok->lexeme = String8Cast(&buffer.data[start], end - start);
-        tok->next = tok;
-    }
-
-    return NULL;
-}
diff --git a/source/lexer.h b/source/lexer.h
deleted file mode 100644
index c950a58..0000000
--- a/source/lexer.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef ENGINE_LEXER_H
-#define ENGINE_LEXER_H
-
-typedef enum token_flags token_flags;
-enum token_flags 
-{
-    START_FL    = 1 << 1,
-    END_FL      = 1 << 2,
-};
-
-
-typedef enum token_type token_type;
-enum token_type
-{
-    // first 255 tokens  for ascii characters
-    TOKEN_UNDEFINED = 255,
-    TOKEN_IDENTIFIER,
-    TOKEN_VALUE,
-};
-
-typedef struct token token;
-struct token
-{
-    string8 lexeme;
-    token_type type;
-    token_flags flags;
-    token *next;
-};
-
-#endif /* ENGINE_LEXER_H */
diff --git a/source/query.c b/source/query.c
deleted file mode 100644
index e69de29..0000000
diff --git a/source/query.h b/source/query.h
deleted file mode 100644
index e69de29..0000000
-- 
cgit v1.3