feature(main): lots of stuff see description

1. increased compile time warnings to help with some optimizations. 2. impelmeented csv lexing helper functions that do stuff on tokenlists like appending and concatenating lists with each other 3. realiszed that btree design in faulty so disabled it and will refactor it in the next approach
author: nasr <nsrddyn@gmail.com> 2026-03-16 19:20:23 +0000
committer: nasr <nsrddyn@gmail.com> 2026-03-16 19:20:23 +0000
commit: 180ccc84aac07c7bee2b09a6e07f7406908409b9 (patch)
tree: efa39665e41c3132626f2c08b2f3ae0d18adc17a
parent: 2e258673171c2e4663a8b5d58e2ad174bb0ecd96 (diff)
5 files changed, 348 insertions, 206 deletions
diff --git a/Makefile b/Makefile
index 0224a42..39e9b87 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,7 @@
 BIN = build/engine
-SRC = source/engine.c
+SRC = source/tb_db.c
 CC = clang
-CFLAGS = -Wall -Wextra -Wfloat-equal -Wswitch-default -Wswitch-enum \
+CFLAGS = -Wall -Wextra -Wpedantic  -Wno-unused-function -g -Werror
-         -Wno-unused-parameter -Wno-implicit-fallthrough -Wno-unused-function -g -Werror
 $(BIN): $(SRC)
        mkdir -p build
diff --git a/source/b_tree.h b/source/b_tree_impl.h
index 0c4b1e1..8eb0723 100644
--- a/source/b_tree.h
+++ b/source/b_tree_impl.h
@@ -4,16 +4,29 @@
 // maximum height of the tree the lower the lower the lower amount
 // of disk reads which translates into faster?
-#define B_TREE_ORDER 4
+#if 0
+global_variable read_only s16 B_TREE_ORDER = 4;
+#endif
+#define B_TREE_ORDER  4
+//- NOTE(nasr): defining a key to improve sorting
+//  i think saying that a key is a combination of the column + row is a good way of appraoching this
+typedef struct key key;
+struct key
+{
+    string8 header;
+    s32     row;
+};
 typedef struct b_tree_node b_tree_node;
 struct b_tree_node
 {
-    // store the values
+    // store the key values of the sub nodes? if they are leaves?
-    string8 keys[B_TREE_ORDER - 1];
+    key keys[B_TREE_ORDER - 1];
    // TODO(nasr): replace with something more generic?
    // NOTE(nasr): cons of void * -> no type safety
    // is there a way to still have some sort of that?
+    // size not variable
    void *payload_per_key[B_TREE_ORDER - 1];
    b_tree_node *parent;
    // handle to store children faster than linked list
@@ -27,6 +40,10 @@ struct b_tree_node
    // s32 *refc;
    s32 key_count;
    b32 leaf;
+    // NOTE(nasr): do we hold the reference to the arena? or do we pass is it as a reference? 
+    // this could solve memory location issues?
 };
 typedef struct b_tree b_tree;
@@ -55,9 +72,17 @@ btree_node_alloc(mem_arena *arena)
 internal s32
 btree_node_find_pos(string8 value, b_tree_node *node)
 {
+    unused(value);
+    unused(node);
+#if 0
    s32 i = 0;
    for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i);
    return i;
+#endif
+    return 0;
 }
 internal void
@@ -68,10 +93,15 @@ b_tree_create(mem_arena *arena, b_tree *tree)
    tree->root->key_count = 0;
 }
 // NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory
 internal void *
 b_tree_search(b_tree_node *node, string8 key)
 {
+    unused(node);
+    unused(key);
+#if 0
    s32 i = btree_node_find_pos(key, node);
    if (i < node->key_count && string8_cmp(node->keys[i], key) == 0)
@@ -83,12 +113,20 @@ b_tree_search(b_tree_node *node, string8 key)
        return NULL;
    }
    return b_tree_search(node->children[i], key);
+#endif
+    return NULL;
 }
 // TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full)
 internal void
-b_tree_insert(mem_arena *arena, b_tree *tree, string8 key, void *payload)
+b_tree_insert(b_tree *tree, string8 key, void *payload)
 {
+    unused(tree);
+    unused(key);
+    unused(payload);
+#if 0
    b_tree_node *current_node = tree->root;
    if (current_node->leaf)
@@ -109,15 +147,18 @@ b_tree_insert(mem_arena *arena, b_tree *tree, string8 key, void *payload)
        }
        else {
            // TODO(nasr): creating a new branch / tree?
+            // make a seperate function for this
        }
    }
    // TODO(nasr): internal node case walk down then split on the way back up
+#endif
 }
 internal void
 b_tree_write(b_tree *bt)
 {
    // TODO(nasr): write the b_tree to disk
+    unused(bt);
 }
 #endif /* B_TREE_IMPLEMENTATION */
diff --git a/source/csv_decoder.h b/source/csv_decoder.h
new file mode 100644
index 0000000..b754ef5
--- /dev/null
+++ b/source/csv_decoder.h
@@ -0,0 +1,289 @@
+#ifndef ENGINE_LEXER_H
+#define ENGINE_LEXER_H
+enum csv_token_flags
+{
+    FL      = 1 << 2,
+};
+enum csv_token_type
+{
+    // first 255 tokens  for ascii characters
+    TOKEN_UNDEFINED = 255,
+    TOKEN_IDENTIFIER,
+    TOKEN_VALUE,
+};
+typedef struct csv_token csv_token;
+struct csv_token
+{
+    string8 lexeme;
+    csv_token *next_token;
+    enum csv_token_type type;
+    enum csv_token_flags flags;
+};
+// NOTE(nasr): i dont think im going to use this.
+typedef struct csv_row csv_row;
+struct csv_row
+{
+    // array of size col_count, points into mmap buffer
+    string8 *fields;
+    s32      count;
+};
+#if 0
+typedef struct csv_lntity csv_entity;
+struct csv_entity 
+{
+    //- not needed because we use key header mapping i think
+};
+#endif
+typedef struct csv_header csv_header;
+struct csv_header 
+{
+    string8  payload;
+    csv_header *next_header;
+};
+typedef struct csv_table csv_table;
+struct csv_table
+{
+    // first row, col names
+    // all data rows
+    csv_header  *header;
+    s32         row_count;
+    s32         header_count;
+};
+typedef struct csv_token_list csv_token_list;
+struct csv_token_list
+{
+    csv_token *start_token;
+    csv_token *end_token;
+};
+read_only global_variable
+csv_token nil_csv_token=
+{
+    .lexeme         = {.data = NULL, .size =0},
+    .type           = 0,
+    .flags          = 0,
+    .next_token     = &nil_csv_token,
+};
+read_only global_variable
+csv_header nil_csv_header = 
+{
+    .payload =  {.data = NULL, .size = 0},
+    .next_header = &nil_csv_header,
+};
+read_only global_variable
+csv_token_list nil_csv_token_list =
+{
+    .start_token = &nil_csv_token,
+    .end_token   = &nil_csv_token,
+};
+read_only global_variable
+csv_row  nil_csv_row =
+{
+    .fields     = &nil_string,
+    .count      = 0,
+};
+read_only global_variable
+csv_table nil_csv_table =
+{
+    .header     = &nil_csv_header,
+    .row_count   = 0,
+};
+#endif /* ENGINE_LEXER_H */
+internal b32 
+is_nil_csv_token(csv_token *token)
+{
+    return ((token == NULL) || (token == &nil_csv_token)); 
+}
+internal void
+csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token)
+{
+    source_token_list->end_token->next_token = source_token;
+    source_token_list->end_token             = source_token;
+}
+//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
+internal void
+csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source)
+{
+    csv_token *source_ct = source->start_token;
+    csv_token *destination_end_ct = destination->end_token;
+     for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
+     {
+         destination_end_ct->next_token = source_ct;
+     }
+     destination->end_token = source_ct;
+}
+#if 0
+internal csv_token_list *
+parse_csv_row(string8 row_buffer)
+{
+    // csv_token_list *
+}
+#endif
+// the lexer acts as a table builder from a csv  file
+// and parsing indivudal rows and columns
+// the next step would be building a the b-tree
+internal csv_token *
+tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list)
+{
+    unused(token_list);
+    b32 finding_headers = TRUE;
+    if(buffer.size < 0) return NULL;
+    csv_token *tok = PushStruct(arena, csv_token);
+    // URGENT(nasr): segfaulting because memcpy of strring value doesnt  work dammit
+    // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
+    // forgot what the solution was
+    // TODO(nasr): check what the problem here was
+    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
+    {
+        u8 point = buffer.data[index];
+        s32 start   = 0;
+        s32 end     = 0;
+        if(is_whitespace(point))
+        {
+            warn("csv file is invalid, detected whitespace");
+            return NULL;
+        }
+        if(point == '\n') 
+        {
+            if(finding_headers)
+            {
+#if 0
+                string8 headers_buffer = {.data = &buffer.data[start], .size = end - start};
+#endif
+                finding_headers = FALSE;
+                {
+                    //- map new header token list to table headers
+                }
+            }
+#if 0
+            else
+            {
+            }
+#endif
+            table->row_count++;
+        }
+        else if(point == ',')
+        {
+            if (finding_headers)
+            {
+                table->header_count++;
+            }
+        }
+        switch(point)
+        {
+            case('\n'):
+                {
+                    tok->flags |= FL;
+                    break;
+                }
+            case(','):
+                {
+                    end = index - 1;
+                    start = index + 1;
+                    break;
+                }
+            default:
+                {
+                    break;
+                }
+        }
+        tok->lexeme         = StringCast(&buffer.data[start], end - start);
+        tok->next_token     = tok;
+    }
+    return tok;
+}
+//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
+internal b_tree *
+parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
+{
+    b_tree *tree = PushStructZero(arena, b_tree);
+    b_tree_create(arena, tree);
+    // iterate over the token list while the token is not nil
+    for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
+    {
+        //- TODO(nasr): check initizalization or something tomorrow
+        {
+            //- are we parsing the first line tokens?
+            //- if so, do something :))
+            if(ct->flags & FL) 
+            {
+                // TODO(nasr): replace with nil header check function
+                if(table->header != &nil_csv_header || table->header == NULL)
+                {
+#if 0
+                    // - no this should happen in the tokenization
+                    table->headers->next =     
+#endif
+                }
+                else 
+                {
+                }
+            }
+        }
+        // TODO(nasr): fix this logic tomorrow
+        csv_token *ct = PushStruct(arena, csv_token);
+        // skip structural ctens, only index values
+        if (ct->type != TOKEN_VALUE)
+        {
+            continue;
+        }
+        // NOTE(nasr): payload is the cten itself so the caller can reach
+        // row/col metadata without us having to copy it
+        // NOTE(nasr): heh why do we void cast again?
+        b_tree_insert(tree, ct->lexeme, (void *)ct);
+    }
+    return tree;
+}
diff --git a/source/csv_reader.h b/source/csv_reader.h
deleted file mode 100644
index f5205bf..0000000
--- a/source/csv_reader.h
+++ /dev/null
@@ -1,183 +0,0 @@
-#ifndef ENGINE_LEXER_H
-#define ENGINE_LEXER_H
-typedef enum csv_token_flags csv_token_flags;
-enum csv_token_flags
-{
-    START_FL    = 1 << 1,
-    END_FL      = 1 << 2,
-};
-typedef enum csv_token_type csv_token_type;
-enum csv_token_type
-{
-    // first 255 tokens  for ascii characters
-    TOKEN_UNDEFINED = 255,
-    TOKEN_IDENTIFIER,
-    TOKEN_VALUE,
-};
-typedef struct csv_token csv_token;
-struct csv_token
-{
-    string8 lexeme;
-    csv_token_type type;
-    csv_token_flags flags;
-    csv_token *next;
-};
-// NOTE(nasr): i dont think im going to use this.
-typedef struct csv_row csv_row;
-struct csv_row
-{
-    // array of size col_count, points into mmap buffer
-    string8 *fields;
-    s32      count;
-};
-typedef struct csv_table csv_table;
-struct csv_table
-{
-    // first row, col names
-    // all data rows
-    string8  *headers;
-    csv_row  *rows;
-    s32       col_count;
-    s32       row_count;
-};
-typedef struct csv_token_list csv_token_list;
-struct csv_token_list
-{
-    csv_token *start_token;
-    csv_token *end_token;
-};
-read_only global_variable
-csv_token nil_csv_token=
-{
-    .lexeme = {.data = NULL, .size =0},
-    .type   = (csv_token_type)0,
-    .flags  = 0,
-    .next   = &nil_csv_token,
-};
-read_only global_variable
-csv_token_list nil_csv_token_list =
-{
-    .start_token = &nil_csv_token,
-    .end_token   = &nil_csv_token,
-};
-read_only global_variable
-csv_row  nil_csv_row =
-{
-    .fields     = &nil_string,
-    .count      = 0,
-};
-read_only global_variable
-csv_table nil_csv_table =
-{
-    .headers     = &nil_string,
-    .rows        = &nil_csv_row,
-    .col_count   = 0,
-    .row_count   = 0,
-};
-#endif /* ENGINE_LEXER_H */
-// the lexer acts as a table builder from a csv  file
-// and parsing indivudal rows and columns
-// the next step would be building a the b-tree
-internal csv_token *
-tokenize_csv(string8 buffer, mem_arena *arena)
-{
-    b32 FL = TRUE;
-    if(buffer.size < 0) return NULL;
-    csv_token *tok = PushStruct(arena, csv_token);
-    // URGENT(nasr): segfaulting because memcpy of strring value doesnt  work dammit
-    // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
-    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
-    {
-        u8 point = buffer.data[index];
-        s32 start   = 0;
-        s32 end     = 0;
-        if(is_whitespace(point))
-        {
-            warn("csv file is invalid, detected whitespace");
-            return NULL;
-        }
-        switch(point)
-        {
-            case('\n'):
-            {
-                if(FL) tok->flags |= END_FL;
-                break;
-            }
-            case(','):
-            {
-                end = index - 1;
-                start = index + 1;
-                break;
-            }
-            default:
-            {
-                break;
-            }
-        }
-        tok->lexeme = StringCast(&buffer.data[start], end - start);
-        tok->next = tok;
-    }
-    return tok;
-}
-internal void
-read_csv(string8 buffer)
-{
-    // printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
-}
-internal b_tree *
-parse_csv(mem_arena *arena, csv_token_list *ctl)
-{
-    b_tree *tree = PushStructZero(arena, b_tree);
-    b_tree_create(arena, tree);
-    //- TODO(nasr): check initizalization or something tomorrow
-    {
-    }
-    // TODO(nasr): fix this logic tomorrow
-    csv_token *ct = PushStruct(arena, csv_token);
-    for (;ct != NULL; ct = ct->next)
-    {
-        // skip structural ctens, only index values
-        if (ct->type != TOKEN_VALUE)
-        {
-            continue;
-        }
-        // NOTE(nasr): payload is the cten itself so the caller can reach
-        // row/col metadata without us having to copy it
-        // NOTE(nasr): heh why do we void cast again?
-        b_tree_insert(arena, tree, ct->lexeme, (void *)ct);
-    }
-    return tree;
-}
diff --git a/source/engine.c b/source/tb_db.c
index 106f113..b992111 100644
--- a/source/engine.c
+++ b/source/tb_db.c
@@ -1,6 +1,3 @@
 #define B_TREE_IMPLEMENTATION
 #define BASE_UNITY
 #include "base/base_include.h"
@@ -33,11 +30,10 @@ internal b32
 is_delimiter(u8 point)
 {
    return (point == ',');
 }
-#include "b_tree.h"
+#include "b_tree_impl.h"
-#include "csv_reader.h"
+#include "csv_decoder.h"
 typedef struct query_token query_token;
 struct query_token
@@ -80,7 +76,6 @@ is_nil_query_token_list(query_token *token)
    return (token == &nil_query_token) || (token == NULL);
 }
 // takes on line of the repl input
 // return a reference to the passed list
 internal query_token_list *
@@ -90,7 +85,7 @@ query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list)
    unused(initialized);
    for (u64 index = 0; index < buffer->size; ++index)
-    {            
+    {
        u8 codepoint = buffer->data[index];
        if(codepoint == '\n' || codepoint == '\r') break;
@@ -98,10 +93,7 @@ query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list)
        s32 start   = 0; 
        s32 end     = 0; 
-        if(is_whitespace(codepoint))
+        if(is_whitespace(codepoint)) end = index; 
-        {
-            end = index; 
-        }
        // save the token
        // TODO(nasr): work on the string macros cuz no work
@@ -185,19 +177,23 @@ int main(int count, char **value)
            }
            {
-                read_csv(buffer);
-                csv_token *tokens = tokenize_csv(buffer, global_arena);
+                // NOTE(nasr): the use of tables is required for tracking headers etc.
+                // i think we can optimize this away in the future but for now its fine
+                csv_table *table = PushStruct(global_arena, csv_table);
+                csv_token_list *token_list = PushStruct(global_arena, csv_token_list);
+                csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list);
                assert_msg(tokens != NULL, "Tokens are NULL.");
                csv_token_list *ctl = PushStruct(global_arena, csv_token_list);
-                b_tree *bt = parse_csv(global_arena, ctl);
+                b_tree *bt = parse_csv(global_arena, ctl, table);
                b_tree_write(bt);
            }
            // NOTE(nasr): not sure on how to approach the b-tree and the  table format thing
            // we kind of want our table format i think? but i wouldnt be sure about the use case
            // so we stick to the regular b_tree for now. commenting out the tables.
author	nasr <nsrddyn@gmail.com>	2026-03-16 19:20:23 +0000
committer	nasr <nsrddyn@gmail.com>	2026-03-16 19:20:23 +0000
commit	180ccc84aac07c7bee2b09a6e07f7406908409b9 (patch)
tree	efa39665e41c3132626f2c08b2f3ae0d18adc17a
parent	2e258673171c2e4663a8b5d58e2ad174bb0ecd96 (diff)

diff --git a/Makefile b/Makefile index 0224a42..39e9b87 100644 --- a/Makefile +++ b/Makefile
@@ -1,8 +1,7 @@
1	BIN = build/engine	1	BIN = build/engine
2	SRC = source/engine.c	2	SRC = source/tb_db.c
3	CC = clang	3	CC = clang
4	CFLAGS = -Wall -Wextra -Wfloat-equal -Wswitch-default -Wswitch-enum \	4	CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -Werror
5	-Wno-unused-parameter -Wno-implicit-fallthrough -Wno-unused-function -g -Werror
6		5
7	$(BIN): $(SRC)	6	$(BIN): $(SRC)
8	mkdir -p build	7	mkdir -p build


diff --git a/source/b_tree.h b/source/b_tree_impl.h index 0c4b1e1..8eb0723 100644 --- a/source/b_tree.h +++ b/source/b_tree_impl.h
@@ -4,16 +4,29 @@
4		4
5	// maximum height of the tree the lower the lower the lower amount	5	// maximum height of the tree the lower the lower the lower amount
6	// of disk reads which translates into faster?	6	// of disk reads which translates into faster?
7	#define B_TREE_ORDER 4	7	#if 0
		8	global_variable read_only s16 B_TREE_ORDER = 4;
		9	#endif
		10	#define B_TREE_ORDER 4
		11
		12	//- NOTE(nasr): defining a key to improve sorting
		13	// i think saying that a key is a combination of the column + row is a good way of appraoching this
		14	typedef struct key key;
		15	struct key
		16	{
		17	string8 header;
		18	s32 row;
		19	};
8		20
9	typedef struct b_tree_node b_tree_node;	21	typedef struct b_tree_node b_tree_node;
10	struct b_tree_node	22	struct b_tree_node
11	{	23	{
12	// store the values	24	// store the key values of the sub nodes? if they are leaves?
13	string8 keys[B_TREE_ORDER - 1];	25	key keys[B_TREE_ORDER - 1];
14	// TODO(nasr): replace with something more generic?	26	// TODO(nasr): replace with something more generic?
15	// NOTE(nasr): cons of void * -> no type safety	27	// NOTE(nasr): cons of void * -> no type safety
16	// is there a way to still have some sort of that?	28	// is there a way to still have some sort of that?
		29	// size not variable
17	void *payload_per_key[B_TREE_ORDER - 1];	30	void *payload_per_key[B_TREE_ORDER - 1];
18	b_tree_node *parent;	31	b_tree_node *parent;
19	// handle to store children faster than linked list	32	// handle to store children faster than linked list
@@ -27,6 +40,10 @@ struct b_tree_node
27	// s32 *refc;	40	// s32 *refc;
28	s32 key_count;	41	s32 key_count;
29	b32 leaf;	42	b32 leaf;
		43
		44
		45	// NOTE(nasr): do we hold the reference to the arena? or do we pass is it as a reference?
		46	// this could solve memory location issues?
30	};	47	};
31		48
32	typedef struct b_tree b_tree;	49	typedef struct b_tree b_tree;
@@ -55,9 +72,17 @@ btree_node_alloc(mem_arena *arena)
55	internal s32	72	internal s32
56	btree_node_find_pos(string8 value, b_tree_node *node)	73	btree_node_find_pos(string8 value, b_tree_node *node)
57	{	74	{
		75	unused(value);
		76	unused(node);
		77
		78	#if 0
58	s32 i = 0;	79	s32 i = 0;
59	for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i);	80	for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i);
60	return i;	81	return i;
		82	#endif
		83
		84	return 0;
		85
61	}	86	}
62		87
63	internal void	88	internal void
@@ -68,10 +93,15 @@ b_tree_create(mem_arena arena, b_tree tree)
68	tree->root->key_count = 0;	93	tree->root->key_count = 0;
69	}	94	}
70		95
		96
71	// NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory	97	// NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory
72	internal void *	98	internal void *
73	b_tree_search(b_tree_node *node, string8 key)	99	b_tree_search(b_tree_node *node, string8 key)
74	{	100	{
		101	unused(node);
		102	unused(key);
		103
		104	#if 0
75	s32 i = btree_node_find_pos(key, node);	105	s32 i = btree_node_find_pos(key, node);
76		106
77	if (i < node->key_count && string8_cmp(node->keys[i], key) == 0)	107	if (i < node->key_count && string8_cmp(node->keys[i], key) == 0)
@@ -83,12 +113,20 @@ b_tree_search(b_tree_node *node, string8 key)
83	return NULL;	113	return NULL;
84	}	114	}
85	return b_tree_search(node->children[i], key);	115	return b_tree_search(node->children[i], key);
		116	#endif
		117
		118	return NULL;
86	}	119	}
87		120
		121
88	// TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full)	122	// TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full)
89	internal void	123	internal void
90	b_tree_insert(mem_arena arena, b_tree tree, string8 key, void *payload)	124	b_tree_insert(b_tree tree, string8 key, void payload)
91	{	125	{
		126	unused(tree);
		127	unused(key);
		128	unused(payload);
		129	#if 0
92	b_tree_node *current_node = tree->root;	130	b_tree_node *current_node = tree->root;
93		131
94	if (current_node->leaf)	132	if (current_node->leaf)
@@ -109,15 +147,18 @@ b_tree_insert(mem_arena arena, b_tree tree, string8 key, void *payload)
109	}	147	}
110	else {	148	else {
111	// TODO(nasr): creating a new branch / tree?	149	// TODO(nasr): creating a new branch / tree?
		150	// make a seperate function for this
112	}	151	}
113	}	152	}
114	// TODO(nasr): internal node case walk down then split on the way back up	153	// TODO(nasr): internal node case walk down then split on the way back up
		154	#endif
115	}	155	}
116		156
117	internal void	157	internal void
118	b_tree_write(b_tree *bt)	158	b_tree_write(b_tree *bt)
119	{	159	{
120	// TODO(nasr): write the b_tree to disk	160	// TODO(nasr): write the b_tree to disk
		161	unused(bt);
121	}	162	}
122		163
123	#endif /* B_TREE_IMPLEMENTATION */	164	#endif /* B_TREE_IMPLEMENTATION */


diff --git a/source/csv_decoder.h b/source/csv_decoder.h new file mode 100644 index 0000000..b754ef5 --- /dev/null +++ b/source/csv_decoder.h
@@ -0,0 +1,289 @@
		1	#ifndef ENGINE_LEXER_H
		2	#define ENGINE_LEXER_H
		3
		4	enum csv_token_flags
		5	{
		6	FL = 1 << 2,
		7	};
		8
		9	enum csv_token_type
		10	{
		11	// first 255 tokens for ascii characters
		12	TOKEN_UNDEFINED = 255,
		13	TOKEN_IDENTIFIER,
		14	TOKEN_VALUE,
		15	};
		16
		17	typedef struct csv_token csv_token;
		18	struct csv_token
		19	{
		20	string8 lexeme;
		21	csv_token *next_token;
		22	enum csv_token_type type;
		23	enum csv_token_flags flags;
		24	};
		25
		26	// NOTE(nasr): i dont think im going to use this.
		27	typedef struct csv_row csv_row;
		28	struct csv_row
		29	{
		30	// array of size col_count, points into mmap buffer
		31	string8 *fields;
		32	s32 count;
		33	};
		34
		35	#if 0
		36	typedef struct csv_lntity csv_entity;
		37	struct csv_entity
		38	{
		39	//- not needed because we use key header mapping i think
		40	};
		41	#endif
		42
		43	typedef struct csv_header csv_header;
		44	struct csv_header
		45	{
		46	string8 payload;
		47	csv_header *next_header;
		48	};
		49
		50	typedef struct csv_table csv_table;
		51	struct csv_table
		52	{
		53	// first row, col names
		54	// all data rows
		55	csv_header *header;
		56	s32 row_count;
		57	s32 header_count;
		58	};
		59
		60
		61	typedef struct csv_token_list csv_token_list;
		62	struct csv_token_list
		63	{
		64	csv_token *start_token;
		65	csv_token *end_token;
		66
		67	};
		68
		69	read_only global_variable
		70	csv_token nil_csv_token=
		71	{
		72	.lexeme = {.data = NULL, .size =0},
		73	.type = 0,
		74	.flags = 0,
		75	.next_token = &nil_csv_token,
		76
		77	};
		78
		79	read_only global_variable
		80	csv_header nil_csv_header =
		81	{
		82	.payload = {.data = NULL, .size = 0},
		83	.next_header = &nil_csv_header,
		84	};
		85
		86	read_only global_variable
		87	csv_token_list nil_csv_token_list =
		88	{
		89	.start_token = &nil_csv_token,
		90	.end_token = &nil_csv_token,
		91	};
		92
		93
		94	read_only global_variable
		95	csv_row nil_csv_row =
		96	{
		97	.fields = &nil_string,
		98	.count = 0,
		99	};
		100
		101	read_only global_variable
		102	csv_table nil_csv_table =
		103	{
		104	.header = &nil_csv_header,
		105	.row_count = 0,
		106	};
		107
		108	#endif /* ENGINE_LEXER_H */
		109
		110	internal b32
		111	is_nil_csv_token(csv_token *token)
		112	{
		113	return ((token == NULL) \|\| (token == &nil_csv_token));
		114	}
		115
		116	internal void
		117	csv_token_list_append_token(csv_token_list source_token_list, csv_token source_token)
		118	{
		119	source_token_list->end_token->next_token = source_token;
		120	source_token_list->end_token = source_token;
		121
		122	}
		123
		124	//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
		125	internal void
		126	csv_token_list_concat_list(csv_token_list destination, csv_token_list source)
		127	{
		128
		129	csv_token *source_ct = source->start_token;
		130	csv_token *destination_end_ct = destination->end_token;
		131
		132	for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
		133	{
		134	destination_end_ct->next_token = source_ct;
		135	}
		136
		137	destination->end_token = source_ct;
		138	}
		139
		140	#if 0
		141	internal csv_token_list *
		142	parse_csv_row(string8 row_buffer)
		143	{
		144	// csv_token_list *
		145
		146	}
		147	#endif
		148
		149
		150	// the lexer acts as a table builder from a csv file
		151	// and parsing indivudal rows and columns
		152	// the next step would be building a the b-tree
		153	internal csv_token *
		154	tokenize_csv(string8 buffer, mem_arena arena, csv_table table, csv_token_list *token_list)
		155	{
		156
		157	unused(token_list);
		158	b32 finding_headers = TRUE;
		159
		160	if(buffer.size < 0) return NULL;
		161
		162	csv_token *tok = PushStruct(arena, csv_token);
		163
		164	// URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
		165	// NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
		166	// forgot what the solution was
		167	// TODO(nasr): check what the problem here was
		168	for(s32 index = 0; buffer.data[index] != '\0'; ++index)
		169	{
		170	u8 point = buffer.data[index];
		171
		172	s32 start = 0;
		173	s32 end = 0;
		174
		175	if(is_whitespace(point))
		176	{
		177	warn("csv file is invalid, detected whitespace");
		178	return NULL;
		179	}
		180
		181
		182	if(point == '\n')
		183	{
		184	if(finding_headers)
		185	{
		186	#if 0
		187	string8 headers_buffer = {.data = &buffer.data[start], .size = end - start};
		188	#endif
		189	finding_headers = FALSE;
		190
		191	{
		192	//- map new header token list to table headers
		193	}
		194	}
		195	#if 0
		196	else
		197	{
		198
		199	}
		200	#endif
		201
		202
		203	table->row_count++;
		204	}
		205	else if(point == ',')
		206	{
		207	if (finding_headers)
		208	{
		209	table->header_count++;
		210	}
		211	}
		212
		213	switch(point)
		214	{
		215	case('\n'):
		216	{
		217	tok->flags \|= FL;
		218	break;
		219	}
		220
		221	case(','):
		222	{
		223	end = index - 1;
		224	start = index + 1;
		225	break;
		226	}
		227	default:
		228	{
		229	break;
		230	}
		231	}
		232
		233	tok->lexeme = StringCast(&buffer.data[start], end - start);
		234	tok->next_token = tok;
		235	}
		236
		237	return tok;
		238	}
		239
		240	//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
		241	internal b_tree *
		242	parse_csv(mem_arena arena, csv_token_list ctl, csv_table *table)
		243	{
		244	b_tree *tree = PushStructZero(arena, b_tree);
		245	b_tree_create(arena, tree);
		246
		247	// iterate over the token list while the token is not nil
		248	for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
		249	{
		250
		251	//- TODO(nasr): check initizalization or something tomorrow
		252	{
		253	//- are we parsing the first line tokens?
		254	//- if so, do something :))
		255	if(ct->flags & FL)
		256	{
		257	// TODO(nasr): replace with nil header check function
		258	if(table->header != &nil_csv_header \|\| table->header == NULL)
		259	{
		260	#if 0
		261	// - no this should happen in the tokenization
		262	table->headers->next =
		263	#endif
		264	}
		265	else
		266	{
		267
		268	}
		269
		270	}
		271	}
		272
		273	// TODO(nasr): fix this logic tomorrow
		274	csv_token *ct = PushStruct(arena, csv_token);
		275	// skip structural ctens, only index values
		276	if (ct->type != TOKEN_VALUE)
		277	{
		278	continue;
		279	}
		280
		281	// NOTE(nasr): payload is the cten itself so the caller can reach
		282	// row/col metadata without us having to copy it
		283	// NOTE(nasr): heh why do we void cast again?
		284	b_tree_insert(tree, ct->lexeme, (void *)ct);
		285	}
		286
		287	return tree;
		288	}
		289


diff --git a/source/csv_reader.h b/source/csv_reader.h deleted file mode 100644 index f5205bf..0000000 --- a/source/csv_reader.h +++ /dev/null
@@ -1,183 +0,0 @@
1	#ifndef ENGINE_LEXER_H
2	#define ENGINE_LEXER_H
3
4	typedef enum csv_token_flags csv_token_flags;
5	enum csv_token_flags
6	{
7	START_FL = 1 << 1,
8	END_FL = 1 << 2,
9	};
10
11	typedef enum csv_token_type csv_token_type;
12	enum csv_token_type
13	{
14	// first 255 tokens for ascii characters
15	TOKEN_UNDEFINED = 255,
16	TOKEN_IDENTIFIER,
17	TOKEN_VALUE,
18	};
19
20	typedef struct csv_token csv_token;
21	struct csv_token
22	{
23	string8 lexeme;
24	csv_token_type type;
25	csv_token_flags flags;
26	csv_token *next;
27	};
28
29	// NOTE(nasr): i dont think im going to use this.
30	typedef struct csv_row csv_row;
31	struct csv_row
32	{
33	// array of size col_count, points into mmap buffer
34	string8 *fields;
35	s32 count;
36	};
37
38	typedef struct csv_table csv_table;
39	struct csv_table
40	{
41	// first row, col names
42	// all data rows
43	string8 *headers;
44	csv_row *rows;
45	s32 col_count;
46	s32 row_count;
47	};
48
49
50	typedef struct csv_token_list csv_token_list;
51	struct csv_token_list
52	{
53	csv_token *start_token;
54	csv_token *end_token;
55
56	};
57
58	read_only global_variable
59	csv_token nil_csv_token=
60	{
61	.lexeme = {.data = NULL, .size =0},
62	.type = (csv_token_type)0,
63	.flags = 0,
64	.next = &nil_csv_token,
65
66	};
67
68	read_only global_variable
69	csv_token_list nil_csv_token_list =
70	{
71	.start_token = &nil_csv_token,
72	.end_token = &nil_csv_token,
73	};
74
75
76	read_only global_variable
77	csv_row nil_csv_row =
78	{
79	.fields = &nil_string,
80	.count = 0,
81	};
82
83	read_only global_variable
84	csv_table nil_csv_table =
85	{
86	.headers = &nil_string,
87	.rows = &nil_csv_row,
88	.col_count = 0,
89	.row_count = 0,
90	};
91
92	#endif /* ENGINE_LEXER_H */
93
94	// the lexer acts as a table builder from a csv file
95	// and parsing indivudal rows and columns
96	// the next step would be building a the b-tree
97	internal csv_token *
98	tokenize_csv(string8 buffer, mem_arena *arena)
99	{
100	b32 FL = TRUE;
101
102	if(buffer.size < 0) return NULL;
103
104	csv_token *tok = PushStruct(arena, csv_token);
105
106	// URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
107	// NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
108	for(s32 index = 0; buffer.data[index] != '\0'; ++index)
109	{
110	u8 point = buffer.data[index];
111
112	s32 start = 0;
113	s32 end = 0;
114
115	if(is_whitespace(point))
116	{
117	warn("csv file is invalid, detected whitespace");
118	return NULL;
119	}
120
121	switch(point)
122	{
123	case('\n'):
124	{
125	if(FL) tok->flags \|= END_FL;
126	break;
127	}
128
129	case(','):
130	{
131	end = index - 1;
132	start = index + 1;
133	break;
134	}
135	default:
136	{
137	break;
138	}
139	}
140
141	tok->lexeme = StringCast(&buffer.data[start], end - start);
142	tok->next = tok;
143	}
144
145	return tok;
146	}
147
148	internal void
149	read_csv(string8 buffer)
150	{
151	// printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
152
153	}
154
155	internal b_tree *
156	parse_csv(mem_arena arena, csv_token_list ctl)
157	{
158	b_tree *tree = PushStructZero(arena, b_tree);
159	b_tree_create(arena, tree);
160
161	//- TODO(nasr): check initizalization or something tomorrow
162	{
163
164	}
165	// TODO(nasr): fix this logic tomorrow
166	csv_token *ct = PushStruct(arena, csv_token);
167
168	for (;ct != NULL; ct = ct->next)
169	{
170	// skip structural ctens, only index values
171	if (ct->type != TOKEN_VALUE)
172	{
173	continue;
174	}
175
176	// NOTE(nasr): payload is the cten itself so the caller can reach
177	// row/col metadata without us having to copy it
178	// NOTE(nasr): heh why do we void cast again?
179	b_tree_insert(arena, tree, ct->lexeme, (void *)ct);
180	}
181
182	return tree;
183	}


diff --git a/source/engine.c b/source/tb_db.c index 106f113..b992111 100644 --- a/source/engine.c +++ b/source/tb_db.c
@@ -1,6 +1,3 @@
1
2
3
4	#define B_TREE_IMPLEMENTATION	1	#define B_TREE_IMPLEMENTATION
5	#define BASE_UNITY	2	#define BASE_UNITY
6	#include "base/base_include.h"	3	#include "base/base_include.h"
@@ -33,11 +30,10 @@ internal b32
33	is_delimiter(u8 point)	30	is_delimiter(u8 point)
34	{	31	{
35	return (point == ',');	32	return (point == ',');
36
37	}	33	}
38		34
39	#include "b_tree.h"	35	#include "b_tree_impl.h"
40	#include "csv_reader.h"	36	#include "csv_decoder.h"
41		37
42	typedef struct query_token query_token;	38	typedef struct query_token query_token;
43	struct query_token	39	struct query_token
@@ -80,7 +76,6 @@ is_nil_query_token_list(query_token *token)
80	return (token == &nil_query_token) \|\| (token == NULL);	76	return (token == &nil_query_token) \|\| (token == NULL);
81	}	77	}
82		78
83
84	// takes on line of the repl input	79	// takes on line of the repl input
85	// return a reference to the passed list	80	// return a reference to the passed list
86	internal query_token_list *	81	internal query_token_list *
@@ -90,7 +85,7 @@ query_tokenizer(mem_arena arena, string8 buffer, query_token_list *list)
90	unused(initialized);	85	unused(initialized);
91		86
92	for (u64 index = 0; index < buffer->size; ++index)	87	for (u64 index = 0; index < buffer->size; ++index)
93	{	88	{
94	u8 codepoint = buffer->data[index];	89	u8 codepoint = buffer->data[index];
95		90
96	if(codepoint == '\n' \|\| codepoint == '\r') break;	91	if(codepoint == '\n' \|\| codepoint == '\r') break;
@@ -98,10 +93,7 @@ query_tokenizer(mem_arena arena, string8 buffer, query_token_list *list)
98	s32 start = 0;	93	s32 start = 0;
99	s32 end = 0;	94	s32 end = 0;
100		95
101	if(is_whitespace(codepoint))	96	if(is_whitespace(codepoint)) end = index;
102	{
103	end = index;
104	}
105		97
106	// save the token	98	// save the token
107	// TODO(nasr): work on the string macros cuz no work	99	// TODO(nasr): work on the string macros cuz no work
@@ -185,19 +177,23 @@ int main(int count, char **value)
185	}	177	}
186		178
187	{	179	{
188	read_csv(buffer);
189		180
190	csv_token *tokens = tokenize_csv(buffer, global_arena);	181	// NOTE(nasr): the use of tables is required for tracking headers etc.
		182	// i think we can optimize this away in the future but for now its fine
		183	csv_table *table = PushStruct(global_arena, csv_table);
		184
		185	csv_token_list *token_list = PushStruct(global_arena, csv_token_list);
		186
		187	csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list);
191		188
192	assert_msg(tokens != NULL, "Tokens are NULL.");	189	assert_msg(tokens != NULL, "Tokens are NULL.");
193		190
194	csv_token_list *ctl = PushStruct(global_arena, csv_token_list);	191	csv_token_list *ctl = PushStruct(global_arena, csv_token_list);
195	b_tree *bt = parse_csv(global_arena, ctl);	192	b_tree *bt = parse_csv(global_arena, ctl, table);
196		193
197	b_tree_write(bt);	194	b_tree_write(bt);
198	}	195	}
199		196
200
201	// NOTE(nasr): not sure on how to approach the b-tree and the table format thing	197	// NOTE(nasr): not sure on how to approach the b-tree and the table format thing
202	// we kind of want our table format i think? but i wouldnt be sure about the use case	198	// we kind of want our table format i think? but i wouldnt be sure about the use case
203	// so we stick to the regular b_tree for now. commenting out the tables.	199	// so we stick to the regular b_tree for now. commenting out the tables.