feature(main): bugfixes

still a segfault when generating the token linked list
author: nasr <nsrddyn@gmail.com> 2026-03-14 21:45:30 +0000
committer: nasr <nsrddyn@gmail.com> 2026-03-14 21:45:30 +0000
commit: ff2ac6f584a1b08c0e66ac65b9c8e8d68e79f124 (patch)
tree: 63e9602c5710cd2550c38bf49e5f4a18c0c33437
parent: 53cd9c4c3408b5f2e54e891baf471c0d774ea2cd (diff)
2 files changed, 65 insertions, 42 deletions
diff --git a/source/csv_reader.h b/source/csv_reader.h
index 2b6f49c..7f5bf06 100644
--- a/source/csv_reader.h
+++ b/source/csv_reader.h
@@ -8,7 +8,6 @@ enum token_flags
    END_FL      = 1 << 2,
 };
 typedef enum token_type token_type;
 enum token_type
 {
@@ -27,6 +26,7 @@ struct token
    token *next;
 };
+// NOTE(nasr): i dont think im going to use this.
 typedef struct csv_row csv_row;
 struct csv_row
 {
@@ -62,8 +62,6 @@ csv_table nil_csv_table =
    .row_count   = 0,
 };
 #endif /* ENGINE_LEXER_H */
 // the lexer acts as a table builder from a csv  file
@@ -72,13 +70,16 @@ csv_table nil_csv_table =
 internal token *
 tokenize_csv(string8 buffer, mem_arena *arena)
 {
    b32 FL = TRUE;
    if(buffer.size < 0) return NULL;
+    token *tok = PushStruct(arena, token);
+    // URGENT(nasr): segfaulting because memcpy of strring value doesnt  work dammit
+    // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
    {
-        token *tok = PushStruct(arena, token);
        u8 point = buffer.data[index];
        s32 start   = 0;
@@ -86,35 +87,35 @@ tokenize_csv(string8 buffer, mem_arena *arena)
        if(is_whitespace(point))
        {
-            print("csv file is invalid");
+            warn("csv file is invalid, detected whitespace");
            return NULL;
        }
        switch(point)
        {
            case('\n'):
-                {
+            {
-                    if(FL) tok->flags |= END_FL;
+                if(FL) tok->flags |= END_FL;
-                    break;
+                break;
-                }
+            }
            case(','):
-                {
+            {
-                    end = index - 1;
+                end = index - 1;
-                    start = index + 1;
+                start = index + 1;
-                    break;
+                break;
-                }
+            }
            default:
-                {
+            {
-                    break;
+                break;
-                }
+            }
        }
        tok->lexeme = StringCast(&buffer.data[start], end - start);
        tok->next = tok;
    }
-    return NULL;
+    return tok;
 }
 internal void
@@ -124,18 +125,24 @@ read_csv(string8 buffer)
 }
-internal b_tree  *
+internal b_tree *
-parse_csv(csv_token *tok, csv_table *table)
+parse_csv(mem_arena *arena, token *tok)
 {
+    b_tree *tree = PushStructZero(arena, b_tree);
+    b_tree_create(arena, tree);
+    for (; tok != NULL; tok = tok->next)
-    for (;tok->next; tok = tok->next)
    {
-        b_tree_node *current_btree_node = btree_node_alloc;
+        // skip structural tokens, only index values
+        if (tok->type != TOKEN_VALUE)
+        {
+            continue;
+        }
-        
+        // NOTE(nasr): payload is the token itself so the caller can reach
+        // row/col metadata without us having to copy it
+        b_tree_insert(arena, tree, tok->lexeme, (void *)tok);
    }
-    return NULL;
+    return tree;
 }
diff --git a/source/engine.c b/source/engine.c
index 1cfbab0..9797d8a 100644
--- a/source/engine.c
+++ b/source/engine.c
@@ -40,7 +40,7 @@ is_delimiter(u8 point)
 typedef struct query_token query_token;
 struct query_token
 {
-    string8      *lexeme;
+    string8      lexeme;
    query_token  *next;
 };
@@ -72,8 +72,9 @@ query_tokenizer(mem_arena *arena, string8 *buffer)
            s32 new_token_size = end - start;
-            tok->lexeme->data = &buffer->data[index]; 
+            tok->lexeme = PushString(arena, new_token_size);
-            tok->lexeme->size = new_token_size;
+            tok->lexeme.data = &buffer->data[index]; 
+            tok->lexeme.size = new_token_size;
            tok->next = tok;
            start = index + 1;
@@ -83,21 +84,18 @@ query_tokenizer(mem_arena *arena, string8 *buffer)
    return tok;
 }
-int main(int c, char **v)
+int main(int count, char **value)
 {
+    if(count < 2) value[1] =  "./test/data.csv";
-    if(c < 2)
-    { 
-        print("bad file, setting default file\n");
-    }
-    else v[1] =  "./test/customers-10000.csv";
    local_persist b32 running = 1;
    mem_arena *global_arena = arena_create(MiB(30));
-    csv_table *global_table = PushStruct(global_arena, csv_table);
-    string8 buffer = load_file(v[1]);
+    // NOTE(nasr): see note down below
+    // csv_table *global_table = PushStruct(global_arena, csv_table);
+    string8 buffer = load_file(global_arena, value[1]);
    print("\nDatabase Engine\n");
@@ -106,23 +104,41 @@ int main(int c, char **v)
        if (running)
        {
            {
-                u8 lbuf[256] = {};
+                u8 *lbuf = PushArray(global_arena, u8, 256);
                s32 err = os_read(STDIN_FD, lbuf, 256);
                if(err < 0)
                {
                    print("error reading from stdin");
                }
-                query_tokenizer(global_arena, &StringLit(lbuf));
+                // TODO(nasr): extract this later in the future and make a string copy function/macro
+                // @params (s32 lbuf_size , string8 lbuf_stringified)
+                s32 lbuf_size             = sizeof(lbuf) - 1;
+                string8 lbuf_stringified  = PushString(global_arena, lbuf_size);
+                {
+                    memcpy(lbuf_stringified.data, lbuf, lbuf_size);
+                    lbuf_stringified.size  = sizeof(lbuf) - 1;
+                }
+                query_tokenizer(global_arena, &lbuf_stringified);
            }
            {
                read_csv(buffer);
                token *tokens = tokenize_csv(buffer, global_arena);
-                global_table = parse_csv(tokens, global_table);
+                assert_msg(tokens != NULL, "Tokens are NULL.");
+                b_tree *bt = parse_csv(global_arena, tokens);
+                b_tree_write(bt);
            }
+            // NOTE(nasr): not sure on how to approach the b-tree and the  table format thing
+            // we kind of want our table format i think? but i wouldnt be sure about the use case
+            // so we stick to the regular b_tree for now. commenting out the tables.
            sleep(1);
        }
    }
author	nasr <nsrddyn@gmail.com>	2026-03-14 21:45:30 +0000
committer	nasr <nsrddyn@gmail.com>	2026-03-14 21:45:30 +0000
commit	ff2ac6f584a1b08c0e66ac65b9c8e8d68e79f124 (patch)
tree	63e9602c5710cd2550c38bf49e5f4a18c0c33437
parent	53cd9c4c3408b5f2e54e891baf471c0d774ea2cd (diff)

diff --git a/source/csv_reader.h b/source/csv_reader.h index 2b6f49c..7f5bf06 100644 --- a/source/csv_reader.h +++ b/source/csv_reader.h
@@ -8,7 +8,6 @@ enum token_flags
8	END_FL = 1 << 2,	8	END_FL = 1 << 2,
9	};	9	};
10		10
11
12	typedef enum token_type token_type;	11	typedef enum token_type token_type;
13	enum token_type	12	enum token_type
14	{	13	{
@@ -27,6 +26,7 @@ struct token
27	token *next;	26	token *next;
28	};	27	};
29		28
		29	// NOTE(nasr): i dont think im going to use this.
30	typedef struct csv_row csv_row;	30	typedef struct csv_row csv_row;
31	struct csv_row	31	struct csv_row
32	{	32	{
@@ -62,8 +62,6 @@ csv_table nil_csv_table =
62	.row_count = 0,	62	.row_count = 0,
63	};	63	};
64		64
65
66
67	#endif /* ENGINE_LEXER_H */	65	#endif /* ENGINE_LEXER_H */
68		66
69	// the lexer acts as a table builder from a csv file	67	// the lexer acts as a table builder from a csv file
@@ -72,13 +70,16 @@ csv_table nil_csv_table =
72	internal token *	70	internal token *
73	tokenize_csv(string8 buffer, mem_arena *arena)	71	tokenize_csv(string8 buffer, mem_arena *arena)
74	{	72	{
75
76	b32 FL = TRUE;	73	b32 FL = TRUE;
77		74
78	if(buffer.size < 0) return NULL;	75	if(buffer.size < 0) return NULL;
		76
		77	token *tok = PushStruct(arena, token);
		78
		79	// URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
		80	// NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
79	for(s32 index = 0; buffer.data[index] != '\0'; ++index)	81	for(s32 index = 0; buffer.data[index] != '\0'; ++index)
80	{	82	{
81	token *tok = PushStruct(arena, token);
82	u8 point = buffer.data[index];	83	u8 point = buffer.data[index];
83		84
84	s32 start = 0;	85	s32 start = 0;
@@ -86,35 +87,35 @@ tokenize_csv(string8 buffer, mem_arena *arena)
86		87
87	if(is_whitespace(point))	88	if(is_whitespace(point))
88	{	89	{
89	print("csv file is invalid");	90	warn("csv file is invalid, detected whitespace");
90	return NULL;	91	return NULL;
91	}	92	}
92		93
93	switch(point)	94	switch(point)
94	{	95	{
95	case('\n'):	96	case('\n'):
96	{	97	{
97	if(FL) tok->flags \|= END_FL;	98	if(FL) tok->flags \|= END_FL;
98	break;	99	break;
99	}	100	}
100		101
101	case(','):	102	case(','):
102	{	103	{
103	end = index - 1;	104	end = index - 1;
104	start = index + 1;	105	start = index + 1;
105	break;	106	break;
106	}	107	}
107	default:	108	default:
108	{	109	{
109	break;	110	break;
110	}	111	}
111	}	112	}
112		113
113	tok->lexeme = StringCast(&buffer.data[start], end - start);	114	tok->lexeme = StringCast(&buffer.data[start], end - start);
114	tok->next = tok;	115	tok->next = tok;
115	}	116	}
116		117
117	return NULL;	118	return tok;
118	}	119	}
119		120
120	internal void	121	internal void
@@ -124,18 +125,24 @@ read_csv(string8 buffer)
124		125
125	}	126	}
126		127
127	internal b_tree *	128	internal b_tree *
128	parse_csv(csv_token tok, csv_table table)	129	parse_csv(mem_arena arena, token tok)
129	{	130	{
		131	b_tree *tree = PushStructZero(arena, b_tree);
		132	b_tree_create(arena, tree);
130		133
131		134	for (; tok != NULL; tok = tok->next)
132	for (;tok->next; tok = tok->next)
133	{	135	{
134	b_tree_node *current_btree_node = btree_node_alloc;	136	// skip structural tokens, only index values
		137	if (tok->type != TOKEN_VALUE)
		138	{
		139	continue;
		140	}
135		141
136		142	// NOTE(nasr): payload is the token itself so the caller can reach
		143	// row/col metadata without us having to copy it
		144	b_tree_insert(arena, tree, tok->lexeme, (void *)tok);
137	}	145	}
138		146
139	return NULL;	147	return tree;
140	}	148	}
141


diff --git a/source/engine.c b/source/engine.c index 1cfbab0..9797d8a 100644 --- a/source/engine.c +++ b/source/engine.c
@@ -40,7 +40,7 @@ is_delimiter(u8 point)
40	typedef struct query_token query_token;	40	typedef struct query_token query_token;
41	struct query_token	41	struct query_token
42	{	42	{
43	string8 *lexeme;	43	string8 lexeme;
44	query_token *next;	44	query_token *next;
45	};	45	};
46		46
@@ -72,8 +72,9 @@ query_tokenizer(mem_arena arena, string8 buffer)
72		72
73	s32 new_token_size = end - start;	73	s32 new_token_size = end - start;
74		74
75	tok->lexeme->data = &buffer->data[index];	75	tok->lexeme = PushString(arena, new_token_size);
76	tok->lexeme->size = new_token_size;	76	tok->lexeme.data = &buffer->data[index];
		77	tok->lexeme.size = new_token_size;
77		78
78	tok->next = tok;	79	tok->next = tok;
79	start = index + 1;	80	start = index + 1;
@@ -83,21 +84,18 @@ query_tokenizer(mem_arena arena, string8 buffer)
83	return tok;	84	return tok;
84	}	85	}
85		86
86	int main(int c, char **v)	87	int main(int count, char **value)
87	{	88	{
88		89	if(count < 2) value[1] = "./test/data.csv";
89	if(c < 2)
90	{
91	print("bad file, setting default file\n");
92	}
93	else v[1] = "./test/customers-10000.csv";
94		90
95	local_persist b32 running = 1;	91	local_persist b32 running = 1;
96		92
97	mem_arena *global_arena = arena_create(MiB(30));	93	mem_arena *global_arena = arena_create(MiB(30));
98	csv_table *global_table = PushStruct(global_arena, csv_table);
99		94
100	string8 buffer = load_file(v[1]);	95	// NOTE(nasr): see note down below
		96	// csv_table *global_table = PushStruct(global_arena, csv_table);
		97
		98	string8 buffer = load_file(global_arena, value[1]);
101		99
102	print("\nDatabase Engine\n");	100	print("\nDatabase Engine\n");
103		101
@@ -106,23 +104,41 @@ int main(int c, char **v)
106	if (running)	104	if (running)
107	{	105	{
108	{	106	{
109	u8 lbuf[256] = {};	107	u8 *lbuf = PushArray(global_arena, u8, 256);
110	s32 err = os_read(STDIN_FD, lbuf, 256);	108	s32 err = os_read(STDIN_FD, lbuf, 256);
		109
111	if(err < 0)	110	if(err < 0)
112	{	111	{
113	print("error reading from stdin");	112	print("error reading from stdin");
114	}	113	}
115		114
116	query_tokenizer(global_arena, &StringLit(lbuf));	115	// TODO(nasr): extract this later in the future and make a string copy function/macro
		116	// @params (s32 lbuf_size , string8 lbuf_stringified)
		117	s32 lbuf_size = sizeof(lbuf) - 1;
		118	string8 lbuf_stringified = PushString(global_arena, lbuf_size);
		119	{
		120	memcpy(lbuf_stringified.data, lbuf, lbuf_size);
		121	lbuf_stringified.size = sizeof(lbuf) - 1;
		122	}
117		123
		124	query_tokenizer(global_arena, &lbuf_stringified);
118	}	125	}
119		126
120	{	127	{
121	read_csv(buffer);	128	read_csv(buffer);
122	token *tokens = tokenize_csv(buffer, global_arena);	129	token *tokens = tokenize_csv(buffer, global_arena);
123	global_table = parse_csv(tokens, global_table);	130
		131	assert_msg(tokens != NULL, "Tokens are NULL.");
		132
		133	b_tree *bt = parse_csv(global_arena, tokens);
		134	b_tree_write(bt);
124	}	135	}
125		136
		137
		138	// NOTE(nasr): not sure on how to approach the b-tree and the table format thing
		139	// we kind of want our table format i think? but i wouldnt be sure about the use case
		140	// so we stick to the regular b_tree for now. commenting out the tables.
		141
126	sleep(1);	142	sleep(1);
127	}	143	}
128	}	144	}