5 files changed, 348 insertions, 206 deletions
diff --git a/Makefile b/Makefile
index 0224a42..39e9b87 100644
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,7 @@
 BIN = build/engine
-SRC = source/engine.c
+SRC = source/tb_db.c
 CC = clang
-CFLAGS = -Wall -Wextra -Wfloat-equal -Wswitch-default -Wswitch-enum \
+CFLAGS = -Wall -Wextra -Wpedantic  -Wno-unused-function -g -Werror
-         -Wno-unused-parameter -Wno-implicit-fallthrough -Wno-unused-function -g -Werror
 $(BIN): $(SRC)
        mkdir -p build
diff --git a/source/b_tree.h b/source/b_tree_impl.h
index 0c4b1e1..8eb0723 100644
--- a/source/b_tree.h
+++ b/source/b_tree_impl.h
@@ -4,16 +4,29 @@
 // maximum height of the tree the lower the lower the lower amount
 // of disk reads which translates into faster?
-#define B_TREE_ORDER 4
+#if 0
+global_variable read_only s16 B_TREE_ORDER = 4;
+#endif
+#define B_TREE_ORDER  4
+//- NOTE(nasr): defining a key to improve sorting
+//  i think saying that a key is a combination of the column + row is a good way of appraoching this
+typedef struct key key;
+struct key
+{
+    string8 header;
+    s32     row;
+};
 typedef struct b_tree_node b_tree_node;
 struct b_tree_node
 {
-    // store the values
+    // store the key values of the sub nodes? if they are leaves?
-    string8 keys[B_TREE_ORDER - 1];
+    key keys[B_TREE_ORDER - 1];
    // TODO(nasr): replace with something more generic?
    // NOTE(nasr): cons of void * -> no type safety
    // is there a way to still have some sort of that?
+    // size not variable
    void *payload_per_key[B_TREE_ORDER - 1];
    b_tree_node *parent;
    // handle to store children faster than linked list
@@ -27,6 +40,10 @@ struct b_tree_node
    // s32 *refc;
    s32 key_count;
    b32 leaf;
+    // NOTE(nasr): do we hold the reference to the arena? or do we pass is it as a reference? 
+    // this could solve memory location issues?
 };
 typedef struct b_tree b_tree;
@@ -55,9 +72,17 @@ btree_node_alloc(mem_arena *arena)
 internal s32
 btree_node_find_pos(string8 value, b_tree_node *node)
 {
+    unused(value);
+    unused(node);
+#if 0
    s32 i = 0;
    for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i);
    return i;
+#endif
+    return 0;
 }
 internal void
@@ -68,10 +93,15 @@ b_tree_create(mem_arena *arena, b_tree *tree)
    tree->root->key_count = 0;
 }
 // NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory
 internal void *
 b_tree_search(b_tree_node *node, string8 key)
 {
+    unused(node);
+    unused(key);
+#if 0
    s32 i = btree_node_find_pos(key, node);
    if (i < node->key_count && string8_cmp(node->keys[i], key) == 0)
@@ -83,12 +113,20 @@ b_tree_search(b_tree_node *node, string8 key)
        return NULL;
    }
    return b_tree_search(node->children[i], key);
+#endif
+    return NULL;
 }
 // TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full)
 internal void
-b_tree_insert(mem_arena *arena, b_tree *tree, string8 key, void *payload)
+b_tree_insert(b_tree *tree, string8 key, void *payload)
 {
+    unused(tree);
+    unused(key);
+    unused(payload);
+#if 0
    b_tree_node *current_node = tree->root;
    if (current_node->leaf)
@@ -109,15 +147,18 @@ b_tree_insert(mem_arena *arena, b_tree *tree, string8 key, void *payload)
        }
        else {
            // TODO(nasr): creating a new branch / tree?
+            // make a seperate function for this
        }
    }
    // TODO(nasr): internal node case walk down then split on the way back up
+#endif
 }
 internal void
 b_tree_write(b_tree *bt)
 {
    // TODO(nasr): write the b_tree to disk
+    unused(bt);
 }
 #endif /* B_TREE_IMPLEMENTATION */
diff --git a/source/csv_decoder.h b/source/csv_decoder.h
new file mode 100644
index 0000000..b754ef5
--- /dev/null
+++ b/source/csv_decoder.h
@@ -0,0 +1,289 @@
+#ifndef ENGINE_LEXER_H
+#define ENGINE_LEXER_H
+enum csv_token_flags
+{
+    FL      = 1 << 2,
+};
+enum csv_token_type
+{
+    // first 255 tokens  for ascii characters
+    TOKEN_UNDEFINED = 255,
+    TOKEN_IDENTIFIER,
+    TOKEN_VALUE,
+};
+typedef struct csv_token csv_token;
+struct csv_token
+{
+    string8 lexeme;
+    csv_token *next_token;
+    enum csv_token_type type;
+    enum csv_token_flags flags;
+};
+// NOTE(nasr): i dont think im going to use this.
+typedef struct csv_row csv_row;
+struct csv_row
+{
+    // array of size col_count, points into mmap buffer
+    string8 *fields;
+    s32      count;
+};
+#if 0
+typedef struct csv_lntity csv_entity;
+struct csv_entity 
+{
+    //- not needed because we use key header mapping i think
+};
+#endif
+typedef struct csv_header csv_header;
+struct csv_header 
+{
+    string8  payload;
+    csv_header *next_header;
+};
+typedef struct csv_table csv_table;
+struct csv_table
+{
+    // first row, col names
+    // all data rows
+    csv_header  *header;
+    s32         row_count;
+    s32         header_count;
+};
+typedef struct csv_token_list csv_token_list;
+struct csv_token_list
+{
+    csv_token *start_token;
+    csv_token *end_token;
+};
+read_only global_variable
+csv_token nil_csv_token=
+{
+    .lexeme         = {.data = NULL, .size =0},
+    .type           = 0,
+    .flags          = 0,
+    .next_token     = &nil_csv_token,
+};
+read_only global_variable
+csv_header nil_csv_header = 
+{
+    .payload =  {.data = NULL, .size = 0},
+    .next_header = &nil_csv_header,
+};
+read_only global_variable
+csv_token_list nil_csv_token_list =
+{
+    .start_token = &nil_csv_token,
+    .end_token   = &nil_csv_token,
+};
+read_only global_variable
+csv_row  nil_csv_row =
+{
+    .fields     = &nil_string,
+    .count      = 0,
+};
+read_only global_variable
+csv_table nil_csv_table =
+{
+    .header     = &nil_csv_header,
+    .row_count   = 0,
+};
+#endif /* ENGINE_LEXER_H */
+internal b32 
+is_nil_csv_token(csv_token *token)
+{
+    return ((token == NULL) || (token == &nil_csv_token)); 
+}
+internal void
+csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token)
+{
+    source_token_list->end_token->next_token = source_token;
+    source_token_list->end_token             = source_token;
+}
+//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
+internal void
+csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source)
+{
+    csv_token *source_ct = source->start_token;
+    csv_token *destination_end_ct = destination->end_token;
+     for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
+     {
+         destination_end_ct->next_token = source_ct;
+     }
+     destination->end_token = source_ct;
+}
+#if 0
+internal csv_token_list *
+parse_csv_row(string8 row_buffer)
+{
+    // csv_token_list *
+}
+#endif
+// the lexer acts as a table builder from a csv  file
+// and parsing indivudal rows and columns
+// the next step would be building a the b-tree
+internal csv_token *
+tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list)
+{
+    unused(token_list);
+    b32 finding_headers = TRUE;
+    if(buffer.size < 0) return NULL;
+    csv_token *tok = PushStruct(arena, csv_token);
+    // URGENT(nasr): segfaulting because memcpy of strring value doesnt  work dammit
+    // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
+    // forgot what the solution was
+    // TODO(nasr): check what the problem here was
+    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
+    {
+        u8 point = buffer.data[index];
+        s32 start   = 0;
+        s32 end     = 0;
+        if(is_whitespace(point))
+        {
+            warn("csv file is invalid, detected whitespace");
+            return NULL;
+        }
+        if(point == '\n') 
+        {
+            if(finding_headers)
+            {
+#if 0
+                string8 headers_buffer = {.data = &buffer.data[start], .size = end - start};
+#endif
+                finding_headers = FALSE;
+                {
+                    //- map new header token list to table headers
+                }
+            }
+#if 0
+            else
+            {
+            }
+#endif
+            table->row_count++;
+        }
+        else if(point == ',')
+        {
+            if (finding_headers)
+            {
+                table->header_count++;
+            }
+        }
+        switch(point)
+        {
+            case('\n'):
+                {
+                    tok->flags |= FL;
+                    break;
+                }
+            case(','):
+                {
+                    end = index - 1;
+                    start = index + 1;
+                    break;
+                }
+            default:
+                {
+                    break;
+                }
+        }
+        tok->lexeme         = StringCast(&buffer.data[start], end - start);
+        tok->next_token     = tok;
+    }
+    return tok;
+}
+//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
+internal b_tree *
+parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
+{
+    b_tree *tree = PushStructZero(arena, b_tree);
+    b_tree_create(arena, tree);
+    // iterate over the token list while the token is not nil
+    for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
+    {
+        //- TODO(nasr): check initizalization or something tomorrow
+        {
+            //- are we parsing the first line tokens?
+            //- if so, do something :))
+            if(ct->flags & FL) 
+            {
+                // TODO(nasr): replace with nil header check function
+                if(table->header != &nil_csv_header || table->header == NULL)
+                {
+#if 0
+                    // - no this should happen in the tokenization
+                    table->headers->next =     
+#endif
+                }
+                else 
+                {
+                }
+            }
+        }
+        // TODO(nasr): fix this logic tomorrow
+        csv_token *ct = PushStruct(arena, csv_token);
+        // skip structural ctens, only index values
+        if (ct->type != TOKEN_VALUE)
+        {
+            continue;
+        }
+        // NOTE(nasr): payload is the cten itself so the caller can reach
+        // row/col metadata without us having to copy it
+        // NOTE(nasr): heh why do we void cast again?
+        b_tree_insert(tree, ct->lexeme, (void *)ct);
+    }
+    return tree;
+}
diff --git a/source/csv_reader.h b/source/csv_reader.h
deleted file mode 100644
index f5205bf..0000000
--- a/source/csv_reader.h
+++ /dev/null
@@ -1,183 +0,0 @@
-#ifndef ENGINE_LEXER_H
-#define ENGINE_LEXER_H
-typedef enum csv_token_flags csv_token_flags;
-enum csv_token_flags
-{
-    START_FL    = 1 << 1,
-    END_FL      = 1 << 2,
-};
-typedef enum csv_token_type csv_token_type;
-enum csv_token_type
-{
-    // first 255 tokens  for ascii characters
-    TOKEN_UNDEFINED = 255,
-    TOKEN_IDENTIFIER,
-    TOKEN_VALUE,
-};
-typedef struct csv_token csv_token;
-struct csv_token
-{
-    string8 lexeme;
-    csv_token_type type;
-    csv_token_flags flags;
-    csv_token *next;
-};
-// NOTE(nasr): i dont think im going to use this.
-typedef struct csv_row csv_row;
-struct csv_row
-{
-    // array of size col_count, points into mmap buffer
-    string8 *fields;
-    s32      count;
-};
-typedef struct csv_table csv_table;
-struct csv_table
-{
-    // first row, col names
-    // all data rows
-    string8  *headers;
-    csv_row  *rows;
-    s32       col_count;
-    s32       row_count;
-};
-typedef struct csv_token_list csv_token_list;
-struct csv_token_list
-{
-    csv_token *start_token;
-    csv_token *end_token;
-};
-read_only global_variable
-csv_token nil_csv_token=
-{
-    .lexeme = {.data = NULL, .size =0},
-    .type   = (csv_token_type)0,
-    .flags  = 0,
-    .next   = &nil_csv_token,
-};
-read_only global_variable
-csv_token_list nil_csv_token_list =
-{
-    .start_token = &nil_csv_token,
-    .end_token   = &nil_csv_token,
-};
-read_only global_variable
-csv_row  nil_csv_row =
-{
-    .fields     = &nil_string,
-    .count      = 0,
-};
-read_only global_variable
-csv_table nil_csv_table =
-{
-    .headers     = &nil_string,
-    .rows        = &nil_csv_row,
-    .col_count   = 0,
-    .row_count   = 0,
-};
-#endif /* ENGINE_LEXER_H */
-// the lexer acts as a table builder from a csv  file
-// and parsing indivudal rows and columns
-// the next step would be building a the b-tree
-internal csv_token *
-tokenize_csv(string8 buffer, mem_arena *arena)
-{
-    b32 FL = TRUE;
-    if(buffer.size < 0) return NULL;
-    csv_token *tok = PushStruct(arena, csv_token);
-    // URGENT(nasr): segfaulting because memcpy of strring value doesnt  work dammit
-    // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
-    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
-    {
-        u8 point = buffer.data[index];
-        s32 start   = 0;
-        s32 end     = 0;
-        if(is_whitespace(point))
-        {
-            warn("csv file is invalid, detected whitespace");
-            return NULL;
-        }
-        switch(point)
-        {
-            case('\n'):
-            {
-                if(FL) tok->flags |= END_FL;
-                break;
-            }
-            case(','):
-            {
-                end = index - 1;
-                start = index + 1;
-                break;
-            }
-            default:
-            {
-                break;
-            }
-        }
-        tok->lexeme = StringCast(&buffer.data[start], end - start);
-        tok->next = tok;
-    }
-    return tok;
-}
-internal void
-read_csv(string8 buffer)
-{
-    // printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
-}
-internal b_tree *
-parse_csv(mem_arena *arena, csv_token_list *ctl)
-{
-    b_tree *tree = PushStructZero(arena, b_tree);
-    b_tree_create(arena, tree);
-    //- TODO(nasr): check initizalization or something tomorrow
-    {
-    }
-    // TODO(nasr): fix this logic tomorrow
-    csv_token *ct = PushStruct(arena, csv_token);
-    for (;ct != NULL; ct = ct->next)
-    {
-        // skip structural ctens, only index values
-        if (ct->type != TOKEN_VALUE)
-        {
-            continue;
-        }
-        // NOTE(nasr): payload is the cten itself so the caller can reach
-        // row/col metadata without us having to copy it
-        // NOTE(nasr): heh why do we void cast again?
-        b_tree_insert(arena, tree, ct->lexeme, (void *)ct);
-    }
-    return tree;
-}
diff --git a/source/engine.c b/source/tb_db.c
index 106f113..b992111 100644
--- a/source/engine.c
+++ b/source/tb_db.c
@@ -1,6 +1,3 @@
 #define B_TREE_IMPLEMENTATION
 #define BASE_UNITY
 #include "base/base_include.h"
@@ -33,11 +30,10 @@ internal b32
 is_delimiter(u8 point)
 {
    return (point == ',');
 }
-#include "b_tree.h"
+#include "b_tree_impl.h"
-#include "csv_reader.h"
+#include "csv_decoder.h"
 typedef struct query_token query_token;
 struct query_token
@@ -80,7 +76,6 @@ is_nil_query_token_list(query_token *token)
    return (token == &nil_query_token) || (token == NULL);
 }
 // takes on line of the repl input
 // return a reference to the passed list
 internal query_token_list *
@@ -90,7 +85,7 @@ query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list)
    unused(initialized);
    for (u64 index = 0; index < buffer->size; ++index)
-    {            
+    {
        u8 codepoint = buffer->data[index];
        if(codepoint == '\n' || codepoint == '\r') break;
@@ -98,10 +93,7 @@ query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list)
        s32 start   = 0; 
        s32 end     = 0; 
-        if(is_whitespace(codepoint))
+        if(is_whitespace(codepoint)) end = index; 
-        {
-            end = index; 
-        }
        // save the token
        // TODO(nasr): work on the string macros cuz no work
@@ -185,19 +177,23 @@ int main(int count, char **value)
            }
            {
-                read_csv(buffer);
-                csv_token *tokens = tokenize_csv(buffer, global_arena);
+                // NOTE(nasr): the use of tables is required for tracking headers etc.
+                // i think we can optimize this away in the future but for now its fine
+                csv_table *table = PushStruct(global_arena, csv_table);
+                csv_token_list *token_list = PushStruct(global_arena, csv_token_list);
+                csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list);
                assert_msg(tokens != NULL, "Tokens are NULL.");
                csv_token_list *ctl = PushStruct(global_arena, csv_token_list);
-                b_tree *bt = parse_csv(global_arena, ctl);
+                b_tree *bt = parse_csv(global_arena, ctl, table);
                b_tree_write(bt);
            }
            // NOTE(nasr): not sure on how to approach the b-tree and the  table format thing
            // we kind of want our table format i think? but i wouldnt be sure about the use case
            // so we stick to the regular b_tree for now. commenting out the tables.

diff --git a/Makefile b/Makefile index 0224a42..39e9b87 100644 --- a/Makefile +++ b/Makefile
@@ -1,8 +1,7 @@
1	BIN = build/engine	1	BIN = build/engine
2	SRC = source/engine.c	2	SRC = source/tb_db.c
3	CC = clang	3	CC = clang
4	CFLAGS = -Wall -Wextra -Wfloat-equal -Wswitch-default -Wswitch-enum \	4	CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -Werror
5	-Wno-unused-parameter -Wno-implicit-fallthrough -Wno-unused-function -g -Werror
6		5
7	$(BIN): $(SRC)	6	$(BIN): $(SRC)
8	mkdir -p build	7	mkdir -p build


diff --git a/source/b_tree.h b/source/b_tree_impl.h index 0c4b1e1..8eb0723 100644 --- a/source/b_tree.h +++ b/source/b_tree_impl.h
@@ -4,16 +4,29 @@
4		4
5	// maximum height of the tree the lower the lower the lower amount	5	// maximum height of the tree the lower the lower the lower amount
6	// of disk reads which translates into faster?	6	// of disk reads which translates into faster?
7	#define B_TREE_ORDER 4	7	#if 0
		8	global_variable read_only s16 B_TREE_ORDER = 4;
		9	#endif
		10	#define B_TREE_ORDER 4
		11
		12	//- NOTE(nasr): defining a key to improve sorting
		13	// i think saying that a key is a combination of the column + row is a good way of appraoching this
		14	typedef struct key key;
		15	struct key
		16	{
		17	string8 header;
		18	s32 row;
		19	};
8		20
9	typedef struct b_tree_node b_tree_node;	21	typedef struct b_tree_node b_tree_node;
10	struct b_tree_node	22	struct b_tree_node
11	{	23	{
12	// store the values	24	// store the key values of the sub nodes? if they are leaves?
13	string8 keys[B_TREE_ORDER - 1];	25	key keys[B_TREE_ORDER - 1];
14	// TODO(nasr): replace with something more generic?	26	// TODO(nasr): replace with something more generic?
15	// NOTE(nasr): cons of void * -> no type safety	27	// NOTE(nasr): cons of void * -> no type safety
16	// is there a way to still have some sort of that?	28	// is there a way to still have some sort of that?
		29	// size not variable
17	void *payload_per_key[B_TREE_ORDER - 1];	30	void *payload_per_key[B_TREE_ORDER - 1];
18	b_tree_node *parent;	31	b_tree_node *parent;
19	// handle to store children faster than linked list	32	// handle to store children faster than linked list
@@ -27,6 +40,10 @@ struct b_tree_node
27	// s32 *refc;	40	// s32 *refc;
28	s32 key_count;	41	s32 key_count;
29	b32 leaf;	42	b32 leaf;
		43
		44
		45	// NOTE(nasr): do we hold the reference to the arena? or do we pass is it as a reference?
		46	// this could solve memory location issues?
30	};	47	};
31		48
32	typedef struct b_tree b_tree;	49	typedef struct b_tree b_tree;
@@ -55,9 +72,17 @@ btree_node_alloc(mem_arena *arena)
55	internal s32	72	internal s32
56	btree_node_find_pos(string8 value, b_tree_node *node)	73	btree_node_find_pos(string8 value, b_tree_node *node)
57	{	74	{
		75	unused(value);
		76	unused(node);
		77
		78	#if 0
58	s32 i = 0;	79	s32 i = 0;
59	for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i);	80	for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i);
60	return i;	81	return i;
		82	#endif
		83
		84	return 0;
		85
61	}	86	}
62		87
63	internal void	88	internal void
@@ -68,10 +93,15 @@ b_tree_create(mem_arena arena, b_tree tree)
68	tree->root->key_count = 0;	93	tree->root->key_count = 0;
69	}	94	}
70		95
		96
71	// NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory	97	// NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory
72	internal void *	98	internal void *
73	b_tree_search(b_tree_node *node, string8 key)	99	b_tree_search(b_tree_node *node, string8 key)
74	{	100	{
		101	unused(node);
		102	unused(key);
		103
		104	#if 0
75	s32 i = btree_node_find_pos(key, node);	105	s32 i = btree_node_find_pos(key, node);
76		106
77	if (i < node->key_count && string8_cmp(node->keys[i], key) == 0)	107	if (i < node->key_count && string8_cmp(node->keys[i], key) == 0)
@@ -83,12 +113,20 @@ b_tree_search(b_tree_node *node, string8 key)
83	return NULL;	113	return NULL;
84	}	114	}
85	return b_tree_search(node->children[i], key);	115	return b_tree_search(node->children[i], key);
		116	#endif
		117
		118	return NULL;
86	}	119	}
87		120
		121
88	// TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full)	122	// TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full)
89	internal void	123	internal void
90	b_tree_insert(mem_arena arena, b_tree tree, string8 key, void *payload)	124	b_tree_insert(b_tree tree, string8 key, void payload)
91	{	125	{
		126	unused(tree);
		127	unused(key);
		128	unused(payload);
		129	#if 0
92	b_tree_node *current_node = tree->root;	130	b_tree_node *current_node = tree->root;
93		131
94	if (current_node->leaf)	132	if (current_node->leaf)
@@ -109,15 +147,18 @@ b_tree_insert(mem_arena arena, b_tree tree, string8 key, void *payload)
109	}	147	}
110	else {	148	else {
111	// TODO(nasr): creating a new branch / tree?	149	// TODO(nasr): creating a new branch / tree?
		150	// make a seperate function for this
112	}	151	}
113	}	152	}
114	// TODO(nasr): internal node case walk down then split on the way back up	153	// TODO(nasr): internal node case walk down then split on the way back up
		154	#endif
115	}	155	}
116		156
117	internal void	157	internal void
118	b_tree_write(b_tree *bt)	158	b_tree_write(b_tree *bt)
119	{	159	{
120	// TODO(nasr): write the b_tree to disk	160	// TODO(nasr): write the b_tree to disk
		161	unused(bt);
121	}	162	}
122		163
123	#endif /* B_TREE_IMPLEMENTATION */	164	#endif /* B_TREE_IMPLEMENTATION */


diff --git a/source/csv_decoder.h b/source/csv_decoder.h new file mode 100644 index 0000000..b754ef5 --- /dev/null +++ b/source/csv_decoder.h
@@ -0,0 +1,289 @@
		1	#ifndef ENGINE_LEXER_H
		2	#define ENGINE_LEXER_H
		3
		4	enum csv_token_flags
		5	{
		6	FL = 1 << 2,
		7	};
		8
		9	enum csv_token_type
		10	{
		11	// first 255 tokens for ascii characters
		12	TOKEN_UNDEFINED = 255,
		13	TOKEN_IDENTIFIER,
		14	TOKEN_VALUE,
		15	};
		16
		17	typedef struct csv_token csv_token;
		18	struct csv_token
		19	{
		20	string8 lexeme;
		21	csv_token *next_token;
		22	enum csv_token_type type;
		23	enum csv_token_flags flags;
		24	};
		25
		26	// NOTE(nasr): i dont think im going to use this.
		27	typedef struct csv_row csv_row;
		28	struct csv_row
		29	{
		30	// array of size col_count, points into mmap buffer
		31	string8 *fields;
		32	s32 count;
		33	};
		34
		35	#if 0
		36	typedef struct csv_lntity csv_entity;
		37	struct csv_entity
		38	{
		39	//- not needed because we use key header mapping i think
		40	};
		41	#endif
		42
		43	typedef struct csv_header csv_header;
		44	struct csv_header
		45	{
		46	string8 payload;
		47	csv_header *next_header;
		48	};
		49
		50	typedef struct csv_table csv_table;
		51	struct csv_table
		52	{
		53	// first row, col names
		54	// all data rows
		55	csv_header *header;
		56	s32 row_count;
		57	s32 header_count;
		58	};
		59
		60
		61	typedef struct csv_token_list csv_token_list;
		62	struct csv_token_list
		63	{
		64	csv_token *start_token;
		65	csv_token *end_token;
		66
		67	};
		68
		69	read_only global_variable
		70	csv_token nil_csv_token=
		71	{
		72	.lexeme = {.data = NULL, .size =0},
		73	.type = 0,
		74	.flags = 0,
		75	.next_token = &nil_csv_token,
		76
		77	};
		78
		79	read_only global_variable
		80	csv_header nil_csv_header =
		81	{
		82	.payload = {.data = NULL, .size = 0},
		83	.next_header = &nil_csv_header,
		84	};
		85
		86	read_only global_variable
		87	csv_token_list nil_csv_token_list =
		88	{
		89	.start_token = &nil_csv_token,
		90	.end_token = &nil_csv_token,
		91	};
		92
		93
		94	read_only global_variable
		95	csv_row nil_csv_row =
		96	{
		97	.fields = &nil_string,
		98	.count = 0,
		99	};
		100
		101	read_only global_variable
		102	csv_table nil_csv_table =
		103	{
		104	.header = &nil_csv_header,
		105	.row_count = 0,
		106	};
		107
		108	#endif /* ENGINE_LEXER_H */
		109
		110	internal b32
		111	is_nil_csv_token(csv_token *token)
		112	{
		113	return ((token == NULL) \|\| (token == &nil_csv_token));
		114	}
		115
		116	internal void
		117	csv_token_list_append_token(csv_token_list source_token_list, csv_token source_token)
		118	{
		119	source_token_list->end_token->next_token = source_token;
		120	source_token_list->end_token = source_token;
		121
		122	}
		123
		124	//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
		125	internal void
		126	csv_token_list_concat_list(csv_token_list destination, csv_token_list source)
		127	{
		128
		129	csv_token *source_ct = source->start_token;
		130	csv_token *destination_end_ct = destination->end_token;
		131
		132	for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
		133	{
		134	destination_end_ct->next_token = source_ct;
		135	}
		136
		137	destination->end_token = source_ct;
		138	}
		139
		140	#if 0
		141	internal csv_token_list *
		142	parse_csv_row(string8 row_buffer)
		143	{
		144	// csv_token_list *
		145
		146	}
		147	#endif
		148
		149
		150	// the lexer acts as a table builder from a csv file
		151	// and parsing indivudal rows and columns
		152	// the next step would be building a the b-tree
		153	internal csv_token *
		154	tokenize_csv(string8 buffer, mem_arena arena, csv_table table, csv_token_list *token_list)
		155	{
		156
		157	unused(token_list);
		158	b32 finding_headers = TRUE;
		159
		160	if(buffer.size < 0) return NULL;
		161
		162	csv_token *tok = PushStruct(arena, csv_token);
		163
		164	// URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
		165	// NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
		166	// forgot what the solution was
		167	// TODO(nasr): check what the problem here was
		168	for(s32 index = 0; buffer.data[index] != '\0'; ++index)
		169	{
		170	u8 point = buffer.data[index];
		171
		172	s32 start = 0;
		173	s32 end = 0;
		174
		175	if(is_whitespace(point))
		176	{
		177	warn("csv file is invalid, detected whitespace");
		178	return NULL;
		179	}
		180
		181
		182	if(point == '\n')
		183	{
		184	if(finding_headers)
		185	{
		186	#if 0
		187	string8 headers_buffer = {.data = &buffer.data[start], .size = end - start};
		188	#endif
		189	finding_headers = FALSE;
		190
		191	{
		192	//- map new header token list to table headers
		193	}
		194	}
		195	#if 0
		196	else
		197	{
		198
		199	}
		200	#endif
		201
		202
		203	table->row_count++;
		204	}
		205	else if(point == ',')
		206	{
		207	if (finding_headers)
		208	{
		209	table->header_count++;
		210	}
		211	}
		212
		213	switch(point)
		214	{
		215	case('\n'):
		216	{
		217	tok->flags \|= FL;
		218	break;
		219	}
		220
		221	case(','):
		222	{
		223	end = index - 1;
		224	start = index + 1;
		225	break;
		226	}
		227	default:
		228	{
		229	break;
		230	}
		231	}
		232
		233	tok->lexeme = StringCast(&buffer.data[start], end - start);
		234	tok->next_token = tok;
		235	}
		236
		237	return tok;
		238	}
		239
		240	//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
		241	internal b_tree *
		242	parse_csv(mem_arena arena, csv_token_list ctl, csv_table *table)
		243	{
		244	b_tree *tree = PushStructZero(arena, b_tree);
		245	b_tree_create(arena, tree);
		246
		247	// iterate over the token list while the token is not nil
		248	for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
		249	{
		250
		251	//- TODO(nasr): check initizalization or something tomorrow
		252	{
		253	//- are we parsing the first line tokens?
		254	//- if so, do something :))
		255	if(ct->flags & FL)
		256	{
		257	// TODO(nasr): replace with nil header check function
		258	if(table->header != &nil_csv_header \|\| table->header == NULL)
		259	{
		260	#if 0
		261	// - no this should happen in the tokenization
		262	table->headers->next =
		263	#endif
		264	}
		265	else
		266	{
		267
		268	}
		269
		270	}
		271	}
		272
		273	// TODO(nasr): fix this logic tomorrow
		274	csv_token *ct = PushStruct(arena, csv_token);
		275	// skip structural ctens, only index values
		276	if (ct->type != TOKEN_VALUE)
		277	{
		278	continue;
		279	}
		280
		281	// NOTE(nasr): payload is the cten itself so the caller can reach
		282	// row/col metadata without us having to copy it
		283	// NOTE(nasr): heh why do we void cast again?
		284	b_tree_insert(tree, ct->lexeme, (void *)ct);
		285	}
		286
		287	return tree;
		288	}
		289


diff --git a/source/csv_reader.h b/source/csv_reader.h deleted file mode 100644 index f5205bf..0000000 --- a/source/csv_reader.h +++ /dev/null
@@ -1,183 +0,0 @@
1	#ifndef ENGINE_LEXER_H
2	#define ENGINE_LEXER_H
3
4	typedef enum csv_token_flags csv_token_flags;
5	enum csv_token_flags
6	{
7	START_FL = 1 << 1,
8	END_FL = 1 << 2,
9	};
10
11	typedef enum csv_token_type csv_token_type;
12	enum csv_token_type
13	{
14	// first 255 tokens for ascii characters
15	TOKEN_UNDEFINED = 255,
16	TOKEN_IDENTIFIER,
17	TOKEN_VALUE,
18	};
19
20	typedef struct csv_token csv_token;
21	struct csv_token
22	{
23	string8 lexeme;
24	csv_token_type type;
25	csv_token_flags flags;
26	csv_token *next;
27	};
28
29	// NOTE(nasr): i dont think im going to use this.
30	typedef struct csv_row csv_row;
31	struct csv_row
32	{
33	// array of size col_count, points into mmap buffer
34	string8 *fields;
35	s32 count;
36	};
37
38	typedef struct csv_table csv_table;
39	struct csv_table
40	{
41	// first row, col names
42	// all data rows
43	string8 *headers;
44	csv_row *rows;
45	s32 col_count;
46	s32 row_count;
47	};
48
49
50	typedef struct csv_token_list csv_token_list;
51	struct csv_token_list
52	{
53	csv_token *start_token;
54	csv_token *end_token;
55
56	};
57
58	read_only global_variable
59	csv_token nil_csv_token=
60	{
61	.lexeme = {.data = NULL, .size =0},
62	.type = (csv_token_type)0,
63	.flags = 0,
64	.next = &nil_csv_token,
65
66	};
67
68	read_only global_variable
69	csv_token_list nil_csv_token_list =
70	{
71	.start_token = &nil_csv_token,
72	.end_token = &nil_csv_token,
73	};
74
75
76	read_only global_variable
77	csv_row nil_csv_row =
78	{
79	.fields = &nil_string,
80	.count = 0,
81	};
82
83	read_only global_variable
84	csv_table nil_csv_table =
85	{
86	.headers = &nil_string,
87	.rows = &nil_csv_row,
88	.col_count = 0,
89	.row_count = 0,
90	};
91
92	#endif /* ENGINE_LEXER_H */
93
94	// the lexer acts as a table builder from a csv file
95	// and parsing indivudal rows and columns
96	// the next step would be building a the b-tree
97	internal csv_token *
98	tokenize_csv(string8 buffer, mem_arena *arena)
99	{
100	b32 FL = TRUE;
101
102	if(buffer.size < 0) return NULL;
103
104	csv_token *tok = PushStruct(arena, csv_token);
105
106	// URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
107	// NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
108	for(s32 index = 0; buffer.data[index] != '\0'; ++index)
109	{
110	u8 point = buffer.data[index];
111
112	s32 start = 0;
113	s32 end = 0;
114
115	if(is_whitespace(point))
116	{
117	warn("csv file is invalid, detected whitespace");
118	return NULL;
119	}
120
121	switch(point)
122	{
123	case('\n'):
124	{
125	if(FL) tok->flags \|= END_FL;
126	break;
127	}
128
129	case(','):
130	{
131	end = index - 1;
132	start = index + 1;
133	break;
134	}
135	default:
136	{
137	break;
138	}
139	}
140
141	tok->lexeme = StringCast(&buffer.data[start], end - start);
142	tok->next = tok;
143	}
144
145	return tok;
146	}
147
148	internal void
149	read_csv(string8 buffer)
150	{
151	// printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
152
153	}
154
155	internal b_tree *
156	parse_csv(mem_arena arena, csv_token_list ctl)
157	{
158	b_tree *tree = PushStructZero(arena, b_tree);
159	b_tree_create(arena, tree);
160
161	//- TODO(nasr): check initizalization or something tomorrow
162	{
163
164	}
165	// TODO(nasr): fix this logic tomorrow
166	csv_token *ct = PushStruct(arena, csv_token);
167
168	for (;ct != NULL; ct = ct->next)
169	{
170	// skip structural ctens, only index values
171	if (ct->type != TOKEN_VALUE)
172	{
173	continue;
174	}
175
176	// NOTE(nasr): payload is the cten itself so the caller can reach
177	// row/col metadata without us having to copy it
178	// NOTE(nasr): heh why do we void cast again?
179	b_tree_insert(arena, tree, ct->lexeme, (void *)ct);
180	}
181
182	return tree;
183	}


diff --git a/source/engine.c b/source/tb_db.c index 106f113..b992111 100644 --- a/source/engine.c +++ b/source/tb_db.c
@@ -1,6 +1,3 @@
1
2
3
4	#define B_TREE_IMPLEMENTATION	1	#define B_TREE_IMPLEMENTATION
5	#define BASE_UNITY	2	#define BASE_UNITY
6	#include "base/base_include.h"	3	#include "base/base_include.h"
@@ -33,11 +30,10 @@ internal b32
33	is_delimiter(u8 point)	30	is_delimiter(u8 point)
34	{	31	{
35	return (point == ',');	32	return (point == ',');
36
37	}	33	}
38		34
39	#include "b_tree.h"	35	#include "b_tree_impl.h"
40	#include "csv_reader.h"	36	#include "csv_decoder.h"
41		37
42	typedef struct query_token query_token;	38	typedef struct query_token query_token;
43	struct query_token	39	struct query_token
@@ -80,7 +76,6 @@ is_nil_query_token_list(query_token *token)
80	return (token == &nil_query_token) \|\| (token == NULL);	76	return (token == &nil_query_token) \|\| (token == NULL);
81	}	77	}
82		78
83
84	// takes on line of the repl input	79	// takes on line of the repl input
85	// return a reference to the passed list	80	// return a reference to the passed list
86	internal query_token_list *	81	internal query_token_list *
@@ -90,7 +85,7 @@ query_tokenizer(mem_arena arena, string8 buffer, query_token_list *list)
90	unused(initialized);	85	unused(initialized);
91		86
92	for (u64 index = 0; index < buffer->size; ++index)	87	for (u64 index = 0; index < buffer->size; ++index)
93	{	88	{
94	u8 codepoint = buffer->data[index];	89	u8 codepoint = buffer->data[index];
95		90
96	if(codepoint == '\n' \|\| codepoint == '\r') break;	91	if(codepoint == '\n' \|\| codepoint == '\r') break;
@@ -98,10 +93,7 @@ query_tokenizer(mem_arena arena, string8 buffer, query_token_list *list)
98	s32 start = 0;	93	s32 start = 0;
99	s32 end = 0;	94	s32 end = 0;
100		95
101	if(is_whitespace(codepoint))	96	if(is_whitespace(codepoint)) end = index;
102	{
103	end = index;
104	}
105		97
106	// save the token	98	// save the token
107	// TODO(nasr): work on the string macros cuz no work	99	// TODO(nasr): work on the string macros cuz no work
@@ -185,19 +177,23 @@ int main(int count, char **value)
185	}	177	}
186		178
187	{	179	{
188	read_csv(buffer);
189		180
190	csv_token *tokens = tokenize_csv(buffer, global_arena);	181	// NOTE(nasr): the use of tables is required for tracking headers etc.
		182	// i think we can optimize this away in the future but for now its fine
		183	csv_table *table = PushStruct(global_arena, csv_table);
		184
		185	csv_token_list *token_list = PushStruct(global_arena, csv_token_list);
		186
		187	csv_token *tokens = tokenize_csv(buffer, global_arena, table, token_list);
191		188
192	assert_msg(tokens != NULL, "Tokens are NULL.");	189	assert_msg(tokens != NULL, "Tokens are NULL.");
193		190
194	csv_token_list *ctl = PushStruct(global_arena, csv_token_list);	191	csv_token_list *ctl = PushStruct(global_arena, csv_token_list);
195	b_tree *bt = parse_csv(global_arena, ctl);	192	b_tree *bt = parse_csv(global_arena, ctl, table);
196		193
197	b_tree_write(bt);	194	b_tree_write(bt);
198	}	195	}
199		196
200
201	// NOTE(nasr): not sure on how to approach the b-tree and the table format thing	197	// NOTE(nasr): not sure on how to approach the b-tree and the table format thing
202	// we kind of want our table format i think? but i wouldnt be sure about the use case	198	// we kind of want our table format i think? but i wouldnt be sure about the use case
203	// so we stick to the regular b_tree for now. commenting out the tables.	199	// so we stick to the regular b_tree for now. commenting out the tables.