11 files changed, 283 insertions, 230 deletions
diff --git a/source/b_tree.h b/source/b_tree.h
deleted file mode 100644
index 0fc4d3c..0000000
--- a/source/b_tree.h
+++ /dev/null
@@ -1,44 +0,0 @@
-#ifndef BTREE_H
-#define BTREE_H
-// maximum height  of the tree the lower the lower the lower amount
-// of disk reads which translates into faster?
-#define B_TREE_ORDER 4
-typedef struct b_tree_node b_tree_node;
-struct  b_tree_node
-{
-    // store the values
-    string8 keys[B_TREE_ORDER - 1];
-    csv_row *rows[B_TREE_ORDER - 1];
-    b_tree_node *parent;
-    // handle to store children faster than linked list
-    // because now we can iteratate over the nodes instead of having cache misses
-    // when jumping through linked nodes
-    b_tree_node *children[B_TREE_ORDER];
-    // NOTE(nasr): reference count ::: check how many leaves are using this node
-    // also not needed for now because we don't free individual node because of arena allocator
-    // s32 *refc;
-    s32 key_count;
-    b32 leaf;
-};
-typedef struct b_tree b_tree;
-struct b_tree
-{
-    // NOTE(nasr): make sure this always stays in memory
-    // so that initial fetch never requires a disk read
-    b_tree_node *root;
-};
-#endif /* BTREE_H */
diff --git a/source/base/base_string.h b/source/base/base_string.h
index 4e0d923..ef2cf16 100644
--- a/source/base/base_string.h
+++ b/source/base/base_string.h
@@ -12,7 +12,8 @@
        result; \
    })
-#define String8Cast(literal, literal_count) ( string8 ){( u8 * )( literal ),( u64 )( literal_count ) } 
+#define StringCast(data, size) (string8){(u8 *)(data), (u64)(size) } 
+#define StringPCast(data, size) (string8 *){(u8 *)(data), (u64)(size) } 
 #define StringFmt "%.*s"
 #define ULongFmt  "%lu"
diff --git a/source/b_tree.c b/source/btree.h
index e42f709..42c8e21 100644
--- a/source/b_tree.c
+++ b/source/btree.h
@@ -1,3 +1,45 @@
+#ifndef BTREE_H
+#define BTREE_H
+// maximum height  of the tree the lower the lower the lower amount
+// of disk reads which translates into faster?
+#define B_TREE_ORDER 4
+typedef struct b_tree_node b_tree_node;
+struct  b_tree_node
+{
+    // store the values
+    string8 keys[B_TREE_ORDER - 1];
+    csv_row *rows[B_TREE_ORDER - 1];
+    b_tree_node *parent;
+    // handle to store children faster than linked list
+    // because now we can iteratate over the nodes instead of having cache misses
+    // when jumping through linked nodes
+    b_tree_node *children[B_TREE_ORDER];
+    // NOTE(nasr): reference count ::: check how many leaves are using this node
+    // also not needed for now because we don't free individual node because of arena allocator
+    // s32 *refc;
+    s32 key_count;
+    b32 leaf;
+};
+typedef struct b_tree b_tree;
+struct b_tree
+{
+    // NOTE(nasr): make sure this always stays in memory
+    // so that initial fetch never requires a disk read
+    b_tree_node *root;
+};
 // TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?
@@ -63,4 +105,4 @@ b_tree_write()
    // TODO(nasr): write the b_tree to disk
 }
+#endif /* BTREE_H */
diff --git a/source/csv_parser.c b/source/csv_parser.c
deleted file mode 100644
index 6679d2f..0000000
--- a/source/csv_parser.c
+++ /dev/null
@@ -1,26 +0,0 @@
-internal void
-strip_new_line(string8 buffer)
-{
-    for (u64 index = 0; index < buffer.size; index++)
-        {
-        }
-    return;
-}
-internal void
-read_csv(string8 buffer)
-{
-    printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
-}
-internal csv_table *
-parse_csv(token *tokens, csv_table *table)
-{
-    return NULL;
-}
diff --git a/source/csv_parser.h b/source/csv_parser.h
deleted file mode 100644
index 1aa96b9..0000000
--- a/source/csv_parser.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef CSV_PARSER_H
-#define CSV_PARSER_H
-typedef struct csv_row csv_row;
-struct csv_row
-{
-    // array of size col_count, points into mmap buffer
-    string8 *fields;
-    s32      count;
-};
-typedef struct csv_table csv_table;
-struct csv_table
-{
-    // first row, col names
-    // all data rows
-    string8  *headers;
-    csv_row  *rows;
-    s32       col_count;
-    s32       row_count;
-};
-read_only global_variable
-csv_row  nil_csv_row =
-{
-    .fields     = &nil_string,
-    .count      = 0,
-};
-read_only global_variable
-csv_table nil_csv_table =
-{
-    .headers     = &nil_string,
-    .rows        = &nil_csv_row,
-    .col_count   = 0,
-    .row_count   = 0,
-};
-#endif /* CSV_PARSER_H */
diff --git a/source/csv_reader.h b/source/csv_reader.h
new file mode 100644
index 0000000..6c8e625
--- /dev/null
+++ b/source/csv_reader.h
@@ -0,0 +1,145 @@
+#ifndef ENGINE_LEXER_H
+#define ENGINE_LEXER_H
+typedef enum token_flags token_flags;
+enum token_flags 
+{
+    START_FL    = 1 << 1,
+    END_FL      = 1 << 2,
+};
+typedef enum token_type token_type;
+enum token_type
+{
+    // first 255 tokens  for ascii characters
+    TOKEN_UNDEFINED = 255,
+    TOKEN_IDENTIFIER,
+    TOKEN_VALUE,
+};
+typedef struct token token;
+struct token
+{
+    string8 lexeme;
+    token_type type;
+    token_flags flags;
+    token *next;
+};
+typedef struct csv_row csv_row;
+struct csv_row
+{
+    // array of size col_count, points into mmap buffer
+    string8 *fields;
+    s32      count;
+};
+typedef struct csv_table csv_table;
+struct csv_table
+{
+    // first row, col names
+    // all data rows
+    string8  *headers;
+    csv_row  *rows;
+    s32       col_count;
+    s32       row_count;
+};
+read_only global_variable
+csv_row  nil_csv_row =
+{
+    .fields     = &nil_string,
+    .count      = 0,
+};
+read_only global_variable
+csv_table nil_csv_table =
+{
+    .headers     = &nil_string,
+    .rows        = &nil_csv_row,
+    .col_count   = 0,
+    .row_count   = 0,
+};
+// the lexer acts as a table builder from a csv  file
+// and parsing indivudal rows and columns
+// the next step would be building a the b-tree
+internal token *
+tokenize_csv(string8 buffer, mem_arena *arena)
+{
+    b32 FL = TRUE;
+    if(buffer.size < 0) return NULL;
+    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
+    {
+        token *tok = PushStruct(arena, token);
+        u8 point = buffer.data[index];
+        s32 start   = 0;
+        s32 end     = 0;
+        if(is_whitespace(point))
+        {
+            print("csv file is invalid");
+            return NULL;
+        }
+        switch(point)
+        {
+            case('\n'):
+                {
+                    if(FL) tok->flags |= END_FL;
+                    break;
+                }
+            case(','):
+                {
+                    end = index - 1;
+                    start = index + 1;
+                    break;
+                }
+            default:
+                {
+                    break;
+                }
+        }
+        tok->lexeme = StringCast(&buffer.data[start], end - start);
+        tok->next = tok;
+    }
+    return NULL;
+}
+internal void
+strip_new_line(string8 buffer)
+{
+    for (u64 index = 0; index < buffer.size; index++)
+        {
+        }
+    return;
+}
+internal void
+read_csv(string8 buffer)
+{
+    // printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
+}
+internal csv_table *
+parse_csv(token *tokens, csv_table *table)
+{
+    return NULL;
+}
+#endif /* ENGINE_LEXER_H */
diff --git a/source/engine.c b/source/engine.c
index fe609e9..3527e27 100644
--- a/source/engine.c
+++ b/source/engine.c
@@ -1,17 +1,88 @@
 #define BASE_UNITY
 #include "base/base_include.h"
-#include <stdio.h>
+internal b32
+is_alpha(u8 point)
+{
+    return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
+}
+internal b32
+is_digit(u8 point)
+{
+    return (point >= '0' && point <= '9');
+}
-#include "csv_parser.h"
+internal b32
-#include "lexer.h"
+is_alpha_num(u8 point)
+{
+    return (is_alpha(point) || is_digit(point));
+}
+internal b32
+is_whitespace(u8 point)
+{
+    return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
+}
+internal b32
+is_delimiter(u8 point)
+{
+    return (point == ',');
+}
+#include "csv_reader.h"
+// #include "btree.h"
+typedef struct query_token query_token;
+struct query_token
+{
+    string8      *lexeme;
+    query_token  *next;
+};
+// takes on line of the repl input
+internal query_token *
+query_tokenizer(mem_arena *arena, string8 *buffer)
+{
+    query_token *tok = PushStruct(arena, query_token);
+    unused(tok);
-#include "lexer.c"
+    for (u64 index = 0; index < buffer->size; ++index)
-#include "csv_parser.c"
+    {            
+        u8 codepoint = buffer->data[index];
+        if(codepoint == '\n' || codepoint == '\r') break;
+        s32 start   = 0; 
+        s32 end     = 0; 
+        if(is_whitespace(codepoint))
+        {
+            end = index; 
+        }
+        s32 new_token_size = end - start;
+        tok->lexeme->data = &buffer->data[index]; 
+        tok->lexeme->size = new_token_size;
+        tok->next = tok;
+        start = index + 1;
+    }
+    return tok;
+}
 int main(int c, char **v)
 {
-    if(c < 2) return -999;
+    if(c < 2)
+    { 
+        print("bad file, setting default file\n");
+    }
+    else v[1] =  "./test/customers-10000.csv";
    local_persist b32 running = 1;
@@ -26,13 +97,27 @@ int main(int c, char **v)
    {
        if (running)
        {
-            // TODO(nasr): check for return key
            {
                u8 line_buffer[256] = {};
                s64 codepoint = os_read(STDIN_FD, line_buffer, 256);
                unused(codepoint);
+                for(s32 index = 0; index < 256; ++index)
+                {
+                    if(line_buffer[index] == '\n')
+                    {
+                        print("exiting");
+                        return 0;
+                    }
+                    else if(line_buffer[index] == ' ')
+                    {
+                        print("TODO(nasr): ");
+                    }
+                }
+            }
+            {
                read_csv(buffer);
                token *tokens = tokenize_csv(buffer, global_arena);
                global_table = parse_csv(tokens, global_table);
diff --git a/source/lexer.c b/source/lexer.c
deleted file mode 100644
index decfb7a..0000000
--- a/source/lexer.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// the lexer acts as a table builder from a csv  file
-// and parsing indivudal rows and columns
-// the next step would be building a the b-tree
-internal b32
-is_alpha(u8 point)
-{
-    return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
-}
-internal b32
-is_digit(u8 point)
-{
-    return (point >= '0' && point <= '9');
-}
-internal b32
-is_alpha_num(u8 point)
-{
-    return (is_alpha(point) || is_digit(point));
-}
-internal b32
-is_whitespace(u8 point)
-{
-    return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
-}
-internal b32
-is_delimiter(u8 point)
-{
-    return (point == ',');
-}
-internal token *
-tokenize_csv(string8 buffer, mem_arena *arena)
-{
-    b32 FL = TRUE;
-    if(buffer.size < 0) return NULL;
-    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
-    {
-        token *tok = PushStruct(arena, token);
-        u8 point = buffer.data[index];
-        s32 start   = 0;
-        s32 end     = 0;
-        if(is_whitespace(point))
-        {
-            print("csv file is invalid");
-            return NULL;
-        }
-        switch(point)
-        {
-            case('\n'):
-                {
-                    if(FL) tok->flags |= END_FL;
-                    break;
-                }
-            case(','):
-                {
-                    end = index - 1;
-                    start = index + 1;
-                    break;
-                }
-            default:
-                {
-                    break;
-                }
-        }
-        tok->lexeme = String8Cast(&buffer.data[start], end - start);
-        tok->next = tok;
-    }
-    return NULL;
-}
diff --git a/source/lexer.h b/source/lexer.h
deleted file mode 100644
index c950a58..0000000
--- a/source/lexer.h
+++ /dev/null
@@ -1,30 +0,0 @@
-#ifndef ENGINE_LEXER_H
-#define ENGINE_LEXER_H
-typedef enum token_flags token_flags;
-enum token_flags 
-{
-    START_FL    = 1 << 1,
-    END_FL      = 1 << 2,
-};
-typedef enum token_type token_type;
-enum token_type
-{
-    // first 255 tokens  for ascii characters
-    TOKEN_UNDEFINED = 255,
-    TOKEN_IDENTIFIER,
-    TOKEN_VALUE,
-};
-typedef struct token token;
-struct token
-{
-    string8 lexeme;
-    token_type type;
-    token_flags flags;
-    token *next;
-};
-#endif /* ENGINE_LEXER_H */
diff --git a/source/query.c b/source/query.c
deleted file mode 100644
index e69de29..0000000
--- a/source/query.c
+++ /dev/null
diff --git a/source/query.h b/source/query.h
deleted file mode 100644
index e69de29..0000000
--- a/source/query.h
+++ /dev/null

diff --git a/source/b_tree.h b/source/b_tree.h deleted file mode 100644 index 0fc4d3c..0000000 --- a/source/b_tree.h +++ /dev/null
@@ -1,44 +0,0 @@
1	#ifndef BTREE_H
2	#define BTREE_H
3
4	// maximum height of the tree the lower the lower the lower amount
5	// of disk reads which translates into faster?
6	#define B_TREE_ORDER 4
7
8	typedef struct b_tree_node b_tree_node;
9	struct b_tree_node
10	{
11
12	// store the values
13	string8 keys[B_TREE_ORDER - 1];
14	csv_row *rows[B_TREE_ORDER - 1];
15
16	b_tree_node *parent;
17	// handle to store children faster than linked list
18	// because now we can iteratate over the nodes instead of having cache misses
19	// when jumping through linked nodes
20	b_tree_node *children[B_TREE_ORDER];
21
22	// NOTE(nasr): reference count ::: check how many leaves are using this node
23	// also not needed for now because we don't free individual node because of arena allocator
24	// s32 *refc;
25
26	s32 key_count;
27
28	b32 leaf;
29
30	};
31
32	typedef struct b_tree b_tree;
33	struct b_tree
34	{
35	// NOTE(nasr): make sure this always stays in memory
36	// so that initial fetch never requires a disk read
37	b_tree_node *root;
38
39	};
40
41
42
43
44	#endif /* BTREE_H */


diff --git a/source/base/base_string.h b/source/base/base_string.h index 4e0d923..ef2cf16 100644 --- a/source/base/base_string.h +++ b/source/base/base_string.h
@@ -12,7 +12,8 @@
12	result; \	12	result; \
13	})	13	})
14		14
15	#define String8Cast(literal, literal_count) ( string8 ){( u8 * )( literal ),( u64 )( literal_count ) }	15	#define StringCast(data, size) (string8){(u8 *)(data), (u64)(size) }
		16	#define StringPCast(data, size) (string8 ){(u8 )(data), (u64)(size) }
16		17
17	#define StringFmt "%.*s"	18	#define StringFmt "%.*s"
18	#define ULongFmt "%lu"	19	#define ULongFmt "%lu"


diff --git a/source/b_tree.c b/source/btree.h index e42f709..42c8e21 100644 --- a/source/b_tree.c +++ b/source/btree.h
@@ -1,3 +1,45 @@
		1	#ifndef BTREE_H
		2	#define BTREE_H
		3
		4	// maximum height of the tree the lower the lower the lower amount
		5	// of disk reads which translates into faster?
		6	#define B_TREE_ORDER 4
		7
		8	typedef struct b_tree_node b_tree_node;
		9	struct b_tree_node
		10	{
		11
		12	// store the values
		13	string8 keys[B_TREE_ORDER - 1];
		14	csv_row *rows[B_TREE_ORDER - 1];
		15
		16	b_tree_node *parent;
		17	// handle to store children faster than linked list
		18	// because now we can iteratate over the nodes instead of having cache misses
		19	// when jumping through linked nodes
		20	b_tree_node *children[B_TREE_ORDER];
		21
		22	// NOTE(nasr): reference count ::: check how many leaves are using this node
		23	// also not needed for now because we don't free individual node because of arena allocator
		24	// s32 *refc;
		25
		26	s32 key_count;
		27
		28	b32 leaf;
		29
		30	};
		31
		32	typedef struct b_tree b_tree;
		33	struct b_tree
		34	{
		35	// NOTE(nasr): make sure this always stays in memory
		36	// so that initial fetch never requires a disk read
		37	b_tree_node *root;
		38
		39	};
		40
		41
		42
1		43
2	// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?	44	// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?
3		45
@@ -63,4 +105,4 @@ b_tree_write()
63	// TODO(nasr): write the b_tree to disk	105	// TODO(nasr): write the b_tree to disk
64	}	106	}
65		107
66		108	#endif /* BTREE_H */


diff --git a/source/csv_parser.c b/source/csv_parser.c deleted file mode 100644 index 6679d2f..0000000 --- a/source/csv_parser.c +++ /dev/null
@@ -1,26 +0,0 @@
1	internal void
2	strip_new_line(string8 buffer)
3	{
4
5	for (u64 index = 0; index < buffer.size; index++)
6	{
7
8	}
9
10	return;
11
12	}
13
14	internal void
15	read_csv(string8 buffer)
16	{
17	printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
18
19	}
20
21	internal csv_table *
22	parse_csv(token tokens, csv_table table)
23	{
24
25	return NULL;
26	}


diff --git a/source/csv_parser.h b/source/csv_parser.h deleted file mode 100644 index 1aa96b9..0000000 --- a/source/csv_parser.h +++ /dev/null
@@ -1,40 +0,0 @@
1	#ifndef CSV_PARSER_H
2	#define CSV_PARSER_H
3
4	typedef struct csv_row csv_row;
5	struct csv_row
6	{
7	// array of size col_count, points into mmap buffer
8	string8 *fields;
9	s32 count;
10	};
11
12	typedef struct csv_table csv_table;
13	struct csv_table
14	{
15	// first row, col names
16	// all data rows
17	string8 *headers;
18	csv_row *rows;
19	s32 col_count;
20	s32 row_count;
21	};
22
23	read_only global_variable
24	csv_row nil_csv_row =
25	{
26	.fields = &nil_string,
27	.count = 0,
28	};
29
30	read_only global_variable
31	csv_table nil_csv_table =
32	{
33	.headers = &nil_string,
34	.rows = &nil_csv_row,
35	.col_count = 0,
36	.row_count = 0,
37	};
38
39
40	#endif /* CSV_PARSER_H */


diff --git a/source/csv_reader.h b/source/csv_reader.h new file mode 100644 index 0000000..6c8e625 --- /dev/null +++ b/source/csv_reader.h
@@ -0,0 +1,145 @@
		1	#ifndef ENGINE_LEXER_H
		2	#define ENGINE_LEXER_H
		3
		4	typedef enum token_flags token_flags;
		5	enum token_flags
		6	{
		7	START_FL = 1 << 1,
		8	END_FL = 1 << 2,
		9	};
		10
		11
		12	typedef enum token_type token_type;
		13	enum token_type
		14	{
		15	// first 255 tokens for ascii characters
		16	TOKEN_UNDEFINED = 255,
		17	TOKEN_IDENTIFIER,
		18	TOKEN_VALUE,
		19	};
		20
		21	typedef struct token token;
		22	struct token
		23	{
		24	string8 lexeme;
		25	token_type type;
		26	token_flags flags;
		27	token *next;
		28	};
		29
		30	typedef struct csv_row csv_row;
		31	struct csv_row
		32	{
		33	// array of size col_count, points into mmap buffer
		34	string8 *fields;
		35	s32 count;
		36	};
		37
		38	typedef struct csv_table csv_table;
		39	struct csv_table
		40	{
		41	// first row, col names
		42	// all data rows
		43	string8 *headers;
		44	csv_row *rows;
		45	s32 col_count;
		46	s32 row_count;
		47	};
		48
		49	read_only global_variable
		50	csv_row nil_csv_row =
		51	{
		52	.fields = &nil_string,
		53	.count = 0,
		54	};
		55
		56	read_only global_variable
		57	csv_table nil_csv_table =
		58	{
		59	.headers = &nil_string,
		60	.rows = &nil_csv_row,
		61	.col_count = 0,
		62	.row_count = 0,
		63	};
		64
		65
		66	// the lexer acts as a table builder from a csv file
		67	// and parsing indivudal rows and columns
		68	// the next step would be building a the b-tree
		69	internal token *
		70	tokenize_csv(string8 buffer, mem_arena *arena)
		71	{
		72
		73	b32 FL = TRUE;
		74
		75	if(buffer.size < 0) return NULL;
		76	for(s32 index = 0; buffer.data[index] != '\0'; ++index)
		77	{
		78	token *tok = PushStruct(arena, token);
		79	u8 point = buffer.data[index];
		80
		81	s32 start = 0;
		82	s32 end = 0;
		83
		84	if(is_whitespace(point))
		85	{
		86	print("csv file is invalid");
		87	return NULL;
		88	}
		89
		90	switch(point)
		91	{
		92	case('\n'):
		93	{
		94	if(FL) tok->flags \|= END_FL;
		95	break;
		96	}
		97
		98	case(','):
		99	{
		100	end = index - 1;
		101	start = index + 1;
		102	break;
		103	}
		104	default:
		105	{
		106	break;
		107	}
		108	}
		109
		110	tok->lexeme = StringCast(&buffer.data[start], end - start);
		111	tok->next = tok;
		112	}
		113
		114	return NULL;
		115	}
		116
		117	internal void
		118	strip_new_line(string8 buffer)
		119	{
		120
		121	for (u64 index = 0; index < buffer.size; index++)
		122	{
		123
		124	}
		125
		126	return;
		127
		128	}
		129
		130	internal void
		131	read_csv(string8 buffer)
		132	{
		133	// printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
		134
		135	}
		136
		137	internal csv_table *
		138	parse_csv(token tokens, csv_table table)
		139	{
		140
		141	return NULL;
		142	}
		143
		144
		145	#endif /* ENGINE_LEXER_H */


diff --git a/source/engine.c b/source/engine.c index fe609e9..3527e27 100644 --- a/source/engine.c +++ b/source/engine.c
@@ -1,17 +1,88 @@
1	#define BASE_UNITY	1	#define BASE_UNITY
2	#include "base/base_include.h"	2	#include "base/base_include.h"
3		3
4	#include <stdio.h>	4	internal b32
		5	is_alpha(u8 point)
		6	{
		7	return ((point >= 'a' && point <= 'z') \|\| (point >= 'A' && point <= 'Z') \|\| (point == '_'));
		8	}
		9
		10	internal b32
		11	is_digit(u8 point)
		12	{
		13	return (point >= '0' && point <= '9');
		14	}
5		15
6	#include "csv_parser.h"	16	internal b32
7	#include "lexer.h"	17	is_alpha_num(u8 point)
		18	{
		19	return (is_alpha(point) \|\| is_digit(point));
		20	}
		21
		22	internal b32
		23	is_whitespace(u8 point)
		24	{
		25	return (point == '\n' \|\| point == '\r' \|\| point == ' ' \|\| point == '\t');
		26	}
		27
		28	internal b32
		29	is_delimiter(u8 point)
		30	{
		31	return (point == ',');
		32	}
		33
		34	#include "csv_reader.h"
		35	// #include "btree.h"
		36
		37	typedef struct query_token query_token;
		38	struct query_token
		39	{
		40	string8 *lexeme;
		41	query_token *next;
		42	};
		43
		44
		45	// takes on line of the repl input
		46	internal query_token *
		47	query_tokenizer(mem_arena arena, string8 buffer)
		48	{
		49	query_token *tok = PushStruct(arena, query_token);
		50	unused(tok);
8		51
9	#include "lexer.c"	52	for (u64 index = 0; index < buffer->size; ++index)
10	#include "csv_parser.c"	53	{
		54	u8 codepoint = buffer->data[index];
		55
		56	if(codepoint == '\n' \|\| codepoint == '\r') break;
		57
		58	s32 start = 0;
		59	s32 end = 0;
		60
		61	if(is_whitespace(codepoint))
		62	{
		63	end = index;
		64	}
		65
		66	s32 new_token_size = end - start;
		67
		68	tok->lexeme->data = &buffer->data[index];
		69	tok->lexeme->size = new_token_size;
		70
		71	tok->next = tok;
		72	start = index + 1;
		73
		74	}
		75
		76	return tok;
		77	}
11		78
12	int main(int c, char **v)	79	int main(int c, char **v)
13	{	80	{
14	if(c < 2) return -999;	81	if(c < 2)
		82	{
		83	print("bad file, setting default file\n");
		84	}
		85	else v[1] = "./test/customers-10000.csv";
15		86
16	local_persist b32 running = 1;	87	local_persist b32 running = 1;
17		88
@@ -26,13 +97,27 @@ int main(int c, char **v)
26	{	97	{
27	if (running)	98	if (running)
28	{	99	{
29	// TODO(nasr): check for return key
30	{	100	{
31	u8 line_buffer[256] = {};	101	u8 line_buffer[256] = {};
32
33	s64 codepoint = os_read(STDIN_FD, line_buffer, 256);	102	s64 codepoint = os_read(STDIN_FD, line_buffer, 256);
34	unused(codepoint);	103	unused(codepoint);
		104	for(s32 index = 0; index < 256; ++index)
		105	{
		106	if(line_buffer[index] == '\n')
		107	{
		108	print("exiting");
		109	return 0;
		110	}
		111	else if(line_buffer[index] == ' ')
		112	{
35		113
		114	print("TODO(nasr): ");
		115	}
		116
		117	}
		118	}
		119
		120	{
36	read_csv(buffer);	121	read_csv(buffer);
37	token *tokens = tokenize_csv(buffer, global_arena);	122	token *tokens = tokenize_csv(buffer, global_arena);
38	global_table = parse_csv(tokens, global_table);	123	global_table = parse_csv(tokens, global_table);


diff --git a/source/lexer.c b/source/lexer.c deleted file mode 100644 index decfb7a..0000000 --- a/source/lexer.c +++ /dev/null
@@ -1,80 +0,0 @@
1	// the lexer acts as a table builder from a csv file
2	// and parsing indivudal rows and columns
3	// the next step would be building a the b-tree
4	internal b32
5	is_alpha(u8 point)
6	{
7	return ((point >= 'a' && point <= 'z') \|\| (point >= 'A' && point <= 'Z') \|\| (point == '_'));
8	}
9
10	internal b32
11	is_digit(u8 point)
12	{
13	return (point >= '0' && point <= '9');
14	}
15
16	internal b32
17	is_alpha_num(u8 point)
18	{
19	return (is_alpha(point) \|\| is_digit(point));
20	}
21
22	internal b32
23	is_whitespace(u8 point)
24	{
25	return (point == '\n' \|\| point == '\r' \|\| point == ' ' \|\| point == '\t');
26	}
27
28	internal b32
29	is_delimiter(u8 point)
30	{
31	return (point == ',');
32	}
33
34	internal token *
35	tokenize_csv(string8 buffer, mem_arena *arena)
36	{
37
38	b32 FL = TRUE;
39
40	if(buffer.size < 0) return NULL;
41	for(s32 index = 0; buffer.data[index] != '\0'; ++index)
42	{
43	token *tok = PushStruct(arena, token);
44	u8 point = buffer.data[index];
45
46	s32 start = 0;
47	s32 end = 0;
48
49	if(is_whitespace(point))
50	{
51	print("csv file is invalid");
52	return NULL;
53	}
54
55	switch(point)
56	{
57	case('\n'):
58	{
59	if(FL) tok->flags \|= END_FL;
60	break;
61	}
62
63	case(','):
64	{
65	end = index - 1;
66	start = index + 1;
67	break;
68	}
69	default:
70	{
71	break;
72	}
73	}
74
75	tok->lexeme = String8Cast(&buffer.data[start], end - start);
76	tok->next = tok;
77	}
78
79	return NULL;
80	}


diff --git a/source/lexer.h b/source/lexer.h deleted file mode 100644 index c950a58..0000000 --- a/source/lexer.h +++ /dev/null
@@ -1,30 +0,0 @@
1	#ifndef ENGINE_LEXER_H
2	#define ENGINE_LEXER_H
3
4	typedef enum token_flags token_flags;
5	enum token_flags
6	{
7	START_FL = 1 << 1,
8	END_FL = 1 << 2,
9	};
10
11
12	typedef enum token_type token_type;
13	enum token_type
14	{
15	// first 255 tokens for ascii characters
16	TOKEN_UNDEFINED = 255,
17	TOKEN_IDENTIFIER,
18	TOKEN_VALUE,
19	};
20
21	typedef struct token token;
22	struct token
23	{
24	string8 lexeme;
25	token_type type;
26	token_flags flags;
27	token *next;
28	};
29
30	#endif /* ENGINE_LEXER_H */


diff --git a/source/query.c b/source/query.c deleted file mode 100644 index e69de29..0000000 --- a/source/query.c +++ /dev/null


diff --git a/source/query.h b/source/query.h deleted file mode 100644 index e69de29..0000000 --- a/source/query.h +++ /dev/null