summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornasr <nsrddyn@gmail.com>2026-03-09 19:44:59 +0000
committernasr <nsrddyn@gmail.com>2026-03-09 19:44:59 +0000
commit854d66bb73cd131bda6452aac74aae3d9a77b91a (patch)
treefcc083e70f01aa170df6e0fedb8597f8c9081648
parente5d6d76e803a8ea7ae86cd8302826bbfc6bd57fa (diff)
refactor(main): simplified the project. going towards a single header file project maybe...
-rw-r--r--source/b_tree.h44
-rw-r--r--source/base/base_string.h3
-rw-r--r--source/btree.h (renamed from source/b_tree.c)44
-rw-r--r--source/csv_parser.c26
-rw-r--r--source/csv_parser.h40
-rw-r--r--source/csv_reader.h145
-rw-r--r--source/engine.c101
-rw-r--r--source/lexer.c80
-rw-r--r--source/lexer.h30
-rw-r--r--source/query.c0
-rw-r--r--source/query.h0
11 files changed, 283 insertions, 230 deletions
diff --git a/source/b_tree.h b/source/b_tree.h
deleted file mode 100644
index 0fc4d3c..0000000
--- a/source/b_tree.h
+++ /dev/null
@@ -1,44 +0,0 @@
1#ifndef BTREE_H
2#define BTREE_H
3
4// maximum height of the tree the lower the lower the lower amount
5// of disk reads which translates into faster?
6#define B_TREE_ORDER 4
7
8typedef struct b_tree_node b_tree_node;
9struct b_tree_node
10{
11
12 // store the values
13 string8 keys[B_TREE_ORDER - 1];
14 csv_row *rows[B_TREE_ORDER - 1];
15
16 b_tree_node *parent;
17 // handle to store children faster than linked list
18 // because now we can iteratate over the nodes instead of having cache misses
19 // when jumping through linked nodes
20 b_tree_node *children[B_TREE_ORDER];
21
22 // NOTE(nasr): reference count ::: check how many leaves are using this node
23 // also not needed for now because we don't free individual node because of arena allocator
24 // s32 *refc;
25
26 s32 key_count;
27
28 b32 leaf;
29
30};
31
32typedef struct b_tree b_tree;
33struct b_tree
34{
35 // NOTE(nasr): make sure this always stays in memory
36 // so that initial fetch never requires a disk read
37 b_tree_node *root;
38
39};
40
41
42
43
44#endif /* BTREE_H */
diff --git a/source/base/base_string.h b/source/base/base_string.h
index 4e0d923..ef2cf16 100644
--- a/source/base/base_string.h
+++ b/source/base/base_string.h
@@ -12,7 +12,8 @@
12 result; \ 12 result; \
13 }) 13 })
14 14
15#define String8Cast(literal, literal_count) ( string8 ){( u8 * )( literal ),( u64 )( literal_count ) } 15#define StringCast(data, size) (string8){(u8 *)(data), (u64)(size) }
16#define StringPCast(data, size) (string8 *){(u8 *)(data), (u64)(size) }
16 17
17#define StringFmt "%.*s" 18#define StringFmt "%.*s"
18#define ULongFmt "%lu" 19#define ULongFmt "%lu"
diff --git a/source/b_tree.c b/source/btree.h
index e42f709..42c8e21 100644
--- a/source/b_tree.c
+++ b/source/btree.h
@@ -1,3 +1,45 @@
1#ifndef BTREE_H
2#define BTREE_H
3
4// maximum height of the tree the lower the lower the lower amount
5// of disk reads which translates into faster?
6#define B_TREE_ORDER 4
7
8typedef struct b_tree_node b_tree_node;
9struct b_tree_node
10{
11
12 // store the values
13 string8 keys[B_TREE_ORDER - 1];
14 csv_row *rows[B_TREE_ORDER - 1];
15
16 b_tree_node *parent;
17 // handle to store children faster than linked list
18 // because now we can iteratate over the nodes instead of having cache misses
19 // when jumping through linked nodes
20 b_tree_node *children[B_TREE_ORDER];
21
22 // NOTE(nasr): reference count ::: check how many leaves are using this node
23 // also not needed for now because we don't free individual node because of arena allocator
24 // s32 *refc;
25
26 s32 key_count;
27
28 b32 leaf;
29
30};
31
32typedef struct b_tree b_tree;
33struct b_tree
34{
35 // NOTE(nasr): make sure this always stays in memory
36 // so that initial fetch never requires a disk read
37 b_tree_node *root;
38
39};
40
41
42
1 43
2// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees? 44// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?
3 45
@@ -63,4 +105,4 @@ b_tree_write()
63 // TODO(nasr): write the b_tree to disk 105 // TODO(nasr): write the b_tree to disk
64} 106}
65 107
66 108#endif /* BTREE_H */
diff --git a/source/csv_parser.c b/source/csv_parser.c
deleted file mode 100644
index 6679d2f..0000000
--- a/source/csv_parser.c
+++ /dev/null
@@ -1,26 +0,0 @@
1internal void
2strip_new_line(string8 buffer)
3{
4
5 for (u64 index = 0; index < buffer.size; index++)
6 {
7
8 }
9
10 return;
11
12}
13
14internal void
15read_csv(string8 buffer)
16{
17 printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
18
19}
20
21internal csv_table *
22parse_csv(token *tokens, csv_table *table)
23{
24
25 return NULL;
26}
diff --git a/source/csv_parser.h b/source/csv_parser.h
deleted file mode 100644
index 1aa96b9..0000000
--- a/source/csv_parser.h
+++ /dev/null
@@ -1,40 +0,0 @@
1#ifndef CSV_PARSER_H
2#define CSV_PARSER_H
3
4typedef struct csv_row csv_row;
5struct csv_row
6{
7 // array of size col_count, points into mmap buffer
8 string8 *fields;
9 s32 count;
10};
11
12typedef struct csv_table csv_table;
13struct csv_table
14{
15 // first row, col names
16 // all data rows
17 string8 *headers;
18 csv_row *rows;
19 s32 col_count;
20 s32 row_count;
21};
22
23read_only global_variable
24csv_row nil_csv_row =
25{
26 .fields = &nil_string,
27 .count = 0,
28};
29
30read_only global_variable
31csv_table nil_csv_table =
32{
33 .headers = &nil_string,
34 .rows = &nil_csv_row,
35 .col_count = 0,
36 .row_count = 0,
37};
38
39
40#endif /* CSV_PARSER_H */
diff --git a/source/csv_reader.h b/source/csv_reader.h
new file mode 100644
index 0000000..6c8e625
--- /dev/null
+++ b/source/csv_reader.h
@@ -0,0 +1,145 @@
1#ifndef ENGINE_LEXER_H
2#define ENGINE_LEXER_H
3
4typedef enum token_flags token_flags;
5enum token_flags
6{
7 START_FL = 1 << 1,
8 END_FL = 1 << 2,
9};
10
11
12typedef enum token_type token_type;
13enum token_type
14{
15 // first 255 tokens for ascii characters
16 TOKEN_UNDEFINED = 255,
17 TOKEN_IDENTIFIER,
18 TOKEN_VALUE,
19};
20
21typedef struct token token;
22struct token
23{
24 string8 lexeme;
25 token_type type;
26 token_flags flags;
27 token *next;
28};
29
30typedef struct csv_row csv_row;
31struct csv_row
32{
33 // array of size col_count, points into mmap buffer
34 string8 *fields;
35 s32 count;
36};
37
38typedef struct csv_table csv_table;
39struct csv_table
40{
41 // first row, col names
42 // all data rows
43 string8 *headers;
44 csv_row *rows;
45 s32 col_count;
46 s32 row_count;
47};
48
49read_only global_variable
50csv_row nil_csv_row =
51{
52 .fields = &nil_string,
53 .count = 0,
54};
55
56read_only global_variable
57csv_table nil_csv_table =
58{
59 .headers = &nil_string,
60 .rows = &nil_csv_row,
61 .col_count = 0,
62 .row_count = 0,
63};
64
65
66// the lexer acts as a table builder from a csv file
67// and parsing indivudal rows and columns
68// the next step would be building a the b-tree
69internal token *
70tokenize_csv(string8 buffer, mem_arena *arena)
71{
72
73 b32 FL = TRUE;
74
75 if(buffer.size < 0) return NULL;
76 for(s32 index = 0; buffer.data[index] != '\0'; ++index)
77 {
78 token *tok = PushStruct(arena, token);
79 u8 point = buffer.data[index];
80
81 s32 start = 0;
82 s32 end = 0;
83
84 if(is_whitespace(point))
85 {
86 print("csv file is invalid");
87 return NULL;
88 }
89
90 switch(point)
91 {
92 case('\n'):
93 {
94 if(FL) tok->flags |= END_FL;
95 break;
96 }
97
98 case(','):
99 {
100 end = index - 1;
101 start = index + 1;
102 break;
103 }
104 default:
105 {
106 break;
107 }
108 }
109
110 tok->lexeme = StringCast(&buffer.data[start], end - start);
111 tok->next = tok;
112 }
113
114 return NULL;
115}
116
117internal void
118strip_new_line(string8 buffer)
119{
120
121 for (u64 index = 0; index < buffer.size; index++)
122 {
123
124 }
125
126 return;
127
128}
129
130internal void
131read_csv(string8 buffer)
132{
133 // printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
134
135}
136
137internal csv_table *
138parse_csv(token *tokens, csv_table *table)
139{
140
141 return NULL;
142}
143
144
145#endif /* ENGINE_LEXER_H */
diff --git a/source/engine.c b/source/engine.c
index fe609e9..3527e27 100644
--- a/source/engine.c
+++ b/source/engine.c
@@ -1,17 +1,88 @@
1#define BASE_UNITY 1#define BASE_UNITY
2#include "base/base_include.h" 2#include "base/base_include.h"
3 3
4#include <stdio.h> 4internal b32
5is_alpha(u8 point)
6{
7 return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
8}
9
10internal b32
11is_digit(u8 point)
12{
13 return (point >= '0' && point <= '9');
14}
5 15
6#include "csv_parser.h" 16internal b32
7#include "lexer.h" 17is_alpha_num(u8 point)
18{
19 return (is_alpha(point) || is_digit(point));
20}
21
22internal b32
23is_whitespace(u8 point)
24{
25 return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
26}
27
28internal b32
29is_delimiter(u8 point)
30{
31 return (point == ',');
32}
33
34#include "csv_reader.h"
35// #include "btree.h"
36
37typedef struct query_token query_token;
38struct query_token
39{
40 string8 *lexeme;
41 query_token *next;
42};
43
44
45// takes on line of the repl input
46internal query_token *
47query_tokenizer(mem_arena *arena, string8 *buffer)
48{
49 query_token *tok = PushStruct(arena, query_token);
50 unused(tok);
8 51
9#include "lexer.c" 52 for (u64 index = 0; index < buffer->size; ++index)
10#include "csv_parser.c" 53 {
54 u8 codepoint = buffer->data[index];
55
56 if(codepoint == '\n' || codepoint == '\r') break;
57
58 s32 start = 0;
59 s32 end = 0;
60
61 if(is_whitespace(codepoint))
62 {
63 end = index;
64 }
65
66 s32 new_token_size = end - start;
67
68 tok->lexeme->data = &buffer->data[index];
69 tok->lexeme->size = new_token_size;
70
71 tok->next = tok;
72 start = index + 1;
73
74 }
75
76 return tok;
77}
11 78
12int main(int c, char **v) 79int main(int c, char **v)
13{ 80{
14 if(c < 2) return -999; 81 if(c < 2)
82 {
83 print("bad file, setting default file\n");
84 }
85 else v[1] = "./test/customers-10000.csv";
15 86
16 local_persist b32 running = 1; 87 local_persist b32 running = 1;
17 88
@@ -26,13 +97,27 @@ int main(int c, char **v)
26 { 97 {
27 if (running) 98 if (running)
28 { 99 {
29 // TODO(nasr): check for return key
30 { 100 {
31 u8 line_buffer[256] = {}; 101 u8 line_buffer[256] = {};
32
33 s64 codepoint = os_read(STDIN_FD, line_buffer, 256); 102 s64 codepoint = os_read(STDIN_FD, line_buffer, 256);
34 unused(codepoint); 103 unused(codepoint);
104 for(s32 index = 0; index < 256; ++index)
105 {
106 if(line_buffer[index] == '\n')
107 {
108 print("exiting");
109 return 0;
110 }
111 else if(line_buffer[index] == ' ')
112 {
35 113
114 print("TODO(nasr): ");
115 }
116
117 }
118 }
119
120 {
36 read_csv(buffer); 121 read_csv(buffer);
37 token *tokens = tokenize_csv(buffer, global_arena); 122 token *tokens = tokenize_csv(buffer, global_arena);
38 global_table = parse_csv(tokens, global_table); 123 global_table = parse_csv(tokens, global_table);
diff --git a/source/lexer.c b/source/lexer.c
deleted file mode 100644
index decfb7a..0000000
--- a/source/lexer.c
+++ /dev/null
@@ -1,80 +0,0 @@
1// the lexer acts as a table builder from a csv file
2// and parsing indivudal rows and columns
3// the next step would be building a the b-tree
4internal b32
5is_alpha(u8 point)
6{
7 return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
8}
9
10internal b32
11is_digit(u8 point)
12{
13 return (point >= '0' && point <= '9');
14}
15
16internal b32
17is_alpha_num(u8 point)
18{
19 return (is_alpha(point) || is_digit(point));
20}
21
22internal b32
23is_whitespace(u8 point)
24{
25 return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
26}
27
28internal b32
29is_delimiter(u8 point)
30{
31 return (point == ',');
32}
33
34internal token *
35tokenize_csv(string8 buffer, mem_arena *arena)
36{
37
38 b32 FL = TRUE;
39
40 if(buffer.size < 0) return NULL;
41 for(s32 index = 0; buffer.data[index] != '\0'; ++index)
42 {
43 token *tok = PushStruct(arena, token);
44 u8 point = buffer.data[index];
45
46 s32 start = 0;
47 s32 end = 0;
48
49 if(is_whitespace(point))
50 {
51 print("csv file is invalid");
52 return NULL;
53 }
54
55 switch(point)
56 {
57 case('\n'):
58 {
59 if(FL) tok->flags |= END_FL;
60 break;
61 }
62
63 case(','):
64 {
65 end = index - 1;
66 start = index + 1;
67 break;
68 }
69 default:
70 {
71 break;
72 }
73 }
74
75 tok->lexeme = String8Cast(&buffer.data[start], end - start);
76 tok->next = tok;
77 }
78
79 return NULL;
80}
diff --git a/source/lexer.h b/source/lexer.h
deleted file mode 100644
index c950a58..0000000
--- a/source/lexer.h
+++ /dev/null
@@ -1,30 +0,0 @@
1#ifndef ENGINE_LEXER_H
2#define ENGINE_LEXER_H
3
4typedef enum token_flags token_flags;
5enum token_flags
6{
7 START_FL = 1 << 1,
8 END_FL = 1 << 2,
9};
10
11
12typedef enum token_type token_type;
13enum token_type
14{
15 // first 255 tokens for ascii characters
16 TOKEN_UNDEFINED = 255,
17 TOKEN_IDENTIFIER,
18 TOKEN_VALUE,
19};
20
21typedef struct token token;
22struct token
23{
24 string8 lexeme;
25 token_type type;
26 token_flags flags;
27 token *next;
28};
29
30#endif /* ENGINE_LEXER_H */
diff --git a/source/query.c b/source/query.c
deleted file mode 100644
index e69de29..0000000
--- a/source/query.c
+++ /dev/null
diff --git a/source/query.h b/source/query.h
deleted file mode 100644
index e69de29..0000000
--- a/source/query.h
+++ /dev/null