summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source/csv_reader.h61
-rw-r--r--source/engine.c46
2 files changed, 65 insertions, 42 deletions
diff --git a/source/csv_reader.h b/source/csv_reader.h
index 2b6f49c..7f5bf06 100644
--- a/source/csv_reader.h
+++ b/source/csv_reader.h
@@ -8,7 +8,6 @@ enum token_flags
8 END_FL = 1 << 2, 8 END_FL = 1 << 2,
9}; 9};
10 10
11
12typedef enum token_type token_type; 11typedef enum token_type token_type;
13enum token_type 12enum token_type
14{ 13{
@@ -27,6 +26,7 @@ struct token
27 token *next; 26 token *next;
28}; 27};
29 28
29// NOTE(nasr): i dont think im going to use this.
30typedef struct csv_row csv_row; 30typedef struct csv_row csv_row;
31struct csv_row 31struct csv_row
32{ 32{
@@ -62,8 +62,6 @@ csv_table nil_csv_table =
62 .row_count = 0, 62 .row_count = 0,
63}; 63};
64 64
65
66
67#endif /* ENGINE_LEXER_H */ 65#endif /* ENGINE_LEXER_H */
68 66
69// the lexer acts as a table builder from a csv file 67// the lexer acts as a table builder from a csv file
@@ -72,13 +70,16 @@ csv_table nil_csv_table =
72internal token * 70internal token *
73tokenize_csv(string8 buffer, mem_arena *arena) 71tokenize_csv(string8 buffer, mem_arena *arena)
74{ 72{
75
76 b32 FL = TRUE; 73 b32 FL = TRUE;
77 74
78 if(buffer.size < 0) return NULL; 75 if(buffer.size < 0) return NULL;
76
77 token *tok = PushStruct(arena, token);
78
79 // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
80 // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
79 for(s32 index = 0; buffer.data[index] != '\0'; ++index) 81 for(s32 index = 0; buffer.data[index] != '\0'; ++index)
80 { 82 {
81 token *tok = PushStruct(arena, token);
82 u8 point = buffer.data[index]; 83 u8 point = buffer.data[index];
83 84
84 s32 start = 0; 85 s32 start = 0;
@@ -86,35 +87,35 @@ tokenize_csv(string8 buffer, mem_arena *arena)
86 87
87 if(is_whitespace(point)) 88 if(is_whitespace(point))
88 { 89 {
89 print("csv file is invalid"); 90 warn("csv file is invalid, detected whitespace");
90 return NULL; 91 return NULL;
91 } 92 }
92 93
93 switch(point) 94 switch(point)
94 { 95 {
95 case('\n'): 96 case('\n'):
96 { 97 {
97 if(FL) tok->flags |= END_FL; 98 if(FL) tok->flags |= END_FL;
98 break; 99 break;
99 } 100 }
100 101
101 case(','): 102 case(','):
102 { 103 {
103 end = index - 1; 104 end = index - 1;
104 start = index + 1; 105 start = index + 1;
105 break; 106 break;
106 } 107 }
107 default: 108 default:
108 { 109 {
109 break; 110 break;
110 } 111 }
111 } 112 }
112 113
113 tok->lexeme = StringCast(&buffer.data[start], end - start); 114 tok->lexeme = StringCast(&buffer.data[start], end - start);
114 tok->next = tok; 115 tok->next = tok;
115 } 116 }
116 117
117 return NULL; 118 return tok;
118} 119}
119 120
120internal void 121internal void
@@ -124,18 +125,24 @@ read_csv(string8 buffer)
124 125
125} 126}
126 127
127internal b_tree * 128internal b_tree *
128parse_csv(csv_token *tok, csv_table *table) 129parse_csv(mem_arena *arena, token *tok)
129{ 130{
131 b_tree *tree = PushStructZero(arena, b_tree);
132 b_tree_create(arena, tree);
130 133
131 134 for (; tok != NULL; tok = tok->next)
132 for (;tok->next; tok = tok->next)
133 { 135 {
134 b_tree_node *current_btree_node = btree_node_alloc; 136 // skip structural tokens, only index values
137 if (tok->type != TOKEN_VALUE)
138 {
139 continue;
140 }
135 141
136 142 // NOTE(nasr): payload is the token itself so the caller can reach
143 // row/col metadata without us having to copy it
144 b_tree_insert(arena, tree, tok->lexeme, (void *)tok);
137 } 145 }
138 146
139 return NULL; 147 return tree;
140} 148}
141
diff --git a/source/engine.c b/source/engine.c
index 1cfbab0..9797d8a 100644
--- a/source/engine.c
+++ b/source/engine.c
@@ -40,7 +40,7 @@ is_delimiter(u8 point)
40typedef struct query_token query_token; 40typedef struct query_token query_token;
41struct query_token 41struct query_token
42{ 42{
43 string8 *lexeme; 43 string8 lexeme;
44 query_token *next; 44 query_token *next;
45}; 45};
46 46
@@ -72,8 +72,9 @@ query_tokenizer(mem_arena *arena, string8 *buffer)
72 72
73 s32 new_token_size = end - start; 73 s32 new_token_size = end - start;
74 74
75 tok->lexeme->data = &buffer->data[index]; 75 tok->lexeme = PushString(arena, new_token_size);
76 tok->lexeme->size = new_token_size; 76 tok->lexeme.data = &buffer->data[index];
77 tok->lexeme.size = new_token_size;
77 78
78 tok->next = tok; 79 tok->next = tok;
79 start = index + 1; 80 start = index + 1;
@@ -83,21 +84,18 @@ query_tokenizer(mem_arena *arena, string8 *buffer)
83 return tok; 84 return tok;
84} 85}
85 86
86int main(int c, char **v) 87int main(int count, char **value)
87{ 88{
88 89 if(count < 2) value[1] = "./test/data.csv";
89 if(c < 2)
90 {
91 print("bad file, setting default file\n");
92 }
93 else v[1] = "./test/customers-10000.csv";
94 90
95 local_persist b32 running = 1; 91 local_persist b32 running = 1;
96 92
97 mem_arena *global_arena = arena_create(MiB(30)); 93 mem_arena *global_arena = arena_create(MiB(30));
98 csv_table *global_table = PushStruct(global_arena, csv_table);
99 94
100 string8 buffer = load_file(v[1]); 95 // NOTE(nasr): see note down below
96 // csv_table *global_table = PushStruct(global_arena, csv_table);
97
98 string8 buffer = load_file(global_arena, value[1]);
101 99
102 print("\nDatabase Engine\n"); 100 print("\nDatabase Engine\n");
103 101
@@ -106,23 +104,41 @@ int main(int c, char **v)
106 if (running) 104 if (running)
107 { 105 {
108 { 106 {
109 u8 lbuf[256] = {}; 107 u8 *lbuf = PushArray(global_arena, u8, 256);
110 s32 err = os_read(STDIN_FD, lbuf, 256); 108 s32 err = os_read(STDIN_FD, lbuf, 256);
109
111 if(err < 0) 110 if(err < 0)
112 { 111 {
113 print("error reading from stdin"); 112 print("error reading from stdin");
114 } 113 }
115 114
116 query_tokenizer(global_arena, &StringLit(lbuf)); 115 // TODO(nasr): extract this later in the future and make a string copy function/macro
116 // @params (s32 lbuf_size , string8 lbuf_stringified)
117 s32 lbuf_size = sizeof(lbuf) - 1;
118 string8 lbuf_stringified = PushString(global_arena, lbuf_size);
119 {
120 memcpy(lbuf_stringified.data, lbuf, lbuf_size);
121 lbuf_stringified.size = sizeof(lbuf) - 1;
122 }
117 123
124 query_tokenizer(global_arena, &lbuf_stringified);
118 } 125 }
119 126
120 { 127 {
121 read_csv(buffer); 128 read_csv(buffer);
122 token *tokens = tokenize_csv(buffer, global_arena); 129 token *tokens = tokenize_csv(buffer, global_arena);
123 global_table = parse_csv(tokens, global_table); 130
131 assert_msg(tokens != NULL, "Tokens are NULL.");
132
133 b_tree *bt = parse_csv(global_arena, tokens);
134 b_tree_write(bt);
124 } 135 }
125 136
137
138 // NOTE(nasr): not sure on how to approach the b-tree and the table format thing
139 // we kind of want our table format i think? but i wouldnt be sure about the use case
140 // so we stick to the regular b_tree for now. commenting out the tables.
141
126 sleep(1); 142 sleep(1);
127 } 143 }
128 } 144 }