summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authornasr <nsrddyn@gmail.com>2026-03-15 15:16:22 +0000
committernasr <nsrddyn@gmail.com>2026-03-15 15:16:22 +0000
commit2e258673171c2e4663a8b5d58e2ad174bb0ecd96 (patch)
tree25f50ee09d67f5b6466c1ba7655c26c1806bce57 /source
parentff2ac6f584a1b08c0e66ac65b9c8e8d68e79f124 (diff)
feature(main): nil tokens for both lexers :)
Diffstat (limited to 'source')
-rw-r--r--source/csv_reader.h69
-rw-r--r--source/engine.c89
2 files changed, 128 insertions, 30 deletions
diff --git a/source/csv_reader.h b/source/csv_reader.h
index 7f5bf06..f5205bf 100644
--- a/source/csv_reader.h
+++ b/source/csv_reader.h
@@ -1,15 +1,15 @@
1#ifndef ENGINE_LEXER_H 1#ifndef ENGINE_LEXER_H
2#define ENGINE_LEXER_H 2#define ENGINE_LEXER_H
3 3
4typedef enum token_flags token_flags; 4typedef enum csv_token_flags csv_token_flags;
5enum token_flags 5enum csv_token_flags
6{ 6{
7 START_FL = 1 << 1, 7 START_FL = 1 << 1,
8 END_FL = 1 << 2, 8 END_FL = 1 << 2,
9}; 9};
10 10
11typedef enum token_type token_type; 11typedef enum csv_token_type csv_token_type;
12enum token_type 12enum csv_token_type
13{ 13{
14 // first 255 tokens for ascii characters 14 // first 255 tokens for ascii characters
15 TOKEN_UNDEFINED = 255, 15 TOKEN_UNDEFINED = 255,
@@ -17,13 +17,13 @@ enum token_type
17 TOKEN_VALUE, 17 TOKEN_VALUE,
18}; 18};
19 19
20typedef struct token token; 20typedef struct csv_token csv_token;
21struct token 21struct csv_token
22{ 22{
23 string8 lexeme; 23 string8 lexeme;
24 token_type type; 24 csv_token_type type;
25 token_flags flags; 25 csv_token_flags flags;
26 token *next; 26 csv_token *next;
27}; 27};
28 28
29// NOTE(nasr): i dont think im going to use this. 29// NOTE(nasr): i dont think im going to use this.
@@ -46,6 +46,33 @@ struct csv_table
46 s32 row_count; 46 s32 row_count;
47}; 47};
48 48
49
50typedef struct csv_token_list csv_token_list;
51struct csv_token_list
52{
53 csv_token *start_token;
54 csv_token *end_token;
55
56};
57
58read_only global_variable
59csv_token nil_csv_token=
60{
61 .lexeme = {.data = NULL, .size =0},
62 .type = (csv_token_type)0,
63 .flags = 0,
64 .next = &nil_csv_token,
65
66};
67
68read_only global_variable
69csv_token_list nil_csv_token_list =
70{
71 .start_token = &nil_csv_token,
72 .end_token = &nil_csv_token,
73};
74
75
49read_only global_variable 76read_only global_variable
50csv_row nil_csv_row = 77csv_row nil_csv_row =
51{ 78{
@@ -67,14 +94,14 @@ csv_table nil_csv_table =
67// the lexer acts as a table builder from a csv file 94// the lexer acts as a table builder from a csv file
68// and parsing indivudal rows and columns 95// and parsing indivudal rows and columns
69// the next step would be building a the b-tree 96// the next step would be building a the b-tree
70internal token * 97internal csv_token *
71tokenize_csv(string8 buffer, mem_arena *arena) 98tokenize_csv(string8 buffer, mem_arena *arena)
72{ 99{
73 b32 FL = TRUE; 100 b32 FL = TRUE;
74 101
75 if(buffer.size < 0) return NULL; 102 if(buffer.size < 0) return NULL;
76 103
77 token *tok = PushStruct(arena, token); 104 csv_token *tok = PushStruct(arena, csv_token);
78 105
79 // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit 106 // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
80 // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? 107 // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
@@ -126,22 +153,30 @@ read_csv(string8 buffer)
126} 153}
127 154
128internal b_tree * 155internal b_tree *
129parse_csv(mem_arena *arena, token *tok) 156parse_csv(mem_arena *arena, csv_token_list *ctl)
130{ 157{
131 b_tree *tree = PushStructZero(arena, b_tree); 158 b_tree *tree = PushStructZero(arena, b_tree);
132 b_tree_create(arena, tree); 159 b_tree_create(arena, tree);
133 160
134 for (; tok != NULL; tok = tok->next) 161 //- TODO(nasr): check initizalization or something tomorrow
162 {
163
164 }
165 // TODO(nasr): fix this logic tomorrow
166 csv_token *ct = PushStruct(arena, csv_token);
167
168 for (;ct != NULL; ct = ct->next)
135 { 169 {
136 // skip structural tokens, only index values 170 // skip structural ctens, only index values
137 if (tok->type != TOKEN_VALUE) 171 if (ct->type != TOKEN_VALUE)
138 { 172 {
139 continue; 173 continue;
140 } 174 }
141 175
142 // NOTE(nasr): payload is the token itself so the caller can reach 176 // NOTE(nasr): payload is the cten itself so the caller can reach
143 // row/col metadata without us having to copy it 177 // row/col metadata without us having to copy it
144 b_tree_insert(arena, tree, tok->lexeme, (void *)tok); 178 // NOTE(nasr): heh why do we void cast again?
179 b_tree_insert(arena, tree, ct->lexeme, (void *)ct);
145 } 180 }
146 181
147 return tree; 182 return tree;
diff --git a/source/engine.c b/source/engine.c
index 9797d8a..106f113 100644
--- a/source/engine.c
+++ b/source/engine.c
@@ -1,3 +1,6 @@
1
2
3
1#define B_TREE_IMPLEMENTATION 4#define B_TREE_IMPLEMENTATION
2#define BASE_UNITY 5#define BASE_UNITY
3#include "base/base_include.h" 6#include "base/base_include.h"
@@ -33,7 +36,6 @@ is_delimiter(u8 point)
33 36
34} 37}
35 38
36
37#include "b_tree.h" 39#include "b_tree.h"
38#include "csv_reader.h" 40#include "csv_reader.h"
39 41
@@ -44,13 +46,48 @@ struct query_token
44 query_token *next; 46 query_token *next;
45}; 47};
46 48
49typedef struct query_token_list query_token_list;
50struct query_token_list
51{
52 query_token *start_token;
53 query_token *current_token;
54};
55
56read_only global_variable
57query_token nil_query_token =
58{
59 .lexeme = {.data = NULL, .size = 0},
60 .next = &nil_query_token
61};
62
63
64read_only global_variable
65query_token_list nil_query_token_list =
66{
67 .start_token = &nil_query_token,
68 .current_token = &nil_query_token,
69};
70
71internal b32
72is_nil_query_token(query_token *token)
73{
74 return (token == &nil_query_token) || (token == NULL);
75}
76
77internal b32
78is_nil_query_token_list(query_token *token)
79{
80 return (token == &nil_query_token) || (token == NULL);
81}
82
47 83
48// takes on line of the repl input 84// takes on line of the repl input
49internal query_token * 85// return a reference to the passed list
50query_tokenizer(mem_arena *arena, string8 *buffer) 86internal query_token_list *
87query_tokenizer(mem_arena *arena, string8 *buffer, query_token_list *list)
51{ 88{
52 query_token *tok = PushStruct(arena, query_token); 89 b32 initialized = 0;
53 unused(tok); 90 unused(initialized);
54 91
55 for (u64 index = 0; index < buffer->size; ++index) 92 for (u64 index = 0; index < buffer->size; ++index)
56 { 93 {
@@ -69,23 +106,44 @@ query_tokenizer(mem_arena *arena, string8 *buffer)
69 // save the token 106 // save the token
70 // TODO(nasr): work on the string macros cuz no work 107 // TODO(nasr): work on the string macros cuz no work
71 { 108 {
109 query_token *new_token = PushStruct(arena, query_token);
110
111 //- initialize list
112 {
113 if(is_nil_query_token(list->start_token))
114 {
115 list->start_token = new_token;
116 list->current_token = new_token;
117 }
118 else
119 {
120 //- all we need to do - we dont track parents or what ever. this is a token stream not a tree
121 list->current_token->next = new_token;
122 }
123 }
72 124
73 s32 new_token_size = end - start; 125 s32 new_token_size = end - start;
74 126
75 tok->lexeme = PushString(arena, new_token_size); 127 new_token->lexeme = PushString(arena, new_token_size);
76 tok->lexeme.data = &buffer->data[index]; 128 new_token->lexeme.data = &buffer->data[index];
77 tok->lexeme.size = new_token_size; 129 new_token->lexeme.size = new_token_size;
130
131 list->current_token->next = new_token;
78 132
79 tok->next = tok;
80 start = index + 1; 133 start = index + 1;
81 } 134 }
82 } 135 }
83 136
84 return tok; 137 return list;
85} 138}
86 139
87int main(int count, char **value) 140int main(int count, char **value)
88{ 141{
142
143#if 1
144 unused(nil_query_token_list);
145#endif
146
89 if(count < 2) value[1] = "./test/data.csv"; 147 if(count < 2) value[1] = "./test/data.csv";
90 148
91 local_persist b32 running = 1; 149 local_persist b32 running = 1;
@@ -121,16 +179,21 @@ int main(int count, char **value)
121 lbuf_stringified.size = sizeof(lbuf) - 1; 179 lbuf_stringified.size = sizeof(lbuf) - 1;
122 } 180 }
123 181
124 query_tokenizer(global_arena, &lbuf_stringified); 182 query_token_list *qtl = PushStruct(global_arena, query_token_list);
183
184 query_tokenizer(global_arena, &lbuf_stringified, qtl);
125 } 185 }
126 186
127 { 187 {
128 read_csv(buffer); 188 read_csv(buffer);
129 token *tokens = tokenize_csv(buffer, global_arena); 189
190 csv_token *tokens = tokenize_csv(buffer, global_arena);
130 191
131 assert_msg(tokens != NULL, "Tokens are NULL."); 192 assert_msg(tokens != NULL, "Tokens are NULL.");
132 193
133 b_tree *bt = parse_csv(global_arena, tokens); 194 csv_token_list *ctl = PushStruct(global_arena, csv_token_list);
195 b_tree *bt = parse_csv(global_arena, ctl);
196
134 b_tree_write(bt); 197 b_tree_write(bt);
135 } 198 }
136 199