summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/csv_decoder.h144
1 files changed, 72 insertions, 72 deletions
diff --git a/source/csv_decoder.h b/source/csv_decoder.h
index 446f1da..3d09dc6 100644
--- a/source/csv_decoder.h
+++ b/source/csv_decoder.h
@@ -55,6 +55,7 @@ struct csv_table
55 csv_header *header; 55 csv_header *header;
56 s32 row_count; 56 s32 row_count;
57 s32 header_count; 57 s32 header_count;
58 b32 finding_headers;
58}; 59};
59 60
60 61
@@ -63,17 +64,15 @@ struct csv_token_list
63{ 64{
64 csv_token *start_token; 65 csv_token *start_token;
65 csv_token *end_token; 66 csv_token *end_token;
66
67}; 67};
68 68
69read_only global_variable 69read_only global_variable
70csv_token nil_csv_token= 70csv_token nil_csv_token=
71{ 71{
72 .lexeme = {.data = NULL, .size =0}, 72 .lexeme = {.data = NULL, .size = 0},
73 .type = 0, 73 .type = 0,
74 .flags = 0, 74 .flags = 0,
75 .next_token = &nil_csv_token, 75 .next_token = &nil_csv_token,
76
77}; 76};
78 77
79read_only global_variable 78read_only global_variable
@@ -90,7 +89,6 @@ csv_token_list nil_csv_token_list =
90 .end_token = &nil_csv_token, 89 .end_token = &nil_csv_token,
91}; 90};
92 91
93
94read_only global_variable 92read_only global_variable
95csv_row nil_csv_row = 93csv_row nil_csv_row =
96{ 94{
@@ -113,28 +111,32 @@ is_nil_csv_token(csv_token *token)
113 return ((token == NULL) || (token == &nil_csv_token)); 111 return ((token == NULL) || (token == &nil_csv_token));
114} 112}
115 113
114// TODO(nasr): segfaulting because end_token not allocated
116internal void 115internal void
117csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token) 116csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token)
118{ 117{
119 source_token_list->end_token->next_token = source_token; 118 source_token_list->end_token->next_token = source_token;
120 source_token_list->end_token = source_token; 119 source_token_list->end_token = source_token;
121
122} 120}
123 121
124//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother 122//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
125internal void 123internal void
126csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source) 124csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source)
127{ 125{
126 if(is_nil_csv_token(source->start_token)) return;
128 127
129 csv_token *source_ct = source->start_token; 128 csv_token *source_ct = source->start_token;
130 csv_token *destination_end_ct = destination->end_token; 129 csv_token *destination_et = destination->end_token;
131 130
132 for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token) 131 // walk source and stitch each node onto destination's tail
133 { 132 for(; !is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
134 destination_end_ct->next_token = source_ct; 133 {
135 } 134 destination_et->next_token = source_ct;
135 destination_et = source_ct;
136 }
136 137
137 destination->end_token = source_ct; 138 // destination_et now points at the last real source node (not the nil sentinel)
139 destination->end_token = destination_et;
138} 140}
139 141
140#if 0 142#if 0
@@ -153,88 +155,80 @@ parse_csv_row(string8 row_buffer)
153internal csv_token * 155internal csv_token *
154tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list) 156tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list)
155{ 157{
156
157 unused(token_list); 158 unused(token_list);
158 b32 finding_headers = TRUE;
159 159
160 if(buffer.size == 0) return NULL; 160 if(buffer.size == 0) return NULL;
161 161
162 csv_token *tok = PushStruct(arena, csv_token);
163
164 // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit 162 // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
165 // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? 163 // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
166 // forgot what the solution was 164 // forgot what the solution was
167 // TODO(nasr): check what the problem here was 165 // TODO(nasr): check what the problem here was
166
167 // string size tracking across the loop not inside it
168 s32 start = 0;
169
168 for(s32 index = 0; buffer.data[index] != '\0'; ++index) 170 for(s32 index = 0; buffer.data[index] != '\0'; ++index)
169 { 171 {
170 u8 point = buffer.data[index]; 172 u8 point = buffer.data[index];
171 173
172 s32 start = 0; 174#if 0
173 s32 end = 0;
174
175 if(is_whitespace(point)) 175 if(is_whitespace(point))
176 { 176 {
177 warn("csv file is invalid, detected whitespace"); 177 warn("csv file is invalid, detected whitespace");
178 return NULL; 178 return NULL;
179 } 179 }
180#endif
180 181
181 182 if(point == ',')
182 if(point == '\n')
183 { 183 {
184 if(finding_headers) 184 // emit a token for the field that ended before this comma
185 { 185 csv_token *token = PushStructZero(arena, csv_token);
186#if 0
187 string8 headers_buffer = {.data = &buffer.data[start], .size = end - start};
188#endif
189 finding_headers = FALSE;
190 186
191 { 187 assert_msg(token != NULL, "did the push struct fail??");
192 //- map new header token list to table headers 188 assert_msg(arena->current_position < arena->capacity, "no more arena size");
193 }
194 }
195#if 0
196 else
197 {
198 189
199 } 190 token->lexeme = StringCast(&buffer.data[start], index - start);
200#endif 191 token->type = TOKEN_VALUE;
192 token->next_token = &nil_csv_token;
193 csv_token_list_append_token(token_list, token);
201 194
195 start = index + 1;
202 196
203 table->row_count++; 197 if(table->finding_headers)
204 }
205 else if(point == ',')
206 {
207 if (finding_headers)
208 { 198 {
209 table->header_count++; 199 table->header_count++;
210 } 200 }
211 } 201 }
212 202 else if(point == '\n')
213 switch(point)
214 { 203 {
215 case('\n'): 204 // emit a token for the field that ended at this newline
216 { 205 csv_token *token = PushStructZero(arena, csv_token);
217 tok->flags |= FL; 206 token->lexeme = StringCast(&buffer.data[start], index - start);
218 break; 207 token->type = TOKEN_VALUE;
219 } 208 token->flags |= FL;
209 token->next_token = &nil_csv_token;
220 210
221 case(','): 211 assert_msg(token_list, "token list invalid");
222 { 212 assert_msg(token, "you're tring to append an invalid token");
223 end = index - 1; 213
224 start = index + 1; 214 csv_token_list_append_token(token_list, token);
225 break; 215
226 } 216 start = index + 1;
227 default: 217
218 if(table->finding_headers)
219 {
228 { 220 {
229 break; 221 //- map new header token list to table headers
230 } 222 }
231 } 223 table->finding_headers = FALSE;
224 }
232 225
233 tok->lexeme = StringCast(&buffer.data[start], end - start); 226 table->row_count++;
234 tok->next_token = tok; 227 }
235 } 228 }
236 229
237 return tok; 230 // NOTE(nasr): return the first token the caller can walk the list from token_list
231 return token_list->start_token;
238} 232}
239 233
240//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future 234//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
@@ -243,18 +237,24 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
243{ 237{
244 btree *tree = PushStructZero(arena, btree); 238 btree *tree = PushStructZero(arena, btree);
245 239
240 s32 col_index = 0;
241 s32 row_index = 0;
242
246 // iterate over the token list while the token is not nil 243 // iterate over the token list while the token is not nil
247 for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token) 244 for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
248 { 245 {
249
250 //- TODO(nasr): check initizalization or something tomorrow
251 { 246 {
252 //- are we parsing the first line tokens? 247 //- are we parsing the first line tokens?
253 //- if so, do something :)) 248 //- if so, do something :))
254 if(ct->flags & FL) 249 if(ct->flags & FL)
255 { 250 {
251 // NOTE(nasr): FL marks end-of-line; advance row, reset col
252 row_index++;
253 col_index = 0;
254
256 // TODO(nasr): replace with nil header check function 255 // TODO(nasr): replace with nil header check function
257 if(table->header != &nil_csv_header || table->header == NULL) 256 // NOTE(nasr): == nil means header hasn't been set yet
257 if(table->header == &nil_csv_header || table->header == NULL)
258 { 258 {
259#if 0 259#if 0
260 // - no this should happen in the tokenization 260 // - no this should happen in the tokenization
@@ -265,30 +265,30 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
265 { 265 {
266 266
267 } 267 }
268
269 // FL tokens are structural, no value to index
270 continue;
268 } 271 }
269 } 272 }
270 273
271 // TODO(nasr): fix this logic tomorrow 274 // skip non-value tokens, only index actual cell values
272 csv_token *ct = PushStruct(arena, csv_token);
273 // skip structural ctens, only index values
274 if (ct->type != TOKEN_VALUE) 275 if (ct->type != TOKEN_VALUE)
275 { 276 {
277 col_index++;
276 continue; 278 continue;
277 } 279 }
278 280
279 // NOTE(nasr): payload is the cten itself so the caller can reach 281 // NOTE(nasr): payload is the cten itself so the caller can reach
280 // row/col metadata without us having to copy it 282 // row/col metadata without us having to copy it
281 // NOTE(nasr): heh why do we void cast again?
282
283 key k = { 283 key k = {
284 .header_index = 1, 284 .header_index = col_index,
285 .row_index = 1, 285 .row_index = row_index,
286 }; 286 };
287 287
288 // btree_insert(arena, tree, (key)ct->lexeme, (void *)ct);
289 btree_insert(arena, tree, k, (void *)ct); 288 btree_insert(arena, tree, k, (void *)ct);
289
290 col_index++;
290 } 291 }
291 292
292 return tree; 293 return tree;
293} 294}
294