From 0409a7cd12b00970609299fc9aa72635c43d1d31 Mon Sep 17 00:00:00 2001 From: nasr Date: Fri, 20 Mar 2026 18:26:22 +0000 Subject: feature(csv_decoder): fixed checking for , when pusing to the token list, proper looping -fixed string size tracking -state tracking in table. TODO(nasr): in the feature. we dont like state tracking feature(csv_decoder): fixed checking for , when pusing to the token list, proper looping -fixed string size tracking -state tracking in table. TODO(nasr): in the feature. we dont like state tracking --- source/csv_decoder.h | 144 +++++++++++++++++++++++++-------------------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/source/csv_decoder.h b/source/csv_decoder.h index 446f1da..3d09dc6 100644 --- a/source/csv_decoder.h +++ b/source/csv_decoder.h @@ -55,6 +55,7 @@ struct csv_table csv_header *header; s32 row_count; s32 header_count; + b32 finding_headers; }; @@ -63,17 +64,15 @@ struct csv_token_list { csv_token *start_token; csv_token *end_token; - }; read_only global_variable csv_token nil_csv_token= { - .lexeme = {.data = NULL, .size =0}, + .lexeme = {.data = NULL, .size = 0}, .type = 0, .flags = 0, .next_token = &nil_csv_token, - }; read_only global_variable @@ -90,7 +89,6 @@ csv_token_list nil_csv_token_list = .end_token = &nil_csv_token, }; - read_only global_variable csv_row nil_csv_row = { @@ -113,28 +111,32 @@ is_nil_csv_token(csv_token *token) return ((token == NULL) || (token == &nil_csv_token)); } +// TODO(nasr): segfaulting because end_token not allocated internal void csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token) { source_token_list->end_token->next_token = source_token; source_token_list->end_token = source_token; - } //- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother internal void csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source) { + if(is_nil_csv_token(source->start_token)) return; - csv_token *source_ct = source->start_token; - csv_token *destination_end_ct = destination->end_token; + csv_token *source_ct = source->start_token; + csv_token *destination_et = destination->end_token; - for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token) - { - destination_end_ct->next_token = source_ct; - } + // walk source and stitch each node onto destination's tail + for(; !is_nil_csv_token(source_ct); source_ct = source_ct->next_token) + { + destination_et->next_token = source_ct; + destination_et = source_ct; + } - destination->end_token = source_ct; + // destination_et now points at the last real source node (not the nil sentinel) + destination->end_token = destination_et; } #if 0 @@ -153,88 +155,80 @@ parse_csv_row(string8 row_buffer) internal csv_token * tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list) { - unused(token_list); - b32 finding_headers = TRUE; if(buffer.size == 0) return NULL; - csv_token *tok = PushStruct(arena, csv_token); - // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING??? // forgot what the solution was // TODO(nasr): check what the problem here was + + // string size tracking across the loop not inside it + s32 start = 0; + for(s32 index = 0; buffer.data[index] != '\0'; ++index) { u8 point = buffer.data[index]; - s32 start = 0; - s32 end = 0; - +#if 0 if(is_whitespace(point)) { warn("csv file is invalid, detected whitespace"); return NULL; } +#endif - - if(point == '\n') + if(point == ',') { - if(finding_headers) - { -#if 0 - string8 headers_buffer = {.data = &buffer.data[start], .size = end - start}; -#endif - finding_headers = FALSE; + // emit a token for the field that ended before this comma + csv_token *token = PushStructZero(arena, csv_token); - { - //- map new header token list to table headers - } - } -#if 0 - else - { + assert_msg(token != NULL, "did the push struct fail??"); + assert_msg(arena->current_position < arena->capacity, "no more arena size"); - } -#endif + token->lexeme = StringCast(&buffer.data[start], index - start); + token->type = TOKEN_VALUE; + token->next_token = &nil_csv_token; + csv_token_list_append_token(token_list, token); + start = index + 1; - table->row_count++; - } - else if(point == ',') - { - if (finding_headers) + if(table->finding_headers) { table->header_count++; } } - - switch(point) + else if(point == '\n') { - case('\n'): - { - tok->flags |= FL; - break; - } + // emit a token for the field that ended at this newline + csv_token *token = PushStructZero(arena, csv_token); + token->lexeme = StringCast(&buffer.data[start], index - start); + token->type = TOKEN_VALUE; + token->flags |= FL; + token->next_token = &nil_csv_token; - case(','): - { - end = index - 1; - start = index + 1; - break; - } - default: + assert_msg(token_list, "token list invalid"); + assert_msg(token, "you're tring to append an invalid token"); + + csv_token_list_append_token(token_list, token); + + start = index + 1; + + if(table->finding_headers) + { { - break; + //- map new header token list to table headers } - } + table->finding_headers = FALSE; + } - tok->lexeme = StringCast(&buffer.data[start], end - start); - tok->next_token = tok; + table->row_count++; + } } - return tok; + // NOTE(nasr): return the first token the caller can walk the list from token_list + return token_list->start_token; } //- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future @@ -243,18 +237,24 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) { btree *tree = PushStructZero(arena, btree); + s32 col_index = 0; + s32 row_index = 0; + // iterate over the token list while the token is not nil for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token) { - - //- TODO(nasr): check initizalization or something tomorrow { //- are we parsing the first line tokens? //- if so, do something :)) if(ct->flags & FL) { + // NOTE(nasr): FL marks end-of-line; advance row, reset col + row_index++; + col_index = 0; + // TODO(nasr): replace with nil header check function - if(table->header != &nil_csv_header || table->header == NULL) + // NOTE(nasr): == nil means header hasn't been set yet + if(table->header == &nil_csv_header || table->header == NULL) { #if 0 // - no this should happen in the tokenization @@ -265,30 +265,30 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) { } + + // FL tokens are structural, no value to index + continue; } } - // TODO(nasr): fix this logic tomorrow - csv_token *ct = PushStruct(arena, csv_token); - // skip structural ctens, only index values + // skip non-value tokens, only index actual cell values if (ct->type != TOKEN_VALUE) { + col_index++; continue; } // NOTE(nasr): payload is the cten itself so the caller can reach // row/col metadata without us having to copy it - // NOTE(nasr): heh why do we void cast again? - key k = { - .header_index = 1, - .row_index = 1, + .header_index = col_index, + .row_index = row_index, }; - // btree_insert(arena, tree, (key)ct->lexeme, (void *)ct); btree_insert(arena, tree, k, (void *)ct); + + col_index++; } return tree; } - -- cgit v1.3