1 files changed, 72 insertions, 72 deletions
diff --git a/source/csv_decoder.h b/source/csv_decoder.h
index 446f1da..3d09dc6 100644
--- a/source/csv_decoder.h
+++ b/source/csv_decoder.h
@@ -55,6 +55,7 @@ struct csv_table
    csv_header  *header;
    s32         row_count;
    s32         header_count;
+    b32         finding_headers;
 };
@@ -63,17 +64,15 @@ struct csv_token_list
 {
    csv_token *start_token;
    csv_token *end_token;
 };
 read_only global_variable
 csv_token nil_csv_token=
 {
-    .lexeme         = {.data = NULL, .size =0},
+    .lexeme         = {.data = NULL, .size = 0},
    .type           = 0,
    .flags          = 0,
    .next_token     = &nil_csv_token,
 };
 read_only global_variable
@@ -90,7 +89,6 @@ csv_token_list nil_csv_token_list =
    .end_token   = &nil_csv_token,
 };
 read_only global_variable
 csv_row  nil_csv_row =
 {
@@ -113,28 +111,32 @@ is_nil_csv_token(csv_token *token)
    return ((token == NULL) || (token == &nil_csv_token)); 
 }
+// TODO(nasr): segfaulting because end_token not allocated
 internal void
 csv_token_list_append_token(csv_token_list *source_token_list, csv_token *source_token)
 {
    source_token_list->end_token->next_token = source_token;
    source_token_list->end_token             = source_token;
 }
 //- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
 internal void
 csv_token_list_concat_list(csv_token_list *destination, csv_token_list *source)
 {
+    if(is_nil_csv_token(source->start_token)) return;
-    csv_token *source_ct = source->start_token;
+    csv_token *source_ct      = source->start_token;
-    csv_token *destination_end_ct = destination->end_token;
+    csv_token *destination_et = destination->end_token;
-     for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
+    // walk source and stitch each node onto destination's tail
-     {
+    for(; !is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
-         destination_end_ct->next_token = source_ct;
+    {
-     }
+        destination_et->next_token = source_ct;
+        destination_et             = source_ct;
+    }
-     destination->end_token = source_ct;
+    // destination_et now points at the last real source node (not the nil sentinel)
+    destination->end_token = destination_et;
 }
 #if 0
@@ -153,88 +155,80 @@ parse_csv_row(string8 row_buffer)
 internal csv_token *
 tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list *token_list)
 {
    unused(token_list);
-    b32 finding_headers = TRUE;
    if(buffer.size == 0) return NULL;
-    csv_token *tok = PushStruct(arena, csv_token);
    // URGENT(nasr): segfaulting because memcpy of strring value doesnt  work dammit
    // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
    // forgot what the solution was
    // TODO(nasr): check what the problem here was
+    // string size tracking across the loop not inside it
+    s32 start = 0;
    for(s32 index = 0; buffer.data[index] != '\0'; ++index)
    {
        u8 point = buffer.data[index];
-        s32 start   = 0;
+#if 0
-        s32 end     = 0;
        if(is_whitespace(point))
        {
            warn("csv file is invalid, detected whitespace");
            return NULL;
        }
+#endif
+        if(point == ',')
-        if(point == '\n') 
        {
-            if(finding_headers)
+            // emit a token for the field that ended before this comma
-            {
+            csv_token *token  = PushStructZero(arena, csv_token);
-#if 0
-                string8 headers_buffer = {.data = &buffer.data[start], .size = end - start};
-#endif
-                finding_headers = FALSE;
-                {
+            assert_msg(token != NULL, "did the push struct fail??");
-                    //- map new header token list to table headers
+            assert_msg(arena->current_position < arena->capacity, "no more arena size");
-                }
-            }
-#if 0
-            else
-            {
-            }
+            token->lexeme     = StringCast(&buffer.data[start], index - start);
-#endif
+            token->type       = TOKEN_VALUE;
+            token->next_token = &nil_csv_token;
+            csv_token_list_append_token(token_list, token);
+            start = index + 1;
-            table->row_count++;
+            if(table->finding_headers)
-        }
-        else if(point == ',')
-        {
-            if (finding_headers)
            {
                table->header_count++;
            }
        }
+        else if(point == '\n')
-        switch(point)
        {
-            case('\n'):
+            // emit a token for the field that ended at this newline
-                {
+            csv_token *token  = PushStructZero(arena, csv_token);
-                    tok->flags |= FL;
+            token->lexeme     = StringCast(&buffer.data[start], index - start);
-                    break;
+            token->type       = TOKEN_VALUE;
-                }
+            token->flags     |= FL;
+            token->next_token = &nil_csv_token;
-            case(','):
+            assert_msg(token_list, "token list invalid");
-                {
+            assert_msg(token, "you're tring to append an invalid token");
-                    end = index - 1;
-                    start = index + 1;
+            csv_token_list_append_token(token_list, token);
-                    break;
-                }
+            start = index + 1;
-            default:
+            if(table->finding_headers)
+            {
                {
-                    break;
+                    //- map new header token list to table headers
                }
-        }
+                table->finding_headers = FALSE;
+            }
-        tok->lexeme         = StringCast(&buffer.data[start], end - start);
+            table->row_count++;
-        tok->next_token     = tok;
+        }
    }
-    return tok;
+    // NOTE(nasr): return the first token the caller can walk the list from token_list
+    return token_list->start_token;
 }
 //- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
@@ -243,18 +237,24 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
 {
    btree *tree = PushStructZero(arena, btree);
+    s32 col_index = 0;
+    s32 row_index = 0;
    // iterate over the token list while the token is not nil
    for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
    {
-        //- TODO(nasr): check initizalization or something tomorrow
        {
            //- are we parsing the first line tokens?
            //- if so, do something :))
            if(ct->flags & FL) 
            {
+                // NOTE(nasr): FL marks end-of-line; advance row, reset col
+                row_index++;
+                col_index = 0;
                // TODO(nasr): replace with nil header check function
-                if(table->header != &nil_csv_header || table->header == NULL)
+                // NOTE(nasr): == nil means header hasn't been set yet
+                if(table->header == &nil_csv_header || table->header == NULL)
                {
 #if 0
                    // - no this should happen in the tokenization
@@ -265,30 +265,30 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
                {
                }
+                // FL tokens are structural, no value to index
+                continue;
            }
        }
-        // TODO(nasr): fix this logic tomorrow
+        // skip non-value tokens, only index actual cell values
-        csv_token *ct = PushStruct(arena, csv_token);
-        // skip structural ctens, only index values
        if (ct->type != TOKEN_VALUE)
        {
+            col_index++;
            continue;
        }
        // NOTE(nasr): payload is the cten itself so the caller can reach
        // row/col metadata without us having to copy it
-        // NOTE(nasr): heh why do we void cast again?
        key k = {
-            .header_index =  1,
+            .header_index = col_index,
-            .row_index    =  1,
+            .row_index    = row_index,
        };
-        // btree_insert(arena, tree, (key)ct->lexeme, (void *)ct);
        btree_insert(arena, tree, k, (void *)ct);
+        col_index++;
    }
    return tree;
 }

diff --git a/source/csv_decoder.h b/source/csv_decoder.h index 446f1da..3d09dc6 100644 --- a/source/csv_decoder.h +++ b/source/csv_decoder.h
@@ -55,6 +55,7 @@ struct csv_table
55	csv_header *header;	55	csv_header *header;
56	s32 row_count;	56	s32 row_count;
57	s32 header_count;	57	s32 header_count;
		58	b32 finding_headers;
58	};	59	};
59		60
60		61
@@ -63,17 +64,15 @@ struct csv_token_list
63	{	64	{
64	csv_token *start_token;	65	csv_token *start_token;
65	csv_token *end_token;	66	csv_token *end_token;
66
67	};	67	};
68		68
69	read_only global_variable	69	read_only global_variable
70	csv_token nil_csv_token=	70	csv_token nil_csv_token=
71	{	71	{
72	.lexeme = {.data = NULL, .size =0},	72	.lexeme = {.data = NULL, .size = 0},
73	.type = 0,	73	.type = 0,
74	.flags = 0,	74	.flags = 0,
75	.next_token = &nil_csv_token,	75	.next_token = &nil_csv_token,
76
77	};	76	};
78		77
79	read_only global_variable	78	read_only global_variable
@@ -90,7 +89,6 @@ csv_token_list nil_csv_token_list =
90	.end_token = &nil_csv_token,	89	.end_token = &nil_csv_token,
91	};	90	};
92		91
93
94	read_only global_variable	92	read_only global_variable
95	csv_row nil_csv_row =	93	csv_row nil_csv_row =
96	{	94	{
@@ -113,28 +111,32 @@ is_nil_csv_token(csv_token *token)
113	return ((token == NULL) \|\| (token == &nil_csv_token));	111	return ((token == NULL) \|\| (token == &nil_csv_token));
114	}	112	}
115		113
		114	// TODO(nasr): segfaulting because end_token not allocated
116	internal void	115	internal void
117	csv_token_list_append_token(csv_token_list source_token_list, csv_token source_token)	116	csv_token_list_append_token(csv_token_list source_token_list, csv_token source_token)
118	{	117	{
119	source_token_list->end_token->next_token = source_token;	118	source_token_list->end_token->next_token = source_token;
120	source_token_list->end_token = source_token;	119	source_token_list->end_token = source_token;
121
122	}	120	}
123		121
124	//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother	122	//- concatenate 2 token lists so we can handle parsing individual rows and concatenating them to eachother
125	internal void	123	internal void
126	csv_token_list_concat_list(csv_token_list destination, csv_token_list source)	124	csv_token_list_concat_list(csv_token_list destination, csv_token_list source)
127	{	125	{
		126	if(is_nil_csv_token(source->start_token)) return;
128		127
129	csv_token *source_ct = source->start_token;	128	csv_token *source_ct = source->start_token;
130	csv_token *destination_end_ct = destination->end_token;	129	csv_token *destination_et = destination->end_token;
131		130
132	for(;!is_nil_csv_token(source_ct); source_ct = source_ct->next_token)	131	// walk source and stitch each node onto destination's tail
133	{	132	for(; !is_nil_csv_token(source_ct); source_ct = source_ct->next_token)
134	destination_end_ct->next_token = source_ct;	133	{
135	}	134	destination_et->next_token = source_ct;
		135	destination_et = source_ct;
		136	}
136		137
137	destination->end_token = source_ct;	138	// destination_et now points at the last real source node (not the nil sentinel)
		139	destination->end_token = destination_et;
138	}	140	}
139		141
140	#if 0	142	#if 0
@@ -153,88 +155,80 @@ parse_csv_row(string8 row_buffer)
153	internal csv_token *	155	internal csv_token *
154	tokenize_csv(string8 buffer, mem_arena arena, csv_table table, csv_token_list *token_list)	156	tokenize_csv(string8 buffer, mem_arena arena, csv_table table, csv_token_list *token_list)
155	{	157	{
156
157	unused(token_list);	158	unused(token_list);
158	b32 finding_headers = TRUE;
159		159
160	if(buffer.size == 0) return NULL;	160	if(buffer.size == 0) return NULL;
161		161
162	csv_token *tok = PushStruct(arena, csv_token);
163
164	// URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit	162	// URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
165	// NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???	163	// NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
166	// forgot what the solution was	164	// forgot what the solution was
167	// TODO(nasr): check what the problem here was	165	// TODO(nasr): check what the problem here was
		166
		167	// string size tracking across the loop not inside it
		168	s32 start = 0;
		169
168	for(s32 index = 0; buffer.data[index] != '\0'; ++index)	170	for(s32 index = 0; buffer.data[index] != '\0'; ++index)
169	{	171	{
170	u8 point = buffer.data[index];	172	u8 point = buffer.data[index];
171		173
172	s32 start = 0;	174	#if 0
173	s32 end = 0;
174
175	if(is_whitespace(point))	175	if(is_whitespace(point))
176	{	176	{
177	warn("csv file is invalid, detected whitespace");	177	warn("csv file is invalid, detected whitespace");
178	return NULL;	178	return NULL;
179	}	179	}
		180	#endif
180		181
181		182	if(point == ',')
182	if(point == '\n')
183	{	183	{
184	if(finding_headers)	184	// emit a token for the field that ended before this comma
185	{	185	csv_token *token = PushStructZero(arena, csv_token);
186	#if 0
187	string8 headers_buffer = {.data = &buffer.data[start], .size = end - start};
188	#endif
189	finding_headers = FALSE;
190		186
191	{	187	assert_msg(token != NULL, "did the push struct fail??");
192	//- map new header token list to table headers	188	assert_msg(arena->current_position < arena->capacity, "no more arena size");
193	}
194	}
195	#if 0
196	else
197	{
198		189
199	}	190	token->lexeme = StringCast(&buffer.data[start], index - start);
200	#endif	191	token->type = TOKEN_VALUE;
		192	token->next_token = &nil_csv_token;
		193	csv_token_list_append_token(token_list, token);
201		194
		195	start = index + 1;
202		196
203	table->row_count++;	197	if(table->finding_headers)
204	}
205	else if(point == ',')
206	{
207	if (finding_headers)
208	{	198	{
209	table->header_count++;	199	table->header_count++;
210	}	200	}
211	}	201	}
212		202	else if(point == '\n')
213	switch(point)
214	{	203	{
215	case('\n'):	204	// emit a token for the field that ended at this newline
216	{	205	csv_token *token = PushStructZero(arena, csv_token);
217	tok->flags \|= FL;	206	token->lexeme = StringCast(&buffer.data[start], index - start);
218	break;	207	token->type = TOKEN_VALUE;
219	}	208	token->flags \|= FL;
		209	token->next_token = &nil_csv_token;
220		210
221	case(','):	211	assert_msg(token_list, "token list invalid");
222	{	212	assert_msg(token, "you're tring to append an invalid token");
223	end = index - 1;	213
224	start = index + 1;	214	csv_token_list_append_token(token_list, token);
225	break;	215
226	}	216	start = index + 1;
227	default:	217
		218	if(table->finding_headers)
		219	{
228	{	220	{
229	break;	221	//- map new header token list to table headers
230	}	222	}
231	}	223	table->finding_headers = FALSE;
		224	}
232		225
233	tok->lexeme = StringCast(&buffer.data[start], end - start);	226	table->row_count++;
234	tok->next_token = tok;	227	}
235	}	228	}
236		229
237	return tok;	230	// NOTE(nasr): return the first token the caller can walk the list from token_list
		231	return token_list->start_token;
238	}	232	}
239		233
240	//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future	234	//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
@@ -243,18 +237,24 @@ parse_csv(mem_arena arena, csv_token_list ctl, csv_table *table)
243	{	237	{
244	btree *tree = PushStructZero(arena, btree);	238	btree *tree = PushStructZero(arena, btree);
245		239
		240	s32 col_index = 0;
		241	s32 row_index = 0;
		242
246	// iterate over the token list while the token is not nil	243	// iterate over the token list while the token is not nil
247	for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)	244	for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
248	{	245	{
249
250	//- TODO(nasr): check initizalization or something tomorrow
251	{	246	{
252	//- are we parsing the first line tokens?	247	//- are we parsing the first line tokens?
253	//- if so, do something :))	248	//- if so, do something :))
254	if(ct->flags & FL)	249	if(ct->flags & FL)
255	{	250	{
		251	// NOTE(nasr): FL marks end-of-line; advance row, reset col
		252	row_index++;
		253	col_index = 0;
		254
256	// TODO(nasr): replace with nil header check function	255	// TODO(nasr): replace with nil header check function
257	if(table->header != &nil_csv_header \|\| table->header == NULL)	256	// NOTE(nasr): == nil means header hasn't been set yet
		257	if(table->header == &nil_csv_header \|\| table->header == NULL)
258	{	258	{
259	#if 0	259	#if 0
260	// - no this should happen in the tokenization	260	// - no this should happen in the tokenization
@@ -265,30 +265,30 @@ parse_csv(mem_arena arena, csv_token_list ctl, csv_table *table)
265	{	265	{
266		266
267	}	267	}
		268
		269	// FL tokens are structural, no value to index
		270	continue;
268	}	271	}
269	}	272	}
270		273
271	// TODO(nasr): fix this logic tomorrow	274	// skip non-value tokens, only index actual cell values
272	csv_token *ct = PushStruct(arena, csv_token);
273	// skip structural ctens, only index values
274	if (ct->type != TOKEN_VALUE)	275	if (ct->type != TOKEN_VALUE)
275	{	276	{
		277	col_index++;
276	continue;	278	continue;
277	}	279	}
278		280
279	// NOTE(nasr): payload is the cten itself so the caller can reach	281	// NOTE(nasr): payload is the cten itself so the caller can reach
280	// row/col metadata without us having to copy it	282	// row/col metadata without us having to copy it
281	// NOTE(nasr): heh why do we void cast again?
282
283	key k = {	283	key k = {
284	.header_index = 1,	284	.header_index = col_index,
285	.row_index = 1,	285	.row_index = row_index,
286	};	286	};
287		287
288	// btree_insert(arena, tree, (key)ct->lexeme, (void *)ct);
289	btree_insert(arena, tree, k, (void *)ct);	288	btree_insert(arena, tree, k, (void *)ct);
		289
		290	col_index++;
290	}	291	}
291		292
292	return tree;	293	return tree;
293	}	294	}
294