summaryrefslogtreecommitdiff
path: root/source/csv_reader.h
diff options
context:
space:
mode:
authornasr <nsrddyn@gmail.com>2026-03-16 19:20:23 +0000
committernasr <nsrddyn@gmail.com>2026-03-16 19:20:23 +0000
commit180ccc84aac07c7bee2b09a6e07f7406908409b9 (patch)
treeefa39665e41c3132626f2c08b2f3ae0d18adc17a /source/csv_reader.h
parent2e258673171c2e4663a8b5d58e2ad174bb0ecd96 (diff)
feature(main): lots of stuff see description
1. increased compile time warnings to help with some optimizations. 2. impelmeented csv lexing helper functions that do stuff on tokenlists like appending and concatenating lists with each other 3. realiszed that btree design in faulty so disabled it and will refactor it in the next approach
Diffstat (limited to 'source/csv_reader.h')
-rw-r--r--source/csv_reader.h183
1 files changed, 0 insertions, 183 deletions
diff --git a/source/csv_reader.h b/source/csv_reader.h
deleted file mode 100644
index f5205bf..0000000
--- a/source/csv_reader.h
+++ /dev/null
@@ -1,183 +0,0 @@
1#ifndef ENGINE_LEXER_H
2#define ENGINE_LEXER_H
3
4typedef enum csv_token_flags csv_token_flags;
5enum csv_token_flags
6{
7 START_FL = 1 << 1,
8 END_FL = 1 << 2,
9};
10
11typedef enum csv_token_type csv_token_type;
12enum csv_token_type
13{
14 // first 255 tokens for ascii characters
15 TOKEN_UNDEFINED = 255,
16 TOKEN_IDENTIFIER,
17 TOKEN_VALUE,
18};
19
20typedef struct csv_token csv_token;
21struct csv_token
22{
23 string8 lexeme;
24 csv_token_type type;
25 csv_token_flags flags;
26 csv_token *next;
27};
28
29// NOTE(nasr): i dont think im going to use this.
30typedef struct csv_row csv_row;
31struct csv_row
32{
33 // array of size col_count, points into mmap buffer
34 string8 *fields;
35 s32 count;
36};
37
38typedef struct csv_table csv_table;
39struct csv_table
40{
41 // first row, col names
42 // all data rows
43 string8 *headers;
44 csv_row *rows;
45 s32 col_count;
46 s32 row_count;
47};
48
49
50typedef struct csv_token_list csv_token_list;
51struct csv_token_list
52{
53 csv_token *start_token;
54 csv_token *end_token;
55
56};
57
58read_only global_variable
59csv_token nil_csv_token=
60{
61 .lexeme = {.data = NULL, .size =0},
62 .type = (csv_token_type)0,
63 .flags = 0,
64 .next = &nil_csv_token,
65
66};
67
68read_only global_variable
69csv_token_list nil_csv_token_list =
70{
71 .start_token = &nil_csv_token,
72 .end_token = &nil_csv_token,
73};
74
75
76read_only global_variable
77csv_row nil_csv_row =
78{
79 .fields = &nil_string,
80 .count = 0,
81};
82
83read_only global_variable
84csv_table nil_csv_table =
85{
86 .headers = &nil_string,
87 .rows = &nil_csv_row,
88 .col_count = 0,
89 .row_count = 0,
90};
91
92#endif /* ENGINE_LEXER_H */
93
94// the lexer acts as a table builder from a csv file
95// and parsing indivudal rows and columns
96// the next step would be building a the b-tree
97internal csv_token *
98tokenize_csv(string8 buffer, mem_arena *arena)
99{
100 b32 FL = TRUE;
101
102 if(buffer.size < 0) return NULL;
103
104 csv_token *tok = PushStruct(arena, csv_token);
105
106 // URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
107 // NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
108 for(s32 index = 0; buffer.data[index] != '\0'; ++index)
109 {
110 u8 point = buffer.data[index];
111
112 s32 start = 0;
113 s32 end = 0;
114
115 if(is_whitespace(point))
116 {
117 warn("csv file is invalid, detected whitespace");
118 return NULL;
119 }
120
121 switch(point)
122 {
123 case('\n'):
124 {
125 if(FL) tok->flags |= END_FL;
126 break;
127 }
128
129 case(','):
130 {
131 end = index - 1;
132 start = index + 1;
133 break;
134 }
135 default:
136 {
137 break;
138 }
139 }
140
141 tok->lexeme = StringCast(&buffer.data[start], end - start);
142 tok->next = tok;
143 }
144
145 return tok;
146}
147
148internal void
149read_csv(string8 buffer)
150{
151 // printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
152
153}
154
155internal b_tree *
156parse_csv(mem_arena *arena, csv_token_list *ctl)
157{
158 b_tree *tree = PushStructZero(arena, b_tree);
159 b_tree_create(arena, tree);
160
161 //- TODO(nasr): check initizalization or something tomorrow
162 {
163
164 }
165 // TODO(nasr): fix this logic tomorrow
166 csv_token *ct = PushStruct(arena, csv_token);
167
168 for (;ct != NULL; ct = ct->next)
169 {
170 // skip structural ctens, only index values
171 if (ct->type != TOKEN_VALUE)
172 {
173 continue;
174 }
175
176 // NOTE(nasr): payload is the cten itself so the caller can reach
177 // row/col metadata without us having to copy it
178 // NOTE(nasr): heh why do we void cast again?
179 b_tree_insert(arena, tree, ct->lexeme, (void *)ct);
180 }
181
182 return tree;
183}