1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
|
#ifndef ENGINE_LEXER_H
#define ENGINE_LEXER_H
typedef enum csv_token_flags csv_token_flags;
enum csv_token_flags
{
START_FL = 1 << 1,
END_FL = 1 << 2,
};
typedef enum csv_token_type csv_token_type;
enum csv_token_type
{
// first 255 tokens for ascii characters
TOKEN_UNDEFINED = 255,
TOKEN_IDENTIFIER,
TOKEN_VALUE,
};
typedef struct csv_token csv_token;
struct csv_token
{
string8 lexeme;
csv_token_type type;
csv_token_flags flags;
csv_token *next;
};
// NOTE(nasr): i dont think im going to use this.
typedef struct csv_row csv_row;
struct csv_row
{
// array of size col_count, points into mmap buffer
string8 *fields;
s32 count;
};
typedef struct csv_table csv_table;
struct csv_table
{
// first row, col names
// all data rows
string8 *headers;
csv_row *rows;
s32 col_count;
s32 row_count;
};
typedef struct csv_token_list csv_token_list;
struct csv_token_list
{
csv_token *start_token;
csv_token *end_token;
};
read_only global_variable
csv_token nil_csv_token=
{
.lexeme = {.data = NULL, .size =0},
.type = (csv_token_type)0,
.flags = 0,
.next = &nil_csv_token,
};
read_only global_variable
csv_token_list nil_csv_token_list =
{
.start_token = &nil_csv_token,
.end_token = &nil_csv_token,
};
read_only global_variable
csv_row nil_csv_row =
{
.fields = &nil_string,
.count = 0,
};
read_only global_variable
csv_table nil_csv_table =
{
.headers = &nil_string,
.rows = &nil_csv_row,
.col_count = 0,
.row_count = 0,
};
#endif /* ENGINE_LEXER_H */
// the lexer acts as a table builder from a csv file
// and parsing indivudal rows and columns
// the next step would be building a the b-tree
internal csv_token *
tokenize_csv(string8 buffer, mem_arena *arena)
{
b32 FL = TRUE;
if(buffer.size < 0) return NULL;
csv_token *tok = PushStruct(arena, csv_token);
// URGENT(nasr): segfaulting because memcpy of strring value doesnt work dammit
// NOPE ITS BEECAUSE WEE DONT LOAD CSV OR SOMTHING???
for(s32 index = 0; buffer.data[index] != '\0'; ++index)
{
u8 point = buffer.data[index];
s32 start = 0;
s32 end = 0;
if(is_whitespace(point))
{
warn("csv file is invalid, detected whitespace");
return NULL;
}
switch(point)
{
case('\n'):
{
if(FL) tok->flags |= END_FL;
break;
}
case(','):
{
end = index - 1;
start = index + 1;
break;
}
default:
{
break;
}
}
tok->lexeme = StringCast(&buffer.data[start], end - start);
tok->next = tok;
}
return tok;
}
internal void
read_csv(string8 buffer)
{
// printf("\nsize:%lu\ndata %s\n", buffer.size, buffer.data);
}
internal b_tree *
parse_csv(mem_arena *arena, csv_token_list *ctl)
{
b_tree *tree = PushStructZero(arena, b_tree);
b_tree_create(arena, tree);
//- TODO(nasr): check initizalization or something tomorrow
{
}
// TODO(nasr): fix this logic tomorrow
csv_token *ct = PushStruct(arena, csv_token);
for (;ct != NULL; ct = ct->next)
{
// skip structural ctens, only index values
if (ct->type != TOKEN_VALUE)
{
continue;
}
// NOTE(nasr): payload is the cten itself so the caller can reach
// row/col metadata without us having to copy it
// NOTE(nasr): heh why do we void cast again?
b_tree_insert(arena, tree, ct->lexeme, (void *)ct);
}
return tree;
}
|