summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornasr <nsrddyn@gmail.com>2026-03-06 18:53:23 +0000
committernasr <nsrddyn@gmail.com>2026-03-06 18:53:23 +0000
commit2c9057b8f009bd39d97a2d30cf71135cb07c5e4b (patch)
tree64712280738eba2ce174aef50a380c9a3d862d35
parentd8c52d6c408a172f1210c77df3e3a9629ea68dc6 (diff)
improvement(main): worked on the lexer, close to finishing the tokenization
csv's are simple
-rw-r--r--source/engine/engine.c23
-rw-r--r--source/lexer/lexer.c70
-rw-r--r--source/repl/repl.c22
3 files changed, 64 insertions, 51 deletions
diff --git a/source/engine/engine.c b/source/engine/engine.c
index 05c143c..64b15bf 100644
--- a/source/engine/engine.c
+++ b/source/engine/engine.c
@@ -3,8 +3,7 @@
3 3
4#include <stdio.h> 4#include <stdio.h>
5 5
6#include "../lexer/lexer.h" 6
7#include "../lexer/lexer.c"
8 7
9#include "../parser/parser.h" 8#include "../parser/parser.h"
10#include "../parser/parser.c" 9#include "../parser/parser.c"
@@ -15,24 +14,22 @@
15#include "../storage/csv_reader.h" 14#include "../storage/csv_reader.h"
16#include "../storage/csv_reader.c" 15#include "../storage/csv_reader.c"
17 16
17#include "../lexer/lexer.h"
18#include "../lexer/lexer.c"
19
20
18 21
19int main(int c, char **v) 22int main(int c, char **v)
20{ 23{
21 if(c < 2) return -999; 24 if(c < 2) return -999;
22 25
23 string8 buffer = load_file(v[1]); 26 mem_arena *global_arena = arena_create(MiB(20));
24 // read_csv(buffer); 27 csv_table *global_table = PushStruct(global_arena, csv_table);
25 tokenize_csv(buffer);
26 28
29 string8 buffer = load_file(v[1]);
30 read_csv(buffer);
31 tokenize_csv(buffer, global_table, global_arena);
27 32
28 // for(;;)
29 // {
30 // print("reading user input...");
31 // // TODO(nasr): design a repl system
32 //
33 // sleep(1);
34 // }
35 //
36 33
37 return 0; 34 return 0;
38} 35}
diff --git a/source/lexer/lexer.c b/source/lexer/lexer.c
index 1c7ab38..948afd0 100644
--- a/source/lexer/lexer.c
+++ b/source/lexer/lexer.c
@@ -1,77 +1,97 @@
1// the lexer acts as a table builder from a csv file
2// and parsing indivudal rows and columns
3// the next step would be building a the b-tree
1internal b32 4internal b32
2is_alpha(u8 point) 5is_alpha(u8 point)
3{ 6{
4 return ((point >= 'a' && point <= 'z') || 7 return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
5 (point >= 'A' && point <= 'Z') ||
6 (point == '_'));
7} 8}
8 9
9internal b32 10internal b32
10is_digit(u8 point) 11is_digit(u8 point)
11{ 12{
12 return (point >= '0' && point <= '9'); 13 return (point >= '0' && point <= '9');
13} 14}
14 15
15internal b32 16internal b32
16is_alpha_num(u8 point) 17is_alpha_num(u8 point)
17{ 18{
18 return (is_alpha(point) || is_digit(point)); 19 return (is_alpha(point) || is_digit(point));
19} 20}
20 21
21internal b32 22internal b32
22is_whitespace(u8 point) 23is_whitespace(u8 point)
23{ 24{
24 return (point == '\n' || point == '\r' || 25 return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
25 point == ' ' || point == '\t');
26} 26}
27 27
28internal b32 28internal b32
29is_delimiter(u8 point) 29is_delimiter(u8 point)
30{ 30{
31
32 return (point == ','); 31 return (point == ',');
33
34} 32}
35 33
36internal token * 34internal token *
37tokenize_csv(string8 buffer) 35tokenize_csv(string8 buffer, csv_table *global_table, mem_arena *arena)
38{ 36{
39 i32 count = 0; 37 i32 count = 0;
40 string8 **tokens = PushArray(arena, string8 *, buffer.size / 10); 38 string8 **tokens = PushArray(arena, string8 *, buffer.size / 10);
39 b32 first_line = 1;
41 40
42 if(buffer.size < 0) return NULL; 41 if(buffer.size < 0) return NULL;
43 for(i32 index = 0; 42 for(i32 index = 0;
44 buffer.data[index] != '\0'; 43 buffer.data[index] != '\0';
45 ++index) 44 ++index)
46 { 45 {
47 string8 tokens = {0}; 46 csv_row *row = PushStruct(arena, csv_row);
47 string8 token = {0};
48 48
49 u8 point = buffer.data[index]; 49 u8 point = buffer.data[index];
50 if(is_whitespace(point)) continue;
51
52 u8 *start = &buffer.data;
53
54 if(is_delimiter(point))
55 {
56
57
58 }
59 50
60 u8 *end = start - 1; 51 u8 *start = buffer.data;
52 u8 *end = NULL;
61 53
62 unused(start); 54 unused(row);
63 unused(end);
64 55
65 switch (point) 56 switch (point)
66 { 57 {
58 case '\n':
59 {
60 first_line = -1;
61 break;
62 }
63 case ',':
64 {
65 end = start - 1;
66
67 if (first_line)
68 {
69 global_table->headers = &token;
70 ++global_table->headers;
71 break;
72 }
73 else
74 {
75
76 break;
77 }
78 }
67 79
68 default: 80 default:
69 { 81 {
70 printf("point: %c\n", point); 82 printf("point: %c\n", point);
71 count++; 83 count++;
84 break;
72 } 85 }
73 } 86 }
74 87
88 token = (string8){
89 .data = start,
90 .size = end - start,
91 };
92
93 **tokens = token;
94 ++*tokens;
75 } 95 }
76 96
77 printf("%d", count); 97 printf("%d", count);
diff --git a/source/repl/repl.c b/source/repl/repl.c
index 4c57345..dd289d8 100644
--- a/source/repl/repl.c
+++ b/source/repl/repl.c
@@ -1,16 +1,12 @@
1#ifndef ENGINE_REPL_H 1internal void
2#define ENGINE_REPL_H 2init_repl()
3
4typedef struct node node;
5struct node
6{
7
8};
9
10typedef struct btree btree;
11struct btree
12{ 3{
4 for(;;)
5 {
6 print("reading user input...");
7 // TODO(nasr): design a repl system
13 8
14}; 9 sleep(1);
10 }
15 11
16#endif /* ENGINE_H */ 12}