diff options
| author | nasr <nsrddyn@gmail.com> | 2026-03-06 18:53:23 +0000 |
|---|---|---|
| committer | nasr <nsrddyn@gmail.com> | 2026-03-06 18:53:23 +0000 |
| commit | 2c9057b8f009bd39d97a2d30cf71135cb07c5e4b (patch) | |
| tree | 64712280738eba2ce174aef50a380c9a3d862d35 | |
| parent | d8c52d6c408a172f1210c77df3e3a9629ea68dc6 (diff) | |
improvement(main): worked on the lexer, close to finishing the tokenization
csv's are simple
| -rw-r--r-- | source/engine/engine.c | 23 | ||||
| -rw-r--r-- | source/lexer/lexer.c | 70 | ||||
| -rw-r--r-- | source/repl/repl.c | 22 |
3 files changed, 64 insertions, 51 deletions
diff --git a/source/engine/engine.c b/source/engine/engine.c index 05c143c..64b15bf 100644 --- a/source/engine/engine.c +++ b/source/engine/engine.c | |||
| @@ -3,8 +3,7 @@ | |||
| 3 | 3 | ||
| 4 | #include <stdio.h> | 4 | #include <stdio.h> |
| 5 | 5 | ||
| 6 | #include "../lexer/lexer.h" | 6 | |
| 7 | #include "../lexer/lexer.c" | ||
| 8 | 7 | ||
| 9 | #include "../parser/parser.h" | 8 | #include "../parser/parser.h" |
| 10 | #include "../parser/parser.c" | 9 | #include "../parser/parser.c" |
| @@ -15,24 +14,22 @@ | |||
| 15 | #include "../storage/csv_reader.h" | 14 | #include "../storage/csv_reader.h" |
| 16 | #include "../storage/csv_reader.c" | 15 | #include "../storage/csv_reader.c" |
| 17 | 16 | ||
| 17 | #include "../lexer/lexer.h" | ||
| 18 | #include "../lexer/lexer.c" | ||
| 19 | |||
| 20 | |||
| 18 | 21 | ||
| 19 | int main(int c, char **v) | 22 | int main(int c, char **v) |
| 20 | { | 23 | { |
| 21 | if(c < 2) return -999; | 24 | if(c < 2) return -999; |
| 22 | 25 | ||
| 23 | string8 buffer = load_file(v[1]); | 26 | mem_arena *global_arena = arena_create(MiB(20)); |
| 24 | // read_csv(buffer); | 27 | csv_table *global_table = PushStruct(global_arena, csv_table); |
| 25 | tokenize_csv(buffer); | ||
| 26 | 28 | ||
| 29 | string8 buffer = load_file(v[1]); | ||
| 30 | read_csv(buffer); | ||
| 31 | tokenize_csv(buffer, global_table, global_arena); | ||
| 27 | 32 | ||
| 28 | // for(;;) | ||
| 29 | // { | ||
| 30 | // print("reading user input..."); | ||
| 31 | // // TODO(nasr): design a repl system | ||
| 32 | // | ||
| 33 | // sleep(1); | ||
| 34 | // } | ||
| 35 | // | ||
| 36 | 33 | ||
| 37 | return 0; | 34 | return 0; |
| 38 | } | 35 | } |
diff --git a/source/lexer/lexer.c b/source/lexer/lexer.c index 1c7ab38..948afd0 100644 --- a/source/lexer/lexer.c +++ b/source/lexer/lexer.c | |||
| @@ -1,77 +1,97 @@ | |||
| 1 | // the lexer acts as a table builder from a csv file | ||
| 2 | // and parsing indivudal rows and columns | ||
| 3 | // the next step would be building a the b-tree | ||
| 1 | internal b32 | 4 | internal b32 |
| 2 | is_alpha(u8 point) | 5 | is_alpha(u8 point) |
| 3 | { | 6 | { |
| 4 | return ((point >= 'a' && point <= 'z') || | 7 | return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_')); |
| 5 | (point >= 'A' && point <= 'Z') || | ||
| 6 | (point == '_')); | ||
| 7 | } | 8 | } |
| 8 | 9 | ||
| 9 | internal b32 | 10 | internal b32 |
| 10 | is_digit(u8 point) | 11 | is_digit(u8 point) |
| 11 | { | 12 | { |
| 12 | return (point >= '0' && point <= '9'); | 13 | return (point >= '0' && point <= '9'); |
| 13 | } | 14 | } |
| 14 | 15 | ||
| 15 | internal b32 | 16 | internal b32 |
| 16 | is_alpha_num(u8 point) | 17 | is_alpha_num(u8 point) |
| 17 | { | 18 | { |
| 18 | return (is_alpha(point) || is_digit(point)); | 19 | return (is_alpha(point) || is_digit(point)); |
| 19 | } | 20 | } |
| 20 | 21 | ||
| 21 | internal b32 | 22 | internal b32 |
| 22 | is_whitespace(u8 point) | 23 | is_whitespace(u8 point) |
| 23 | { | 24 | { |
| 24 | return (point == '\n' || point == '\r' || | 25 | return (point == '\n' || point == '\r' || point == ' ' || point == '\t'); |
| 25 | point == ' ' || point == '\t'); | ||
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | internal b32 | 28 | internal b32 |
| 29 | is_delimiter(u8 point) | 29 | is_delimiter(u8 point) |
| 30 | { | 30 | { |
| 31 | |||
| 32 | return (point == ','); | 31 | return (point == ','); |
| 33 | |||
| 34 | } | 32 | } |
| 35 | 33 | ||
| 36 | internal token * | 34 | internal token * |
| 37 | tokenize_csv(string8 buffer) | 35 | tokenize_csv(string8 buffer, csv_table *global_table, mem_arena *arena) |
| 38 | { | 36 | { |
| 39 | i32 count = 0; | 37 | i32 count = 0; |
| 40 | string8 **tokens = PushArray(arena, string8 *, buffer.size / 10); | 38 | string8 **tokens = PushArray(arena, string8 *, buffer.size / 10); |
| 39 | b32 first_line = 1; | ||
| 41 | 40 | ||
| 42 | if(buffer.size < 0) return NULL; | 41 | if(buffer.size < 0) return NULL; |
| 43 | for(i32 index = 0; | 42 | for(i32 index = 0; |
| 44 | buffer.data[index] != '\0'; | 43 | buffer.data[index] != '\0'; |
| 45 | ++index) | 44 | ++index) |
| 46 | { | 45 | { |
| 47 | string8 tokens = {0}; | 46 | csv_row *row = PushStruct(arena, csv_row); |
| 47 | string8 token = {0}; | ||
| 48 | 48 | ||
| 49 | u8 point = buffer.data[index]; | 49 | u8 point = buffer.data[index]; |
| 50 | if(is_whitespace(point)) continue; | ||
| 51 | |||
| 52 | u8 *start = &buffer.data; | ||
| 53 | |||
| 54 | if(is_delimiter(point)) | ||
| 55 | { | ||
| 56 | |||
| 57 | |||
| 58 | } | ||
| 59 | 50 | ||
| 60 | u8 *end = start - 1; | 51 | u8 *start = buffer.data; |
| 52 | u8 *end = NULL; | ||
| 61 | 53 | ||
| 62 | unused(start); | 54 | unused(row); |
| 63 | unused(end); | ||
| 64 | 55 | ||
| 65 | switch (point) | 56 | switch (point) |
| 66 | { | 57 | { |
| 58 | case '\n': | ||
| 59 | { | ||
| 60 | first_line = -1; | ||
| 61 | break; | ||
| 62 | } | ||
| 63 | case ',': | ||
| 64 | { | ||
| 65 | end = start - 1; | ||
| 66 | |||
| 67 | if (first_line) | ||
| 68 | { | ||
| 69 | global_table->headers = &token; | ||
| 70 | ++global_table->headers; | ||
| 71 | break; | ||
| 72 | } | ||
| 73 | else | ||
| 74 | { | ||
| 75 | |||
| 76 | break; | ||
| 77 | } | ||
| 78 | } | ||
| 67 | 79 | ||
| 68 | default: | 80 | default: |
| 69 | { | 81 | { |
| 70 | printf("point: %c\n", point); | 82 | printf("point: %c\n", point); |
| 71 | count++; | 83 | count++; |
| 84 | break; | ||
| 72 | } | 85 | } |
| 73 | } | 86 | } |
| 74 | 87 | ||
| 88 | token = (string8){ | ||
| 89 | .data = start, | ||
| 90 | .size = end - start, | ||
| 91 | }; | ||
| 92 | |||
| 93 | **tokens = token; | ||
| 94 | ++*tokens; | ||
| 75 | } | 95 | } |
| 76 | 96 | ||
| 77 | printf("%d", count); | 97 | printf("%d", count); |
diff --git a/source/repl/repl.c b/source/repl/repl.c index 4c57345..dd289d8 100644 --- a/source/repl/repl.c +++ b/source/repl/repl.c | |||
| @@ -1,16 +1,12 @@ | |||
| 1 | #ifndef ENGINE_REPL_H | 1 | internal void |
| 2 | #define ENGINE_REPL_H | 2 | init_repl() |
| 3 | |||
| 4 | typedef struct node node; | ||
| 5 | struct node | ||
| 6 | { | ||
| 7 | |||
| 8 | }; | ||
| 9 | |||
| 10 | typedef struct btree btree; | ||
| 11 | struct btree | ||
| 12 | { | 3 | { |
| 4 | for(;;) | ||
| 5 | { | ||
| 6 | print("reading user input..."); | ||
| 7 | // TODO(nasr): design a repl system | ||
| 13 | 8 | ||
| 14 | }; | 9 | sleep(1); |
| 10 | } | ||
| 15 | 11 | ||
| 16 | #endif /* ENGINE_H */ | 12 | } |
