diff options
Diffstat (limited to 'source/lexer/lexer.c')
| -rw-r--r-- | source/lexer/lexer.c | 70 |
1 files changed, 45 insertions, 25 deletions
diff --git a/source/lexer/lexer.c b/source/lexer/lexer.c index 1c7ab38..948afd0 100644 --- a/source/lexer/lexer.c +++ b/source/lexer/lexer.c | |||
| @@ -1,77 +1,97 @@ | |||
| 1 | // the lexer acts as a table builder from a csv file | ||
| 2 | // and parsing indivudal rows and columns | ||
| 3 | // the next step would be building a the b-tree | ||
| 1 | internal b32 | 4 | internal b32 |
| 2 | is_alpha(u8 point) | 5 | is_alpha(u8 point) |
| 3 | { | 6 | { |
| 4 | return ((point >= 'a' && point <= 'z') || | 7 | return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_')); |
| 5 | (point >= 'A' && point <= 'Z') || | ||
| 6 | (point == '_')); | ||
| 7 | } | 8 | } |
| 8 | 9 | ||
| 9 | internal b32 | 10 | internal b32 |
| 10 | is_digit(u8 point) | 11 | is_digit(u8 point) |
| 11 | { | 12 | { |
| 12 | return (point >= '0' && point <= '9'); | 13 | return (point >= '0' && point <= '9'); |
| 13 | } | 14 | } |
| 14 | 15 | ||
| 15 | internal b32 | 16 | internal b32 |
| 16 | is_alpha_num(u8 point) | 17 | is_alpha_num(u8 point) |
| 17 | { | 18 | { |
| 18 | return (is_alpha(point) || is_digit(point)); | 19 | return (is_alpha(point) || is_digit(point)); |
| 19 | } | 20 | } |
| 20 | 21 | ||
| 21 | internal b32 | 22 | internal b32 |
| 22 | is_whitespace(u8 point) | 23 | is_whitespace(u8 point) |
| 23 | { | 24 | { |
| 24 | return (point == '\n' || point == '\r' || | 25 | return (point == '\n' || point == '\r' || point == ' ' || point == '\t'); |
| 25 | point == ' ' || point == '\t'); | ||
| 26 | } | 26 | } |
| 27 | 27 | ||
| 28 | internal b32 | 28 | internal b32 |
| 29 | is_delimiter(u8 point) | 29 | is_delimiter(u8 point) |
| 30 | { | 30 | { |
| 31 | |||
| 32 | return (point == ','); | 31 | return (point == ','); |
| 33 | |||
| 34 | } | 32 | } |
| 35 | 33 | ||
| 36 | internal token * | 34 | internal token * |
| 37 | tokenize_csv(string8 buffer) | 35 | tokenize_csv(string8 buffer, csv_table *global_table, mem_arena *arena) |
| 38 | { | 36 | { |
| 39 | i32 count = 0; | 37 | i32 count = 0; |
| 40 | string8 **tokens = PushArray(arena, string8 *, buffer.size / 10); | 38 | string8 **tokens = PushArray(arena, string8 *, buffer.size / 10); |
| 39 | b32 first_line = 1; | ||
| 41 | 40 | ||
| 42 | if(buffer.size < 0) return NULL; | 41 | if(buffer.size < 0) return NULL; |
| 43 | for(i32 index = 0; | 42 | for(i32 index = 0; |
| 44 | buffer.data[index] != '\0'; | 43 | buffer.data[index] != '\0'; |
| 45 | ++index) | 44 | ++index) |
| 46 | { | 45 | { |
| 47 | string8 tokens = {0}; | 46 | csv_row *row = PushStruct(arena, csv_row); |
| 47 | string8 token = {0}; | ||
| 48 | 48 | ||
| 49 | u8 point = buffer.data[index]; | 49 | u8 point = buffer.data[index]; |
| 50 | if(is_whitespace(point)) continue; | ||
| 51 | |||
| 52 | u8 *start = &buffer.data; | ||
| 53 | |||
| 54 | if(is_delimiter(point)) | ||
| 55 | { | ||
| 56 | |||
| 57 | |||
| 58 | } | ||
| 59 | 50 | ||
| 60 | u8 *end = start - 1; | 51 | u8 *start = buffer.data; |
| 52 | u8 *end = NULL; | ||
| 61 | 53 | ||
| 62 | unused(start); | 54 | unused(row); |
| 63 | unused(end); | ||
| 64 | 55 | ||
| 65 | switch (point) | 56 | switch (point) |
| 66 | { | 57 | { |
| 58 | case '\n': | ||
| 59 | { | ||
| 60 | first_line = -1; | ||
| 61 | break; | ||
| 62 | } | ||
| 63 | case ',': | ||
| 64 | { | ||
| 65 | end = start - 1; | ||
| 66 | |||
| 67 | if (first_line) | ||
| 68 | { | ||
| 69 | global_table->headers = &token; | ||
| 70 | ++global_table->headers; | ||
| 71 | break; | ||
| 72 | } | ||
| 73 | else | ||
| 74 | { | ||
| 75 | |||
| 76 | break; | ||
| 77 | } | ||
| 78 | } | ||
| 67 | 79 | ||
| 68 | default: | 80 | default: |
| 69 | { | 81 | { |
| 70 | printf("point: %c\n", point); | 82 | printf("point: %c\n", point); |
| 71 | count++; | 83 | count++; |
| 84 | break; | ||
| 72 | } | 85 | } |
| 73 | } | 86 | } |
| 74 | 87 | ||
| 88 | token = (string8){ | ||
| 89 | .data = start, | ||
| 90 | .size = end - start, | ||
| 91 | }; | ||
| 92 | |||
| 93 | **tokens = token; | ||
| 94 | ++*tokens; | ||
| 75 | } | 95 | } |
| 76 | 96 | ||
| 77 | printf("%d", count); | 97 | printf("%d", count); |
