From 2c9057b8f009bd39d97a2d30cf71135cb07c5e4b Mon Sep 17 00:00:00 2001
From: nasr <nsrddyn@gmail.com>
Date: Fri, 6 Mar 2026 18:53:23 +0000
Subject: improvement(main): worked on the lexer, close to finishing the
 tokenization

csv's are simple
---
 source/lexer/lexer.c | 70 +++++++++++++++++++++++++++++++++-------------------
 1 file changed, 45 insertions(+), 25 deletions(-)

(limited to 'source/lexer/lexer.c')

diff --git a/source/lexer/lexer.c b/source/lexer/lexer.c
index 1c7ab38..948afd0 100644
--- a/source/lexer/lexer.c
+++ b/source/lexer/lexer.c
@@ -1,77 +1,97 @@
+// the lexer acts as a table builder from a csv  file
+// and parsing indivudal rows and columns
+// the next step would be building a the b-tree
 internal b32
 is_alpha(u8 point)
 {
-  return ((point >= 'a' && point <= 'z') ||
-          (point >= 'A' && point <= 'Z') ||
-          (point == '_'));
+    return ((point >= 'a' && point <= 'z') || (point >= 'A' && point <= 'Z') || (point == '_'));
 }
 
 internal b32
 is_digit(u8 point)
 {
-  return (point >= '0' && point <= '9');
+    return (point >= '0' && point <= '9');
 }
 
 internal b32
 is_alpha_num(u8 point)
 {
-  return (is_alpha(point) || is_digit(point));
+    return (is_alpha(point) || is_digit(point));
 }
 
 internal b32
 is_whitespace(u8 point)
 {
-  return (point == '\n' || point == '\r' ||
-          point == ' ' || point == '\t');
+    return (point == '\n' || point == '\r' || point == ' ' || point == '\t');
 }
 
 internal b32
 is_delimiter(u8 point)
 {
-
     return (point == ',');
-
 }
 
 internal token *
-tokenize_csv(string8 buffer)
+tokenize_csv(string8 buffer, csv_table *global_table, mem_arena *arena)
 {
     i32 count = 0;
     string8 **tokens = PushArray(arena, string8 *, buffer.size / 10);
+    b32 first_line = 1;
 
     if(buffer.size < 0) return NULL;
     for(i32 index = 0;
-         buffer.data[index] != '\0';
-         ++index)
+        buffer.data[index] != '\0';
+        ++index)
     {
-        string8 tokens = {0};
+        csv_row *row = PushStruct(arena, csv_row);
+        string8 token = {0};
 
         u8 point = buffer.data[index];
-        if(is_whitespace(point)) continue;
 
-        u8 *start = &buffer.data;
-
-        if(is_delimiter(point))
-        {
-
-
-        }
+        u8 *start = buffer.data;
+        u8 *end = NULL;
 
-        u8 *end = start - 1;
-
-        unused(start);
-        unused(end);
+        unused(row);
 
         switch (point)
         {
+            case '\n':
+                {
+                    first_line = -1;
+                    break;
+                }
+            case ',':
+                {
+                    end = start - 1;
+
+                    if (first_line)
+                    {
+                        global_table->headers = &token;
+                        ++global_table->headers;
+                        break;
+                    }
+                    else
+                    {
+
+                        break;
+                    }
+                }
 
             default:
                 {
                     printf("point: %c\n", point);
                     count++;
+                    break;
                 }
         }
 
+        token = (string8){
+            .data = start,
+            .size = end - start,
+        };
+
+        **tokens = token;
+        ++*tokens;
     }
 
     printf("%d", count);
-- 
cgit v1.3