From dd5586abec207dd4acd16d51ce0d392c03e5e957 Mon Sep 17 00:00:00 2001 From: nasr Date: Thu, 26 Mar 2026 22:35:30 +0100 Subject: feature(main): init feature(main): init --- .gitignore | 2 + build.sh | 52 ++++++ source/base/base.h | 73 ++++++++ source/base/base_arena.c | 138 +++++++++++++++ source/base/base_arena.h | 26 +++ source/base/base_hash.c | 12 ++ source/base/base_include.h | 27 +++ source/base/base_io.h | 11 ++ source/base/base_mem.c | 0 source/base/base_mem.h | 26 +++ source/base/base_os.h | 38 ++++ source/base/base_stack.c | 187 ++++++++++++++++++++ source/base/base_stack.h | 22 +++ source/base/base_string.h | 59 +++++++ source/base/base_test.h | 74 ++++++++ source/base/bash_hash.h | 15 ++ source/fajr/fajr_main.c | 41 +++++ source/fajr/fajr_main.h | 6 + source/fajr_lexer/fajr_lexer.c | 274 +++++++++++++++++++++++++++++ source/fajr_lexer/fajr_lexer.h | 119 +++++++++++++ source/fajr_parser/fajr_parser.c | 364 +++++++++++++++++++++++++++++++++++++++ source/fajr_parser/fajr_parser.h | 60 +++++++ tests/main.fajr | 6 + 23 files changed, 1632 insertions(+) create mode 100755 .gitignore create mode 100755 build.sh create mode 100755 source/base/base.h create mode 100755 source/base/base_arena.c create mode 100755 source/base/base_arena.h create mode 100644 source/base/base_hash.c create mode 100755 source/base/base_include.h create mode 100644 source/base/base_io.h create mode 100644 source/base/base_mem.c create mode 100644 source/base/base_mem.h create mode 100644 source/base/base_os.h create mode 100755 source/base/base_stack.c create mode 100755 source/base/base_stack.h create mode 100644 source/base/base_string.h create mode 100644 source/base/base_test.h create mode 100644 source/base/bash_hash.h create mode 100644 source/fajr/fajr_main.c create mode 100644 source/fajr/fajr_main.h create mode 100644 source/fajr_lexer/fajr_lexer.c create mode 100644 source/fajr_lexer/fajr_lexer.h create mode 100644 source/fajr_parser/fajr_parser.c create mode 100644 source/fajr_parser/fajr_parser.h create mode 100644 tests/main.fajr diff --git a/.gitignore b/.gitignore new file mode 100755 index 0000000..59d28ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea +/build/compiler diff --git a/build.sh b/build.sh new file mode 100755 index 0000000..678f327 --- /dev/null +++ b/build.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +set -eu + +DIR="$(dirname "$(readlink -f "$0")")" +cd "$DIR" + +BUILD="build" +mkdir -p "$BUILD" + +COMPILER="clang" +CLEAN=0 +ASAN=0 +DEBUG=1 +RELEASE=0 +TARGET_FAJR=0 + +[ "$#" = 0 ] && TARGET_FAJR=1 +for ARG in "$@"; do eval "$ARG=1" 2>/dev/null || :; done + +[ "$RELEASE" = 1 ] && DEBUG=0 + +compile() { + local SOURCE="$1" + local OUT="$2" + local EXTRA="${3:-}" + + local COMMON="-I$DIR -I$DIR/source -D_GNU_SOURCE=1 -fno-threadsafe-statics -nostdinc++" + local WARNINGS="-Wall -Wextra -Wconversion -Wno-sign-conversion -Wno-unused-parameter -Wno-missing-field-initializers -Wno-main-return-type -Wno-unused-variable -Wno-unused-but-set-variable -Wno-switch -Wno-unused-function" + local CLANG="-fno-omit-frame-pointer -fdiagnostics-absolute-paths -Wno-null-dereference" + local LINKER="-lm -ldl -lpthread" + + local FLAGS="$COMMON $WARNINGS $CLANG" + + [ "$DEBUG" = 1 ] && FLAGS="$FLAGS -g -ggdb3" + [ "$RELEASE" = 1 ] && FLAGS="$FLAGS -O3" + [ "$ASAN" = 1 ] && FLAGS="$FLAGS -fsanitize=address -fsanitize-trap -Isource" + + echo "Building $OUT..." + $COMPILER $FLAGS $SOURCE $EXTRA $LINKER -o "$BUILD/$OUT" +} + +if [ "$CLEAN" = 1 ]; then + rm -rf "$BUILD"/* + echo "Cleaned $BUILD" +fi + +if [ "$TARGET_FAJR" = 1 ] || [ "${fajr:-0}" = 1 ]; then + compile "./source/fajr/fajr_main.c" "compiler" +fi + +echo "Done." diff --git a/source/base/base.h b/source/base/base.h new file mode 100755 index 0000000..ef23391 --- /dev/null +++ b/source/base/base.h @@ -0,0 +1,73 @@ +#ifndef BASE_H +#define BASE_H + +/* assert an expression and output the file and the line */ + +#define internal static +#define global_variable static +#define local_persist static + +#define ERR_OK 0 +#define ERR_IO 1 +#define ERR_PARSE 2 +#define ERR_PERM 3 +#define ERR_INVALID 4 + +#define KiB(n) (((u64)(n)) << 10) +#define MiB(n) (((u64)(n)) << 20) +#define GiB(n) (((u64)(n)) << 30) + +#define unused(x) (void)(x) + +#define PATH_MAX_LEN 128 +#define BUFF_SMALL 128 +#define BUFF_DEFAULT 256 +#define BUFF_LARGE 512 + +#define NIL 0 + +#define DEPRECATED __attribute__((__deprecated__)) + +#if defined(__arm__) || defined(__aarch64__) +#define breakpoint __asm__ volatile("brk #0"); +#define temp_breakpoint __asm__ volatile("udf #0"); +#elif defined(__i386__) || defined(__x86_64__) +#define breakpoint __asm__ volatile("int3"); +#endif + +#define MemCpy(dest, src, len) memcpy((dest), (src), (len)) +#define MemSet(dest, len) memset((dest), (0), (len)) + +#if COMPILER_MSVC || (COMPILER_CLANG && OS_WINDOWS) +#pragma section(".rdata$", read) +#define read_only __declspec(allocate(".rdata$")) +#elif (COMPILER_CLANG && OS_LINUX) +#define read_only __attribute__((section(".rodata"))) +#else +#define read_only +#endif + +typedef uint64_t u64; +typedef uint32_t u32; +typedef uint16_t u16; +typedef uint8_t u8; + +typedef int8_t i8; +typedef int16_t i16; +typedef int32_t i32; +typedef int64_t i64; + +typedef float f32; +typedef double f64; + +typedef i32 b32; +typedef i16 b16; +typedef u8 b8; + +typedef uintptr_t umm; +typedef intptr_t smm; + +#define TRUE (0 == 0) +#define FALSE (0 != 0) + +#endif diff --git a/source/base/base_arena.c b/source/base/base_arena.c new file mode 100755 index 0000000..5855e5e --- /dev/null +++ b/source/base/base_arena.c @@ -0,0 +1,138 @@ +internal mem_arena * +arena_create(u64 capacity) +{ + mem_arena *arena = (mem_arena *)mmap( + /* kernel decides where to throw the arena */ + NULL, + capacity + sizeof(mem_arena), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + -1, + 0); + + if (arena == MAP_FAILED) + { + return NULL; + } + + arena->capacity = capacity; + arena->base_position = (u8 *)arena + sizeof(mem_arena); + arena->current_position = 0; + arena->previous_position = 0; + + return arena; +} + +internal void +arena_destroy(mem_arena *arena) +{ + if (!arena) + { + return; + } + munmap(arena, arena->capacity + sizeof(mem_arena)); +} +internal void * +arena_alloc(mem_arena *arena, u64 size, b32 zero) +{ + if (!arena) + { + return NULL; + } + u64 aligned = Align(arena->current_position, ARENA_ALIGN); + u64 new_pos = aligned + size; + if (new_pos > arena->capacity) + { + return NULL; + } + + void *out = arena->base_position + aligned; + + arena->previous_position = arena->current_position; + arena->current_position = aligned + size; + + if (zero) MemSet(out, size); + + return out; +} + +internal void +arena_pop(mem_arena *arena, u64 size) +{ + size = MIN(size, arena->current_position); + arena->current_position -= size; +} + +internal void +arena_pop_to(mem_arena *arena, u64 pos) +{ + u64 size = pos < arena->current_position ? arena->current_position - pos : 0; + arena_pop(arena, size); +} + +internal void +arena_clear(mem_arena *arena) +{ + arena->current_position = 0; +} + +internal mem_arena * +arena_resize_align(mem_arena *arena, void *old_memory, u64 new_size, u64 old_size, umm alignment) +{ + u8 *old_mem = (u8 *)old_memory; + + if (!is_pow(alignment)) + { + Align(arena->current_position, alignment); + } + + if (old_memory == NULL || old_size == 0) + { + return (mem_arena *)arena_alloc(arena, new_size, 0); + } + else if ((old_mem >= arena->base_position && old_mem < arena->base_position + arena->capacity)) + { + if ((arena->base_position + arena->previous_position) == old_memory) + { + arena->current_position = arena->previous_position + new_size; + if (new_size > old_size) + { + MemSet(&arena->current_position, new_size - old_size); + } + return (mem_arena *)old_memory; + } + else + { + void *new_memory = arena_alloc(arena, new_size, 0); + umm copy_size = old_size < new_size ? old_size : new_size; + memmove(new_memory, old_mem, copy_size); + } + } + else + { + check(0); + } + return NULL; +} + +internal mem_arena * +arena_resize(mem_arena *arena, void *old_memory, u64 new_size, u64 old_size) +{ + return arena_resize_align(arena, old_memory, new_size, old_size, ARENA_ALIGN); +} + +internal temp_arena +temp_arena_begin(mem_arena *arena) +{ + temp_arena t; + t.arena = arena; + t.start_position = arena->current_position; + + return t; +} + +internal void +temp_arena_end(temp_arena temp) +{ + temp.arena->current_position = temp.start_position; +} diff --git a/source/base/base_arena.h b/source/base/base_arena.h new file mode 100755 index 0000000..2818ae4 --- /dev/null +++ b/source/base/base_arena.h @@ -0,0 +1,26 @@ +#ifndef BASE_ARENA_H +#define BASE_ARENA_H + +#define Align(pointer, alignment) align((u64)(pointer), (umm)(alignment)) +#define PushStruct(arena, type) (type *)arena_alloc((arena), sizeof(type), 0) +#define PushStructZero(arena, type) (type *)arena_alloc((arena), sizeof(type), 1) +#define PushArray(arena, type, len) (type *)arena_alloc((arena), sizeof(type) * (len), 0) +#define PushArrayZero(arena, type, len) (type *)arena_alloc((arena), sizeof(type) * (len), 1) + +typedef struct mem_arena mem_arena; +struct mem_arena +{ + u64 current_position; + u64 previous_position; + u64 capacity; + u8 *base_position; +}; + +typedef struct temp_arena temp_arena; +struct temp_arena +{ + mem_arena *arena; + u64 start_position; +}; + +#endif /* BASE_ARENA_H */ diff --git a/source/base/base_hash.c b/source/base/base_hash.c new file mode 100644 index 0000000..1964441 --- /dev/null +++ b/source/base/base_hash.c @@ -0,0 +1,12 @@ + +internal u64 +generate_hash() +{ + + + +} + +internal hash_map +make_hash_map + diff --git a/source/base/base_include.h b/source/base/base_include.h new file mode 100755 index 0000000..40ae5ea --- /dev/null +++ b/source/base/base_include.h @@ -0,0 +1,27 @@ +#ifndef BASE_INCLUDE_H +#define BASE_INCLUDE_H + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "base.h" +#include "base_mem.h" +#include "base_arena.h" +#include "base_stack.h" +#include "base_test.h" +#include "base_string.h" +#include "base_os.h" + +#ifdef BASE_UNITY + +#include "base_arena.c" +#include "base_stack.c" + +#endif +#endif diff --git a/source/base/base_io.h b/source/base/base_io.h new file mode 100644 index 0000000..ece4d7c --- /dev/null +++ b/source/base/base_io.h @@ -0,0 +1,11 @@ +#ifndef BASE_IO_H +#define BASE_IO_H + +internal void +input_read() +{ + + +} + +#endif /* BASE_IO_H */ diff --git a/source/base/base_mem.c b/source/base/base_mem.c new file mode 100644 index 0000000..e69de29 diff --git a/source/base/base_mem.h b/source/base/base_mem.h new file mode 100644 index 0000000..2778fce --- /dev/null +++ b/source/base/base_mem.h @@ -0,0 +1,26 @@ +#ifndef BASE_MEM_H +#define BASE_MEM_H + +#define ARENA_ALIGN (2 * sizeof(void *)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) + +internal inline b8 +is_pow(umm x) +{ + return (x & (x - 1)) == 0; +} + +internal inline u64 +align(u64 pointer, umm alignment) +{ + if ((alignment & (alignment - 1)) == 0) + { + return pointer; + } + + return (pointer + alignment - 1) & ~(alignment - 1); +} + + +#endif diff --git a/source/base/base_os.h b/source/base/base_os.h new file mode 100644 index 0000000..23587c6 --- /dev/null +++ b/source/base/base_os.h @@ -0,0 +1,38 @@ +#ifndef BASE_OS_H +#define BASE_OS_H + +internal void +print(const char *str) +{ + i32 len = 0; + while (str[len]) len++; + write(STDOUT_FILENO, str, len); +} + +internal string8 +load_file(const char *path) +{ + string8 result = {0}; + struct stat sbuf = {0}; + + i32 file = open(path, O_RDONLY); + if(file == -1) return result; + + if(fstat(file, &sbuf) == -1) + { + print("error: fstat failed"); + close(file); + return result; + } + + result.size = (u64)sbuf.st_size; + if(result.size != 0) + { + result.data = (u8 *)mmap(0, result.size, PROT_READ, MAP_PRIVATE, file, 0); + } + + close(file); + return result; +} + +#endif /* BASE_OS_H */ diff --git a/source/base/base_stack.c b/source/base/base_stack.c new file mode 100755 index 0000000..9c1218a --- /dev/null +++ b/source/base/base_stack.c @@ -0,0 +1,187 @@ +internal mem_stack * +stack_create(u64 capacity) +{ + mem_stack *stack = (mem_stack *)mmap( + 0, + capacity + sizeof(mem_stack), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, + -1, + 0); + + if (stack == MAP_FAILED) + { + return NULL; + } + + stack->capacity = capacity; + stack->base_position = (u8 *)stack + sizeof(mem_stack); + stack->current_offset = 0; + + return stack; +} + +internal u8 +calculate_padding(u64 pointer, u8 alignment, u64 header_size) +{ + u8 modulo, padding; + + if (!is_pow(alignment)) + { + return 0; + } + + modulo = pointer & (u8)(alignment - 1); + + padding = 0; + + if (0 == modulo) + { + padding = alignment - modulo; + } + + if (padding < header_size) + { + header_size -= padding; + + if ((header_size & (alignment - 1)) != 0) + { + padding += alignment * (1 + (header_size / alignment)); + } + else + { + padding += alignment * (header_size / alignment); + } + } + + return padding; +} + +internal mem_stack * +stack_push_align(mem_stack *stack, u64 size, u8 alignment) +{ + u8 padding = 0; + + if (!is_pow(alignment)) + { + return (0); + } + + if (alignment > 128) + { + alignment = 128; + } + + u64 current_address = (u64)stack->base_position + stack->current_offset; + padding = calculate_padding(current_address, alignment, sizeof(mem_stack_header)); + + if (stack->current_offset + padding + size > stack->capacity) + { + return 0; + } + + stack->current_offset += padding; + + u64 next_address = current_address + (u64)padding; + mem_stack_header *header = (mem_stack_header *)(next_address - sizeof(mem_stack_header)); + header->padding = padding; + + stack->current_offset += size; + + return MemSet((void *)next_address, size); +} +internal void * +stack_push(mem_stack *stack, u64 size) +{ + return stack_push_align(stack, size, ARENA_ALIGN); +} + +internal void +stack_pop(mem_stack *stack, void *pointer) +{ + if (pointer != NULL) + { + u64 start, end, current_address; + mem_stack_header *header; + u64 prev_offset; + + start = (u64)stack->base_position; + end = start + (u64)stack->capacity; + current_address = (u64)pointer; + + if (!(start <= current_address && current_address < end)) + { + if (0 && "Out of bounds memory address passed to stack allocator (free)") + { + return; + } + return; + } + + if (current_address >= start + (u64)stack->base_position) + { + return; + } + + header = (mem_stack_header *)(current_address - sizeof(mem_stack_header)); + prev_offset = (size_t)(current_address - (u64)header->padding - start); + stack->current_offset = prev_offset; + } +} + +internal mem_stack * +stack_resize_align(mem_stack *stack, void *pointer, u64 old_size, u64 new_size, u8 alignment) +{ + if (pointer == NULL) + { + return stack_push_align(stack, new_size, alignment); + } + else if (new_size == 0) + { + stack_pop(stack, pointer); + return NULL; + } + + u64 start, end, current_address; + u64 min_size = old_size < new_size ? old_size : new_size; + void *new_pointer; + + start = (u64)stack->base_position; + end = start + (u64)stack->capacity; + current_address = (u64)pointer; + if (!(start <= current_address && current_address < end)) + { + return NULL; + } + + if (current_address >= start + (u64)stack->current_offset) + { + return NULL; + } + + if (old_size == new_size) + { + return pointer; + } + + new_pointer = stack_push_align(stack, new_size, alignment); + memmove(new_pointer, pointer, min_size); + return new_pointer; +} + +internal void +stack_pop_all(mem_stack *stack) +{ + stack->current_offset = 0; +} + +internal void +stack_destroy(mem_stack *stack) +{ + if (!stack) + { + return; + } + + munmap(stack, stack->capacity + sizeof(mem_stack)); +} diff --git a/source/base/base_stack.h b/source/base/base_stack.h new file mode 100755 index 0000000..54d61d3 --- /dev/null +++ b/source/base/base_stack.h @@ -0,0 +1,22 @@ +#ifndef STACK_H +#define STACK_H + +typedef struct mem_stack_header mem_stack_header; +struct mem_stack_header +{ + u8 padding; + u8 previous_offset; +}; + + +typedef struct mem_stack mem_stack; +struct mem_stack +{ + mem_stack_header *header; + + u64 current_offset; + u64 capacity; + u8 *base_position; +}; + +#endif diff --git a/source/base/base_string.h b/source/base/base_string.h new file mode 100644 index 0000000..189b38a --- /dev/null +++ b/source/base/base_string.h @@ -0,0 +1,59 @@ +#ifndef BASE_STRING_H +#define BASE_STRING_H + +#include + +#define StringLit(string) \ + (string8){ .data = (u8 *)(string), .size = (sizeof(string) - 1) } + + #define PushString(arena, size) \ + (string8){ (u8 *)PushArray((arena), u8, (size)), (u64)(size) } + +#define StringFmt "%.*s" +#define ULongFmt "%lu" +#define ULLongFmt "%llu" + +typedef struct string8 string8; +struct string8 +{ + u8 *data; + u64 size; +}; + +internal b8 +string8_cmp(string8 a, string8 b) +{ + if (a.size != b.size) return 0; + return (b8)(memcmp(a.data, b.data, a.size) == 0); +} + +internal u64 +string8_to_u64(u8 *buf, umm len) +{ + u64 value = 0; + for (umm i = 0; i < len; ++i) + { + u8 c = buf[i]; + if (c < '0' || c > '9') break; + value = value * 10 + (c - '0'); + } + return value; +} + +internal void +string8_append_char(string8 *buf, u8 c) +{ + buf->data[buf->size] = c; + buf->size += 1; +} + +read_only global_variable +string8 nil_string = +{ + + .data = NULL, + .size = 0, + +}; + +#endif /* BASE_STRING_H */ diff --git a/source/base/base_test.h b/source/base/base_test.h new file mode 100644 index 0000000..412797b --- /dev/null +++ b/source/base/base_test.h @@ -0,0 +1,74 @@ +#ifndef BASE_TEST_H +#define BASE_TEST_H + +#define RED "\x1b[31m" +#define GREEN "\x1b[32m" +#define RESET "\x1b[0m" +#define BLUE "\x1b[34m" + +#define LEN(s) (sizeof(s) - 1) + +internal void +write_int(i32 num) +{ + + if (num < 0) + { + write(STDERR_FILENO, "-", 1); + num = -num; + } + if (num >= 10) + write_int(num / 10); + char digit = '0' + (num % 10); + + write(STDERR_FILENO, &digit, 1); +} + +#define show \ + do \ + { \ + write(STDOUT_FILENO, __FILE__, sizeof(__FILE__) - 1); \ + write(STDOUT_FILENO, ":", 1); \ + write(STDOUT_FILENO, __func__, sizeof(__func__) - 1); \ + write(STDOUT_FILENO, ":", 1); \ + write_int(__LINE__); \ + write(STDOUT_FILENO, "\n", 1); \ + } while (0) + +#define test(expr) \ + { \ + if ((expr) != 0) \ + { \ + write(STDERR_FILENO, "[FAILED] ", LEN("[FAILED] ")); \ + show; \ + _exit(1); \ + } \ + } + +#define check(expr) \ + { \ + if ((expr) != 0) \ + { \ + write(STDERR_FILENO, RED "[ERROR] ", LEN(RED "[ERROR] ")); \ + show; \ + write(STDERR_FILENO, RESET, LEN(RESET)); \ + _exit(1); \ + } \ + else \ + { \ + write(STDERR_FILENO, GREEN "[SUCCESS] ", LEN(GREEN "[SUCCESS] ")); \ + show; \ + write(STDERR_FILENO, RESET, LEN(RESET)); \ + } \ + } + +#define checkpoint_output "<>\n" +#define checkpoint_end_output "^^^^^^^^^^^^^^\n\n\n" +#define checkpoint \ + { \ + write(STDERR_FILENO, BLUE checkpoint_output, LEN(BLUE checkpoint_output)); \ + show; \ + write(STDERR_FILENO, BLUE checkpoint_end_output, LEN(BLUE checkpoint_end_output)); \ + } + +#endif /* BASE_TEST_H */ diff --git a/source/base/bash_hash.h b/source/base/bash_hash.h new file mode 100644 index 0000000..2c286a2 --- /dev/null +++ b/source/base/bash_hash.h @@ -0,0 +1,15 @@ +##ifndef HEADER_H +#define HEADER_H + +typedef struct hash_map hash_map; +typedef struct hash hash; + + +struct hash_map +{ + + +}; + + +#endif /* HEADER_H */ diff --git a/source/fajr/fajr_main.c b/source/fajr/fajr_main.c new file mode 100644 index 0000000..fc7be4a --- /dev/null +++ b/source/fajr/fajr_main.c @@ -0,0 +1,41 @@ +#define BASE_UNITY +#include "base/base_include.h" +#include "fajr_lexer/fajr_lexer.h" +#include "fajr_parser/fajr_parser.h" + +#include "fajr_lexer/fajr_lexer.c" +#include "fajr_parser/fajr_parser.c" + +int +main(int argc, char **argv) +{ + mem_arena *global = arena_create(GiB(1)); + if (argc < 2) { + print("Usage: program \n"); + return 1; + } + + i32 fd = open(argv[1], O_RDONLY); + if (fd < 0) { + print("Error: Cannot open file\n"); + return 1; + } + + u8 input[4096]; + u64 bytes_read; + bytes_read = read(fd, input, sizeof(input)); + + string8 buffer = {0}; + + buffer.data = input; + buffer.size = bytes_read; + + token_list *List = PushStruct(global, token_list); + concrete_syntax_tree *Tree = PushStruct(global, concrete_syntax_tree); + + Lex(&buffer, global, List); + Parse(global, List, Tree); + + close(fd); + return 0; +} diff --git a/source/fajr/fajr_main.h b/source/fajr/fajr_main.h new file mode 100644 index 0000000..835ab65 --- /dev/null +++ b/source/fajr/fajr_main.h @@ -0,0 +1,6 @@ +#ifndef FAJR_MAIN_H +#define FAJR_MAIN_H + + + +#endif /* FAJR_MAIN_H */ diff --git a/source/fajr_lexer/fajr_lexer.c b/source/fajr_lexer/fajr_lexer.c new file mode 100644 index 0000000..3a98ca9 --- /dev/null +++ b/source/fajr_lexer/fajr_lexer.c @@ -0,0 +1,274 @@ +internal inline b32 +IsAlpha(u8 Character) +{ + return ((Character >= 'a' && Character <= 'z') || + (Character >= 'A' && Character <= 'Z') || + (Character == '_')); +} + +internal inline b32 +IsDigit(u8 Character) +{ + return (Character >= '0' && Character <= '9'); +} + +internal b32 +IsDelimiter(u8 Character) +{ + for(i32 Index = 0; Index < (i32)sizeof(Delimiters); ++Index) + { + if(Delimiters[Index] == Character) + { + return 1; + } + } + return 0; +} + +internal inline b32 +IsNilTokenNode(token_node *TokenNode) +{ + return TokenNode == &nil_token_node || TokenNode == NULL; +} + +internal inline b32 +IsNilToken(token *Token) +{ + return Token == &nil_token || Token == NULL; +} + +internal inline b32 +IsWhiteSpace(u8 Character) +{ + return (Character == '\n' || Character == '\r' || + Character == ' ' || Character == '\t'); +} + +internal inline void +ParseCStyleComment(u8 Buffer[]) +{ + // TODO(nasr): handle c style comments + // couuld be usefull for function information visualiszation + // so think of a way to link themn to functions and variables? + // some sort of meta data per thing? + // and then we can do a visualization if the str8.count of the metadata thing is bigger then 0 + // we should a visualization thing for the thing + // if the thing is less then 0, we dont do anything? + + // TODO(nasr): while doingn this we could also add in some editor specific anotations ? +} + +internal inline void +ParseCPPStyleComment(u8 Buffer[]) +{ + // TODO(nasr): +} + +internal inline b32 +Is_TokenBreak(u8 Character) +{ + return (IsWhiteSpace(Character) || IsDelimiter(Character)); +} + +internal token_list * +Lex(string8 *Buffer, mem_arena *Arena, token_list *List) +{ + b32 Initialized = 0; + i32 Line = 1; + i32 Column = 1; + + for(u64 TextIndex = 0; TextIndex < Buffer->size; TextIndex++) + { + u8 Character = Buffer->data[TextIndex]; + + if(Character == '\r' || Character == '\n') + { + if(Character == '\r' && + (TextIndex + 1 < Buffer->size) && + Buffer->data[TextIndex + 1] == '\n') + { + TextIndex++; + } + + ++TextIndex; + ++Line; + + // NOTE(nasr): reset the column to the beginning of the line + Column = 1; + continue; + } + + if(IsWhiteSpace(Character)) + { + ++Column; + continue; + } + + token_node *TokenNode = PushStruct(Arena, token_node); + token *Token = PushStruct(Arena, token); + TokenNode->Next = &nil_token_node; + TokenNode->Previous = &nil_token_node; + TokenNode->Token = Token; + Token->Line = Line; + Token->Column = Column; + Token->ByteOffset = (u64)TextIndex; + Token->Flags = FlagNone; + + u64 TokenStart = TextIndex; + u64 TokenEnd = TextIndex; + + if(Character > 126) + { + Token->Type = TokenUnwantedChild; + TokenEnd = TextIndex + 1; + } + else if(IsAlpha(Character)) + { + while((TextIndex + 1 < Buffer->size) && + (IsAlpha(Buffer->data[TextIndex + 1]) || IsDigit(Buffer->data[TextIndex + 1]))) + { + ++TextIndex; + } + + // TODO(nasr): build a lexeme + TokenEnd = TextIndex + 1; + string8 Lexeme = { + .data = (u8 *)Buffer->data, + .size = (u64)Buffer->data + } + ; + + // TODO(nasr): handle functions + if(string8_cmp(Lexeme, StringLit("func"), 0)) + Token->Type = TokenIf; + else if(string8_cmp(Lexeme, StringLit("if"), 0)) + Token->Type = TokenElse; + else if(string8_cmp(Lexeme, StringLit("return"), 0)) + Token->Type = TokenReturn; + else if(string8_cmp(Lexeme, StringLit("while"), 0)) + Token->Type = TokenWhile; + else if(string8_cmp(Lexeme, StringLit("for"), 0)) + Token->Type = TokenFor; + else if(string8_cmp(Lexeme, StringLit("break"), 0)) + Token->Type = TokenBreak; + else if(string8_cmp(Lexeme, StringLit("continue"), 0)) + Token->Type = TokenContinue; + else + Token->Type = TokenIdentifier; + } + else if(IsDigit(Character)) + { + while((TextIndex + 1 < Buffer->size) && + IsDigit(Buffer->data[TextIndex + 1])) + { + ++TextIndex; + } + + TokenEnd = TextIndex + 1; + Token->Type = TokenNumber; + } + + else + { + u8 Next = (TextIndex + 1 < Buffer->size) ? Buffer->data[TextIndex + 1] : 0; + + switch(Character) + { + case '=': + { + if(Next == '=') + { + Token->Type = TokenDoubleEqual; + TextIndex++; + } + else + { + Token->Type = (token_type)'='; + } + } + break; + + case '>': + { + if(Next == '=') + { + Token->Type = TokenGreaterEqual; + TextIndex++; + } + else if(Next == '>') + { + Token->Type = TokenRightShift; + TextIndex++; + } + else + { + Token->Type = (token_type)'>'; + } + } + break; + + case '<': + { + if(Next == '=') + { + Token->Type = TokenLesserEqual; + TextIndex++; + } + else if(Next == '<') + { + Token->Type = TokenLeftShift; + TextIndex++; + } + else + { + Token->Type = (token_type)'<'; + } + } + break; + + case '"': + { + while(Buffer->data[TextIndex + 1] != '"' && Buffer->data[TextIndex + 1] != '\0') + { + ++TextIndex; + if(Buffer->data[TextIndex + 1] == '\\') + + ++TextIndex; + } + + TokenStart += 1; + Token->Type = TokenString; + } + break; + default: + { + Token->Type = (token_type)Character; + } + break; + } + } + + TokenEnd = TextIndex + 1; + + Token->Lexeme.data = (u8 *)&Buffer->data[TokenStart]; + Token->Lexeme.size = (u64)(TokenEnd - TokenStart); + Column += (i32)Token->Lexeme.size; + + //Log("Token: \t%.lu*s\n", Token->Lexeme.Size, Token->Lexeme.Data); + + if(!Initialized) + { + Initialized = 1; + List->Root = TokenNode; + List->Current = TokenNode; + } + else + { + TokenNode->Previous = List->Current; + List->Current->Next = TokenNode; + List->Current = TokenNode; + } + } + + return List; +} diff --git a/source/fajr_lexer/fajr_lexer.h b/source/fajr_lexer/fajr_lexer.h new file mode 100644 index 0000000..754b89a --- /dev/null +++ b/source/fajr_lexer/fajr_lexer.h @@ -0,0 +1,119 @@ +#ifndef FAJR_LEXER_H +#define FAJR_LEXER_H + +typedef enum token_type token_type; +enum token_type +{ + TokenUndefined = 256, + TokenIdentifier, + TokenIdentifierAssignmentValue, + TokenValue, + TokenString, + TokenNumber, + TokenDoubleEqual, + TokenGreaterEqual, + TokenLesserEqual, + TokenParam, + TokenFunc, + TokenReturn, + TokenIf, + TokenElse, + TokenFor, + TokenWhile, + TokenBreak, + TokenContinue, + TokenExpression, + TokenFuncBody, + TokenUnwantedChild, + TokenNewLine, + TokenRightShift, + TokenLeftShift, + TokenStar, +}; + +typedef struct Tokenizer Tokenizer; +struct Tokenizer +{ + i32 Line; + i32 Column; +}; + +typedef enum token_flags token_flags; +enum token_flags +{ + FlagNone = (0), + FlagConstant = (1 << 0), + FlagGlobal = (1 << 1), + FlagsValue = (1 << 2), + FlagDefinition = (1 << 3), + FlagComparison = (1 << 4), + FlagDeprecated = (1 << 5), + FlagDirty = (1 << 6), +}; + +typedef struct token token; +struct token +{ + string8 Lexeme; + token_type Type; + token_flags Flags; + u64 ByteOffset; + i32 Column; + i32 Line; + + string8 MetaData; +}; + +typedef struct token_node token_node; +struct token_node +{ + token_node *Next; + token_node *Previous; + token *Token; +}; + +typedef struct token_list token_list; +struct token_list +{ + token_node *Root; + token_node *Current; +}; + +typedef struct lexer lexer; +struct lexer +{ + u8 *Text; + u64 TextCount; + u8 *EndOfFile; + u8 *UndefinedTokens; +}; + +global_variable const u8 Delimiters[] = +{ + '{', + '}', + '(', + ')', + '[', + ']', + ';', +}; + +read_only global_variable token nil_token = +{ + .Lexeme = {NULL, 0}, + .Type = TokenUndefined, + .Flags = FlagNone, + .ByteOffset = 0, + .Column = 0, + .Line = 0, +}; + +read_only global_variable token_node nil_token_node = +{ + .Next = &nil_token_node, + .Previous = &nil_token_node, + .Token = NULL, +}; + +#endif // FAJR_LEXER_H diff --git a/source/fajr_parser/fajr_parser.c b/source/fajr_parser/fajr_parser.c new file mode 100644 index 0000000..cfd1fb2 --- /dev/null +++ b/source/fajr_parser/fajr_parser.c @@ -0,0 +1,364 @@ +internal syntax_node * +PeekToOffset(syntax_node *Node, i32 PeekOffset, b32 findChild) +{ + if(PeekOffset < 0) + { + for(i32 PeekCount = 0; PeekCount < PeekOffset; PeekCount++) + { + if(!Node || !Node->Next) return &nil_syntax_node; + Node = Node->Next; + } + } + else if(PeekOffset > 0) + { + for(i32 PeekCount = 0; PeekCount < PeekOffset; PeekCount++) + { + if(!Node || !Node->First) return &nil_syntax_node; + Node = Node->First; + } + } + else if(PeekOffset == 0) + { + return Node; + } + + return &nil_syntax_node; +} + +internal void +NodePushChild(concrete_syntax_tree *Tree, syntax_node *Node) +{ + Node->Parent = Tree->Current; + + if(Tree->Current->First == &nil_syntax_node) + { + Tree->Current->First = Node; + Tree->Current->Last = Node; + } + else + { + Tree->Current->Last->Next = Node; + Tree->Current->Last = Node; + } +} + +internal void +NodePushNext(concrete_syntax_tree *Tree, syntax_node *Node) +{ + Node->Parent = Tree->Current; + + if(Tree->Current->Next == &nil_syntax_node) Tree->Current->Next = Node; + else Tree->Current->Last->Next = Node; +} + +internal inline b32 +IsNilSyntaxNode(syntax_node *Node) +{ + return Node == &nil_syntax_node || Node == NULL; +} + +internal void +DisownNode(concrete_syntax_tree *Tree, syntax_node *Node) +{ + Node->First = &nil_syntax_node; + Node->Last = &nil_syntax_node; + Node->Parent = &nil_syntax_node; + Node->Next = &nil_syntax_node; + Node->Token = &nil_token; + Node->Type = SyntaxNodeUnwanted; +} + +internal void +CopyNode(syntax_node *Dest, syntax_node *Src) +{ + Src->First = Dest->First; + Src->Parent = Dest->Parent; + Src->Last = Dest->Last; + Src->Next = Dest->Next; + Src->Token = Dest->Token; + Src->Type = Dest->Type; +} + +/* Insert the node at any given spot thus hopefully allowwing for code changes without breaking */ +internal void +AdoptNode(concrete_syntax_tree *Tree, syntax_node *Node, syntax_node *ParentNode, b32 asChild) +{ + // + if (!IsNilSyntaxNode(ParentNode)) return; + + // + if (asChild) + { + ParentNode->Last->Next = Node; + Node->Next = ParentNode->Last; + } + else + { + // copy node + ParentNode->Next = Node; + CopyNode(Node, ParentNode->Next); + } + + + NodePushNext(Tree, Node); + + return; +} + + +internal inline void +Ground(token *Token) +{ + Token->Flags = (token_flags)(Token->Flags | FlagDirty); +} + +internal concrete_syntax_tree * +Parse(mem_arena *Arena, token_list *List, concrete_syntax_tree *Tree) +{ + for(token_node *TokenNode = List->Root; + TokenNode != &nil_token_node && TokenNode != NULL; + TokenNode = TokenNode->Next) + { + token *Token = TokenNode->Token; + syntax_node *SyntaxNode; + + { + SyntaxNode = PushStruct(Arena, syntax_node); + SyntaxNode->Token = Token; + SyntaxNode->First = &nil_syntax_node; + SyntaxNode->Last = &nil_syntax_node; + SyntaxNode->Next = &nil_syntax_node; + SyntaxNode->Parent = &nil_syntax_node; + } + + + if(IsNilSyntaxNode(Tree->Root)) + { + Tree->Root = SyntaxNode; + Tree->Current = SyntaxNode; + } + + switch((token_type)Token->Type) + { + case TokenIdentifier: + { + if(!IsNilTokenNode(TokenNode) && TokenNode->Next->Token->Type == (token_type)'=') + { + token_node *ValueNode = TokenNode->Next->Next; + + if(ValueNode->Token->Type != TokenIdentifierAssignmentValue && + ValueNode->Token->Type != TokenValue) + { + ValueNode->Token->Type = TokenIdentifierAssignmentValue; + } + } + + if(Tree->Current != SyntaxNode) + { + NodePushChild(Tree, SyntaxNode); + } + } + break; + + case TokenIdentifierAssignmentValue: + { + NodePushChild(Tree, SyntaxNode); + } + break; + + case TokenNumber: + case TokenString: + { + if(Tree->Current && Tree->Current->Token->Type == (token_type)'=') + { + Token->Type = TokenIdentifierAssignmentValue; + + if(Tree->Current->Parent && Tree->Current->Parent->Token->Type != TokenIdentifier) + { + Ground(Token); + } + } + + NodePushChild(Tree, SyntaxNode); + } + break; + + case TokenDoubleEqual: + { + NodePushChild(Tree, SyntaxNode); + + if(!IsNilSyntaxNode(Tree->Current->Parent)) + { + Tree->Current->First = Tree->Current->Parent; + Tree->Current->Last = Tree->Current->Next; + Tree->Current->Parent = Tree->Current; + } + } + break; + + case TokenGreaterEqual: + case TokenLesserEqual: + case(token_type)'<': + case(token_type)'>': + { + NodePushChild(Tree, SyntaxNode); + Tree->Current = SyntaxNode; + } + break; + + case(token_type)'(': + { + syntax_node *Current = Tree->Current; + Tree->Current->First = PeekToOffset(Tree->Current, 1, 0); + + while(Tree->Current->Token->Type != (token_type)')' && !IsNilSyntaxNode(Tree->Current->Next)) + { + Current = Current->Next; + } + if(Current == &nil_syntax_node) + { + print("Forgot to close paran"); + } + } + break; + + case(token_type)')': + { + while(Tree->Current && + Tree->Current != &nil_syntax_node && + Tree->Current->Token->Type != (token_type)'(') + { + Tree->Current = Tree->Current->Parent; + } + + if(Tree->Current && Tree->Current->Parent) + { + Tree->Current = Tree->Current->Parent; + } + } + break; + + case(token_type)'{': + { + syntax_node *Node = &nil_syntax_node; + + for(i32 index = 0; PeekToOffset(Tree->Current, index, 0); ++index) + { + Tree->Current->First = Tree->Current->Next; + // TODO(nasr): was doing something here + } + } + break; + + case(token_type)'}': + { + if(Tree->Current && Tree->Current->Parent) + { + Tree->Current = Tree->Current->Parent; + } + } + break; + + case(token_type)';': + { + Tree->Current->Last = Tree->Current; + Tree->Current = Tree->Current->Parent; + } + break; + + case TokenFunc: + { + // TODO(nasr): define the function body + NodePushChild(Tree, SyntaxNode); + Tree->Current = SyntaxNode; + } + break; + + case TokenReturn: + { + if(Tree->Current && + Tree->Current->Parent && + Tree->Current->Parent->Token->Type != TokenFunc) + { + Ground(Token); + } + + NodePushChild(Tree, SyntaxNode); + Tree->Current = SyntaxNode; + } + break; + + case TokenIf: + { + NodePushChild(Tree, SyntaxNode); + Tree->Current = SyntaxNode; + } + break; + + case TokenElse: + { + // TODO(nasr): handle no body + NodePushChild(Tree, SyntaxNode); + } + break; + + case TokenWhile: + case TokenFor: + { + NodePushChild(Tree, SyntaxNode); + Tree->Current = SyntaxNode; + } + break; + + case TokenBreak: + { + token_type Type = Tree->Current->Parent->Token->Type; + + if(Type != TokenFor && Type != TokenWhile) + { + Ground(Token); + print("Break statement not allowed here"); + } + NodePushChild(Tree, SyntaxNode); + } + break; + + case TokenContinue: + { + NodePushChild(Tree, SyntaxNode); + } + break; + + case TokenExpression: + case TokenParam: + { + NodePushChild(Tree, SyntaxNode); + Tree->Current = SyntaxNode; + } + break; + + case TokenStar: + { + // TODO(nasr): once we get to better visualizations i think + NodePushChild(Tree, SyntaxNode); + } + break; + + case TokenUndefined: + { + Ground(Token); + NodePushChild(Tree, SyntaxNode); + } + break; + + default: + { + Ground(Token); + NodePushChild(Tree, SyntaxNode); + } + break; + } + } + + return Tree; +} diff --git a/source/fajr_parser/fajr_parser.h b/source/fajr_parser/fajr_parser.h new file mode 100644 index 0000000..d83dde2 --- /dev/null +++ b/source/fajr_parser/fajr_parser.h @@ -0,0 +1,60 @@ +#ifndef EDITOR_PARSER_H +#define EDITOR_PARSER_H + +typedef enum syntax_node_type syntax_node_type; +enum syntax_node_type +{ + SyntaxNodeLiteral, + SyntaxNodeIdentifier, + SyntaxNodeBinary, + + SyntaxNodeAssignment, + SyntaxNodeReturn, + SyntaxNodeFunction, + SyntaxNodeUnwanted, +}; + +typedef struct syntax_node syntax_node; +struct syntax_node +{ + syntax_node *First; + syntax_node *Last; + syntax_node *Parent; + syntax_node *Next; + + token *Token; + + syntax_node_type Type; +}; + +typedef struct concrete_syntax_tree concrete_syntax_tree; +struct concrete_syntax_tree +{ + syntax_node *Root; + syntax_node *Current; +}; + +// TODO(nasr): implement this later together with file handling +read_only global_variable +syntax_node nil_syntax_node = +{ +.First = &nil_syntax_node, +.Last = &nil_syntax_node, +.Parent = &nil_syntax_node, +.Next = &nil_syntax_node, +.Token = &nil_token, +}; + +read_only global_variable +concrete_syntax_tree nil_concrete_syntax_tree = +{ +.Root = &nil_syntax_node, +.Current = &nil_syntax_node, +}; + +#define PeekForward(Node, Type, NilNode) \ + for(; (Node) && (Node) != &(NilNode); (Node) = (Node)->Next) \ + if((Node)->Token->Type == (Type)) \ + break; + +#endif // EDITOR_PARSER_H diff --git a/tests/main.fajr b/tests/main.fajr new file mode 100644 index 0000000..88e2bb0 --- /dev/null +++ b/tests/main.fajr @@ -0,0 +1,6 @@ +module file; + +void main() +{ + s32 value = 0; +} -- cgit v1.3