From 62e8946f4f2d1759f2d73318209a523f853aa534 Mon Sep 17 00:00:00 2001 From: nasr Date: Wed, 18 Mar 2026 22:18:47 +0000 Subject: feature(btree_implementation): rethinnking the insertion logic using a key struct --- Makefile | 16 ++++- source/btree_impl.h | 182 ++++++++++++++++++++++++++++++++------------------- source/csv_decoder.h | 17 +++-- source/tb_db.c | 9 +-- 4 files changed, 146 insertions(+), 78 deletions(-) diff --git a/Makefile b/Makefile index 39e9b87..29aa256 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,25 @@ BIN = build/engine SRC = source/tb_db.c + +# CC = gcc CC = clang -CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -Werror + +COMPILER := $(shell $(CC) --version | grep -o "gcc\|clang" | head -1) + +# check for compile optimizations per compiler +ifeq ($(COMPILER),gcc) + CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -Werror -O0 +else ifeq ($(COMPILER),clang) + CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -O0 +else + CFLAGS = -Wall -Wextra -Wpedantic -Wno-unused-function -g -O0 +endif $(BIN): $(SRC) mkdir -p build $(CC) $(CFLAGS) $< -o $@ -run: +run: $(BIN) $(BIN) .PHONY: clean diff --git a/source/btree_impl.h b/source/btree_impl.h index 8eb0723..7dda6c9 100644 --- a/source/btree_impl.h +++ b/source/btree_impl.h @@ -1,38 +1,46 @@ +/** + * >> api notes + * >> by Abdellah El Morabit + * + * - creation of a btree happens when doing an insertion on an + * empty btree where the root is null or nil + * + **/ + /* single header b-tree implementation by Abdellah El Morabit */ -#ifndef B_TREE_H -#define B_TREE_H +#ifndef BTREE_H +#define BTREE_H // maximum height of the tree the lower the lower the lower amount // of disk reads which translates into faster? #if 0 -global_variable read_only s16 B_TREE_ORDER = 4; +global_variable read_only s16 btree_ORDER = 4; #endif -#define B_TREE_ORDER 4 +#define BTREE_ORDER 4 //- NOTE(nasr): defining a key to improve sorting // i think saying that a key is a combination of the column + row is a good way of appraoching this typedef struct key key; struct key { - string8 header; - s32 row; + s32 header_index; //- col number + s32 row_index; }; -typedef struct b_tree_node b_tree_node; -struct b_tree_node +typedef struct btree_node btree_node; +struct btree_node { // store the key values of the sub nodes? if they are leaves? - key keys[B_TREE_ORDER - 1]; + key keys[BTREE_ORDER - 1]; // TODO(nasr): replace with something more generic? // NOTE(nasr): cons of void * -> no type safety - // is there a way to still have some sort of that? - // size not variable - void *payload_per_key[B_TREE_ORDER - 1]; - b_tree_node *parent; + // is there a way to still have some sort of that? size not variable + void *payload_per_key[BTREE_ORDER - 1]; + btree_node *parent; // handle to store children faster than linked list // because now we can iterate over the nodes instead of having cache misses // when jumping through linked nodes - b_tree_node *children[B_TREE_ORDER]; + btree_node *children[BTREE_ORDER]; // this shouldn't change things but could be a performance improvement on a bigger scale?? s32 *max_children; // NOTE(nasr): reference count ::: check how many leaves are using this node @@ -46,120 +54,162 @@ struct b_tree_node // this could solve memory location issues? }; -typedef struct b_tree b_tree; -struct b_tree + +typedef struct btree btree; +struct btree { // NOTE(nasr): make sure this always stays in memory // so that initial fetch never requires a disk read - b_tree_node *root; + btree_node *root; }; -#ifdef B_TREE_IMPLEMENTATION + + +#ifdef BTREE_IMPLEMENTATION /////////////////////////////////////////////////////////////////////////////// -//- b_tree.c +//- btree.c + + +internal b32 +is_nil_btree_node(btree_node *node) +{ + // NOTE(nasr): looks like a useless function but could be usefull when we implement a nil_btree_node + return (node != NULL); +} // TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees? -internal b_tree_node * -btree_node_alloc(mem_arena *arena) +internal b8 +key_compare(key destination_key, key source_key) { - b_tree_node *node = PushStructZero(arena, b_tree_node); - node->leaf = 1; - return node; + s32 source_index = source_key.header_index + source_key.row_index; + s32 destination_index = destination_key.header_index + destination_key.row_index; + + if(destination_index > source_index) return 1; + else if(destination_index < source_index) return -1; + else return 0; } // NOTE(nasr): @return the index of the found element +//- @param: start node could be the root node for all we care. but this enables optimizations in the future if we need them internal s32 -btree_node_find_pos(string8 value, b_tree_node *node) +btree_find_ipos(key k, btree_node *start_node) { - unused(value); - unused(node); - -#if 0 - s32 i = 0; - for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i); - return i; -#endif + s32 index = 0; + for (btree_node *current_node = start_node; + index < current_node->key_count && key_compare(k, current_node->keys[0]) < 0; + ++index); + return index; return 0; } -internal void -b_tree_create(mem_arena *arena, b_tree *tree) -{ - tree->root = btree_node_alloc(arena); - tree->root->leaf = 1; - tree->root->key_count = 0; -} - - // NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory internal void * -b_tree_search(b_tree_node *node, string8 key) +btree_search(btree_node *node, key key) { - unused(node); - unused(key); -#if 0 - s32 i = btree_node_find_pos(key, node); + s32 index = btree_find_ipos(key, node); - if (i < node->key_count && string8_cmp(node->keys[i], key) == 0) + if (index < node->key_count && key_compare(node->keys[index], key) == 0) { - return node->payload_per_key[i]; + return node->payload_per_key[index]; } if (node->leaf) { return NULL; } - return b_tree_search(node->children[i], key); -#endif + return btree_search(node->children[index], key); return NULL; } - -// TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full) +// TODO(nasr): split node when key_count == btree_ORDER - 1 (node is full) +// -- each node has max btree_ORDER - 1 keys +// internal void -b_tree_insert(b_tree *tree, string8 key, void *payload) +btree_insert(mem_arena *arena, btree *tree, key key, void *payload) { - unused(tree); - unused(key); - unused(payload); -#if 0 - b_tree_node *current_node = tree->root; + + if(is_nil_btree_node(tree->root)) + { + tree->root = PushStructZero(arena, btree_node); + tree->root->leaf = 1; + tree->root->key_count = 0; + + } + + btree_node *current_node = tree->root; + if (current_node->leaf) { // find the insertion position and shift keys right to make room - s32 i = btree_node_find_pos(key, current_node); + //- after: but what is this. we are expecting a new key.but we are looking for + //- its position in the tree is useless because it's non existent + + s32 i = btree_find_ipos(key, current_node); + for (s32 j = current_node->key_count; j > i; --j) { current_node->keys[j] = current_node->keys[j - 1]; current_node->payload_per_key[j] = current_node->payload_per_key[j - 1]; } + current_node->keys[i] = key; current_node->payload_per_key[i] = payload; - if(current_node->key_count < (B_TREE_ORDER - 1)) + if(current_node->key_count < (BTREE_ORDER - 1)) { current_node->key_count += 1; } else { // TODO(nasr): creating a new branch / tree? // make a seperate function for this + s32 split_pos = current_node->parent->key_count / 2 - 1; + s32 updated_keycount_p = 0; + for(s32 index = 0; index <= split_pos; ++index) + { + ++updated_keycount_p; + } + + current_node->parent->key_count = updated_keycount_p; + } } // TODO(nasr): internal node case walk down then split on the way back up -#endif } + internal void -b_tree_write(b_tree *bt) +btree_write(btree *bt) { - // TODO(nasr): write the b_tree to disk + unused(bt); +#if 0 + temp_arena *temp_arena = temp_arena_begin(arena); + btree_node *root = bt->root; + + for(btree_node *bt = PushStruct(arena); root->next; next = root;) + { + + } + + temp_arena_end(temp); +#endif + +} + +// - load the entire db into memory. +#if 0 +internal void +btree_load(mem_arena *arena) +{ + + } +#endif -#endif /* B_TREE_IMPLEMENTATION */ -#endif /* B_TREE_H */ +#endif /* BTREE_IMPLEMENTATION */ +#endif /* BTREE_H */ diff --git a/source/csv_decoder.h b/source/csv_decoder.h index b754ef5..446f1da 100644 --- a/source/csv_decoder.h +++ b/source/csv_decoder.h @@ -157,7 +157,7 @@ tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list unused(token_list); b32 finding_headers = TRUE; - if(buffer.size < 0) return NULL; + if(buffer.size == 0) return NULL; csv_token *tok = PushStruct(arena, csv_token); @@ -238,11 +238,10 @@ tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list } //- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future -internal b_tree * +internal btree * parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) { - b_tree *tree = PushStructZero(arena, b_tree); - b_tree_create(arena, tree); + btree *tree = PushStructZero(arena, btree); // iterate over the token list while the token is not nil for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token) @@ -266,7 +265,6 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) { } - } } @@ -281,7 +279,14 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) // NOTE(nasr): payload is the cten itself so the caller can reach // row/col metadata without us having to copy it // NOTE(nasr): heh why do we void cast again? - b_tree_insert(tree, ct->lexeme, (void *)ct); + + key k = { + .header_index = 1, + .row_index = 1, + }; + + // btree_insert(arena, tree, (key)ct->lexeme, (void *)ct); + btree_insert(arena, tree, k, (void *)ct); } return tree; diff --git a/source/tb_db.c b/source/tb_db.c index b992111..8aba614 100644 --- a/source/tb_db.c +++ b/source/tb_db.c @@ -1,4 +1,4 @@ -#define B_TREE_IMPLEMENTATION +#define BTREE_IMPLEMENTATION #define BASE_UNITY #include "base/base_include.h" @@ -32,7 +32,7 @@ is_delimiter(u8 point) return (point == ','); } -#include "b_tree_impl.h" +#include "btree_impl.h" #include "csv_decoder.h" typedef struct query_token query_token; @@ -162,6 +162,7 @@ int main(int count, char **value) print("error reading from stdin"); } + // TODO(nasr): extract this later in the future and make a string copy function/macro // @params (s32 lbuf_size , string8 lbuf_stringified) s32 lbuf_size = sizeof(lbuf) - 1; @@ -189,9 +190,9 @@ int main(int count, char **value) assert_msg(tokens != NULL, "Tokens are NULL."); csv_token_list *ctl = PushStruct(global_arena, csv_token_list); - b_tree *bt = parse_csv(global_arena, ctl, table); + btree *bt = parse_csv(global_arena, ctl, table); - b_tree_write(bt); + btree_write(bt); } // NOTE(nasr): not sure on how to approach the b-tree and the table format thing -- cgit v1.3