summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
authornasr <nsrddyn@gmail.com>2026-03-18 22:18:47 +0000
committernasr <nsrddyn@gmail.com>2026-03-18 22:18:47 +0000
commit62e8946f4f2d1759f2d73318209a523f853aa534 (patch)
tree23a6cd7d71929a37e4121b8c90f5d03c5683555c /source
parentfaaebe4f754d2bf138803e94424935de12a20235 (diff)
feature(btree_implementation): rethinnking the insertion logic using a key struct
Diffstat (limited to 'source')
-rw-r--r--source/btree_impl.h182
-rw-r--r--source/csv_decoder.h17
-rw-r--r--source/tb_db.c9
3 files changed, 132 insertions, 76 deletions
diff --git a/source/btree_impl.h b/source/btree_impl.h
index 8eb0723..7dda6c9 100644
--- a/source/btree_impl.h
+++ b/source/btree_impl.h
@@ -1,38 +1,46 @@
1/**
2 * >> api notes
3 * >> by Abdellah El Morabit
4 *
5 * - creation of a btree happens when doing an insertion on an
6 * empty btree where the root is null or nil
7 *
8 **/
9
1/* single header b-tree implementation by Abdellah El Morabit */ 10/* single header b-tree implementation by Abdellah El Morabit */
2#ifndef B_TREE_H 11#ifndef BTREE_H
3#define B_TREE_H 12#define BTREE_H
4 13
5// maximum height of the tree the lower the lower the lower amount 14// maximum height of the tree the lower the lower the lower amount
6// of disk reads which translates into faster? 15// of disk reads which translates into faster?
7#if 0 16#if 0
8global_variable read_only s16 B_TREE_ORDER = 4; 17global_variable read_only s16 btree_ORDER = 4;
9#endif 18#endif
10#define B_TREE_ORDER 4 19#define BTREE_ORDER 4
11 20
12//- NOTE(nasr): defining a key to improve sorting 21//- NOTE(nasr): defining a key to improve sorting
13// i think saying that a key is a combination of the column + row is a good way of appraoching this 22// i think saying that a key is a combination of the column + row is a good way of appraoching this
14typedef struct key key; 23typedef struct key key;
15struct key 24struct key
16{ 25{
17 string8 header; 26 s32 header_index; //- col number
18 s32 row; 27 s32 row_index;
19}; 28};
20 29
21typedef struct b_tree_node b_tree_node; 30typedef struct btree_node btree_node;
22struct b_tree_node 31struct btree_node
23{ 32{
24 // store the key values of the sub nodes? if they are leaves? 33 // store the key values of the sub nodes? if they are leaves?
25 key keys[B_TREE_ORDER - 1]; 34 key keys[BTREE_ORDER - 1];
26 // TODO(nasr): replace with something more generic? 35 // TODO(nasr): replace with something more generic?
27 // NOTE(nasr): cons of void * -> no type safety 36 // NOTE(nasr): cons of void * -> no type safety
28 // is there a way to still have some sort of that? 37 // is there a way to still have some sort of that? size not variable
29 // size not variable 38 void *payload_per_key[BTREE_ORDER - 1];
30 void *payload_per_key[B_TREE_ORDER - 1]; 39 btree_node *parent;
31 b_tree_node *parent;
32 // handle to store children faster than linked list 40 // handle to store children faster than linked list
33 // because now we can iterate over the nodes instead of having cache misses 41 // because now we can iterate over the nodes instead of having cache misses
34 // when jumping through linked nodes 42 // when jumping through linked nodes
35 b_tree_node *children[B_TREE_ORDER]; 43 btree_node *children[BTREE_ORDER];
36 // this shouldn't change things but could be a performance improvement on a bigger scale?? 44 // this shouldn't change things but could be a performance improvement on a bigger scale??
37 s32 *max_children; 45 s32 *max_children;
38 // NOTE(nasr): reference count ::: check how many leaves are using this node 46 // NOTE(nasr): reference count ::: check how many leaves are using this node
@@ -46,120 +54,162 @@ struct b_tree_node
46 // this could solve memory location issues? 54 // this could solve memory location issues?
47}; 55};
48 56
49typedef struct b_tree b_tree; 57
50struct b_tree 58typedef struct btree btree;
59struct btree
51{ 60{
52 // NOTE(nasr): make sure this always stays in memory 61 // NOTE(nasr): make sure this always stays in memory
53 // so that initial fetch never requires a disk read 62 // so that initial fetch never requires a disk read
54 b_tree_node *root; 63 btree_node *root;
55}; 64};
56 65
57#ifdef B_TREE_IMPLEMENTATION 66
67
68#ifdef BTREE_IMPLEMENTATION
58/////////////////////////////////////////////////////////////////////////////// 69///////////////////////////////////////////////////////////////////////////////
59//- b_tree.c 70//- btree.c
71
72
73internal b32
74is_nil_btree_node(btree_node *node)
75{
76 // NOTE(nasr): looks like a useless function but could be usefull when we implement a nil_btree_node
77 return (node != NULL);
78}
60 79
61// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees? 80// TODO(nasr): 1. splitting the tree when getting too big? (horizontally) 2. joining trees?
62 81
63internal b_tree_node * 82internal b8
64btree_node_alloc(mem_arena *arena) 83key_compare(key destination_key, key source_key)
65{ 84{
66 b_tree_node *node = PushStructZero(arena, b_tree_node); 85 s32 source_index = source_key.header_index + source_key.row_index;
67 node->leaf = 1; 86 s32 destination_index = destination_key.header_index + destination_key.row_index;
68 return node; 87
88 if(destination_index > source_index) return 1;
89 else if(destination_index < source_index) return -1;
90 else return 0;
69} 91}
70 92
71// NOTE(nasr): @return the index of the found element 93// NOTE(nasr): @return the index of the found element
94//- @param: start node could be the root node for all we care. but this enables optimizations in the future if we need them
72internal s32 95internal s32
73btree_node_find_pos(string8 value, b_tree_node *node) 96btree_find_ipos(key k, btree_node *start_node)
74{ 97{
75 unused(value); 98 s32 index = 0;
76 unused(node); 99 for (btree_node *current_node = start_node;
77 100 index < current_node->key_count && key_compare(k, current_node->keys[0]) < 0;
78#if 0 101 ++index);
79 s32 i = 0; 102 return index;
80 for (; i < node->key_count && string8_cmp(node->keys[i], value) < 0; ++i);
81 return i;
82#endif
83 103
84 return 0; 104 return 0;
85 105
86} 106}
87 107
88internal void
89b_tree_create(mem_arena *arena, b_tree *tree)
90{
91 tree->root = btree_node_alloc(arena);
92 tree->root->leaf = 1;
93 tree->root->key_count = 0;
94}
95
96
97// NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory 108// NOTE(nasr): nodes that get passed as parameters should've already been loaded into memory
98internal void * 109internal void *
99b_tree_search(b_tree_node *node, string8 key) 110btree_search(btree_node *node, key key)
100{ 111{
101 unused(node);
102 unused(key);
103 112
104#if 0 113 s32 index = btree_find_ipos(key, node);
105 s32 i = btree_node_find_pos(key, node);
106 114
107 if (i < node->key_count && string8_cmp(node->keys[i], key) == 0) 115 if (index < node->key_count && key_compare(node->keys[index], key) == 0)
108 { 116 {
109 return node->payload_per_key[i]; 117 return node->payload_per_key[index];
110 } 118 }
111 if (node->leaf) 119 if (node->leaf)
112 { 120 {
113 return NULL; 121 return NULL;
114 } 122 }
115 return b_tree_search(node->children[i], key); 123 return btree_search(node->children[index], key);
116#endif
117 124
118 return NULL; 125 return NULL;
119} 126}
120 127
121 128// TODO(nasr): split node when key_count == btree_ORDER - 1 (node is full)
122// TODO(nasr): split node when key_count == B_TREE_ORDER - 1 (node is full) 129// -- each node has max btree_ORDER - 1 keys
130//
123internal void 131internal void
124b_tree_insert(b_tree *tree, string8 key, void *payload) 132btree_insert(mem_arena *arena, btree *tree, key key, void *payload)
125{ 133{
126 unused(tree); 134
127 unused(key); 135 if(is_nil_btree_node(tree->root))
128 unused(payload); 136 {
129#if 0 137 tree->root = PushStructZero(arena, btree_node);
130 b_tree_node *current_node = tree->root; 138 tree->root->leaf = 1;
139 tree->root->key_count = 0;
140
141 }
142
143 btree_node *current_node = tree->root;
144
131 145
132 if (current_node->leaf) 146 if (current_node->leaf)
133 { 147 {
134 // find the insertion position and shift keys right to make room 148 // find the insertion position and shift keys right to make room
135 s32 i = btree_node_find_pos(key, current_node); 149 //- after: but what is this. we are expecting a new key.but we are looking for
150 //- its position in the tree is useless because it's non existent
151
152 s32 i = btree_find_ipos(key, current_node);
153
136 for (s32 j = current_node->key_count; j > i; --j) 154 for (s32 j = current_node->key_count; j > i; --j)
137 { 155 {
138 current_node->keys[j] = current_node->keys[j - 1]; 156 current_node->keys[j] = current_node->keys[j - 1];
139 current_node->payload_per_key[j] = current_node->payload_per_key[j - 1]; 157 current_node->payload_per_key[j] = current_node->payload_per_key[j - 1];
140 } 158 }
159
141 current_node->keys[i] = key; 160 current_node->keys[i] = key;
142 current_node->payload_per_key[i] = payload; 161 current_node->payload_per_key[i] = payload;
143 162
144 if(current_node->key_count < (B_TREE_ORDER - 1)) 163 if(current_node->key_count < (BTREE_ORDER - 1))
145 { 164 {
146 current_node->key_count += 1; 165 current_node->key_count += 1;
147 } 166 }
148 else { 167 else {
149 // TODO(nasr): creating a new branch / tree? 168 // TODO(nasr): creating a new branch / tree?
150 // make a seperate function for this 169 // make a seperate function for this
170 s32 split_pos = current_node->parent->key_count / 2 - 1;
171 s32 updated_keycount_p = 0;
172 for(s32 index = 0; index <= split_pos; ++index)
173 {
174 ++updated_keycount_p;
175 }
176
177 current_node->parent->key_count = updated_keycount_p;
178
151 } 179 }
152 } 180 }
153 // TODO(nasr): internal node case walk down then split on the way back up 181 // TODO(nasr): internal node case walk down then split on the way back up
154#endif
155} 182}
156 183
184
157internal void 185internal void
158b_tree_write(b_tree *bt) 186btree_write(btree *bt)
159{ 187{
160 // TODO(nasr): write the b_tree to disk 188
161 unused(bt); 189 unused(bt);
190#if 0
191 temp_arena *temp_arena = temp_arena_begin(arena);
192 btree_node *root = bt->root;
193
194 for(btree_node *bt = PushStruct(arena); root->next; next = root;)
195 {
196
197 }
198
199 temp_arena_end(temp);
200#endif
201
202}
203
204// - load the entire db into memory.
205#if 0
206internal void
207btree_load(mem_arena *arena)
208{
209
210
162} 211}
212#endif
163 213
164#endif /* B_TREE_IMPLEMENTATION */ 214#endif /* BTREE_IMPLEMENTATION */
165#endif /* B_TREE_H */ 215#endif /* BTREE_H */
diff --git a/source/csv_decoder.h b/source/csv_decoder.h
index b754ef5..446f1da 100644
--- a/source/csv_decoder.h
+++ b/source/csv_decoder.h
@@ -157,7 +157,7 @@ tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list
157 unused(token_list); 157 unused(token_list);
158 b32 finding_headers = TRUE; 158 b32 finding_headers = TRUE;
159 159
160 if(buffer.size < 0) return NULL; 160 if(buffer.size == 0) return NULL;
161 161
162 csv_token *tok = PushStruct(arena, csv_token); 162 csv_token *tok = PushStruct(arena, csv_token);
163 163
@@ -238,11 +238,10 @@ tokenize_csv(string8 buffer, mem_arena *arena, csv_table *table, csv_token_list
238} 238}
239 239
240//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future 240//- NOTE(nasr): I don't know why we are still using that dumb table but we'll remove it in the future
241internal b_tree * 241internal btree *
242parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table) 242parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
243{ 243{
244 b_tree *tree = PushStructZero(arena, b_tree); 244 btree *tree = PushStructZero(arena, btree);
245 b_tree_create(arena, tree);
246 245
247 // iterate over the token list while the token is not nil 246 // iterate over the token list while the token is not nil
248 for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token) 247 for (csv_token *ct = ctl->start_token; !is_nil_csv_token(ct); ct = ct->next_token)
@@ -266,7 +265,6 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
266 { 265 {
267 266
268 } 267 }
269
270 } 268 }
271 } 269 }
272 270
@@ -281,7 +279,14 @@ parse_csv(mem_arena *arena, csv_token_list *ctl, csv_table *table)
281 // NOTE(nasr): payload is the cten itself so the caller can reach 279 // NOTE(nasr): payload is the cten itself so the caller can reach
282 // row/col metadata without us having to copy it 280 // row/col metadata without us having to copy it
283 // NOTE(nasr): heh why do we void cast again? 281 // NOTE(nasr): heh why do we void cast again?
284 b_tree_insert(tree, ct->lexeme, (void *)ct); 282
283 key k = {
284 .header_index = 1,
285 .row_index = 1,
286 };
287
288 // btree_insert(arena, tree, (key)ct->lexeme, (void *)ct);
289 btree_insert(arena, tree, k, (void *)ct);
285 } 290 }
286 291
287 return tree; 292 return tree;
diff --git a/source/tb_db.c b/source/tb_db.c
index b992111..8aba614 100644
--- a/source/tb_db.c
+++ b/source/tb_db.c
@@ -1,4 +1,4 @@
1#define B_TREE_IMPLEMENTATION 1#define BTREE_IMPLEMENTATION
2#define BASE_UNITY 2#define BASE_UNITY
3#include "base/base_include.h" 3#include "base/base_include.h"
4 4
@@ -32,7 +32,7 @@ is_delimiter(u8 point)
32 return (point == ','); 32 return (point == ',');
33} 33}
34 34
35#include "b_tree_impl.h" 35#include "btree_impl.h"
36#include "csv_decoder.h" 36#include "csv_decoder.h"
37 37
38typedef struct query_token query_token; 38typedef struct query_token query_token;
@@ -162,6 +162,7 @@ int main(int count, char **value)
162 print("error reading from stdin"); 162 print("error reading from stdin");
163 } 163 }
164 164
165
165 // TODO(nasr): extract this later in the future and make a string copy function/macro 166 // TODO(nasr): extract this later in the future and make a string copy function/macro
166 // @params (s32 lbuf_size , string8 lbuf_stringified) 167 // @params (s32 lbuf_size , string8 lbuf_stringified)
167 s32 lbuf_size = sizeof(lbuf) - 1; 168 s32 lbuf_size = sizeof(lbuf) - 1;
@@ -189,9 +190,9 @@ int main(int count, char **value)
189 assert_msg(tokens != NULL, "Tokens are NULL."); 190 assert_msg(tokens != NULL, "Tokens are NULL.");
190 191
191 csv_token_list *ctl = PushStruct(global_arena, csv_token_list); 192 csv_token_list *ctl = PushStruct(global_arena, csv_token_list);
192 b_tree *bt = parse_csv(global_arena, ctl, table); 193 btree *bt = parse_csv(global_arena, ctl, table);
193 194
194 b_tree_write(bt); 195 btree_write(bt);
195 } 196 }
196 197
197 // NOTE(nasr): not sure on how to approach the b-tree and the table format thing 198 // NOTE(nasr): not sure on how to approach the b-tree and the table format thing