summaryrefslogtreecommitdiff
path: root/source/fajr_lexer
diff options
context:
space:
mode:
Diffstat (limited to 'source/fajr_lexer')
-rw-r--r--source/fajr_lexer/fajr_lexer.c274
-rw-r--r--source/fajr_lexer/fajr_lexer.h119
2 files changed, 393 insertions, 0 deletions
diff --git a/source/fajr_lexer/fajr_lexer.c b/source/fajr_lexer/fajr_lexer.c
new file mode 100644
index 0000000..3a98ca9
--- /dev/null
+++ b/source/fajr_lexer/fajr_lexer.c
@@ -0,0 +1,274 @@
1internal inline b32
2IsAlpha(u8 Character)
3{
4 return ((Character >= 'a' && Character <= 'z') ||
5 (Character >= 'A' && Character <= 'Z') ||
6 (Character == '_'));
7}
8
9internal inline b32
10IsDigit(u8 Character)
11{
12 return (Character >= '0' && Character <= '9');
13}
14
15internal b32
16IsDelimiter(u8 Character)
17{
18 for(i32 Index = 0; Index < (i32)sizeof(Delimiters); ++Index)
19 {
20 if(Delimiters[Index] == Character)
21 {
22 return 1;
23 }
24 }
25 return 0;
26}
27
28internal inline b32
29IsNilTokenNode(token_node *TokenNode)
30{
31 return TokenNode == &nil_token_node || TokenNode == NULL;
32}
33
34internal inline b32
35IsNilToken(token *Token)
36{
37 return Token == &nil_token || Token == NULL;
38}
39
40internal inline b32
41IsWhiteSpace(u8 Character)
42{
43 return (Character == '\n' || Character == '\r' ||
44 Character == ' ' || Character == '\t');
45}
46
47internal inline void
48ParseCStyleComment(u8 Buffer[])
49{
50 // TODO(nasr): handle c style comments
51 // couuld be usefull for function information visualiszation
52 // so think of a way to link themn to functions and variables?
53 // some sort of meta data per thing?
54 // and then we can do a visualization if the str8.count of the metadata thing is bigger then 0
55 // we should a visualization thing for the thing
56 // if the thing is less then 0, we dont do anything?
57
58 // TODO(nasr): while doingn this we could also add in some editor specific anotations ?
59}
60
61internal inline void
62ParseCPPStyleComment(u8 Buffer[])
63{
64 // TODO(nasr):
65}
66
67internal inline b32
68Is_TokenBreak(u8 Character)
69{
70 return (IsWhiteSpace(Character) || IsDelimiter(Character));
71}
72
73internal token_list *
74Lex(string8 *Buffer, mem_arena *Arena, token_list *List)
75{
76 b32 Initialized = 0;
77 i32 Line = 1;
78 i32 Column = 1;
79
80 for(u64 TextIndex = 0; TextIndex < Buffer->size; TextIndex++)
81 {
82 u8 Character = Buffer->data[TextIndex];
83
84 if(Character == '\r' || Character == '\n')
85 {
86 if(Character == '\r' &&
87 (TextIndex + 1 < Buffer->size) &&
88 Buffer->data[TextIndex + 1] == '\n')
89 {
90 TextIndex++;
91 }
92
93 ++TextIndex;
94 ++Line;
95
96 // NOTE(nasr): reset the column to the beginning of the line
97 Column = 1;
98 continue;
99 }
100
101 if(IsWhiteSpace(Character))
102 {
103 ++Column;
104 continue;
105 }
106
107 token_node *TokenNode = PushStruct(Arena, token_node);
108 token *Token = PushStruct(Arena, token);
109 TokenNode->Next = &nil_token_node;
110 TokenNode->Previous = &nil_token_node;
111 TokenNode->Token = Token;
112 Token->Line = Line;
113 Token->Column = Column;
114 Token->ByteOffset = (u64)TextIndex;
115 Token->Flags = FlagNone;
116
117 u64 TokenStart = TextIndex;
118 u64 TokenEnd = TextIndex;
119
120 if(Character > 126)
121 {
122 Token->Type = TokenUnwantedChild;
123 TokenEnd = TextIndex + 1;
124 }
125 else if(IsAlpha(Character))
126 {
127 while((TextIndex + 1 < Buffer->size) &&
128 (IsAlpha(Buffer->data[TextIndex + 1]) || IsDigit(Buffer->data[TextIndex + 1])))
129 {
130 ++TextIndex;
131 }
132
133 // TODO(nasr): build a lexeme
134 TokenEnd = TextIndex + 1;
135 string8 Lexeme = {
136 .data = (u8 *)Buffer->data,
137 .size = (u64)Buffer->data
138 }
139 ;
140
141 // TODO(nasr): handle functions
142 if(string8_cmp(Lexeme, StringLit("func"), 0))
143 Token->Type = TokenIf;
144 else if(string8_cmp(Lexeme, StringLit("if"), 0))
145 Token->Type = TokenElse;
146 else if(string8_cmp(Lexeme, StringLit("return"), 0))
147 Token->Type = TokenReturn;
148 else if(string8_cmp(Lexeme, StringLit("while"), 0))
149 Token->Type = TokenWhile;
150 else if(string8_cmp(Lexeme, StringLit("for"), 0))
151 Token->Type = TokenFor;
152 else if(string8_cmp(Lexeme, StringLit("break"), 0))
153 Token->Type = TokenBreak;
154 else if(string8_cmp(Lexeme, StringLit("continue"), 0))
155 Token->Type = TokenContinue;
156 else
157 Token->Type = TokenIdentifier;
158 }
159 else if(IsDigit(Character))
160 {
161 while((TextIndex + 1 < Buffer->size) &&
162 IsDigit(Buffer->data[TextIndex + 1]))
163 {
164 ++TextIndex;
165 }
166
167 TokenEnd = TextIndex + 1;
168 Token->Type = TokenNumber;
169 }
170
171 else
172 {
173 u8 Next = (TextIndex + 1 < Buffer->size) ? Buffer->data[TextIndex + 1] : 0;
174
175 switch(Character)
176 {
177 case '=':
178 {
179 if(Next == '=')
180 {
181 Token->Type = TokenDoubleEqual;
182 TextIndex++;
183 }
184 else
185 {
186 Token->Type = (token_type)'=';
187 }
188 }
189 break;
190
191 case '>':
192 {
193 if(Next == '=')
194 {
195 Token->Type = TokenGreaterEqual;
196 TextIndex++;
197 }
198 else if(Next == '>')
199 {
200 Token->Type = TokenRightShift;
201 TextIndex++;
202 }
203 else
204 {
205 Token->Type = (token_type)'>';
206 }
207 }
208 break;
209
210 case '<':
211 {
212 if(Next == '=')
213 {
214 Token->Type = TokenLesserEqual;
215 TextIndex++;
216 }
217 else if(Next == '<')
218 {
219 Token->Type = TokenLeftShift;
220 TextIndex++;
221 }
222 else
223 {
224 Token->Type = (token_type)'<';
225 }
226 }
227 break;
228
229 case '"':
230 {
231 while(Buffer->data[TextIndex + 1] != '"' && Buffer->data[TextIndex + 1] != '\0')
232 {
233 ++TextIndex;
234 if(Buffer->data[TextIndex + 1] == '\\')
235
236 ++TextIndex;
237 }
238
239 TokenStart += 1;
240 Token->Type = TokenString;
241 }
242 break;
243 default:
244 {
245 Token->Type = (token_type)Character;
246 }
247 break;
248 }
249 }
250
251 TokenEnd = TextIndex + 1;
252
253 Token->Lexeme.data = (u8 *)&Buffer->data[TokenStart];
254 Token->Lexeme.size = (u64)(TokenEnd - TokenStart);
255 Column += (i32)Token->Lexeme.size;
256
257 //Log("Token: \t%.lu*s\n", Token->Lexeme.Size, Token->Lexeme.Data);
258
259 if(!Initialized)
260 {
261 Initialized = 1;
262 List->Root = TokenNode;
263 List->Current = TokenNode;
264 }
265 else
266 {
267 TokenNode->Previous = List->Current;
268 List->Current->Next = TokenNode;
269 List->Current = TokenNode;
270 }
271 }
272
273 return List;
274}
diff --git a/source/fajr_lexer/fajr_lexer.h b/source/fajr_lexer/fajr_lexer.h
new file mode 100644
index 0000000..754b89a
--- /dev/null
+++ b/source/fajr_lexer/fajr_lexer.h
@@ -0,0 +1,119 @@
1#ifndef FAJR_LEXER_H
2#define FAJR_LEXER_H
3
4typedef enum token_type token_type;
5enum token_type
6{
7 TokenUndefined = 256,
8 TokenIdentifier,
9 TokenIdentifierAssignmentValue,
10 TokenValue,
11 TokenString,
12 TokenNumber,
13 TokenDoubleEqual,
14 TokenGreaterEqual,
15 TokenLesserEqual,
16 TokenParam,
17 TokenFunc,
18 TokenReturn,
19 TokenIf,
20 TokenElse,
21 TokenFor,
22 TokenWhile,
23 TokenBreak,
24 TokenContinue,
25 TokenExpression,
26 TokenFuncBody,
27 TokenUnwantedChild,
28 TokenNewLine,
29 TokenRightShift,
30 TokenLeftShift,
31 TokenStar,
32};
33
34typedef struct Tokenizer Tokenizer;
35struct Tokenizer
36{
37 i32 Line;
38 i32 Column;
39};
40
41typedef enum token_flags token_flags;
42enum token_flags
43{
44 FlagNone = (0),
45 FlagConstant = (1 << 0),
46 FlagGlobal = (1 << 1),
47 FlagsValue = (1 << 2),
48 FlagDefinition = (1 << 3),
49 FlagComparison = (1 << 4),
50 FlagDeprecated = (1 << 5),
51 FlagDirty = (1 << 6),
52};
53
54typedef struct token token;
55struct token
56{
57 string8 Lexeme;
58 token_type Type;
59 token_flags Flags;
60 u64 ByteOffset;
61 i32 Column;
62 i32 Line;
63
64 string8 MetaData;
65};
66
67typedef struct token_node token_node;
68struct token_node
69{
70 token_node *Next;
71 token_node *Previous;
72 token *Token;
73};
74
75typedef struct token_list token_list;
76struct token_list
77{
78 token_node *Root;
79 token_node *Current;
80};
81
82typedef struct lexer lexer;
83struct lexer
84{
85 u8 *Text;
86 u64 TextCount;
87 u8 *EndOfFile;
88 u8 *UndefinedTokens;
89};
90
91global_variable const u8 Delimiters[] =
92{
93 '{',
94 '}',
95 '(',
96 ')',
97 '[',
98 ']',
99 ';',
100};
101
102read_only global_variable token nil_token =
103{
104 .Lexeme = {NULL, 0},
105 .Type = TokenUndefined,
106 .Flags = FlagNone,
107 .ByteOffset = 0,
108 .Column = 0,
109 .Line = 0,
110};
111
112read_only global_variable token_node nil_token_node =
113{
114 .Next = &nil_token_node,
115 .Previous = &nil_token_node,
116 .Token = NULL,
117};
118
119#endif // FAJR_LEXER_H