let's go we can parse return zero most useful program ever

author: Carson Fleming <cflems@cflems.net> 2026-03-26 16:21:29 -0400
committer: Carson Fleming <cflems@cflems.net> 2026-03-26 16:22:00 -0400
commit: 7d9fb2c733c8c64f6f74eefa0eea35b36be102cd (patch)
tree: 16b6cded5f9611e0ff1948395578845c9688b926 /lexer.c
parent: 68db110d34611fc8bb79035d3a11bba07dea43f3 (diff)
download: ccc-7d9fb2c733c8c64f6f74eefa0eea35b36be102cd.tar.gz
1 files changed, 93 insertions, 57 deletions
diff --git a/lexer.c b/lexer.c
index 2fc6885..9d0e596 100644
--- a/lexer.c
+++ b/lexer.c
@@ -7,12 +7,14 @@
 
 static FILE* file = NULL;
 static int lookahead;
+static const char* PATH;
 static unsigned long LINE, COL;
 
 #define LEXER_PANIC(format, ...) {\
     fprintf(\
         stderr,\
-        "ccc: lexer error: line %lu, column %lu: " format "\n",\
+        "ccc: lexer error: %s: line %lu, column %lu: " format "\n",\
+        PATH,\
         LINE,\
         COL __VA_OPT__(,)\
         __VA_ARGS__);\
@@ -27,6 +29,7 @@ void lexer_load(const char* path) {
     if (file == NULL) CCC_PANIC;
 
     lookahead = fgetc(file);
+    PATH = path;
     LINE = 1;
     COL = 1;
 }
@@ -42,9 +45,15 @@ bool lexer_peek(struct token* p_token) {
 
     long orig_offset = ftell(file);
     int orig_lookahead = lookahead;
+    unsigned long orig_line = LINE, orig_col = COL;
+
     bool rv = lexer_pop(p_token);
+
+    LINE = orig_line;
+    COL = orig_col;
     lookahead = orig_lookahead;
     fseek(file, orig_offset, SEEK_SET);
+
     return rv;
 }
 
@@ -79,8 +88,11 @@ static void lex_ident(struct token* p_token, char ic) {
 
     buf[len] = 0;
     *p_token = (struct token) {
-        .type = IDENTIFIER,
-        .data.identifier = strndup(buf, sizeof(buf) - 1),
+        .type = TK_IDENT,
+        .data.ident = strndup(buf, sizeof(buf) - 1),
+        .PATH = PATH,
+        .LINE = LINE,
+        .COL = COL,
     };
 }
 
@@ -110,8 +122,11 @@ static void lex_float_lit(
     }
 
     *p_token = (struct token) {
-        .type = FLOAT_LIT,
+        .type = TK_FLOAT_LIT,
         .data.float_lit = iv,
+        .PATH = PATH,
+        .LINE = LINE,
+        .COL = COL,
     };
 }
 
@@ -145,8 +160,11 @@ static void lex_int_lit(struct token* p_token, int_lit_t iv) {
     }
 
     *p_token = (struct token) {
-        .type = INT_LIT,
+        .type = TK_INT_LIT,
         .data.int_lit = iv,
+        .PATH = PATH,
+        .LINE = LINE,
+        .COL = COL,
     };
 }
 
@@ -185,8 +203,11 @@ static void lex_char_lit(struct token* p_token) {
             "expected end of char literal, not \"%c\"", close_quote);
 
     *p_token = (struct token) {
-        .type = CHAR_LIT,
+        .type = TK_CHAR_LIT,
         .data.char_lit = c,
+        .PATH = PATH,
+        .LINE = LINE,
+        .COL = COL,
     };
 }
 
@@ -194,8 +215,11 @@ static void lex_str_lit(struct token* p_token) {
     if (lookahead == '"') {
         consume_char();
         *p_token = (struct token) {
-            .type = STR_LIT,
+            .type = TK_STR_LIT,
             .data.str_lit = strdup(""),
+            .PATH = PATH,
+            .LINE = LINE,
+            .COL = COL,
         };
         return;
     }
@@ -223,75 +247,83 @@ static void lex_str_lit(struct token* p_token) {
     buf[len] = 0;
 
     *p_token = (struct token) {
-        .type = STR_LIT,
+        .type = TK_STR_LIT,
         .data.str_lit = strndup(buf, sizeof(buf) - 1),
+        .PATH = PATH,
+        .LINE = LINE,
+        .COL = COL,
     };
 }
 
 static enum token_type two_char_operator_type(char c) {
-    if (c == '!' && lookahead == '=') return NEQ;
-    if (c == '^' && lookahead == '=') return XEQ;
-    if (c == '&' && lookahead == '=') return AND_EQ;
-    if (c == '&' && lookahead == '&') return LOG_AND;
-    if (c == '*' && lookahead == '=') return MUL_EQ;
-    if (c == '-' && lookahead == '=') return NEG_EQ;
-    if (c == '-' && lookahead == '>') return ARROW;
-    if (c == '=' && lookahead == '=') return TEST_EQ;
-    if (c == '+' && lookahead == '=') return PLUS_EQ;
-    if (c == '|' && lookahead == '|') return LOG_PIPE;
-    if (c == '|' && lookahead == '=') return PIPE_EQ;
-    if (c == '/' && lookahead == '=') return DIV_EQ;
-    if (c == '%' && lookahead == '=') return MOD_EQ;
-    if (c == '<' && lookahead == '=') return LEQ;
-    if (c == '>' && lookahead == '=') return GEQ;
-    if (c == '<' && lookahead == '<') return SHL;
-    if (c == '>' && lookahead == '>') return SHR;
-    return NOT_FOUND;
+    if (c == '!' && lookahead == '=') return TK_NEQ;
+    if (c == '^' && lookahead == '=') return TK_XEQ;
+    if (c == '&' && lookahead == '=') return TK_AND_EQ;
+    if (c == '&' && lookahead == '&') return TK_LOG_AND;
+    if (c == '*' && lookahead == '=') return TK_MUL_EQ;
+    if (c == '-' && lookahead == '=') return TK_NEG_EQ;
+    if (c == '-' && lookahead == '>') return TK_ARROW;
+    if (c == '=' && lookahead == '=') return TK_TEST_EQ;
+    if (c == '+' && lookahead == '=') return TK_PLUS_EQ;
+    if (c == '|' && lookahead == '|') return TK_LOG_PIPE;
+    if (c == '|' && lookahead == '=') return TK_PIPE_EQ;
+    if (c == '/' && lookahead == '=') return TK_DIV_EQ;
+    if (c == '%' && lookahead == '=') return TK_MOD_EQ;
+    if (c == '<' && lookahead == '=') return TK_LEQ;
+    if (c == '>' && lookahead == '=') return TK_GEQ;
+    if (c == '<' && lookahead == '<') return TK_SHL;
+    if (c == '>' && lookahead == '>') return TK_SHR;
+    return TK_NOT_FOUND;
 }
 
 static bool lex_complex_operator(struct token* p_token, char c) {
     enum token_type type = two_char_operator_type(c);
-    if (type == NOT_FOUND) return false;
+    if (type == TK_NOT_FOUND) return false;
     consume_char();
-    if (type == SHL && lookahead == '=') {
+    if (type == TK_SHL && lookahead == '=') {
         consume_char();
-        type = SHL_EQ;
+        type = TK_SHL_EQ;
     }
-    if (type == SHR && lookahead == '=') {
+    if (type == TK_SHR && lookahead == '=') {
         consume_char();
-        type = SHR_EQ;
+        type = TK_SHR_EQ;
     }
-    *p_token = (struct token) {.type = type};
+    *p_token = (struct token) {
+        .type = type,
+        .PATH = PATH,
+        .LINE = LINE,
+        .COL = COL,
+    };
     return type;
 }
 
 static enum token_type lex_simple_operator(char c) {
     switch (c) {
-        case '#': return HASHTAG;
-        case '(': return LPAREN;
-        case ')': return RPAREN;
-        case '{': return LCURLY;
-        case '}': return RCURLY;
-        case '[': return LSQUARE;
-        case ']': return RSQUARE;
-        case ':': return COLON;
-        case ';': return SEMI;
-        case ',': return COMMA;
-        case '.': return DOT;
-        case '?': return QMARK;
-        case '!': return NOT;
-        case '^': return XOR;
-        case '&': return AMP;
-        case '*': return STAR;
-        case '-': return NEG;
-        case '=': return ASSIGN;
-        case '+': return PLUS;
-        case '\\': return BSLASH;
-        case '|': return PIPE;
-        case '/': return DIV;
-        case '%': return MOD;
-        case '<': return LT;
-        case '>': return GT;
+        case '#': return TK_HASHTAG;
+        case '(': return TK_LPAREN;
+        case ')': return TK_RPAREN;
+        case '{': return TK_LCURLY;
+        case '}': return TK_RCURLY;
+        case '[': return TK_LSQUARE;
+        case ']': return TK_RSQUARE;
+        case ':': return TK_COLON;
+        case ';': return TK_SEMI;
+        case ',': return TK_COMMA;
+        case '.': return TK_DOT;
+        case '?': return TK_QMARK;
+        case '!': return TK_NOT;
+        case '^': return TK_XOR;
+        case '&': return TK_AMP;
+        case '*': return TK_STAR;
+        case '-': return TK_NEG;
+        case '=': return TK_ASSIGN;
+        case '+': return TK_PLUS;
+        case '\\': return TK_BSLASH;
+        case '|': return TK_PIPE;
+        case '/': return TK_DIV;
+        case '%': return TK_MOD;
+        case '<': return TK_LT;
+        case '>': return TK_GT;
     }
     LEXER_PANIC("unexpected token %c", c);
 }
@@ -337,3 +369,7 @@ bool lexer_pop(struct token* p_token) {
 
     return true;
 }
+
+bool lexer_eof() {
+    return lookahead == EOF;
+}
author	Carson Fleming <cflems@cflems.net>	2026-03-26 16:21:29 -0400
committer	Carson Fleming <cflems@cflems.net>	2026-03-26 16:22:00 -0400
commit	7d9fb2c733c8c64f6f74eefa0eea35b36be102cd (patch)
tree	16b6cded5f9611e0ff1948395578845c9688b926 /lexer.c
parent	68db110d34611fc8bb79035d3a11bba07dea43f3 (diff)
download	ccc-7d9fb2c733c8c64f6f74eefa0eea35b36be102cd.tar.gz