summaryrefslogtreecommitdiff
path: root/lexer.c
diff options
context:
space:
mode:
authorCarson Fleming <cflems@cflems.net>2026-03-26 16:21:29 -0400
committerCarson Fleming <cflems@cflems.net>2026-03-26 16:22:00 -0400
commit7d9fb2c733c8c64f6f74eefa0eea35b36be102cd (patch)
tree16b6cded5f9611e0ff1948395578845c9688b926 /lexer.c
parent68db110d34611fc8bb79035d3a11bba07dea43f3 (diff)
downloadccc-7d9fb2c733c8c64f6f74eefa0eea35b36be102cd.tar.gz
let's go we can parse return zero most useful program ever
Diffstat (limited to 'lexer.c')
-rw-r--r--lexer.c150
1 files changed, 93 insertions, 57 deletions
diff --git a/lexer.c b/lexer.c
index 2fc6885..9d0e596 100644
--- a/lexer.c
+++ b/lexer.c
@@ -7,12 +7,14 @@
static FILE* file = NULL;
static int lookahead;
+static const char* PATH;
static unsigned long LINE, COL;
#define LEXER_PANIC(format, ...) {\
fprintf(\
stderr,\
- "ccc: lexer error: line %lu, column %lu: " format "\n",\
+ "ccc: lexer error: %s: line %lu, column %lu: " format "\n",\
+ PATH,\
LINE,\
COL __VA_OPT__(,)\
__VA_ARGS__);\
@@ -27,6 +29,7 @@ void lexer_load(const char* path) {
if (file == NULL) CCC_PANIC;
lookahead = fgetc(file);
+ PATH = path;
LINE = 1;
COL = 1;
}
@@ -42,9 +45,15 @@ bool lexer_peek(struct token* p_token) {
long orig_offset = ftell(file);
int orig_lookahead = lookahead;
+ unsigned long orig_line = LINE, orig_col = COL;
+
bool rv = lexer_pop(p_token);
+
+ LINE = orig_line;
+ COL = orig_col;
lookahead = orig_lookahead;
fseek(file, orig_offset, SEEK_SET);
+
return rv;
}
@@ -79,8 +88,11 @@ static void lex_ident(struct token* p_token, char ic) {
buf[len] = 0;
*p_token = (struct token) {
- .type = IDENTIFIER,
- .data.identifier = strndup(buf, sizeof(buf) - 1),
+ .type = TK_IDENT,
+ .data.ident = strndup(buf, sizeof(buf) - 1),
+ .PATH = PATH,
+ .LINE = LINE,
+ .COL = COL,
};
}
@@ -110,8 +122,11 @@ static void lex_float_lit(
}
*p_token = (struct token) {
- .type = FLOAT_LIT,
+ .type = TK_FLOAT_LIT,
.data.float_lit = iv,
+ .PATH = PATH,
+ .LINE = LINE,
+ .COL = COL,
};
}
@@ -145,8 +160,11 @@ static void lex_int_lit(struct token* p_token, int_lit_t iv) {
}
*p_token = (struct token) {
- .type = INT_LIT,
+ .type = TK_INT_LIT,
.data.int_lit = iv,
+ .PATH = PATH,
+ .LINE = LINE,
+ .COL = COL,
};
}
@@ -185,8 +203,11 @@ static void lex_char_lit(struct token* p_token) {
"expected end of char literal, not \"%c\"", close_quote);
*p_token = (struct token) {
- .type = CHAR_LIT,
+ .type = TK_CHAR_LIT,
.data.char_lit = c,
+ .PATH = PATH,
+ .LINE = LINE,
+ .COL = COL,
};
}
@@ -194,8 +215,11 @@ static void lex_str_lit(struct token* p_token) {
if (lookahead == '"') {
consume_char();
*p_token = (struct token) {
- .type = STR_LIT,
+ .type = TK_STR_LIT,
.data.str_lit = strdup(""),
+ .PATH = PATH,
+ .LINE = LINE,
+ .COL = COL,
};
return;
}
@@ -223,75 +247,83 @@ static void lex_str_lit(struct token* p_token) {
buf[len] = 0;
*p_token = (struct token) {
- .type = STR_LIT,
+ .type = TK_STR_LIT,
.data.str_lit = strndup(buf, sizeof(buf) - 1),
+ .PATH = PATH,
+ .LINE = LINE,
+ .COL = COL,
};
}
static enum token_type two_char_operator_type(char c) {
- if (c == '!' && lookahead == '=') return NEQ;
- if (c == '^' && lookahead == '=') return XEQ;
- if (c == '&' && lookahead == '=') return AND_EQ;
- if (c == '&' && lookahead == '&') return LOG_AND;
- if (c == '*' && lookahead == '=') return MUL_EQ;
- if (c == '-' && lookahead == '=') return NEG_EQ;
- if (c == '-' && lookahead == '>') return ARROW;
- if (c == '=' && lookahead == '=') return TEST_EQ;
- if (c == '+' && lookahead == '=') return PLUS_EQ;
- if (c == '|' && lookahead == '|') return LOG_PIPE;
- if (c == '|' && lookahead == '=') return PIPE_EQ;
- if (c == '/' && lookahead == '=') return DIV_EQ;
- if (c == '%' && lookahead == '=') return MOD_EQ;
- if (c == '<' && lookahead == '=') return LEQ;
- if (c == '>' && lookahead == '=') return GEQ;
- if (c == '<' && lookahead == '<') return SHL;
- if (c == '>' && lookahead == '>') return SHR;
- return NOT_FOUND;
+ if (c == '!' && lookahead == '=') return TK_NEQ;
+ if (c == '^' && lookahead == '=') return TK_XEQ;
+ if (c == '&' && lookahead == '=') return TK_AND_EQ;
+ if (c == '&' && lookahead == '&') return TK_LOG_AND;
+ if (c == '*' && lookahead == '=') return TK_MUL_EQ;
+ if (c == '-' && lookahead == '=') return TK_NEG_EQ;
+ if (c == '-' && lookahead == '>') return TK_ARROW;
+ if (c == '=' && lookahead == '=') return TK_TEST_EQ;
+ if (c == '+' && lookahead == '=') return TK_PLUS_EQ;
+ if (c == '|' && lookahead == '|') return TK_LOG_PIPE;
+ if (c == '|' && lookahead == '=') return TK_PIPE_EQ;
+ if (c == '/' && lookahead == '=') return TK_DIV_EQ;
+ if (c == '%' && lookahead == '=') return TK_MOD_EQ;
+ if (c == '<' && lookahead == '=') return TK_LEQ;
+ if (c == '>' && lookahead == '=') return TK_GEQ;
+ if (c == '<' && lookahead == '<') return TK_SHL;
+ if (c == '>' && lookahead == '>') return TK_SHR;
+ return TK_NOT_FOUND;
}
static bool lex_complex_operator(struct token* p_token, char c) {
enum token_type type = two_char_operator_type(c);
- if (type == NOT_FOUND) return false;
+ if (type == TK_NOT_FOUND) return false;
consume_char();
- if (type == SHL && lookahead == '=') {
+ if (type == TK_SHL && lookahead == '=') {
consume_char();
- type = SHL_EQ;
+ type = TK_SHL_EQ;
}
- if (type == SHR && lookahead == '=') {
+ if (type == TK_SHR && lookahead == '=') {
consume_char();
- type = SHR_EQ;
+ type = TK_SHR_EQ;
}
- *p_token = (struct token) {.type = type};
+ *p_token = (struct token) {
+ .type = type,
+ .PATH = PATH,
+ .LINE = LINE,
+ .COL = COL,
+ };
return type;
}
static enum token_type lex_simple_operator(char c) {
switch (c) {
- case '#': return HASHTAG;
- case '(': return LPAREN;
- case ')': return RPAREN;
- case '{': return LCURLY;
- case '}': return RCURLY;
- case '[': return LSQUARE;
- case ']': return RSQUARE;
- case ':': return COLON;
- case ';': return SEMI;
- case ',': return COMMA;
- case '.': return DOT;
- case '?': return QMARK;
- case '!': return NOT;
- case '^': return XOR;
- case '&': return AMP;
- case '*': return STAR;
- case '-': return NEG;
- case '=': return ASSIGN;
- case '+': return PLUS;
- case '\\': return BSLASH;
- case '|': return PIPE;
- case '/': return DIV;
- case '%': return MOD;
- case '<': return LT;
- case '>': return GT;
+ case '#': return TK_HASHTAG;
+ case '(': return TK_LPAREN;
+ case ')': return TK_RPAREN;
+ case '{': return TK_LCURLY;
+ case '}': return TK_RCURLY;
+ case '[': return TK_LSQUARE;
+ case ']': return TK_RSQUARE;
+ case ':': return TK_COLON;
+ case ';': return TK_SEMI;
+ case ',': return TK_COMMA;
+ case '.': return TK_DOT;
+ case '?': return TK_QMARK;
+ case '!': return TK_NOT;
+ case '^': return TK_XOR;
+ case '&': return TK_AMP;
+ case '*': return TK_STAR;
+ case '-': return TK_NEG;
+ case '=': return TK_ASSIGN;
+ case '+': return TK_PLUS;
+ case '\\': return TK_BSLASH;
+ case '|': return TK_PIPE;
+ case '/': return TK_DIV;
+ case '%': return TK_MOD;
+ case '<': return TK_LT;
+ case '>': return TK_GT;
}
LEXER_PANIC("unexpected token %c", c);
}
@@ -337,3 +369,7 @@ bool lexer_pop(struct token* p_token) {
return true;
}
+
+bool lexer_eof() {
+ return lookahead == EOF;
+}