summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarson Fleming <cflems@cflems.net>2026-03-27 11:27:08 -1000
committerCarson Fleming <cflems@cflems.net>2026-03-27 11:27:08 -1000
commit414a608c36b2d8f208ad0223219736d7582948ae (patch)
treeeeeb284023236a4ee53bb4a78608c3cd1e3992bb
parentfca3bf239cfdf03c4479f5d0c14a21c1fd96ea3e (diff)
downloadccc-414a608c36b2d8f208ad0223219736d7582948ae.tar.gz
fix some stuff
-rw-r--r--ccc.h2
-rw-r--r--codegen.c3
-rw-r--r--lexer.c49
-rw-r--r--lexer.h1
-rw-r--r--main.c17
-rw-r--r--parser.c101
-rw-r--r--test/weird.c2
7 files changed, 92 insertions, 83 deletions
diff --git a/ccc.h b/ccc.h
index 36f80f9..80cbc0a 100644
--- a/ccc.h
+++ b/ccc.h
@@ -2,6 +2,6 @@
#define CCC_H
#define CCC_PANIC { perror("ccc"); exit(1); }
-
+#define PTR_SIZE 8
#endif
diff --git a/codegen.c b/codegen.c
index ab3660b..d0905e2 100644
--- a/codegen.c
+++ b/codegen.c
@@ -81,13 +81,14 @@ static void emit_expr(
break;
case EXPR_VAR_REF:
emit_var_ref(outfile, &node->as._var_ref, storage);
+ break;
}
}
static void emit_stmt(FILE* outfile, const struct stmt_node* node);
static unsigned long long get_type_size(const struct type_node* type) {
- if (type->ptr_level > 0) return 8;
+ if (type->ptr_level > 0) return PTR_SIZE;
struct type_def type_def;
if (!scope_get_type(scope, &type_def, type->name))
diff --git a/lexer.c b/lexer.c
index 9d0e596..f5072a1 100644
--- a/lexer.c
+++ b/lexer.c
@@ -10,6 +10,8 @@ static int lookahead;
static const char* PATH;
static unsigned long LINE, COL;
+static struct token tok = {.type = TK_NOT_FOUND};
+
#define LEXER_PANIC(format, ...) {\
fprintf(\
stderr,\
@@ -21,6 +23,8 @@ static unsigned long LINE, COL;
exit(1);\
}
+static void lexer_advance();
+
void lexer_load(const char* path) {
if (file != NULL) {
fclose(file);
@@ -32,6 +36,7 @@ void lexer_load(const char* path) {
PATH = path;
LINE = 1;
COL = 1;
+ lexer_advance();
}
void lexer_close() {
@@ -41,20 +46,9 @@ void lexer_close() {
}
bool lexer_peek(struct token* p_token) {
- if (file == NULL) return false;
-
- long orig_offset = ftell(file);
- int orig_lookahead = lookahead;
- unsigned long orig_line = LINE, orig_col = COL;
-
- bool rv = lexer_pop(p_token);
-
- LINE = orig_line;
- COL = orig_col;
- lookahead = orig_lookahead;
- fseek(file, orig_offset, SEEK_SET);
-
- return rv;
+ if (tok.type == TK_NOT_FOUND) return false;
+ if (p_token != NULL) *p_token = tok;
+ return true;
}
#define is_whitespace(c) (c == ' ' || c == '\t' || c == '\n')
@@ -328,7 +322,7 @@ static enum token_type lex_simple_operator(char c) {
LEXER_PANIC("unexpected token %c", c);
}
-bool lexer_pop(struct token* p_token) {
+static bool lexer_read() {
if (file == NULL) return false;
// consume all whitespace and comments preceding the next token
@@ -355,21 +349,28 @@ bool lexer_pop(struct token* p_token) {
}
if (is_numeric(c))
- lex_int_lit(p_token, c - '0');
+ lex_int_lit(&tok, c - '0');
else if (c == '.' && is_numeric(lookahead))
- lex_float_lit(p_token, 10, 0);
+ lex_float_lit(&tok, 10, 0);
else if (is_ident_legal(c))
- lex_ident(p_token, c);
+ lex_ident(&tok, c);
else if (c == '\'')
- lex_char_lit(p_token);
+ lex_char_lit(&tok);
else if (c == '"')
- lex_str_lit(p_token);
- else if (!lex_complex_operator(p_token, c))
- p_token->type = lex_simple_operator(c);
+ lex_str_lit(&tok);
+ else if (!lex_complex_operator(&tok, c))
+ tok.type = lex_simple_operator(c);
return true;
}
-bool lexer_eof() {
- return lookahead == EOF;
+static void lexer_advance() {
+ if (!lexer_read()) tok.type = TK_NOT_FOUND;
+}
+
+bool lexer_pop(struct token* p_token) {
+ if (tok.type == TK_NOT_FOUND) return false;
+ if (p_token != NULL) *p_token = tok;
+ lexer_advance();
+ return true;
}
diff --git a/lexer.h b/lexer.h
index acb8eb5..aefca82 100644
--- a/lexer.h
+++ b/lexer.h
@@ -77,6 +77,5 @@ void lexer_load(const char* path);
void lexer_close();
bool lexer_peek(struct token* p_token);
bool lexer_pop(struct token* p_token);
-bool lexer_eof();
#endif
diff --git a/main.c b/main.c
index e2aca11..5c26dd3 100644
--- a/main.c
+++ b/main.c
@@ -3,6 +3,7 @@
#include "codegen.h"
#include <stdlib.h>
#include <stdio.h>
+#include <string.h>
void test_lexer(int argc, char** argv) {
struct token token;
@@ -37,15 +38,17 @@ void test_lexer(int argc, char** argv) {
}
void test_parser(int argc, char** argv) {
- struct root_node* root;
- struct root_node** p_cur = &root;
for (int i = 1; i < argc; i++) {
- *p_cur = parse(argv[i]);
- p_cur = &((*p_cur)->next);
- }
+ struct root_node* root = parse(argv[i]);
+ unsigned int fn_sz = strlen(argv[i]);
+ char outfile[fn_sz + 1];
+ strcpy(outfile, argv[i]);
+ outfile[fn_sz - 1] = 's';
+ outfile[fn_sz] = 0;
- emit_code(root, "test/simple.s");
- ast_destroy(root);
+ emit_code(root, outfile);
+ ast_destroy(root);
+ }
}
int main(int argc, char** argv) {
diff --git a/parser.c b/parser.c
index 699f345..d4af5a0 100644
--- a/parser.c
+++ b/parser.c
@@ -33,6 +33,12 @@ static void unexpected_token(enum token_type expected) {
PARSER_PANIC("unexpected token");
}
+/* TODO: reorganize the lexer to make peek cheaper */
+static void peek_or_panic() {
+ if (!lexer_peek(&tok))
+ PARSER_PANIC("unexpected EOF");
+}
+
static void expect(enum token_type expected) {
if (!lexer_pop(&tok))
PARSER_PANIC("unexpected EOF");
@@ -40,46 +46,38 @@ static void expect(enum token_type expected) {
if (tok.type != expected) unexpected_token(expected);
}
-static void peek_or_panic() {
- if (!lexer_peek(&tok))
- PARSER_PANIC("unexpected EOF");
-}
+static void expect_kw(const char* kw) {
+ if (!lexer_pop(&tok))
+ PARSER_PANIC("unexpected EOF, expected %s", kw);
-/* "handle" indicates that we've peeked already */
-static void handle_expr(struct expr_node* p_node);
-static void handle_stmt(struct stmt_node* p_node);
+ if (tok.type != TK_IDENT)
+ PARSER_PANIC("unexpected token, expected %s", kw);
+
+ if (strcmp(kw, tok.data.ident) != 0)
+ PARSER_PANIC(
+ "unexpected identifier %s, expected %s", tok.data.ident, kw);
+
+ /* string won't go in the AST, discard it */
+ free(tok.data.ident);
+ tok.data.ident = NULL;
+}
-static void handle_type(struct type_node* p_node) {
+static void parse_type(struct type_node* p_node) {
/* TODO: need some concept of known types in scope */
/* TODO: modifiers, void rules, arrays, etc. */
/* TODO: struct, union, enum */
+ expect(TK_IDENT);
p_node->name = tok.data.ident;
peek_or_panic();
p_node->ptr_level = 0;
while (tok.type == TK_STAR) {
- p_node->ptr_level++;
expect(TK_STAR);
+ p_node->ptr_level++;
+ peek_or_panic();
}
}
-static void parse_return(struct return_node* p_node) {
- expect(TK_IDENT);
- if (strcmp(tok.data.ident, "return") != 0)
- PARSER_PANIC("unexpected token %s; expected: return", tok.data.ident);
-
- if (!lexer_peek(&tok))
- PARSER_PANIC("unexpected EOF in return statement");
-
- if (tok.type == TK_SEMI) {
- p_node->ret_val = NULL;
- return;
- }
-
- p_node->ret_val = protected_alloc(sizeof(struct expr_node));
- handle_expr(p_node->ret_val);
-}
-
static void parse_int_lit(struct int_lit_node* p_node) {
expect(TK_INT_LIT);
p_node->val = tok.data.int_lit;
@@ -90,7 +88,8 @@ static void parse_var_ref(struct var_ref_node* p_node) {
p_node->ident = tok.data.ident;
}
-static void handle_expr(struct expr_node* p_node) {
+static void parse_expr(struct expr_node* p_node) {
+ peek_or_panic();
switch (tok.type) {
case TK_SEMI:
p_node->type = EXPR_EMPTY;
@@ -108,31 +107,45 @@ static void handle_expr(struct expr_node* p_node) {
}
}
-static void handle_var_decl(struct var_decl_node* p_node) {
- handle_type(&p_node->type);
+static void parse_var_decl(struct var_decl_node* p_node) {
+ parse_type(&p_node->type);
expect(TK_IDENT);
p_node->ident = tok.data.ident;
}
+static void parse_stmt(struct stmt_node* p_node);
+
+static void parse_return(struct return_node* p_node) {
+ expect_kw("return");
+
+ peek_or_panic();
+ if (tok.type == TK_SEMI) {
+ p_node->ret_val = NULL;
+ return;
+ }
+
+ p_node->ret_val = protected_alloc(sizeof(struct expr_node));
+ parse_expr(p_node->ret_val);
+}
+
static void parse_group(struct group_node* p_node) {
expect(TK_LCURLY);
struct stmt_node** pp_node = &p_node->body_head;
for (;;) {
- if (!lexer_peek(&tok))
- PARSER_PANIC("unexpected EOF in statement group");
-
+ peek_or_panic();
if (tok.type == TK_RCURLY) break;
*pp_node = protected_alloc(sizeof(struct stmt_node));
- handle_stmt(*pp_node);
+ parse_stmt(*pp_node);
pp_node = &((*pp_node)->next);
}
expect(TK_RCURLY);
}
-static void handle_stmt(struct stmt_node* p_node) {
+static void parse_stmt(struct stmt_node* p_node) {
+ peek_or_panic();
switch (tok.type) {
case TK_LCURLY:
p_node->type = STMT_GROUP;
@@ -145,45 +158,37 @@ static void handle_stmt(struct stmt_node* p_node) {
break;
} else if (scope_get_type(scope, NULL, tok.data.ident)) {
p_node->type = STMT_VAR_DECL;
- handle_var_decl(&p_node->as._var_decl);
+ parse_var_decl(&p_node->as._var_decl);
break;
}
default:
p_node->type = STMT_EXPR;
- handle_expr(&p_node->as._expr);
+ parse_expr(&p_node->as._expr);
}
expect(TK_SEMI);
}
static void parse_arg_list(struct var_decl_node** pp_arg) {
for (;;) {
- expect(TK_IDENT);
-
*pp_arg = protected_alloc(sizeof(struct var_decl_node));
- handle_var_decl(*pp_arg);
+ parse_var_decl(*pp_arg);
pp_arg = &((*pp_arg)->next);
- if (!lexer_peek(&tok))
- PARSER_PANIC("unexpected EOF in argument list");
-
+ peek_or_panic();
if (tok.type == TK_RPAREN) break;
expect(TK_COMMA);
}
}
static void parse_fn_decl(struct fn_decl_node* p_node) {
- expect(TK_IDENT);
- handle_type(&p_node->return_type);
+ parse_type(&p_node->return_type);
expect(TK_IDENT);
p_node->name = tok.data.ident;
expect(TK_LPAREN);
- if (!lexer_peek(&tok))
- PARSER_PANIC("unexpected EOF in function declaration");
-
-
+ peek_or_panic();
if (tok.type != TK_RPAREN) parse_arg_list(&p_node->args_head);
expect(TK_RPAREN);
diff --git a/test/weird.c b/test/weird.c
index 74a74be..bc49f3f 100644
--- a/test/weird.c
+++ b/test/weird.c
@@ -1,3 +1,3 @@
-int main(int argc) {
+int main(int argc, char** argv) {
return argc;
}