diff options
| author | Carson Fleming <cflems@cflems.net> | 2026-03-29 08:28:28 -1000 |
|---|---|---|
| committer | Carson Fleming <cflems@cflems.net> | 2026-03-29 08:28:28 -1000 |
| commit | 7cf2065be92855b5b1db31a4bb7afbb4af29a817 (patch) | |
| tree | 773e9df00d46934f548a2d76dbe6e61aec9b21c9 | |
| parent | 50495e8f815d3d5f92b3d36369acc52a6d2ea9c4 (diff) | |
| download | ccc-master.tar.gz | |
| -rw-r--r-- | ast.c | 22 | ||||
| -rw-r--r-- | ast.h | 14 | ||||
| -rw-r--r-- | codegen.c | 256 | ||||
| -rw-r--r-- | parser.c | 81 | ||||
| -rw-r--r-- | register.c | 2 | ||||
| -rw-r--r-- | register.h | 2 | ||||
| -rw-r--r-- | scope.c | 2 | ||||
| -rw-r--r-- | scope.h | 12 |
8 files changed, 280 insertions, 111 deletions
@@ -4,13 +4,7 @@ static void expr_destroy(struct expr_node* node); static void stmt_destroy(struct stmt_node* node); -static void type_destroy(struct type_node* node) { - free(node->name); -} - static void var_decl_destroy(struct var_decl_node* node) { - type_destroy(&node->type); - free(node->ident); } @@ -46,8 +40,6 @@ static void group_destroy(struct group_node* node) { } static void fn_decl_destroy(struct fn_decl_node* node) { - type_destroy(&node->return_type); - free(node->name); struct var_decl_node* args_node = node->args_head; @@ -72,6 +64,17 @@ static void str_lit_destroy(struct str_lit_node* node) { free(node->val); } +static void call_destroy(struct call_node* node) { + /* don't destroy node->called_fn, this is owned by its declaration */ + struct expr_node* args_node = node->args_head; + while (args_node != NULL) { + struct expr_node* next = args_node->next; + expr_destroy(args_node); + free(args_node); + args_node = next; + } +} + static void expr_destroy(struct expr_node* node) { switch (node->type) { case EXPR_INT_LIT: @@ -87,6 +90,9 @@ static void expr_destroy(struct expr_node* node) { case EXPR_ASSIGN: assign_destroy(&node->as._assign); break; + case EXPR_CALL: + call_destroy(&node->as._call); + break; } } @@ -1,6 +1,8 @@ #ifndef AST_H #define AST_H +#include "scope.h" + struct stmt_node; struct expr_node; @@ -8,8 +10,8 @@ struct type_node { bool _unsigned; bool _short; bool _long; + struct type_def def; unsigned char ptr_level; - char* name; }; struct int_lit_node { @@ -56,6 +58,12 @@ struct assign_node { struct expr_node* rval; }; +struct call_node { + /* TODO: eventually this could also be a function pointer */ + struct fn_decl_node* called_fn; + struct expr_node* args_head; +}; + struct expr_node { enum { EXPR_INT_LIT, @@ -64,6 +72,7 @@ struct expr_node { EXPR_STR_LIT, EXPR_VAR_REF, EXPR_ASSIGN, + EXPR_CALL, } type; union { struct int_lit_node _int_lit; @@ -72,7 +81,10 @@ struct expr_node { struct str_lit_node _str_lit; struct var_ref_node _var_ref; struct assign_node _assign; + struct call_node _call; } as; + + struct expr_node* next; }; struct group_node { @@ -4,6 +4,7 @@ #include "register.h" #include <stdlib.h> #include <stdio.h> +#include <string.h> #define CGEN_PANIC(format, ...) {\ fprintf(\ @@ -19,12 +20,15 @@ struct lval_def { }; static const struct storage_location RV_LOC = { - .type = REGISTER, + .type = STO_REG, .reg = &RAX, }; + +#define RETURN_LABEL_FMT "%s@coda" #define FULL_REG_SZ 8 static struct scope* scope; +static const struct fn_decl_node* active_fn; static void emit_storage_loc( FILE* outfile, @@ -32,37 +36,64 @@ static void emit_storage_loc( unsigned long long sz ) { switch (loc->type) { - case REGISTER: + case STO_LABEL: + fprintf(outfile, "%s", loc->label); + break; + case STO_FN: + fprintf(outfile, "%s", loc->decl->name); + break; + case STO_REG: if (sz > 4) fprintf(outfile, "%s", loc->reg->qword); else if (sz > 2) fprintf(outfile, "%s", loc->reg->dword); else if (sz > 1) fprintf(outfile, "%s", loc->reg->word); else fprintf(outfile, "%s", loc->reg->byte); break; - case JMP_LABEL: - fprintf(outfile, "%s", loc->label); - break; - case BP_OFFSET: - if (loc->offset < 0) - fprintf(outfile, "[rbp + %lld]", -loc->offset); - else if (loc->offset > 0) - fprintf(outfile, "[rbp - %lld]", loc->offset); + case STO_STACK: + if (loc->bp_offset < 0) + fprintf(outfile, "[rbp + %lld]", -loc->bp_offset); + else if (loc->bp_offset > 0) + fprintf(outfile, "[rbp - %lld]", loc->bp_offset); else fprintf(outfile, "[rbp]"); break; - case IMMEDIATE: + case STO_IMM: fprintf(outfile, "%llu", loc->value); break; } } +static bool locs_equal( + const struct storage_location* a, + const struct storage_location* b +) { + if (a->type != b->type) return false; + + switch (a->type) { + case STO_IMM: + return a->value == b->value; + case STO_REG: + return strcmp(a->reg->qword, b->reg->qword) == 0; + case STO_STACK: + return a->bp_offset == b->bp_offset; + case STO_LABEL: + return strcmp(a->label, b->label) == 0; + case STO_FN: + return a->decl == b->decl; + } + CGEN_PANIC("unhandled storage type case"); +} + static void emit_mov( FILE* outfile, const struct lval_def* dst, const struct storage_location* src ) { + /* first optimization: if dst == src, emit nothing */ + if (locs_equal(&dst->loc, src)) return; + switch (dst->loc.type) { - case REGISTER: - if (src->type == REGISTER && dst->sz < 4) { + case STO_REG: + if (src->type == STO_REG && dst->sz < 4) { fprintf(outfile, "\tmovzx "); emit_storage_loc(outfile, &dst->loc, FULL_REG_SZ); } else { @@ -73,8 +104,8 @@ static void emit_mov( fprintf(outfile, ", "); emit_storage_loc(outfile, src, dst->sz); break; - case BP_OFFSET: - if (src->type == BP_OFFSET) { + case STO_STACK: + if (src->type == STO_STACK) { /* `mov mem, mem` is illegal in x86_64 */ emit_mov( outfile, @@ -88,7 +119,7 @@ static void emit_mov( } fprintf(outfile, "\tmov "); - if (src->type == IMMEDIATE) { + if (src->type == STO_IMM) { /* must specify size to move immediates into memory*/ if (dst->sz > 4) fprintf(outfile, "qword "); else if (dst->sz > 2) fprintf(outfile, "dword "); @@ -100,15 +131,39 @@ static void emit_mov( fprintf(outfile, ", "); emit_storage_loc(outfile, src, dst->sz); break; - case JMP_LABEL: + case STO_LABEL: CGEN_PANIC("can't move value into label %s", dst->loc.label); - case IMMEDIATE: + case STO_FN: + CGEN_PANIC( + "can't move value into function %s", dst->loc.decl->name); + case STO_IMM: CGEN_PANIC( "can't move value into immediate value %lld", dst->loc.value); } fprintf(outfile, "\n"); } +static unsigned long long get_type_size(const struct type_node* type) { + if (type->ptr_level > 0) return PTR_SIZE; + return type->def.sz; +} + +static struct lval_def make_stack_lval( + FILE* outfile, + const struct type_node* type +) { + unsigned long long type_sz = get_type_size(type); + fprintf(outfile, "\tsub rsp, %llu\n", type_sz); + scope->bp_offset += type_sz; + return (struct lval_def) { + .loc = { + .type = STO_STACK, + .bp_offset = scope->bp_offset, + }, + .sz = type_sz, + }; +} + static void emit_expr( FILE* outfile, const struct expr_node* node, @@ -124,7 +179,7 @@ static void emit_int_lit( outfile, dst, &(struct storage_location) { - .type = IMMEDIATE, + .type = STO_IMM, .value = node->val, }); } @@ -149,7 +204,7 @@ static void emit_char_lit( outfile, dst, &(struct storage_location) { - .type = IMMEDIATE, + .type = STO_IMM, .value = (unsigned long long) node->val }); } @@ -185,32 +240,15 @@ static void emit_var_ref( static void emit_stmt(FILE* outfile, const struct stmt_node* node); -static unsigned long long get_type_size(const struct type_node* type) { - if (type->ptr_level > 0) return PTR_SIZE; - - struct type_def type_def; - if (!scope_get_type(scope, &type_def, type->name)) - CGEN_PANIC("size of type %s is not known", type->name); - - return type_def.sz; -} - static struct var_def emit_var_decl( FILE* outfile, const struct var_decl_node* node ) { - unsigned long long type_sz = get_type_size(&node->type); - - fprintf(outfile, "\tsub rsp, %llu\n", type_sz); - scope->bp_offset += type_sz; - + struct lval_def var_dst = make_stack_lval(outfile, &node->type); struct var_def var_def = { .name = node->ident, - .loc = { - .type = BP_OFFSET, - .offset = scope->bp_offset, - }, - .sz = type_sz, + .loc = var_dst.loc, + .sz = var_dst.sz, }; scope_define_var(scope, var_def); return var_def; @@ -242,6 +280,70 @@ static void emit_assignment( if (dst != NULL) emit_mov(outfile, dst, &lval_def.loc); } +static void emit_call( + FILE* outfile, + const struct call_node* node, + const struct lval_def* dst +) { + unsigned long long orig_bp_offset = scope->bp_offset; + unsigned long long arg_bp_offset = orig_bp_offset; + + struct var_decl_node* arg_decl = node->called_fn->args_head; + struct expr_node* arg_eval = node->args_head; + while (arg_decl != NULL && arg_eval != NULL) { + struct lval_def arg_dst = make_stack_lval(outfile, &arg_decl->type); + emit_expr(outfile, arg_eval, &arg_dst); + + arg_decl = arg_decl->next; + arg_eval = arg_eval->next; + } + + if (arg_decl != NULL) + CGEN_PANIC("too many arguments to function %s", node->called_fn->name); + if (arg_eval != NULL) + CGEN_PANIC("missing arguments to function %s", node->called_fn->name); + + unsigned char arg_regnum = 0; + arg_decl = node->called_fn->args_head; + while (arg_decl != NULL) { + unsigned long long type_sz = get_type_size(&arg_decl->type); + arg_bp_offset += type_sz; + + struct lval_def arg_dst; + if (arg_regnum < CC_N_REGS) + arg_dst = (struct lval_def) { + .loc = (struct storage_location) { + .type = STO_REG, + .reg = CALLING_CONV[arg_regnum++], + }, + .sz = type_sz, + }; + else + arg_dst = make_stack_lval(outfile, &arg_decl->type); + + emit_mov(outfile, &arg_dst, &(struct storage_location) { + .type = STO_STACK, + .bp_offset = arg_bp_offset, + }); + arg_decl = arg_decl->next; + } + + fprintf(outfile, "\tcall %s\n", node->called_fn->name); + if (dst != NULL) { + if (node->called_fn->return_type.def.sz == 0) + CGEN_PANIC("can't assign the result of a void function"); + + emit_mov(outfile, dst, &RV_LOC); + } + + /* pop our argument temporaries off the stack */ + scope->bp_offset = orig_bp_offset; + if (orig_bp_offset > 0) + fprintf(outfile, "\tlea rsp, [rbp - %llu]\n", orig_bp_offset); + else + fprintf(outfile, "\tmov rsp, rbp\n"); +} + static void emit_expr( FILE* outfile, const struct expr_node* node, @@ -265,22 +367,34 @@ static void emit_expr( break; case EXPR_ASSIGN: emit_assignment(outfile, &node->as._assign, dst); + break; + case EXPR_CALL: + emit_call(outfile, &node->as._call, dst); + break; } } static void emit_return(FILE* outfile, const struct return_node* node) { - if (node->ret_val != NULL) + if (active_fn == NULL) CGEN_PANIC("must be inside a function to return"); + + if (node->ret_val != NULL) { + if (active_fn->return_type.def.sz == 0) + CGEN_PANIC( + "returning a value from void function %s", active_fn->name); + emit_expr( outfile, node->ret_val, &(struct lval_def) { .loc = RV_LOC, - .sz = FULL_REG_SZ /* TODO: need to pull this from return type */ + .sz = active_fn->return_type.def.sz, }); + } else if (active_fn->return_type.def.sz > 0) { + CGEN_PANIC( + "non-void function %s should return a value", active_fn->name); + } - fprintf(outfile, "\tmov rsp, rbp\n"); - fprintf(outfile, "\tpop rbp\n"); - fprintf(outfile, "\tret\n"); + fprintf(outfile, "\tjmp " RETURN_LABEL_FMT "\n", active_fn->name); } static void emit_group(FILE* outfile, const struct group_node* node) { @@ -322,6 +436,13 @@ static void emit_stmt(FILE* outfile, const struct stmt_node* node) { } static void emit_fn_decl(FILE* outfile, const struct fn_decl_node* node) { + if (active_fn != NULL) + CGEN_PANIC( + "can't define function %s inside function %s", + node->name, + active_fn->name); + active_fn = node; + fprintf(outfile, "%s:\n", node->name); fprintf(outfile, "\tpush rbp\n"); fprintf(outfile, "\tmov rbp, rsp\n"); @@ -330,42 +451,32 @@ static void emit_fn_decl(FILE* outfile, const struct fn_decl_node* node) { scope->bp_offset = 0; long long spilled_bp_ofs = -16; // return address + old bp - unsigned long long arg_regnum = 0; + unsigned char arg_regnum = 0; struct var_decl_node* args_node = node->args_head; while (args_node != NULL) { - unsigned long long type_sz = get_type_size(&args_node->type); - - scope->bp_offset += type_sz; - struct var_def var_def = { - .name = args_node->ident, - .loc = { - .type = BP_OFFSET, - .offset = scope->bp_offset, - }, - .sz = type_sz, - }; - scope_define_var(scope, var_def); + struct lval_def arg_dst = make_stack_lval(outfile, &args_node->type); + scope_define_var( + scope, + (struct var_def) { + .name = args_node->ident, + .loc = arg_dst.loc, + .sz = arg_dst.sz, + }); struct storage_location arg_src; if (arg_regnum < CC_N_REGS) { arg_src = (struct storage_location) { - .type = REGISTER, + .type = STO_REG, .reg = CALLING_CONV[arg_regnum++] }; } else { arg_src = (struct storage_location) { - .type = BP_OFFSET, - .offset = spilled_bp_ofs, + .type = STO_STACK, + .bp_offset = spilled_bp_ofs, }; - spilled_bp_ofs -= type_sz; + spilled_bp_ofs -= arg_dst.sz; } - emit_mov( - outfile, - &(struct lval_def) { - .loc = var_def.loc, - .sz = type_sz, - }, - &arg_src); + emit_mov(outfile, &arg_dst, &arg_src); args_node = args_node->next; } @@ -374,9 +485,11 @@ static void emit_fn_decl(FILE* outfile, const struct fn_decl_node* node) { scope_pop(&scope); + fprintf(outfile, RETURN_LABEL_FMT ":\n", node->name); fprintf(outfile, "\tmov rsp, rbp\n"); fprintf(outfile, "\tpop rbp\n"); fprintf(outfile, "\tret\n"); + active_fn = NULL; } static void emit_root_node(FILE* outfile, const struct root_node* node) { @@ -403,9 +516,10 @@ void emit_code(const struct root_node* ast, const char* path) { scope_define_var(scope, (struct var_def) { .name = fn_name, .loc = { - .type = JMP_LABEL, + .type = STO_LABEL, .label = fn_name, }, + /* sz ignored, not relevant to functions */ }); fprintf(outfile, "global %s\n", fn_name); } @@ -419,7 +533,7 @@ void emit_code(const struct root_node* ast, const char* path) { node = ast; while (node != NULL) { emit_root_node(outfile, node); - fprintf(outfile, "\n"); + if (node->next != NULL) fprintf(outfile, "\n"); node = node->next; } @@ -63,11 +63,14 @@ static void expect_kw(const char* kw) { } static void parse_type(struct type_node* p_node) { - /* TODO: need some concept of known types in scope */ /* TODO: modifiers, void rules, arrays, etc. */ /* TODO: struct, union, enum */ expect(TK_IDENT); - p_node->name = tok.data.ident; + struct type_def type_def; + if (!scope_get_type(scope, &type_def, tok.data.ident)) + PARSER_PANIC("unknown type name: %s", tok.data.ident); + + p_node->def = type_def; peek_or_panic(); p_node->ptr_level = 0; @@ -118,9 +121,6 @@ static void expr_to_lval(struct lval_node* l_node, struct expr_node* e_node) { } static void parse_expr_assign(struct expr_node* p_node) { - peek_or_panic(); - if (tok.type != TK_ASSIGN) return; - expr_to_lval(&p_node->as._assign.lval, p_node); p_node->type = EXPR_ASSIGN; p_node->as._assign.rval = protected_alloc(sizeof(struct expr_node)); @@ -129,6 +129,44 @@ static void parse_expr_assign(struct expr_node* p_node) { parse_expr(p_node->as._assign.rval); } +static void parse_arg_evals(struct expr_node** pp_arg) { + for (;;) { + *pp_arg = protected_alloc(sizeof(struct expr_node)); + parse_expr(*pp_arg); + pp_arg = &((*pp_arg)->next); + + peek_or_panic(); + if (tok.type == TK_RPAREN) break; + expect(TK_COMMA); + } +} + +static void parse_expr_call(struct expr_node* p_node) { + switch (p_node->type) { + case EXPR_VAR_REF: + struct var_def var_def; + if (!scope_get_var(scope, &var_def, p_node->as._var_ref.ident)) + PARSER_PANIC( + "%s is not a known function", p_node->as._var_ref.ident); + + if (var_def.loc.type != STO_FN) + PARSER_PANIC("called object is not a function"); + + p_node->as._call.called_fn = var_def.loc.decl; + break; + default: + PARSER_PANIC("expression is not callable"); + } + + p_node->type = EXPR_CALL; + p_node->as._call.args_head = NULL; + + expect(TK_LPAREN); + peek_or_panic(); + if (tok.type != TK_RPAREN) parse_arg_evals(&p_node->as._call.args_head); + expect(TK_RPAREN); +} + static void parse_expr(struct expr_node* p_node) { peek_or_panic(); switch (tok.type) { @@ -146,27 +184,16 @@ static void parse_expr(struct expr_node* p_node) { case TK_IDENT: p_node->type = EXPR_VAR_REF; parse_var_ref(&p_node->as._var_ref); - - peek_or_panic(); - if (tok.type == TK_ASSIGN) { - p_node->type = EXPR_ASSIGN; - p_node->as._assign = (struct assign_node) { - .lval = { - .type = LVAL_VAR_REF, - .as._var_ref = p_node->as._var_ref, - }, - .rval = protected_alloc(sizeof(struct expr_node)), - }; - - expect(TK_ASSIGN); - parse_expr(p_node->as._assign.rval); - } break; default: PARSER_PANIC("expected expression"); } - parse_expr_assign(p_node); + peek_or_panic(); + if (tok.type == TK_ASSIGN) + parse_expr_assign(p_node); + else if (tok.type == TK_LPAREN) + parse_expr_call(p_node); } static void parse_var_decl(struct var_decl_node* p_node) { @@ -259,7 +286,7 @@ static void parse_stmt(struct stmt_node* p_node) { expect(TK_SEMI); } -static void parse_arg_list(struct var_decl_node** pp_arg) { +static void parse_arg_decls(struct var_decl_node** pp_arg) { for (;;) { *pp_arg = protected_alloc(sizeof(struct var_decl_node)); parse_var_decl(*pp_arg); @@ -280,11 +307,19 @@ static void parse_fn_decl(struct fn_decl_node* p_node) { expect(TK_LPAREN); peek_or_panic(); - if (tok.type != TK_RPAREN) parse_arg_list(&p_node->args_head); + if (tok.type != TK_RPAREN) parse_arg_decls(&p_node->args_head); expect(TK_RPAREN); parse_group(&p_node->body); + + scope_define_var(scope, (struct var_def) { + .name = p_node->name, + .loc = { + .type = STO_FN, + .decl = p_node, + }, + }); } static bool parse_root(struct root_node* p_node) { @@ -44,5 +44,5 @@ const struct reg R8 = { }; const struct reg* const CALLING_CONV[] = {&RDI, &RSI, &RDX, &R10, &R9, &R8}; -const unsigned long long CC_N_REGS = +const unsigned char CC_N_REGS = sizeof(CALLING_CONV) / sizeof(const struct reg* const); @@ -17,6 +17,6 @@ extern const struct reg R9; extern const struct reg R8; extern const struct reg* const CALLING_CONV[]; -extern const unsigned long long CC_N_REGS; +extern const unsigned char CC_N_REGS; #endif @@ -2,7 +2,7 @@ #include <stdlib.h> #include <stdio.h> #include <string.h> -#define DEFAULT_SIZE 4 +#define DEFAULT_SIZE 16 static void scope_init(struct scope* scope) { scope->types = calloc(DEFAULT_SIZE, sizeof(struct type_def*)); @@ -3,16 +3,18 @@ struct storage_location { enum { - REGISTER, - JMP_LABEL, - BP_OFFSET, - IMMEDIATE, + STO_REG, + STO_LABEL, + STO_STACK, + STO_IMM, + STO_FN, } type; union { const struct reg* reg; const char* label; - long long offset; + long long bp_offset; unsigned long long value; + struct fn_decl_node* decl; }; }; |
