summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCarson Fleming <cflems@cflems.net>2026-03-29 08:28:28 -1000
committerCarson Fleming <cflems@cflems.net>2026-03-29 08:28:28 -1000
commit7cf2065be92855b5b1db31a4bb7afbb4af29a817 (patch)
tree773e9df00d46934f548a2d76dbe6e61aec9b21c9
parent50495e8f815d3d5f92b3d36369acc52a6d2ea9c4 (diff)
downloadccc-master.tar.gz
calling functions and some optimizationsHEADmaster
-rw-r--r--ast.c22
-rw-r--r--ast.h14
-rw-r--r--codegen.c256
-rw-r--r--parser.c81
-rw-r--r--register.c2
-rw-r--r--register.h2
-rw-r--r--scope.c2
-rw-r--r--scope.h12
8 files changed, 280 insertions, 111 deletions
diff --git a/ast.c b/ast.c
index 8c3461f..7837cf8 100644
--- a/ast.c
+++ b/ast.c
@@ -4,13 +4,7 @@
static void expr_destroy(struct expr_node* node);
static void stmt_destroy(struct stmt_node* node);
-static void type_destroy(struct type_node* node) {
- free(node->name);
-}
-
static void var_decl_destroy(struct var_decl_node* node) {
- type_destroy(&node->type);
-
free(node->ident);
}
@@ -46,8 +40,6 @@ static void group_destroy(struct group_node* node) {
}
static void fn_decl_destroy(struct fn_decl_node* node) {
- type_destroy(&node->return_type);
-
free(node->name);
struct var_decl_node* args_node = node->args_head;
@@ -72,6 +64,17 @@ static void str_lit_destroy(struct str_lit_node* node) {
free(node->val);
}
+static void call_destroy(struct call_node* node) {
+ /* don't destroy node->called_fn, this is owned by its declaration */
+ struct expr_node* args_node = node->args_head;
+ while (args_node != NULL) {
+ struct expr_node* next = args_node->next;
+ expr_destroy(args_node);
+ free(args_node);
+ args_node = next;
+ }
+}
+
static void expr_destroy(struct expr_node* node) {
switch (node->type) {
case EXPR_INT_LIT:
@@ -87,6 +90,9 @@ static void expr_destroy(struct expr_node* node) {
case EXPR_ASSIGN:
assign_destroy(&node->as._assign);
break;
+ case EXPR_CALL:
+ call_destroy(&node->as._call);
+ break;
}
}
diff --git a/ast.h b/ast.h
index 5ae0d12..2eabb02 100644
--- a/ast.h
+++ b/ast.h
@@ -1,6 +1,8 @@
#ifndef AST_H
#define AST_H
+#include "scope.h"
+
struct stmt_node;
struct expr_node;
@@ -8,8 +10,8 @@ struct type_node {
bool _unsigned;
bool _short;
bool _long;
+ struct type_def def;
unsigned char ptr_level;
- char* name;
};
struct int_lit_node {
@@ -56,6 +58,12 @@ struct assign_node {
struct expr_node* rval;
};
+struct call_node {
+ /* TODO: eventually this could also be a function pointer */
+ struct fn_decl_node* called_fn;
+ struct expr_node* args_head;
+};
+
struct expr_node {
enum {
EXPR_INT_LIT,
@@ -64,6 +72,7 @@ struct expr_node {
EXPR_STR_LIT,
EXPR_VAR_REF,
EXPR_ASSIGN,
+ EXPR_CALL,
} type;
union {
struct int_lit_node _int_lit;
@@ -72,7 +81,10 @@ struct expr_node {
struct str_lit_node _str_lit;
struct var_ref_node _var_ref;
struct assign_node _assign;
+ struct call_node _call;
} as;
+
+ struct expr_node* next;
};
struct group_node {
diff --git a/codegen.c b/codegen.c
index d0c4b27..216d41a 100644
--- a/codegen.c
+++ b/codegen.c
@@ -4,6 +4,7 @@
#include "register.h"
#include <stdlib.h>
#include <stdio.h>
+#include <string.h>
#define CGEN_PANIC(format, ...) {\
fprintf(\
@@ -19,12 +20,15 @@ struct lval_def {
};
static const struct storage_location RV_LOC = {
- .type = REGISTER,
+ .type = STO_REG,
.reg = &RAX,
};
+
+#define RETURN_LABEL_FMT "%s@coda"
#define FULL_REG_SZ 8
static struct scope* scope;
+static const struct fn_decl_node* active_fn;
static void emit_storage_loc(
FILE* outfile,
@@ -32,37 +36,64 @@ static void emit_storage_loc(
unsigned long long sz
) {
switch (loc->type) {
- case REGISTER:
+ case STO_LABEL:
+ fprintf(outfile, "%s", loc->label);
+ break;
+ case STO_FN:
+ fprintf(outfile, "%s", loc->decl->name);
+ break;
+ case STO_REG:
if (sz > 4) fprintf(outfile, "%s", loc->reg->qword);
else if (sz > 2) fprintf(outfile, "%s", loc->reg->dword);
else if (sz > 1) fprintf(outfile, "%s", loc->reg->word);
else fprintf(outfile, "%s", loc->reg->byte);
break;
- case JMP_LABEL:
- fprintf(outfile, "%s", loc->label);
- break;
- case BP_OFFSET:
- if (loc->offset < 0)
- fprintf(outfile, "[rbp + %lld]", -loc->offset);
- else if (loc->offset > 0)
- fprintf(outfile, "[rbp - %lld]", loc->offset);
+ case STO_STACK:
+ if (loc->bp_offset < 0)
+ fprintf(outfile, "[rbp + %lld]", -loc->bp_offset);
+ else if (loc->bp_offset > 0)
+ fprintf(outfile, "[rbp - %lld]", loc->bp_offset);
else
fprintf(outfile, "[rbp]");
break;
- case IMMEDIATE:
+ case STO_IMM:
fprintf(outfile, "%llu", loc->value);
break;
}
}
+static bool locs_equal(
+ const struct storage_location* a,
+ const struct storage_location* b
+) {
+ if (a->type != b->type) return false;
+
+ switch (a->type) {
+ case STO_IMM:
+ return a->value == b->value;
+ case STO_REG:
+ return strcmp(a->reg->qword, b->reg->qword) == 0;
+ case STO_STACK:
+ return a->bp_offset == b->bp_offset;
+ case STO_LABEL:
+ return strcmp(a->label, b->label) == 0;
+ case STO_FN:
+ return a->decl == b->decl;
+ }
+ CGEN_PANIC("unhandled storage type case");
+}
+
static void emit_mov(
FILE* outfile,
const struct lval_def* dst,
const struct storage_location* src
) {
+ /* first optimization: if dst == src, emit nothing */
+ if (locs_equal(&dst->loc, src)) return;
+
switch (dst->loc.type) {
- case REGISTER:
- if (src->type == REGISTER && dst->sz < 4) {
+ case STO_REG:
+ if (src->type == STO_REG && dst->sz < 4) {
fprintf(outfile, "\tmovzx ");
emit_storage_loc(outfile, &dst->loc, FULL_REG_SZ);
} else {
@@ -73,8 +104,8 @@ static void emit_mov(
fprintf(outfile, ", ");
emit_storage_loc(outfile, src, dst->sz);
break;
- case BP_OFFSET:
- if (src->type == BP_OFFSET) {
+ case STO_STACK:
+ if (src->type == STO_STACK) {
/* `mov mem, mem` is illegal in x86_64 */
emit_mov(
outfile,
@@ -88,7 +119,7 @@ static void emit_mov(
}
fprintf(outfile, "\tmov ");
- if (src->type == IMMEDIATE) {
+ if (src->type == STO_IMM) {
/* must specify size to move immediates into memory*/
if (dst->sz > 4) fprintf(outfile, "qword ");
else if (dst->sz > 2) fprintf(outfile, "dword ");
@@ -100,15 +131,39 @@ static void emit_mov(
fprintf(outfile, ", ");
emit_storage_loc(outfile, src, dst->sz);
break;
- case JMP_LABEL:
+ case STO_LABEL:
CGEN_PANIC("can't move value into label %s", dst->loc.label);
- case IMMEDIATE:
+ case STO_FN:
+ CGEN_PANIC(
+ "can't move value into function %s", dst->loc.decl->name);
+ case STO_IMM:
CGEN_PANIC(
"can't move value into immediate value %lld", dst->loc.value);
}
fprintf(outfile, "\n");
}
+static unsigned long long get_type_size(const struct type_node* type) {
+ if (type->ptr_level > 0) return PTR_SIZE;
+ return type->def.sz;
+}
+
+static struct lval_def make_stack_lval(
+ FILE* outfile,
+ const struct type_node* type
+) {
+ unsigned long long type_sz = get_type_size(type);
+ fprintf(outfile, "\tsub rsp, %llu\n", type_sz);
+ scope->bp_offset += type_sz;
+ return (struct lval_def) {
+ .loc = {
+ .type = STO_STACK,
+ .bp_offset = scope->bp_offset,
+ },
+ .sz = type_sz,
+ };
+}
+
static void emit_expr(
FILE* outfile,
const struct expr_node* node,
@@ -124,7 +179,7 @@ static void emit_int_lit(
outfile,
dst,
&(struct storage_location) {
- .type = IMMEDIATE,
+ .type = STO_IMM,
.value = node->val,
});
}
@@ -149,7 +204,7 @@ static void emit_char_lit(
outfile,
dst,
&(struct storage_location) {
- .type = IMMEDIATE,
+ .type = STO_IMM,
.value = (unsigned long long) node->val
});
}
@@ -185,32 +240,15 @@ static void emit_var_ref(
static void emit_stmt(FILE* outfile, const struct stmt_node* node);
-static unsigned long long get_type_size(const struct type_node* type) {
- if (type->ptr_level > 0) return PTR_SIZE;
-
- struct type_def type_def;
- if (!scope_get_type(scope, &type_def, type->name))
- CGEN_PANIC("size of type %s is not known", type->name);
-
- return type_def.sz;
-}
-
static struct var_def emit_var_decl(
FILE* outfile,
const struct var_decl_node* node
) {
- unsigned long long type_sz = get_type_size(&node->type);
-
- fprintf(outfile, "\tsub rsp, %llu\n", type_sz);
- scope->bp_offset += type_sz;
-
+ struct lval_def var_dst = make_stack_lval(outfile, &node->type);
struct var_def var_def = {
.name = node->ident,
- .loc = {
- .type = BP_OFFSET,
- .offset = scope->bp_offset,
- },
- .sz = type_sz,
+ .loc = var_dst.loc,
+ .sz = var_dst.sz,
};
scope_define_var(scope, var_def);
return var_def;
@@ -242,6 +280,70 @@ static void emit_assignment(
if (dst != NULL) emit_mov(outfile, dst, &lval_def.loc);
}
+static void emit_call(
+ FILE* outfile,
+ const struct call_node* node,
+ const struct lval_def* dst
+) {
+ unsigned long long orig_bp_offset = scope->bp_offset;
+ unsigned long long arg_bp_offset = orig_bp_offset;
+
+ struct var_decl_node* arg_decl = node->called_fn->args_head;
+ struct expr_node* arg_eval = node->args_head;
+ while (arg_decl != NULL && arg_eval != NULL) {
+ struct lval_def arg_dst = make_stack_lval(outfile, &arg_decl->type);
+ emit_expr(outfile, arg_eval, &arg_dst);
+
+ arg_decl = arg_decl->next;
+ arg_eval = arg_eval->next;
+ }
+
+ if (arg_decl != NULL)
+ CGEN_PANIC("too many arguments to function %s", node->called_fn->name);
+ if (arg_eval != NULL)
+ CGEN_PANIC("missing arguments to function %s", node->called_fn->name);
+
+ unsigned char arg_regnum = 0;
+ arg_decl = node->called_fn->args_head;
+ while (arg_decl != NULL) {
+ unsigned long long type_sz = get_type_size(&arg_decl->type);
+ arg_bp_offset += type_sz;
+
+ struct lval_def arg_dst;
+ if (arg_regnum < CC_N_REGS)
+ arg_dst = (struct lval_def) {
+ .loc = (struct storage_location) {
+ .type = STO_REG,
+ .reg = CALLING_CONV[arg_regnum++],
+ },
+ .sz = type_sz,
+ };
+ else
+ arg_dst = make_stack_lval(outfile, &arg_decl->type);
+
+ emit_mov(outfile, &arg_dst, &(struct storage_location) {
+ .type = STO_STACK,
+ .bp_offset = arg_bp_offset,
+ });
+ arg_decl = arg_decl->next;
+ }
+
+ fprintf(outfile, "\tcall %s\n", node->called_fn->name);
+ if (dst != NULL) {
+ if (node->called_fn->return_type.def.sz == 0)
+ CGEN_PANIC("can't assign the result of a void function");
+
+ emit_mov(outfile, dst, &RV_LOC);
+ }
+
+ /* pop our argument temporaries off the stack */
+ scope->bp_offset = orig_bp_offset;
+ if (orig_bp_offset > 0)
+ fprintf(outfile, "\tlea rsp, [rbp - %llu]\n", orig_bp_offset);
+ else
+ fprintf(outfile, "\tmov rsp, rbp\n");
+}
+
static void emit_expr(
FILE* outfile,
const struct expr_node* node,
@@ -265,22 +367,34 @@ static void emit_expr(
break;
case EXPR_ASSIGN:
emit_assignment(outfile, &node->as._assign, dst);
+ break;
+ case EXPR_CALL:
+ emit_call(outfile, &node->as._call, dst);
+ break;
}
}
static void emit_return(FILE* outfile, const struct return_node* node) {
- if (node->ret_val != NULL)
+ if (active_fn == NULL) CGEN_PANIC("must be inside a function to return");
+
+ if (node->ret_val != NULL) {
+ if (active_fn->return_type.def.sz == 0)
+ CGEN_PANIC(
+ "returning a value from void function %s", active_fn->name);
+
emit_expr(
outfile,
node->ret_val,
&(struct lval_def) {
.loc = RV_LOC,
- .sz = FULL_REG_SZ /* TODO: need to pull this from return type */
+ .sz = active_fn->return_type.def.sz,
});
+ } else if (active_fn->return_type.def.sz > 0) {
+ CGEN_PANIC(
+ "non-void function %s should return a value", active_fn->name);
+ }
- fprintf(outfile, "\tmov rsp, rbp\n");
- fprintf(outfile, "\tpop rbp\n");
- fprintf(outfile, "\tret\n");
+ fprintf(outfile, "\tjmp " RETURN_LABEL_FMT "\n", active_fn->name);
}
static void emit_group(FILE* outfile, const struct group_node* node) {
@@ -322,6 +436,13 @@ static void emit_stmt(FILE* outfile, const struct stmt_node* node) {
}
static void emit_fn_decl(FILE* outfile, const struct fn_decl_node* node) {
+ if (active_fn != NULL)
+ CGEN_PANIC(
+ "can't define function %s inside function %s",
+ node->name,
+ active_fn->name);
+ active_fn = node;
+
fprintf(outfile, "%s:\n", node->name);
fprintf(outfile, "\tpush rbp\n");
fprintf(outfile, "\tmov rbp, rsp\n");
@@ -330,42 +451,32 @@ static void emit_fn_decl(FILE* outfile, const struct fn_decl_node* node) {
scope->bp_offset = 0;
long long spilled_bp_ofs = -16; // return address + old bp
- unsigned long long arg_regnum = 0;
+ unsigned char arg_regnum = 0;
struct var_decl_node* args_node = node->args_head;
while (args_node != NULL) {
- unsigned long long type_sz = get_type_size(&args_node->type);
-
- scope->bp_offset += type_sz;
- struct var_def var_def = {
- .name = args_node->ident,
- .loc = {
- .type = BP_OFFSET,
- .offset = scope->bp_offset,
- },
- .sz = type_sz,
- };
- scope_define_var(scope, var_def);
+ struct lval_def arg_dst = make_stack_lval(outfile, &args_node->type);
+ scope_define_var(
+ scope,
+ (struct var_def) {
+ .name = args_node->ident,
+ .loc = arg_dst.loc,
+ .sz = arg_dst.sz,
+ });
struct storage_location arg_src;
if (arg_regnum < CC_N_REGS) {
arg_src = (struct storage_location) {
- .type = REGISTER,
+ .type = STO_REG,
.reg = CALLING_CONV[arg_regnum++]
};
} else {
arg_src = (struct storage_location) {
- .type = BP_OFFSET,
- .offset = spilled_bp_ofs,
+ .type = STO_STACK,
+ .bp_offset = spilled_bp_ofs,
};
- spilled_bp_ofs -= type_sz;
+ spilled_bp_ofs -= arg_dst.sz;
}
- emit_mov(
- outfile,
- &(struct lval_def) {
- .loc = var_def.loc,
- .sz = type_sz,
- },
- &arg_src);
+ emit_mov(outfile, &arg_dst, &arg_src);
args_node = args_node->next;
}
@@ -374,9 +485,11 @@ static void emit_fn_decl(FILE* outfile, const struct fn_decl_node* node) {
scope_pop(&scope);
+ fprintf(outfile, RETURN_LABEL_FMT ":\n", node->name);
fprintf(outfile, "\tmov rsp, rbp\n");
fprintf(outfile, "\tpop rbp\n");
fprintf(outfile, "\tret\n");
+ active_fn = NULL;
}
static void emit_root_node(FILE* outfile, const struct root_node* node) {
@@ -403,9 +516,10 @@ void emit_code(const struct root_node* ast, const char* path) {
scope_define_var(scope, (struct var_def) {
.name = fn_name,
.loc = {
- .type = JMP_LABEL,
+ .type = STO_LABEL,
.label = fn_name,
},
+ /* sz ignored, not relevant to functions */
});
fprintf(outfile, "global %s\n", fn_name);
}
@@ -419,7 +533,7 @@ void emit_code(const struct root_node* ast, const char* path) {
node = ast;
while (node != NULL) {
emit_root_node(outfile, node);
- fprintf(outfile, "\n");
+ if (node->next != NULL) fprintf(outfile, "\n");
node = node->next;
}
diff --git a/parser.c b/parser.c
index 157ffb1..f9c7047 100644
--- a/parser.c
+++ b/parser.c
@@ -63,11 +63,14 @@ static void expect_kw(const char* kw) {
}
static void parse_type(struct type_node* p_node) {
- /* TODO: need some concept of known types in scope */
/* TODO: modifiers, void rules, arrays, etc. */
/* TODO: struct, union, enum */
expect(TK_IDENT);
- p_node->name = tok.data.ident;
+ struct type_def type_def;
+ if (!scope_get_type(scope, &type_def, tok.data.ident))
+ PARSER_PANIC("unknown type name: %s", tok.data.ident);
+
+ p_node->def = type_def;
peek_or_panic();
p_node->ptr_level = 0;
@@ -118,9 +121,6 @@ static void expr_to_lval(struct lval_node* l_node, struct expr_node* e_node) {
}
static void parse_expr_assign(struct expr_node* p_node) {
- peek_or_panic();
- if (tok.type != TK_ASSIGN) return;
-
expr_to_lval(&p_node->as._assign.lval, p_node);
p_node->type = EXPR_ASSIGN;
p_node->as._assign.rval = protected_alloc(sizeof(struct expr_node));
@@ -129,6 +129,44 @@ static void parse_expr_assign(struct expr_node* p_node) {
parse_expr(p_node->as._assign.rval);
}
+static void parse_arg_evals(struct expr_node** pp_arg) {
+ for (;;) {
+ *pp_arg = protected_alloc(sizeof(struct expr_node));
+ parse_expr(*pp_arg);
+ pp_arg = &((*pp_arg)->next);
+
+ peek_or_panic();
+ if (tok.type == TK_RPAREN) break;
+ expect(TK_COMMA);
+ }
+}
+
+static void parse_expr_call(struct expr_node* p_node) {
+ switch (p_node->type) {
+ case EXPR_VAR_REF:
+ struct var_def var_def;
+ if (!scope_get_var(scope, &var_def, p_node->as._var_ref.ident))
+ PARSER_PANIC(
+ "%s is not a known function", p_node->as._var_ref.ident);
+
+ if (var_def.loc.type != STO_FN)
+ PARSER_PANIC("called object is not a function");
+
+ p_node->as._call.called_fn = var_def.loc.decl;
+ break;
+ default:
+ PARSER_PANIC("expression is not callable");
+ }
+
+ p_node->type = EXPR_CALL;
+ p_node->as._call.args_head = NULL;
+
+ expect(TK_LPAREN);
+ peek_or_panic();
+ if (tok.type != TK_RPAREN) parse_arg_evals(&p_node->as._call.args_head);
+ expect(TK_RPAREN);
+}
+
static void parse_expr(struct expr_node* p_node) {
peek_or_panic();
switch (tok.type) {
@@ -146,27 +184,16 @@ static void parse_expr(struct expr_node* p_node) {
case TK_IDENT:
p_node->type = EXPR_VAR_REF;
parse_var_ref(&p_node->as._var_ref);
-
- peek_or_panic();
- if (tok.type == TK_ASSIGN) {
- p_node->type = EXPR_ASSIGN;
- p_node->as._assign = (struct assign_node) {
- .lval = {
- .type = LVAL_VAR_REF,
- .as._var_ref = p_node->as._var_ref,
- },
- .rval = protected_alloc(sizeof(struct expr_node)),
- };
-
- expect(TK_ASSIGN);
- parse_expr(p_node->as._assign.rval);
- }
break;
default:
PARSER_PANIC("expected expression");
}
- parse_expr_assign(p_node);
+ peek_or_panic();
+ if (tok.type == TK_ASSIGN)
+ parse_expr_assign(p_node);
+ else if (tok.type == TK_LPAREN)
+ parse_expr_call(p_node);
}
static void parse_var_decl(struct var_decl_node* p_node) {
@@ -259,7 +286,7 @@ static void parse_stmt(struct stmt_node* p_node) {
expect(TK_SEMI);
}
-static void parse_arg_list(struct var_decl_node** pp_arg) {
+static void parse_arg_decls(struct var_decl_node** pp_arg) {
for (;;) {
*pp_arg = protected_alloc(sizeof(struct var_decl_node));
parse_var_decl(*pp_arg);
@@ -280,11 +307,19 @@ static void parse_fn_decl(struct fn_decl_node* p_node) {
expect(TK_LPAREN);
peek_or_panic();
- if (tok.type != TK_RPAREN) parse_arg_list(&p_node->args_head);
+ if (tok.type != TK_RPAREN) parse_arg_decls(&p_node->args_head);
expect(TK_RPAREN);
parse_group(&p_node->body);
+
+ scope_define_var(scope, (struct var_def) {
+ .name = p_node->name,
+ .loc = {
+ .type = STO_FN,
+ .decl = p_node,
+ },
+ });
}
static bool parse_root(struct root_node* p_node) {
diff --git a/register.c b/register.c
index ca095ea..fe1a718 100644
--- a/register.c
+++ b/register.c
@@ -44,5 +44,5 @@ const struct reg R8 = {
};
const struct reg* const CALLING_CONV[] = {&RDI, &RSI, &RDX, &R10, &R9, &R8};
-const unsigned long long CC_N_REGS =
+const unsigned char CC_N_REGS =
sizeof(CALLING_CONV) / sizeof(const struct reg* const);
diff --git a/register.h b/register.h
index 323b668..943e839 100644
--- a/register.h
+++ b/register.h
@@ -17,6 +17,6 @@ extern const struct reg R9;
extern const struct reg R8;
extern const struct reg* const CALLING_CONV[];
-extern const unsigned long long CC_N_REGS;
+extern const unsigned char CC_N_REGS;
#endif
diff --git a/scope.c b/scope.c
index 17bc22b..a0d4477 100644
--- a/scope.c
+++ b/scope.c
@@ -2,7 +2,7 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
-#define DEFAULT_SIZE 4
+#define DEFAULT_SIZE 16
static void scope_init(struct scope* scope) {
scope->types = calloc(DEFAULT_SIZE, sizeof(struct type_def*));
diff --git a/scope.h b/scope.h
index 52ae6b1..4d6cc7f 100644
--- a/scope.h
+++ b/scope.h
@@ -3,16 +3,18 @@
struct storage_location {
enum {
- REGISTER,
- JMP_LABEL,
- BP_OFFSET,
- IMMEDIATE,
+ STO_REG,
+ STO_LABEL,
+ STO_STACK,
+ STO_IMM,
+ STO_FN,
} type;
union {
const struct reg* reg;
const char* label;
- long long offset;
+ long long bp_offset;
unsigned long long value;
+ struct fn_decl_node* decl;
};
};