#include "parser.h" #include "lexer.h" #include "scope.h" #include #include #include #define PARSER_PANIC(format, ...) {\ fprintf(\ stderr,\ "ccc: parse error: %s: line %lu, column %lu: " format "\n",\ tok.PATH,\ tok.LINE,\ tok.COL __VA_OPT__(,)\ __VA_ARGS__);\ exit(1);\ } static struct token tok; static struct scope* scope; static void* protected_alloc(size_t sz) { void* ptr = calloc(1, sz); if (ptr == NULL) { fprintf(stderr, "ccc: out of memory\n"); exit(1); } return ptr; } static void unexpected_token(enum token_type expected) { /* TODO: print what token was expected */ PARSER_PANIC("unexpected token"); } /* TODO: reorganize the lexer to make peek cheaper */ static void peek_or_panic() { if (!lexer_peek(&tok)) PARSER_PANIC("unexpected EOF"); } static void expect(enum token_type expected) { if (!lexer_pop(&tok)) PARSER_PANIC("unexpected EOF"); if (tok.type != expected) unexpected_token(expected); } static void expect_kw(const char* kw) { if (!lexer_pop(&tok)) PARSER_PANIC("unexpected EOF, expected %s", kw); if (tok.type != TK_IDENT) PARSER_PANIC("unexpected token, expected %s", kw); if (strcmp(kw, tok.data.ident) != 0) PARSER_PANIC( "unexpected identifier %s, expected %s", tok.data.ident, kw); /* string won't go in the AST, discard it */ free(tok.data.ident); tok.data.ident = NULL; } static void parse_type(struct type_node* p_node) { /* TODO: modifiers, void rules, arrays, etc. */ /* TODO: struct, union, enum */ expect(TK_IDENT); struct type_def type_def; if (!scope_get_type(scope, &type_def, tok.data.ident)) PARSER_PANIC("unknown type name: %s", tok.data.ident); p_node->def = type_def; peek_or_panic(); p_node->ptr_level = 0; while (tok.type == TK_STAR) { expect(TK_STAR); p_node->ptr_level++; peek_or_panic(); } } static void parse_expr(struct expr_node* p_node); static void parse_literal(struct expr_node* p_node) { peek_or_panic(); switch (tok.type) { case TK_INT_LIT: expect(TK_INT_LIT); p_node->type = EXPR_INT_LIT; p_node->as._int_lit.val = tok.data.int_lit; break; case TK_CHAR_LIT: expect(TK_CHAR_LIT); p_node->type = EXPR_CHAR_LIT; p_node->as._char_lit.val = tok.data.char_lit; break; default: PARSER_PANIC("invalid literal type"); } } static void parse_var_ref(struct var_ref_node* p_node) { expect(TK_IDENT); p_node->ident = tok.data.ident; } static void expr_to_lval(struct lval_node* l_node, struct expr_node* e_node) { switch (e_node->type) { case EXPR_VAR_REF: *l_node = (struct lval_node) { .type = LVAL_VAR_REF, .as._var_ref = e_node->as._var_ref, }; return; default: PARSER_PANIC("expression is not assignable"); } } static void parse_expr_assign(struct expr_node* p_node) { expr_to_lval(&p_node->as._assign.lval, p_node); p_node->type = EXPR_ASSIGN; p_node->as._assign.rval = protected_alloc(sizeof(struct expr_node)); expect(TK_ASSIGN); parse_expr(p_node->as._assign.rval); } static void parse_arg_evals(struct expr_node** pp_arg) { for (;;) { *pp_arg = protected_alloc(sizeof(struct expr_node)); parse_expr(*pp_arg); pp_arg = &((*pp_arg)->next); peek_or_panic(); if (tok.type == TK_RPAREN) break; expect(TK_COMMA); } } static void parse_expr_call(struct expr_node* p_node) { switch (p_node->type) { case EXPR_VAR_REF: struct var_def var_def; if (!scope_get_var(scope, &var_def, p_node->as._var_ref.ident)) PARSER_PANIC( "%s is not a known function", p_node->as._var_ref.ident); if (var_def.loc.type != STO_FN) PARSER_PANIC("called object is not a function"); p_node->as._call.called_fn = var_def.loc.decl; break; default: PARSER_PANIC("expression is not callable"); } p_node->type = EXPR_CALL; p_node->as._call.args_head = NULL; expect(TK_LPAREN); peek_or_panic(); if (tok.type != TK_RPAREN) parse_arg_evals(&p_node->as._call.args_head); expect(TK_RPAREN); } static void parse_expr(struct expr_node* p_node) { peek_or_panic(); switch (tok.type) { case TK_LPAREN: expect(TK_LPAREN); parse_expr(p_node); expect(TK_RPAREN); break; case TK_INT_LIT: case TK_CHAR_LIT: case TK_FLOAT_LIT: case TK_STR_LIT: parse_literal(p_node); break; case TK_IDENT: p_node->type = EXPR_VAR_REF; parse_var_ref(&p_node->as._var_ref); break; default: PARSER_PANIC("expected expression"); } peek_or_panic(); if (tok.type == TK_ASSIGN) parse_expr_assign(p_node); else if (tok.type == TK_LPAREN) parse_expr_call(p_node); } static void parse_var_decl(struct var_decl_node* p_node) { parse_type(&p_node->type); expect(TK_IDENT); p_node->ident = tok.data.ident; } static void parse_stmt(struct stmt_node* p_node); static void parse_return(struct return_node* p_node) { expect_kw("return"); peek_or_panic(); if (tok.type == TK_SEMI) { p_node->ret_val = NULL; return; } p_node->ret_val = protected_alloc(sizeof(struct expr_node)); parse_expr(p_node->ret_val); } static void parse_group(struct group_node* p_node) { expect(TK_LCURLY); struct stmt_node** pp_node = &p_node->body_head; for (;;) { peek_or_panic(); if (tok.type == TK_RCURLY) break; *pp_node = protected_alloc(sizeof(struct stmt_node)); parse_stmt(*pp_node); pp_node = &((*pp_node)->next); } expect(TK_RCURLY); } static void parse_stmt_assign(struct stmt_node* p_node) { peek_or_panic(); if (tok.type != TK_ASSIGN) return; switch (p_node->type) { case STMT_VAR_DECL: p_node->as._expr.as._assign.lval = (struct lval_node) { .type = LVAL_VAR_DECL, .as._var_decl = p_node->as._var_decl, }; break; default: return; } p_node->type = STMT_EXPR; p_node->as._expr.type = EXPR_ASSIGN; p_node->as._expr.as._assign.rval = protected_alloc(sizeof(struct expr_node)); expect(TK_ASSIGN); parse_expr(p_node->as._expr.as._assign.rval); } static void parse_stmt(struct stmt_node* p_node) { peek_or_panic(); switch (tok.type) { case TK_SEMI: p_node->type = STMT_EMPTY; break; case TK_LCURLY: p_node->type = STMT_GROUP; parse_group(&p_node->as._group); return; case TK_IDENT: if (strcmp(tok.data.ident, "return") == 0) { p_node->type = STMT_RETURN; parse_return(&p_node->as._return); break; } else if (scope_get_type(scope, NULL, tok.data.ident)) { p_node->type = STMT_VAR_DECL; parse_var_decl(&p_node->as._var_decl); break; } default: p_node->type = STMT_EXPR; parse_expr(&p_node->as._expr); } parse_stmt_assign(p_node); expect(TK_SEMI); } static void parse_arg_decls(struct var_decl_node** pp_arg) { for (;;) { *pp_arg = protected_alloc(sizeof(struct var_decl_node)); parse_var_decl(*pp_arg); pp_arg = &((*pp_arg)->next); peek_or_panic(); if (tok.type == TK_RPAREN) break; expect(TK_COMMA); } } static void parse_fn_decl(struct fn_decl_node* p_node) { parse_type(&p_node->return_type); expect(TK_IDENT); p_node->name = tok.data.ident; expect(TK_LPAREN); peek_or_panic(); if (tok.type != TK_RPAREN) parse_arg_decls(&p_node->args_head); expect(TK_RPAREN); parse_group(&p_node->body); scope_define_var(scope, (struct var_def) { .name = p_node->name, .loc = { .type = STO_FN, .decl = p_node, }, }); } static bool parse_root(struct root_node* p_node) { if (!lexer_peek(&tok)) return false; p_node->type = ROOT_FN_DECL; parse_fn_decl(&p_node->as._fn_decl); return true; } struct root_node* parse(const char* path) { lexer_load(path); scope_push(&scope); scope_install_default_types(scope); struct root_node* root; struct root_node** p_node = &root; for (;;) { *p_node = protected_alloc(sizeof(struct root_node)); if (!parse_root(*p_node)) { free(*p_node); *p_node = NULL; break; } p_node = &((*p_node)->next); } scope_pop(&scope); lexer_close(); return root; }