diff options
| author | Carson Fleming <cflems@cflems.net> | 2026-03-27 11:27:08 -1000 |
|---|---|---|
| committer | Carson Fleming <cflems@cflems.net> | 2026-03-27 11:27:08 -1000 |
| commit | 414a608c36b2d8f208ad0223219736d7582948ae (patch) | |
| tree | eeeb284023236a4ee53bb4a78608c3cd1e3992bb | |
| parent | fca3bf239cfdf03c4479f5d0c14a21c1fd96ea3e (diff) | |
| download | ccc-414a608c36b2d8f208ad0223219736d7582948ae.tar.gz | |
fix some stuff
| -rw-r--r-- | ccc.h | 2 | ||||
| -rw-r--r-- | codegen.c | 3 | ||||
| -rw-r--r-- | lexer.c | 49 | ||||
| -rw-r--r-- | lexer.h | 1 | ||||
| -rw-r--r-- | main.c | 17 | ||||
| -rw-r--r-- | parser.c | 101 | ||||
| -rw-r--r-- | test/weird.c | 2 |
7 files changed, 92 insertions, 83 deletions
@@ -2,6 +2,6 @@ #define CCC_H #define CCC_PANIC { perror("ccc"); exit(1); } - +#define PTR_SIZE 8 #endif @@ -81,13 +81,14 @@ static void emit_expr( break; case EXPR_VAR_REF: emit_var_ref(outfile, &node->as._var_ref, storage); + break; } } static void emit_stmt(FILE* outfile, const struct stmt_node* node); static unsigned long long get_type_size(const struct type_node* type) { - if (type->ptr_level > 0) return 8; + if (type->ptr_level > 0) return PTR_SIZE; struct type_def type_def; if (!scope_get_type(scope, &type_def, type->name)) @@ -10,6 +10,8 @@ static int lookahead; static const char* PATH; static unsigned long LINE, COL; +static struct token tok = {.type = TK_NOT_FOUND}; + #define LEXER_PANIC(format, ...) {\ fprintf(\ stderr,\ @@ -21,6 +23,8 @@ static unsigned long LINE, COL; exit(1);\ } +static void lexer_advance(); + void lexer_load(const char* path) { if (file != NULL) { fclose(file); @@ -32,6 +36,7 @@ void lexer_load(const char* path) { PATH = path; LINE = 1; COL = 1; + lexer_advance(); } void lexer_close() { @@ -41,20 +46,9 @@ void lexer_close() { } bool lexer_peek(struct token* p_token) { - if (file == NULL) return false; - - long orig_offset = ftell(file); - int orig_lookahead = lookahead; - unsigned long orig_line = LINE, orig_col = COL; - - bool rv = lexer_pop(p_token); - - LINE = orig_line; - COL = orig_col; - lookahead = orig_lookahead; - fseek(file, orig_offset, SEEK_SET); - - return rv; + if (tok.type == TK_NOT_FOUND) return false; + if (p_token != NULL) *p_token = tok; + return true; } #define is_whitespace(c) (c == ' ' || c == '\t' || c == '\n') @@ -328,7 +322,7 @@ static enum token_type lex_simple_operator(char c) { LEXER_PANIC("unexpected token %c", c); } -bool lexer_pop(struct token* p_token) { +static bool lexer_read() { if (file == NULL) return false; // consume all whitespace and comments preceding the next token @@ -355,21 +349,28 @@ bool lexer_pop(struct token* p_token) { } if (is_numeric(c)) - lex_int_lit(p_token, c - '0'); + lex_int_lit(&tok, c - '0'); else if (c == '.' && is_numeric(lookahead)) - lex_float_lit(p_token, 10, 0); + lex_float_lit(&tok, 10, 0); else if (is_ident_legal(c)) - lex_ident(p_token, c); + lex_ident(&tok, c); else if (c == '\'') - lex_char_lit(p_token); + lex_char_lit(&tok); else if (c == '"') - lex_str_lit(p_token); - else if (!lex_complex_operator(p_token, c)) - p_token->type = lex_simple_operator(c); + lex_str_lit(&tok); + else if (!lex_complex_operator(&tok, c)) + tok.type = lex_simple_operator(c); return true; } -bool lexer_eof() { - return lookahead == EOF; +static void lexer_advance() { + if (!lexer_read()) tok.type = TK_NOT_FOUND; +} + +bool lexer_pop(struct token* p_token) { + if (tok.type == TK_NOT_FOUND) return false; + if (p_token != NULL) *p_token = tok; + lexer_advance(); + return true; } @@ -77,6 +77,5 @@ void lexer_load(const char* path); void lexer_close(); bool lexer_peek(struct token* p_token); bool lexer_pop(struct token* p_token); -bool lexer_eof(); #endif @@ -3,6 +3,7 @@ #include "codegen.h" #include <stdlib.h> #include <stdio.h> +#include <string.h> void test_lexer(int argc, char** argv) { struct token token; @@ -37,15 +38,17 @@ void test_lexer(int argc, char** argv) { } void test_parser(int argc, char** argv) { - struct root_node* root; - struct root_node** p_cur = &root; for (int i = 1; i < argc; i++) { - *p_cur = parse(argv[i]); - p_cur = &((*p_cur)->next); - } + struct root_node* root = parse(argv[i]); + unsigned int fn_sz = strlen(argv[i]); + char outfile[fn_sz + 1]; + strcpy(outfile, argv[i]); + outfile[fn_sz - 1] = 's'; + outfile[fn_sz] = 0; - emit_code(root, "test/simple.s"); - ast_destroy(root); + emit_code(root, outfile); + ast_destroy(root); + } } int main(int argc, char** argv) { @@ -33,6 +33,12 @@ static void unexpected_token(enum token_type expected) { PARSER_PANIC("unexpected token"); } +/* TODO: reorganize the lexer to make peek cheaper */ +static void peek_or_panic() { + if (!lexer_peek(&tok)) + PARSER_PANIC("unexpected EOF"); +} + static void expect(enum token_type expected) { if (!lexer_pop(&tok)) PARSER_PANIC("unexpected EOF"); @@ -40,46 +46,38 @@ static void expect(enum token_type expected) { if (tok.type != expected) unexpected_token(expected); } -static void peek_or_panic() { - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF"); -} +static void expect_kw(const char* kw) { + if (!lexer_pop(&tok)) + PARSER_PANIC("unexpected EOF, expected %s", kw); -/* "handle" indicates that we've peeked already */ -static void handle_expr(struct expr_node* p_node); -static void handle_stmt(struct stmt_node* p_node); + if (tok.type != TK_IDENT) + PARSER_PANIC("unexpected token, expected %s", kw); + + if (strcmp(kw, tok.data.ident) != 0) + PARSER_PANIC( + "unexpected identifier %s, expected %s", tok.data.ident, kw); + + /* string won't go in the AST, discard it */ + free(tok.data.ident); + tok.data.ident = NULL; +} -static void handle_type(struct type_node* p_node) { +static void parse_type(struct type_node* p_node) { /* TODO: need some concept of known types in scope */ /* TODO: modifiers, void rules, arrays, etc. */ /* TODO: struct, union, enum */ + expect(TK_IDENT); p_node->name = tok.data.ident; peek_or_panic(); p_node->ptr_level = 0; while (tok.type == TK_STAR) { - p_node->ptr_level++; expect(TK_STAR); + p_node->ptr_level++; + peek_or_panic(); } } -static void parse_return(struct return_node* p_node) { - expect(TK_IDENT); - if (strcmp(tok.data.ident, "return") != 0) - PARSER_PANIC("unexpected token %s; expected: return", tok.data.ident); - - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF in return statement"); - - if (tok.type == TK_SEMI) { - p_node->ret_val = NULL; - return; - } - - p_node->ret_val = protected_alloc(sizeof(struct expr_node)); - handle_expr(p_node->ret_val); -} - static void parse_int_lit(struct int_lit_node* p_node) { expect(TK_INT_LIT); p_node->val = tok.data.int_lit; @@ -90,7 +88,8 @@ static void parse_var_ref(struct var_ref_node* p_node) { p_node->ident = tok.data.ident; } -static void handle_expr(struct expr_node* p_node) { +static void parse_expr(struct expr_node* p_node) { + peek_or_panic(); switch (tok.type) { case TK_SEMI: p_node->type = EXPR_EMPTY; @@ -108,31 +107,45 @@ static void handle_expr(struct expr_node* p_node) { } } -static void handle_var_decl(struct var_decl_node* p_node) { - handle_type(&p_node->type); +static void parse_var_decl(struct var_decl_node* p_node) { + parse_type(&p_node->type); expect(TK_IDENT); p_node->ident = tok.data.ident; } +static void parse_stmt(struct stmt_node* p_node); + +static void parse_return(struct return_node* p_node) { + expect_kw("return"); + + peek_or_panic(); + if (tok.type == TK_SEMI) { + p_node->ret_val = NULL; + return; + } + + p_node->ret_val = protected_alloc(sizeof(struct expr_node)); + parse_expr(p_node->ret_val); +} + static void parse_group(struct group_node* p_node) { expect(TK_LCURLY); struct stmt_node** pp_node = &p_node->body_head; for (;;) { - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF in statement group"); - + peek_or_panic(); if (tok.type == TK_RCURLY) break; *pp_node = protected_alloc(sizeof(struct stmt_node)); - handle_stmt(*pp_node); + parse_stmt(*pp_node); pp_node = &((*pp_node)->next); } expect(TK_RCURLY); } -static void handle_stmt(struct stmt_node* p_node) { +static void parse_stmt(struct stmt_node* p_node) { + peek_or_panic(); switch (tok.type) { case TK_LCURLY: p_node->type = STMT_GROUP; @@ -145,45 +158,37 @@ static void handle_stmt(struct stmt_node* p_node) { break; } else if (scope_get_type(scope, NULL, tok.data.ident)) { p_node->type = STMT_VAR_DECL; - handle_var_decl(&p_node->as._var_decl); + parse_var_decl(&p_node->as._var_decl); break; } default: p_node->type = STMT_EXPR; - handle_expr(&p_node->as._expr); + parse_expr(&p_node->as._expr); } expect(TK_SEMI); } static void parse_arg_list(struct var_decl_node** pp_arg) { for (;;) { - expect(TK_IDENT); - *pp_arg = protected_alloc(sizeof(struct var_decl_node)); - handle_var_decl(*pp_arg); + parse_var_decl(*pp_arg); pp_arg = &((*pp_arg)->next); - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF in argument list"); - + peek_or_panic(); if (tok.type == TK_RPAREN) break; expect(TK_COMMA); } } static void parse_fn_decl(struct fn_decl_node* p_node) { - expect(TK_IDENT); - handle_type(&p_node->return_type); + parse_type(&p_node->return_type); expect(TK_IDENT); p_node->name = tok.data.ident; expect(TK_LPAREN); - if (!lexer_peek(&tok)) - PARSER_PANIC("unexpected EOF in function declaration"); - - + peek_or_panic(); if (tok.type != TK_RPAREN) parse_arg_list(&p_node->args_head); expect(TK_RPAREN); diff --git a/test/weird.c b/test/weird.c index 74a74be..bc49f3f 100644 --- a/test/weird.c +++ b/test/weird.c @@ -1,3 +1,3 @@ -int main(int argc) { +int main(int argc, char** argv) { return argc; } |
