From ef4e0881be7f34ca5c7bc366e629020a144ef0de Mon Sep 17 00:00:00 2001 From: Carson Fleming Date: Sun, 15 Mar 2026 21:54:35 -0400 Subject: floats --- lexer.c | 73 +++++++++++++++++++++++++++++++++++++++++++--------------------- main.c | 3 ++- makefile | 2 ++ 3 files changed, 53 insertions(+), 25 deletions(-) create mode 100644 makefile diff --git a/lexer.c b/lexer.c index a4ffd89..f472acc 100644 --- a/lexer.c +++ b/lexer.c @@ -54,6 +54,8 @@ bool lexer_peek(struct token* p_token) { #define is_alpha(c) (is_lower_alpha(c) || is_upper_alpha(c)) #define is_numeric(c) ('0' <= c && c <= '9') #define is_alphanumeric(c) (is_alpha(c) || is_numeric(c)) +#define is_hexadecimal(c) \ + (is_numeric(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) #define is_ident_legal(c) (is_alphanumeric(c) || c == '_' || c == '$') static int consume_char() { @@ -82,39 +84,55 @@ static void lex_ident(struct token* p_token, char ic) { }; } +static unsigned char digit_val(int c, unsigned char base) { + unsigned char c_val; + if (is_numeric(c)) c_val = c - '0'; + else if (is_lower_alpha(c)) c_val = c - 'a' + 10; + else c_val = c - 'A' + 10; + + if (c_val >= base) + LEXER_PANIC("invalid digit in base %hhu: %c", base, c); + return c_val; +} + static void lex_float_lit( struct token* p_token, unsigned char base, - double iv + float_lit_t iv ) { - LEXER_PANIC("floating point literals are not implemented"); + if (consume_char() != '.') + LEXER_PANIC("sanity error, float literal without decimal point"); + float_lit_t exp = 1.0; + while (is_hexadecimal(lookahead)) { + int c = consume_char(); + exp /= base; + iv += digit_val(c, base) * exp; + } + + *p_token = (struct token) { + .type = FLOAT_LIT, + .data.float_lit = iv, + }; } static void lex_int_lit(struct token* p_token, int_lit_t iv) { unsigned char base = 10; - /* TODO: exponentiation, 2e10 f.e. */ if (iv == 0) { if (lookahead == 'x' || lookahead == 'X' || lookahead == 'b' || lookahead == 'B') { base = (lookahead == 'x' || lookahead == 'X') ? 16 : 2; int suffix = consume_char(); - if (!is_alphanumeric(lookahead)) + if (!is_hexadecimal(lookahead)) LEXER_PANIC("invalid suffix on integer constant: %c", suffix); - } else base = 8; + } else if (is_hexadecimal(lookahead)) base = 8; } - while (is_alphanumeric(lookahead)) { + while (is_hexadecimal(lookahead)) { int c = consume_char(); - int_lit_t c_val; - - if (is_numeric(c)) c_val = c - '0'; - else if (is_lower_alpha(c)) c_val = c - 'a' + 10; - else c_val = c - 'A' + 10; - - if (c_val >= base) - LEXER_PANIC("invalid digit in base %hhu: %c", base, c); - + unsigned char c_val = digit_val(c, base); + if (base < 15 && (c == 'e' || c == 'E')) + LEXER_PANIC("exponentiation is not implemented"); if (ckd_mul(&iv, iv, base)) LEXER_PANIC("integer literal will overflow"); if (ckd_add(&iv, iv, c_val)) @@ -122,8 +140,7 @@ static void lex_int_lit(struct token* p_token, int_lit_t iv) { } if (lookahead == '.') { - consume_char(); - lex_float_lit(p_token, base, iv); + lex_float_lit(p_token, base, (float_lit_t) iv); return; } @@ -134,13 +151,21 @@ static void lex_int_lit(struct token* p_token, int_lit_t iv) { } static char replace_escape_sequence(char c) { - if (c == '\'') return '\''; - else if (c == '\"') return '\"'; - else if (c == '\\') return '\\'; - else if (c == 'r') return '\r'; - else if (c == 'n') return '\n'; - else if (c == 't') return '\t'; - else LEXER_PANIC("escape sequence not implemented"); + switch (c) { + case '\'': return '\''; + case '\"': return '\"'; + case '\\': return '\\'; + case '?': return '?'; + case 'r': return '\r'; + case 'n': return '\n'; + case 't': return '\t'; + case 'v': return '\v'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + /* TODO: numeric escape sequences, e.g. \xff */ + default: LEXER_PANIC("escape sequence not implemented"); + } } static void lex_char_lit(struct token* p_token) { diff --git a/main.c b/main.c index d2a6ef5..6fc75e9 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ int main(int argc, char** argv) { if (argc < 2) { - fprintf(stderr, "ccc: no input files"); + fprintf(stderr, "ccc: no input files\n"); return 1; } @@ -12,6 +12,7 @@ int main(int argc, char** argv) { for (int i = 1; i < argc; i++) { lexer_load(argv[i]); while (lexer_pop(&token)) { + printf("[%s]: ", argv[i]); switch (token.type) { case IDENTIFIER: printf("got identifier: %s\n", token.data.identifier); diff --git a/makefile b/makefile new file mode 100644 index 0000000..9cab3aa --- /dev/null +++ b/makefile @@ -0,0 +1,2 @@ +all: + gcc -std=c23 -g -O0 -Wall -Werror *.c -o ccc -- cgit v1.2.3