#include "ccc.h"
#include "lexer.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdckdint.h>

#define LEXER_PANIC(format, ...) {\
    fprintf(\
        stderr,\
        "ccc: lexer error: line %lu, column %lu: " format "\n",\
        LINE,\
        COL __VA_OPT__(,)\
        __VA_ARGS__);\
    exit(1);\
}

static FILE* file = NULL;
static int lookahead;
static unsigned long LINE, COL;

void lexer_load(const char* path) {
    if (file != NULL) {
        fclose(file);
    }
    file = fopen(path, "r");
    if (file == NULL) CCC_PANIC;

    lookahead = fgetc(file);
    LINE = 1;
    COL = 1;
}

void lexer_close() {
    if (file == NULL) return;
    fclose(file);
    file = NULL;
}

bool lexer_peek(struct token* p_token) {
    if (file == NULL) return false;

    long orig_offset = ftell(file);
    int orig_lookahead = lookahead;
    bool rv = lexer_pop(p_token);
    lookahead = orig_lookahead;
    fseek(file, orig_offset, SEEK_SET);
    return rv;
}

#define is_whitespace(c) (c == ' ' || c == '\t' || c == '\n')
#define is_lower_alpha(c) ('a' <= c && c <= 'z')
#define is_upper_alpha(c) ('A' <= c && c <= 'Z')
#define is_alpha(c) (is_lower_alpha(c) || is_upper_alpha(c))
#define is_numeric(c) ('0' <= c && c <= '9')
#define is_alphanumeric(c) (is_alpha(c) || is_numeric(c))
#define is_hexadecimal(c) \
    (is_numeric(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F'))
#define is_ident_legal(c) (is_alphanumeric(c) || c == '_' || c == '$')

static int consume_char() {
    int rv = lookahead;
    lookahead = fgetc(file);
    COL++;
    return rv;
}

static void lex_ident(struct token* p_token, char ic) {
    char buf[1024] = {ic};
    unsigned int len = 1;

    while (is_ident_legal(lookahead)) {
        int c = consume_char();
        if (len >= sizeof(buf) - 1)
            LEXER_PANIC(
                "identifier exceeds maximum size (%ld)", sizeof(buf) - 1);
        buf[len++] = c;
    }

    buf[len] = 0;
    *p_token = (struct token) {
        .type = IDENTIFIER,
        .data.identifier = strndup(buf, sizeof(buf) - 1),
    };
}

static unsigned char digit_val(int c, unsigned char base) {
    unsigned char c_val;
    if (is_numeric(c)) c_val = c - '0';
    else if (is_lower_alpha(c)) c_val = c - 'a' + 10;
    else c_val = c - 'A' + 10;

    if (c_val >= base)
        LEXER_PANIC("invalid digit in base %hhu: %c", base, c);
    return c_val;
}

static void lex_float_lit(
    struct token* p_token,
    unsigned char base,
    float_lit_t iv
) {
    if (consume_char() != '.')
        LEXER_PANIC("sanity error, float literal without decimal point");
    float_lit_t exp = 1.0;
    while (is_hexadecimal(lookahead)) {
        int c = consume_char();
        exp /= base;
        iv += digit_val(c, base) * exp;
    }

    *p_token = (struct token) {
        .type = FLOAT_LIT,
        .data.float_lit = iv,
    };
}

static void lex_int_lit(struct token* p_token, int_lit_t iv) {
    unsigned char base = 10;

    if (iv == 0) {
        if (lookahead == 'x' || lookahead == 'X'
                || lookahead == 'b' || lookahead == 'B') {
            base = (lookahead == 'x' || lookahead == 'X') ? 16 : 2;
            int suffix = consume_char();
            if (!is_hexadecimal(lookahead))
                LEXER_PANIC("invalid suffix on integer constant: %c", suffix);
        } else if (is_hexadecimal(lookahead)) base = 8;
    }

    while (is_hexadecimal(lookahead)) {
        int c = consume_char();
        unsigned char c_val = digit_val(c, base);
        if (base < 15 && (c == 'e' || c == 'E'))
            LEXER_PANIC("exponentiation is not implemented");
        if (ckd_mul(&iv, iv, base))
            LEXER_PANIC("integer literal will overflow");
        if (ckd_add(&iv, iv, c_val))
            LEXER_PANIC("integer literal will overflow");
    }

    if (lookahead == '.') {
        lex_float_lit(p_token, base, (float_lit_t) iv);
        return;
    }

    *p_token = (struct token) {
        .type = INT_LIT,
        .data.int_lit = iv,
    };
}

static char replace_escape_sequence(char c) {
    switch (c) {
        case '\'': return '\'';
        case '\"': return '\"';
        case '\\': return '\\';
        case '?': return '?';
        case 'r': return '\r';
        case 'n': return '\n';
        case 't': return '\t';
        case 'v': return '\v';
        case 'a': return '\a';
        case 'b': return '\b';
        case 'f': return '\f';
        /* TODO: numeric escape sequences, e.g. \xff */
        default: LEXER_PANIC("escape sequence not implemented");
    }
}

static void lex_char_lit(struct token* p_token) {
    int c = consume_char();
    if (c == EOF) LEXER_PANIC("unexpected EOF in char literal");

    if (c == '\\') {
        c = consume_char();
        if (c == EOF) LEXER_PANIC("unexpected EOF in char literal");
        c = replace_escape_sequence(c);
    }

    int close_quote = consume_char();
    if (close_quote == EOF) LEXER_PANIC("unexpected EOF in char literal");
    if (close_quote != '\'')
        LEXER_PANIC(
            "expected end of char literal, not \"%c\"", close_quote);

    *p_token = (struct token) {
        .type = CHAR_LIT,
        .data.char_lit = c,
    };
}

static void lex_str_lit(struct token* p_token) {
    if (lookahead == '"') {
        consume_char();
        *p_token = (struct token) {
            .type = STR_LIT,
            .data.str_lit = strdup(""),
        };
        return;
    }

    char buf[65536];
    unsigned int len = 0;
    int c;
    for (;;) {
        c = consume_char();
        if (c == '"') break;
        if (c == EOF || c == '\n') LEXER_PANIC("unterminated string literal");

        if (c == '\\') {
            c = consume_char();
            if (c == EOF) LEXER_PANIC("unterminated string literal");
            c = replace_escape_sequence(c);
        }

        if (len >= sizeof(buf) - 1)
            LEXER_PANIC(
                "string literal exceeds maximum length (%ld)",
                sizeof(buf) - 1);
        buf[len++] = c;
    }
    buf[len] = 0;

    *p_token = (struct token) {
        .type = STR_LIT,
        .data.str_lit = strndup(buf, sizeof(buf) - 1),
    };
}

static enum token_type two_char_operator_type(char c) {
    if (c == '!' && lookahead == '=') return NEQ;
    if (c == '^' && lookahead == '=') return XEQ;
    if (c == '&' && lookahead == '=') return AND_EQ;
    if (c == '&' && lookahead == '&') return LOG_AND;
    if (c == '*' && lookahead == '=') return MUL_EQ;
    if (c == '-' && lookahead == '=') return NEG_EQ;
    if (c == '-' && lookahead == '>') return ARROW;
    if (c == '=' && lookahead == '=') return TEST_EQ;
    if (c == '+' && lookahead == '=') return PLUS_EQ;
    if (c == '|' && lookahead == '|') return LOG_PIPE;
    if (c == '|' && lookahead == '=') return PIPE_EQ;
    if (c == '/' && lookahead == '=') return DIV_EQ;
    if (c == '%' && lookahead == '=') return MOD_EQ;
    if (c == '<' && lookahead == '=') return LEQ;
    if (c == '>' && lookahead == '=') return GEQ;
    if (c == '<' && lookahead == '<') return SHL;
    if (c == '>' && lookahead == '>') return SHR;
    return NOT_FOUND;
}

static bool lex_complex_operator(struct token* p_token, char c) {
    enum token_type type = two_char_operator_type(c);
    if (type == NOT_FOUND) return false;
    consume_char();
    if (type == SHL && lookahead == '=') {
        consume_char();
        type = SHL_EQ;
    }
    if (type == SHR && lookahead == '=') {
        consume_char();
        type = SHR_EQ;
    }
    *p_token = (struct token) {.type = type};
    return type;
}

static enum token_type lex_simple_operator(char c) {
    switch (c) {
        case '#': return HASHTAG;
        case '(': return LPAREN;
        case ')': return RPAREN;
        case '{': return LCURLY;
        case '}': return RCURLY;
        case '[': return LSQUARE;
        case ']': return RSQUARE;
        case ':': return COLON;
        case ';': return SEMI;
        case ',': return COMMA;
        case '.': return DOT;
        case '?': return QMARK;
        case '!': return NOT;
        case '^': return XOR;
        case '&': return AMP;
        case '*': return STAR;
        case '-': return NEG;
        case '=': return ASSIGN;
        case '+': return PLUS;
        case '\\': return BSLASH;
        case '|': return PIPE;
        case '/': return DIV;
        case '%': return MOD;
        case '<': return LT;
        case '>': return GT;
    }
    LEXER_PANIC("unexpected token %c", c);
}

bool lexer_pop(struct token* p_token) {
    if (file == NULL) return false;

    // consume all whitespace and comments preceding the next token
    int c;
    for (;;) {
        c = consume_char();
        if (c == EOF) return false;
        else if (c == '/' && lookahead == '/') { // one of these
            while (lookahead != EOF && lookahead != '\n') consume_char();
        }
        else if (c == '/' && lookahead == '*') {
            consume_char(); /* consume the * */
            int c = consume_char();
            while (c != EOF && (c != '*' || lookahead != '/'))
                c = consume_char();
            if (c == EOF) LEXER_PANIC("unterminated /* comment");
            consume_char(); /* consume the final slash */
        }
        else if (c == '\n') {
            LINE++;
            COL = 1;
        }
        else if (!is_whitespace(c)) break;
    }
    
    if (is_numeric(c))
        lex_int_lit(p_token, c - '0');
    else if (c == '.' && is_numeric(lookahead))
        lex_float_lit(p_token, 10, 0);
    else if (is_ident_legal(c))
        lex_ident(p_token, c);
    else if (c == '\'')
        lex_char_lit(p_token);
    else if (c == '"')
        lex_str_lit(p_token);
    else if (!lex_complex_operator(p_token, c))
        p_token->type = lex_simple_operator(c);

    return true;
}