const std = @import("std");
const Error = @import("./Error.zig");
const Token = @import("./Token.zig");
const ziglyph = @import("ziglyph");
const Normalizer = ziglyph.Normalizer;
const Grapheme = ziglyph.Grapheme;
const CodePoint = ziglyph.CodePoint;
const GraphemeIterator = Grapheme.GraphemeIterator;
const letter = ziglyph.letter; // or const letter = ziglyph.letter;
const number = ziglyph.number; // or const number = ziglyph.number;

const Self = @This();

allocator: std.mem.Allocator,
source: []Grapheme,
start: u32,
index: u32,
line: u32,

pub fn init(allocator: std.mem.Allocator, source: []const u8) !Self {
    var normalizer = try Normalizer.init(allocator);
    defer normalizer.deinit();

    var got_nfkc = try normalizer.nfkc(allocator, source);
    defer got_nfkc.deinit();

    var graphemes = std.ArrayList(Grapheme).init(allocator);
    defer graphemes.deinit();

    var iter = try GraphemeIterator.init(got_nfkc.slice);
    while (iter.next()) |grapheme| {
        try graphemes.append(grapheme);
    }

    return .{
        .allocator = allocator,
        .source = try graphemes.toOwnedSlice(),
        .line = 1,
        .start = 0,
        .index = 0,
    };
}

pub fn deinit(self: *Self) void {
    self.allocator.free(self.source);
}

pub fn scanTokens(self: *Self) ![]const Token {
    var list = std.ArrayList(Token).init(self.allocator);
    defer {
        for (list.items) |token| {
            self.allocator.free(token.lexeme);
        }
        list.deinit();
    }
    while (!self.isAtEnd()) {
        self.start = self.index;
        try self.scanToken(&list);
    }

    try list.append(.{
        .token_type = .eof,
        .lexeme = "",
        .line = self.line,
        .literal = null,
    });
    return list.toOwnedSlice();
}

fn isAtEnd(self: *Self) bool {
    return self.index >= self.source.len;
}

fn codepointize(grapheme: Grapheme) !u21 {
    return std.unicode.utf8Decode(grapheme.bytes);
}

inline fn peek(self: *Self) !u21 {
    if (self.isAtEnd()) {
        return 0;
    }
    return codepointize(self.source[self.index]);
}

fn match(self: *Self, expected: u21) !bool {
    if (self.isAtEnd()) return false;
    if (try self.peek() != expected) return false;

    self.index += 1;
    return true;
}

fn advance(self: *Self) !u21 {
    const grapheme = self.source[self.index];
    self.index += 1;
    return codepointize(grapheme);
}

fn createToken(self: *Self, token_type: Token.Type, literal: ?Token.Literal) !Token {
    var text = std.ArrayList(u8).init(self.allocator);
    defer text.deinit();

    for (self.source[self.start..self.index]) |grapheme| {
        try text.appendSlice(grapheme.bytes);
    }

    return .{
        .token_type = token_type,
        .literal = literal,
        .lexeme = try text.toOwnedSlice(),
        .line = self.line,
    };
}

const KeywordEntry = struct {
    kw: Token.Type,
    text: []const u8,
};

const keywords = [_]KeywordEntry{
    .{ .kw = .kw_var, .text = "var" },
};

fn identOrKeyword(self: *Self, ini: u21) !Token {
    var ident = std.ArrayList(u21).init(self.allocator);
    defer ident.deinit();

    try ident.append(ini);

    while (ziglyph.isAlphaNum(try self.peek())) {
        try ident.append(try self.advance());
    }

    var ident_string = std.ArrayList(u8).init(self.allocator);
    const writer = ident_string.writer();
    defer ident_string.deinit();

    for (ident.items) |cp| {
        try writer.print("{u}", .{cp});
    }

    return for (keywords) |kwe| {
        if (std.mem.eql(u8, ident_string.items, kwe.text)) {
            break try self.createToken(kwe.kw, null);
        }
    } else try self.createToken(.identifier, null);
}

fn identifyToken(self: *Self, cp: u21) !?Token {
    switch (cp) {
        '(' => return try self.createToken(.left_paren, null),
        ')' => return try self.createToken(.right_paren, null),
        '{' => return try self.createToken(.left_brace, null),
        '}' => return try self.createToken(.right_brace, null),
        ',' => return try self.createToken(.comma, null),
        '.' => return try self.createToken(.dot, null),
        '-' => return try self.createToken(.minus, null),
        '+' => return try self.createToken(.plus, null),
        ';' => return try self.createToken(.semicolon, null),
        '*' => return try self.createToken(.star, null),
        '!' => if (try self.match('=')) {
            return try self.createToken(.bang_equal, null);
        } else {
            return try self.createToken(.bang, null);
        },
        '=' => if (try self.match('=')) {
            return try self.createToken(.equal_equal, null);
        } else {
            return try self.createToken(.equal, null);
        },
        '>' => if (try self.match('=')) {
            return try self.createToken(.greater_equal, null);
        } else {
            return try self.createToken(.greater, null);
        },
        '<' => if (try self.match('=')) {
            return try self.createToken(.less_equal, null);
        } else {
            return try self.createToken(.less, null);
        },
        '/' => if (try self.match('/')) {
            while (try self.peek() != '\n' and !self.isAtEnd()) {
                _ = try self.advance();
            }
            return try self.createToken(.comment, null);
        } else {
            return try self.createToken(.slash, null);
        },
        else => {
            if (ziglyph.isLetter(cp)) {
                return try self.identOrKeyword(cp);
            }
            return null;
        },
    }
}

fn scanToken(self: *Self, list: *std.ArrayList(Token)) !void {
    var c = try self.advance();

    var token = try self.identifyToken(c);
    var unknown = std.ArrayList(u21).init(self.allocator);
    defer unknown.deinit();
    while (token == null) {
        try unknown.append(c);
        self.start = self.index; // Move up the start of the token (the previous start is unknown)
        if (!self.isAtEnd()) {
            c = try self.advance();
            token = try self.identifyToken(c);
        } else {
            break;
        }
    }

    if (unknown.items.len > 0) {
        var unknown_string = std.ArrayList(u8).init(self.allocator);
        const string_writer = unknown_string.writer();
        defer unknown_string.deinit();

        for (unknown.items) |ucp| {
            try string_writer.print("{u}", .{ucp});
        }

        try Error.report(self.line, "lexer", "Unknown codepoints '{}'", .{std.zig.fmtEscapes(unknown_string.items)});
    }

    if (token) |atoken| {
        try list.append(atoken);
    }
}