XV3B7TJOX7HXDITDNLXF3IJHB67Q3JY5DCWOSP2TAY45T76SMALQC
for (try scanner.scanTokens()) |token| {
std.debug.print("{d}:{d} \'{?s}\' : {any}\n", .{
token.source_start,
token.source_end(),
token.data,
token.token_type,
});
const tokens = try scanner.scanTokens();
defer {
for (tokens) |token| {
allocator.free(token.lexeme);
}
allocator.free(tokens);
}
for (tokens) |token| {
std.debug.print("[line {d}] {any}\n", .{ token.line, token });
const Type = enum {
pub const Type = enum {
// Single-char tokens
left_paren,
right_paren,
left_brace,
right_brace,
comma,
dot,
minus,
plus,
semicolon,
slash,
star,
// one or two char tokens
bang,
bang_equal,
equal,
equal_equal,
greater,
greater_equal,
less,
less_equal,
// literals
pub fn source_end(self: *const Self) u32 {
return self.source_start + @truncate(u32, (self.data orelse &[_]u8{}).len);
pub fn format(value: Self, comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void {
_ = fmt;
_ = options;
try writer.print("{any} '{s}' {?any}", .{ value.token_type, value.lexeme, value.literal });
pub fn init(allocator: std.mem.Allocator) !Self {
return .{ .allocator = allocator };
pub fn init(allocator: std.mem.Allocator, source: []const u8) !Self {
var normalizer = try Normalizer.init(allocator);
defer normalizer.deinit();
var got_nfkc = try normalizer.nfkc(allocator, source);
defer got_nfkc.deinit();
var graphemes = std.ArrayList(Grapheme).init(allocator);
defer graphemes.deinit();
var iter = try GraphemeIterator.init(got_nfkc.slice);
while (iter.next()) |grapheme| {
try graphemes.append(grapheme);
}
return .{
.allocator = allocator,
.source = try graphemes.toOwnedSlice(),
.line = 1,
.start = 0,
.index = 0,
};
_ = self;
return &[_]Token{.{
.token_type = .identifier,
.data = "deez",
.source_start = 0,
}};
var list = std.ArrayList(Token).init(self.allocator);
defer {
for (list.items) |token| {
self.allocator.free(token.lexeme);
}
list.deinit();
}
while (!self.isAtEnd()) {
self.start = self.index;
try self.scanToken(&list);
}
try list.append(.{
.token_type = .eof,
.lexeme = "",
.line = self.line,
.literal = null,
});
return list.toOwnedSlice();
}
fn isAtEnd(self: *Self) bool {
return self.index >= self.source.len;
}
inline fn peek(self: *Self) Grapheme {
return self.source[self.index];
}
fn advance(self: *Self) Grapheme {
const grapheme = self.source[self.index];
self.index += 1;
return grapheme;
}
fn addToken(self: *Self, list: *std.ArrayList(Token), token_type: Token.Type, literal: ?Token.Literal) !void {
var text = std.ArrayList(u8).init(self.allocator);
defer text.deinit();
for (self.source[self.start..self.index]) |grapheme| {
try text.appendSlice(grapheme.bytes);
}
try list.append(.{
.token_type = token_type,
.literal = literal,
.lexeme = try text.toOwnedSlice(),
.line = self.line,
});
}
fn codepointize(grapheme: Grapheme) u21 {
const bytes = grapheme.bytes;
switch (bytes.len) {
0 => unreachable,
1 => return @as(u21, bytes[0]),
else => @panic("Unsupported grapheme length"),
}
}
fn scanToken(self: *Self, list: *std.ArrayList(Token)) !void {
const c = self.advance();
// We do not support combining characters. But we don't have to,
// as we have already merged necessary characters using the Unicode NFKC process, so...
const cp = codepointize(c);
switch (cp) {
'(' => try self.addToken(list, .left_paren, null),
else => {
try Error.report(self.line, "lexer", "Unknown codepoint {u}", .{cp});
return error.UnknownCharacter;
},
}