B:BD[
2.2248] → [
2.2248:2316]
pub fn add(left: usize, right: usize) -> usize {
left + right
}
pub mod lexer {
use logos::{Lexer, Logos};
use std::num::ParseIntError;
// Use tiny winnow parsers to parse intra-element stuff
use winnow::{
ascii::hex_digit1,
combinator::{alt, delimited, preceded, repeat},
token::{any, one_of},
PResult, Parser,
};
#[derive(thiserror::Error, Clone, PartialEq, Default, Debug)]
pub enum LexError {
#[default]
#[error("unknown")]
Unknown,
}
fn symbol_element_parse(input: &mut &str) -> PResult<Vec<char>> {
repeat(
0..,
alt((
delimited(r"\x", hex_digit1, r";").try_map(|s: &str| {
Ok::<_, ParseIntError>(
char::from_u32(u32::from_str_radix(s, 16)?)
.unwrap_or(char::REPLACEMENT_CHARACTER),
)
}),
preceded(r"\", one_of(['a', 'b', 't', 'n', 'r'])).map(|c| match c {
'a' => 7 as char,
'b' => 8 as char,
't' => 9 as char,
'n' => 0xa as char,
'r' => 0xd as char,
_ => unreachable!(r"Scheme R7RS only handles \a, \b, \t, \n, and \r"),
}),
any,
)),
)
.parse_next(input)
}
fn parse_to_symbol_elements(lex: &mut Lexer<Token>) -> Result<String, LexError> {
let slice = lex.slice();
let target = &slice[1..slice.len() - 1];
symbol_element_parse
.parse(target)
.map(|vc| vc.into_iter().collect())
.map_err(|e| {
// TODO A proper lex error (for ident symbols!)
dbg!(std::any::type_name_of_val(&e));
LexError::Unknown
})
}
fn string_element_parse(input: &mut &str) -> PResult<Vec<char>> {
repeat(
0..,
alt((
delimited(r"\x", hex_digit1, r";")
.try_map(|s: &str| {
Ok::<_, ParseIntError>(
char::from_u32(u32::from_str_radix(s, 16)?)
.unwrap_or(char::REPLACEMENT_CHARACTER),
)
})
.map(Some),
preceded(r"\", one_of(['a', 'b', 't', 'n', 'r', '"', '\\', '|']))
.map(|c| match c {
'a' => 7 as char,
'b' => 8 as char,
't' => 9 as char,
'n' => 0xa as char,
'r' => 0xd as char,
'"' => 0x22 as char,
'\\' => 0x5c as char,
'|' => 0x7c as char,
_ => unreachable!(
r#"Scheme R7RS strings only handle \a, \b, \t, \n, \r, \", \\, and \|"#
),
})
.map(Some),
delimited(
repeat::<_, _, (), _, _>(0.., one_of([' ', '\t', '\r'])),
'\n',
repeat::<_, _, (), _, _>(0.., one_of([' ', '\t', '\r'])),
)
.void()
.map(|_| None),
any.map(Some),
)),
)
.map(|vc: Vec<Option<char>>| vc.into_iter().flatten().collect())
.parse_next(input)
}
fn parse_to_string(lex: &mut Lexer<Token>) -> Result<String, LexError> {
let slice = lex.slice();
let target = &slice[1..slice.len() - 1];
string_element_parse
.parse(target)
.map(|vc| vc.into_iter().collect())
.map_err(|e| {
// TODO A proper lex error
dbg!(std::any::type_name_of_val(&e));
LexError::Unknown
})
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Logos)]
#[logos(error = LexError)]
pub enum Token {
#[token("(")]
LParen,
#[token(")")]
RParen,
#[regex(r"[ \t\r]+")]
Whitespace,
#[regex(r"[A-Za-z!$%&*+\-./:<=>?@^_-][A-Za-z0-9!$%&*+\-./:<=>?@^_-]*", |lex| lex.slice().to_string())]
#[regex(r"\|(?:\\x[0-9a-fA-F]+;|\\[abtnr]|[^|])*\|", parse_to_symbol_elements)]
Identifier(String),
#[regex(
r#""(?:\\x[0-9a-fA-F]+;|\\[abtnr"\\|]|\\[ \t\r]*\n[ \t\r]*|[^"])*""#,
parse_to_string
)]
String(String),
// TODO lexers for num, complex, real
#[regex(r"[+-]?[0-9][0-9_]*", priority = 3)]
#[regex(r"[+-]?[0-9][0-9_]*e[+-]?[0-9_]+")]
Integer,
#[regex(r"[+-]?[0-9][0-9_]*\.[0-9]?[0-9_]*")]
#[regex(r"[+-]?[0-9][0-9_]*\.[0-9]?[0-9_]*e[+-]?[0-9_]+")]
#[regex(r"[+-]?\.[0-9][0-9_]*")]
#[regex(r"[+-]?\.[0-9][0-9_]*e[+-]?[0-9_]+")]
Decimal,
#[token("+inf.0")]
InfinityPos,
#[token("-inf.0")]
InifinityNeg,
#[token("+nan.0")]
NotANumberPos,
#[token("-nan.0")]
NotANumberNeg,
#[token("+i")]
ImaginaryPos,
#[token("-i")]
ImaginaryNeg,
#[token(".", priority = 3)]
Dot,
#[token("'")]
QuoteSymbol,
#[token("`")]
QuasiquoteSymbol,
#[token(",")]
UnquoteSymbol,
#[token(",@")]
UnquoteSplicingSymbol,
#[token(";")]
LineCommentStart,
#[regex(r"\n+")]
LineEnd,
#[token("#;")]
DatumCommentStart,
#[token("#|")]
BlockCommentStart,
#[token("|#")]
BlockCommentEnd,
}
pub fn lexer(input: &str) -> Lexer<'_, Token> {
Token::lexer(input)
}
#[cfg(test)]
mod tests {
use super::Token;
use assert2::assert;
use logos::Logos;
#[test]
fn literal_identfier_equivalence() {
let source = r"|\t\t||\x9;\x9;|";
let mut lexer = Token::lexer(source);
let tok1 = lexer.next();
let tok1_slice = lexer.slice();
let tok2 = lexer.next();
let tok2_slice = lexer.slice();
assert!(tok1 == tok2);
assert!(tok1_slice != tok2_slice);
}
#[test]
fn lex_literal_identifier() {
let source = r"|H\x65;llo|";
let mut lexer = Token::lexer(source);
assert!(lexer.next() == Some(Ok(Token::Identifier("Hello".to_string()))));
assert!(lexer.span() == (0..source.len()));
assert!(lexer.slice() == "|H\\x65;llo|");
}
#[test]
fn lex_identifier_or_number() {
let source = "3let -i";
let mut lexer = Token::lexer(source);
assert!(lexer.next() == Some(Ok(Token::Integer)));
assert!(lexer.span() == (0..1));
assert!(lexer.slice() == "3");
assert!(lexer.next() == Some(Ok(Token::Identifier("let".to_string()))));
assert!(lexer.span() == (1..4));
assert!(lexer.slice() == "let");
assert!(lexer.next() == Some(Ok(Token::Whitespace)));
assert!(lexer.span() == (4..5));
assert!(lexer.slice() == " ");