// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.
#ifndef _ALIAS_LEXER_H_
#define _ALIAS_LEXER_H_
#include <tabula.h>
#include <alias/data_structure/vector.h>
#include <alias/utils.h>
struct alias_lexer_Character {
char32_t character;
uint32_t location;
};
struct alias_lexer_Token {
uint32_t kind;
uint32_t location_start;
uint32_t location_end;
};
struct alias_lexer_File {
const char * name;
const char * buffer;
uint32_t buffer_size;
uint32_t location_start;
uint32_t location_end;
alias_Vector(uint32_t) newline_locations;
};
struct alias_lexer_Files {
alias_MemoryCB mcb;
alias_Vector(struct alias_lexer_File) files;
uint32_t location_top;
};
void alias_lexer_Files_initialize(struct alias_lexer_Files * files, alias_MemoryCB mcb);
uint32_t alias_lexer_Files_create_buffer(struct alias_lexer_Files * files, const char * name, const char * buffer, size_t buffer_size);
struct alias_lexer_SetMatch {
const char32_t * start;
const char32_t * middle;
const char32_t * end;
uint32_t token;
};
struct alias_lexer_ExactMatch {
const char32_t * exact;
uint32_t token;
};
struct alias_lexer_Settings {
alias_MemoryCB mcb;
bool newline_splicing;
uint32_t whitespace;
uint32_t newline;
uint32_t hash_line_comment;
uint32_t c_line_comment;
uint32_t c_block_comment;
uint32_t c_character;
uint32_t c_string;
const char32_t * identifier_start;
const char32_t * identifier_middle;
uint32_t identifier;
uint32_t num_keywords;
struct alias_lexer_ExactMatch * keywords;
uint32_t num_punctuation;
struct alias_lexer_ExactMatch * punctuation;
uint32_t num_remove_newline_before;
uint32_t * remove_newline_before;
uint32_t num_remove_newline_after;
uint32_t * remove_newline_after;
uint32_t num_remove;
uint32_t * remove;
};
struct alias_Lexer {
struct alias_lexer_Settings settings;
struct alias_lexer_Files * files;
// source character decoding
struct {
uint32_t file;
mbstate_t mbstate;
uint32_t offset;
} phase_0;
// newline source normalization
struct alias_lexer_Character phase_1[2];
// newline splicing
struct alias_lexer_Character phase_2[2];
// tokenize
alias_Vector(struct alias_lexer_Character) phase_3;
// <some token> NEWLINE → <some token>
// NEWLINE <some token> → <some token>
struct alias_lexer_Token phase_4[2];
// <some token> ->
// <internal token> ->
struct alias_lexer_Token phase_5[1];
};
void alias_Lexer_initialize(struct alias_Lexer * lexer, const struct alias_lexer_Settings * settings, struct alias_lexer_Files * files, uint32_t file);
struct alias_lexer_Token alias_Lexer_next_token(struct alias_Lexer * lexer);
#endif