//===- TokenLexer.h - Lex from a token buffer -------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the TokenLexer interface.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_CLANG_LEX_TOKENLEXER_H
#define LLVM_CLANG_LEX_TOKENLEXER_H
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/ArrayRef.h"
namespace clang {
class MacroArgs;
class MacroInfo;
class Preprocessor;
class Token;
class VAOptExpansionContext;
/// TokenLexer - This implements a lexer that returns tokens from a macro body
/// or token stream instead of lexing from a character buffer. This is used for
/// macro expansion and _Pragma handling, for example.
class TokenLexer {
friend class Preprocessor;
/// The macro we are expanding from. This is null if expanding a token stream.
MacroInfo *Macro = nullptr;
/// The actual arguments specified for a function-like macro, or null. The
/// TokenLexer owns the pointed-to object.
MacroArgs *ActualArgs = nullptr;
/// The current preprocessor object we are expanding for.
Preprocessor &PP;
/// This is the pointer to an array of tokens that the macro is
/// defined to, with arguments expanded for function-like macros. If this is
/// a token stream, these are the tokens we are returning. This points into
/// the macro definition we are lexing from, a cache buffer that is owned by
/// the preprocessor, or some other buffer that we may or may not own
/// (depending on OwnsTokens).
/// Note that if it points into Preprocessor's cache buffer, the Preprocessor
/// may update the pointer as needed.
const Token *Tokens;
/// This is the length of the Tokens array.
unsigned NumTokens;
/// This is the index of the next token that Lex will return.
unsigned CurTokenIdx;
/// The source location range where this macro was expanded.
SourceLocation ExpandLocStart, ExpandLocEnd;
/// Source location pointing at the source location entry chunk that
/// was reserved for the current macro expansion.
SourceLocation MacroExpansionStart;
/// The offset of the macro expansion in the
/// "source location address space".
unsigned MacroStartSLocOffset;
/// Location of the macro definition.
SourceLocation MacroDefStart;
/// Length of the macro definition.
unsigned MacroDefLength;
/// Lexical information about the expansion point of the macro: the identifier
/// that the macro expanded from had these properties.
bool AtStartOfLine : 1;
bool HasLeadingSpace : 1;
// When this is true, the next token appended to the
// output list during function argument expansion will get a leading space,
// regardless of whether it had one to begin with or not. This is used for
// placemarker support. If still true after function argument expansion, the
// leading space will be applied to the first token following the macro
// expansion.
bool NextTokGetsSpace : 1;
/// This is true if this TokenLexer allocated the Tokens
/// array, and thus needs to free it when destroyed. For simple object-like
/// macros (for example) we just point into the token buffer of the macro
/// definition, we don't make a copy of it.
bool OwnsTokens : 1;
/// This is true when tokens lexed from the TokenLexer
/// should not be subject to further macro expansion.
bool DisableMacroExpansion : 1;
/// When true, the produced tokens have Token::IsReinjected flag set.
/// See the flag documentation for details.
bool IsReinject : 1;
public:
/// Create a TokenLexer for the specified macro with the specified actual
/// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
/// ILEnd specifies the location of the ')' for a function-like macro or the
/// identifier for an object-like macro.
TokenLexer(Token &Tok, SourceLocation ILEnd, MacroInfo *MI,
MacroArgs *ActualArgs, Preprocessor &pp)
: PP(pp), OwnsTokens(false) {
Init(Tok, ILEnd, MI, ActualArgs);
}
/// Create a TokenLexer for the specified token stream. If 'OwnsTokens' is
/// specified, this takes ownership of the tokens and delete[]'s them when
/// the token lexer is empty.
TokenLexer(const Token *TokArray, unsigned NumToks, bool DisableExpansion,
bool ownsTokens, bool isReinject, Preprocessor &pp)
: PP(pp), OwnsTokens(false) {
Init(TokArray, NumToks, DisableExpansion, ownsTokens, isReinject);
}
TokenLexer(const TokenLexer &) = delete;
TokenLexer &operator=(const TokenLexer &) = delete;
~TokenLexer() { destroy(); }
/// Initialize this TokenLexer to expand from the specified macro
/// with the specified argument information. Note that this ctor takes
/// ownership of the ActualArgs pointer. ILEnd specifies the location of the
/// ')' for a function-like macro or the identifier for an object-like macro.
void Init(Token &Tok, SourceLocation ELEnd, MacroInfo *MI,
MacroArgs *Actuals);
/// Initialize this TokenLexer with the specified token stream.
/// This does not take ownership of the specified token vector.
///
/// DisableExpansion is true when macro expansion of tokens lexed from this
/// stream should be disabled.
void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion,
bool OwnsTokens, bool IsReinject);
/// If the next token lexed will pop this macro off the
/// expansion stack, return 2. If the next unexpanded token is a '(', return
/// 1, otherwise return 0.
unsigned isNextTokenLParen() const;
/// Lex and return a token from this macro stream.
bool Lex(Token &Tok);
/// isParsingPreprocessorDirective - Return true if we are in the middle of a
/// preprocessor directive.
bool isParsingPreprocessorDirective() const;
private:
void destroy();
/// Return true if the next lex call will pop this macro off the include
/// stack.
bool isAtEnd() const {
return CurTokenIdx == NumTokens;
}
/// Concatenates the next (sub-)sequence of \p Tokens separated by '##'
/// starting with LHSTok - stopping when we encounter a token that is neither
/// '##' nor preceded by '##'. Places the result back into \p LHSTok and sets
/// \p CurIdx to point to the token following the last one that was pasted.
///
/// Also performs the MSVC extension wide-literal token pasting involved with:
/// \code L #macro-arg. \endcode
///
/// \param[in,out] LHSTok - Contains the token to the left of '##' in \p
/// Tokens upon entry and will contain the resulting concatenated Token upon
/// exit.
///
/// \param[in] TokenStream - The stream of Tokens we are lexing from.
///
/// \param[in,out] CurIdx - Upon entry, \pTokens[\pCurIdx] must equal '##'
/// (with the exception of the MSVC extension mentioned above). Upon exit, it
/// is set to the index of the token following the last token that was
/// concatenated together.
///
/// \returns If this returns true, the caller should immediately return the
/// token.
bool pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream,
unsigned int &CurIdx);
/// Calls pasteTokens above, passing in the '*this' object's Tokens and
/// CurTokenIdx data members.
bool pasteTokens(Token &Tok);
/// Takes the tail sequence of tokens within ReplacementToks that represent
/// the just expanded __VA_OPT__ tokens (possibly zero tokens) and transforms
/// them into a string. \p VCtx is used to determine which token represents
/// the first __VA_OPT__ replacement token.
///
/// \param[in,out] ResultToks - Contains the current Replacement Tokens
/// (prior to rescanning and token pasting), the tail end of which represents
/// the tokens just expanded through __VA_OPT__ processing. These (sub)
/// sequence of tokens are folded into one stringified token.
///
/// \param[in] VCtx - contains relevant contextual information about the
/// state of the tokens around and including the __VA_OPT__ token, necessary
/// for stringification.
void stringifyVAOPTContents(SmallVectorImpl<Token> &ResultToks,
const VAOptExpansionContext &VCtx,
SourceLocation VAOPTClosingParenLoc);
/// Expand the arguments of a function-like macro so that we can quickly
/// return preexpanded tokens from Tokens.
void ExpandFunctionArguments();
/// In microsoft compatibility mode, /##/ pastes
/// together to form a comment that comments out everything in the current
/// macro, other active macros, and anything left on the current physical
/// source line of the expanded buffer. Handle this by returning the
/// first token on the next line.
void HandleMicrosoftCommentPaste(Token &Tok, SourceLocation OpLoc);
/// If \p loc is a FileID and points inside the current macro
/// definition, returns the appropriate source location pointing at the
/// macro expansion source location entry.
SourceLocation getExpansionLocForMacroDefLoc(SourceLocation loc) const;
/// Creates SLocEntries and updates the locations of macro argument
/// tokens to their new expanded locations.
///
/// \param ArgIdSpellLoc the location of the macro argument id inside the
/// macro definition.
void updateLocForMacroArgTokens(SourceLocation ArgIdSpellLoc,
Token *begin_tokens, Token *end_tokens);
/// Remove comma ahead of __VA_ARGS__, if present, according to compiler
/// dialect settings. Returns true if the comma is removed.
bool MaybeRemoveCommaBeforeVaArgs(SmallVectorImpl<Token> &ResultToks,
bool HasPasteOperator,
MacroInfo *Macro, unsigned MacroArgNo,
Preprocessor &PP);
void PropagateLineStartLeadingSpaceInfo(Token &Result);
};
} // namespace clang
#endif // LLVM_CLANG_LEX_TOKENLEXER_H