#include "clang/Basic/IdentifierTable.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/OperatorKinds.h"
#include "clang/Basic/Specifiers.h"
#include "clang/Basic/TargetBuiltins.h"
#include "clang/Basic/TokenKinds.h"
#include "llvm/ADT/DenseMapInfo.h"
#include "llvm/ADT/FoldingSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Allocator.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <cassert>
#include <cstdio>
#include <cstring>
#include <string>
using namespace clang;
static_assert(2 * LargestBuiltinID < (2 << (ObjCOrBuiltinIDBits - 1)),
"Insufficient ObjCOrBuiltinID Bits");
IdentifierIterator::~IdentifierIterator() = default;
IdentifierInfoLookup::~IdentifierInfoLookup() = default;
namespace {
class EmptyLookupIterator : public IdentifierIterator
{
public:
StringRef Next() override { return StringRef(); }
};
}
IdentifierIterator *IdentifierInfoLookup::getIdentifiers() {
return new EmptyLookupIterator();
}
IdentifierTable::IdentifierTable(IdentifierInfoLookup *ExternalLookup)
: HashTable(8192), ExternalLookup(ExternalLookup) {}
IdentifierTable::IdentifierTable(const LangOptions &LangOpts,
IdentifierInfoLookup *ExternalLookup)
: IdentifierTable(ExternalLookup) {
AddKeywords(LangOpts);
}
namespace {
enum {
KEYC99 = 0x1,
KEYCXX = 0x2,
KEYCXX11 = 0x4,
KEYGNU = 0x8,
KEYMS = 0x10,
BOOLSUPPORT = 0x20,
KEYALTIVEC = 0x40,
KEYNOCXX = 0x80,
KEYBORLAND = 0x100,
KEYOPENCLC = 0x200,
KEYC11 = 0x400,
KEYNOMS18 = 0x800,
KEYNOOPENCL = 0x1000,
WCHARSUPPORT = 0x2000,
HALFSUPPORT = 0x4000,
CHAR8SUPPORT = 0x8000,
KEYCONCEPTS = 0x10000,
KEYOBJC = 0x20000,
KEYZVECTOR = 0x40000,
KEYCOROUTINES = 0x80000,
KEYMODULES = 0x100000,
KEYCXX20 = 0x200000,
KEYOPENCLCXX = 0x400000,
KEYMSCOMPAT = 0x800000,
KEYSYCL = 0x1000000,
KEYCUDA = 0x2000000,
KEYMAX = KEYCUDA, KEYALLCXX = KEYCXX | KEYCXX11 | KEYCXX20,
KEYALL = (KEYMAX | (KEYMAX-1)) & ~KEYNOMS18 &
~KEYNOOPENCL };
enum KeywordStatus {
KS_Disabled, KS_Extension, KS_Enabled, KS_Future };
}
static KeywordStatus getKeywordStatus(const LangOptions &LangOpts,
unsigned Flags) {
if (Flags == KEYALL) return KS_Enabled;
if (LangOpts.CPlusPlus && (Flags & KEYCXX)) return KS_Enabled;
if (LangOpts.CPlusPlus11 && (Flags & KEYCXX11)) return KS_Enabled;
if (LangOpts.CPlusPlus20 && (Flags & KEYCXX20)) return KS_Enabled;
if (LangOpts.C99 && (Flags & KEYC99)) return KS_Enabled;
if (LangOpts.GNUKeywords && (Flags & KEYGNU)) return KS_Extension;
if (LangOpts.MicrosoftExt && (Flags & KEYMS)) return KS_Extension;
if (LangOpts.MSVCCompat && (Flags & KEYMSCOMPAT)) return KS_Enabled;
if (LangOpts.Borland && (Flags & KEYBORLAND)) return KS_Extension;
if (LangOpts.Bool && (Flags & BOOLSUPPORT)) return KS_Enabled;
if (LangOpts.Half && (Flags & HALFSUPPORT)) return KS_Enabled;
if (LangOpts.WChar && (Flags & WCHARSUPPORT)) return KS_Enabled;
if (LangOpts.Char8 && (Flags & CHAR8SUPPORT)) return KS_Enabled;
if (LangOpts.AltiVec && (Flags & KEYALTIVEC)) return KS_Enabled;
if (LangOpts.ZVector && (Flags & KEYZVECTOR)) return KS_Enabled;
if (LangOpts.OpenCL && !LangOpts.OpenCLCPlusPlus && (Flags & KEYOPENCLC))
return KS_Enabled;
if (LangOpts.OpenCLCPlusPlus && (Flags & KEYOPENCLCXX)) return KS_Enabled;
if (!LangOpts.CPlusPlus && (Flags & KEYNOCXX)) return KS_Enabled;
if (LangOpts.C11 && (Flags & KEYC11)) return KS_Enabled;
if (LangOpts.ObjC && (Flags & KEYOBJC)) return KS_Enabled;
if (LangOpts.CPlusPlus20 && (Flags & KEYCONCEPTS)) return KS_Enabled;
if (LangOpts.Coroutines && (Flags & KEYCOROUTINES)) return KS_Enabled;
if (LangOpts.ModulesTS && (Flags & KEYMODULES)) return KS_Enabled;
if (LangOpts.CPlusPlus && (Flags & KEYALLCXX)) return KS_Future;
if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus20 && (Flags & CHAR8SUPPORT))
return KS_Future;
if (LangOpts.isSYCL() && (Flags & KEYSYCL))
return KS_Enabled;
if (LangOpts.CUDA && (Flags & KEYCUDA))
return KS_Enabled;
return KS_Disabled;
}
static void AddKeyword(StringRef Keyword,
tok::TokenKind TokenCode, unsigned Flags,
const LangOptions &LangOpts, IdentifierTable &Table) {
KeywordStatus AddResult = getKeywordStatus(LangOpts, Flags);
if (LangOpts.MSVCCompat && (Flags & KEYNOMS18) &&
!LangOpts.isCompatibleWithMSVC(LangOptions::MSVC2015))
return;
if (LangOpts.OpenCL && (Flags & KEYNOOPENCL))
return;
if (AddResult == KS_Disabled) return;
IdentifierInfo &Info =
Table.get(Keyword, AddResult == KS_Future ? tok::identifier : TokenCode);
Info.setIsExtensionToken(AddResult == KS_Extension);
Info.setIsFutureCompatKeyword(AddResult == KS_Future);
}
static void AddCXXOperatorKeyword(StringRef Keyword,
tok::TokenKind TokenCode,
IdentifierTable &Table) {
IdentifierInfo &Info = Table.get(Keyword, TokenCode);
Info.setIsCPlusPlusOperatorKeyword();
}
static void AddObjCKeyword(StringRef Name,
tok::ObjCKeywordKind ObjCID,
IdentifierTable &Table) {
Table.get(Name).setObjCKeywordID(ObjCID);
}
void IdentifierTable::AddKeywords(const LangOptions &LangOpts) {
#define KEYWORD(NAME, FLAGS) \
AddKeyword(StringRef(#NAME), tok::kw_ ## NAME, \
FLAGS, LangOpts, *this);
#define ALIAS(NAME, TOK, FLAGS) \
AddKeyword(StringRef(NAME), tok::kw_ ## TOK, \
FLAGS, LangOpts, *this);
#define CXX_KEYWORD_OPERATOR(NAME, ALIAS) \
if (LangOpts.CXXOperatorNames) \
AddCXXOperatorKeyword(StringRef(#NAME), tok::ALIAS, *this);
#define OBJC_AT_KEYWORD(NAME) \
if (LangOpts.ObjC) \
AddObjCKeyword(StringRef(#NAME), tok::objc_##NAME, *this);
#define TESTING_KEYWORD(NAME, FLAGS)
#include "clang/Basic/TokenKinds.def"
if (LangOpts.ParseUnknownAnytype)
AddKeyword("__unknown_anytype", tok::kw___unknown_anytype, KEYALL,
LangOpts, *this);
if (LangOpts.DeclSpecKeyword)
AddKeyword("__declspec", tok::kw___declspec, KEYALL, LangOpts, *this);
if (LangOpts.IEEE128)
AddKeyword("__ieee128", tok::kw___float128, KEYALL, LangOpts, *this);
get("import").setModulesImport(true);
}
static KeywordStatus getTokenKwStatus(const LangOptions &LangOpts,
tok::TokenKind K) {
switch (K) {
#define KEYWORD(NAME, FLAGS) \
case tok::kw_##NAME: return getKeywordStatus(LangOpts, FLAGS);
#include "clang/Basic/TokenKinds.def"
default: return KS_Disabled;
}
}
bool IdentifierInfo::isKeyword(const LangOptions &LangOpts) const {
switch (getTokenKwStatus(LangOpts, getTokenID())) {
case KS_Enabled:
case KS_Extension:
return true;
default:
return false;
}
}
bool IdentifierInfo::isCPlusPlusKeyword(const LangOptions &LangOpts) const {
if (!LangOpts.CPlusPlus || !isKeyword(LangOpts))
return false;
LangOptions LangOptsNoCPP = LangOpts;
LangOptsNoCPP.CPlusPlus = false;
LangOptsNoCPP.CPlusPlus11 = false;
LangOptsNoCPP.CPlusPlus20 = false;
return !isKeyword(LangOptsNoCPP);
}
ReservedIdentifierStatus
IdentifierInfo::isReserved(const LangOptions &LangOpts) const {
StringRef Name = getName();
if (Name.size() <= 1)
return ReservedIdentifierStatus::NotReserved;
if (Name[0] == '_') {
if (Name[1] == '_')
return ReservedIdentifierStatus::StartsWithDoubleUnderscore;
if ('A' <= Name[1] && Name[1] <= 'Z')
return ReservedIdentifierStatus::
StartsWithUnderscoreFollowedByCapitalLetter;
return ReservedIdentifierStatus::StartsWithUnderscoreAtGlobalScope;
}
if (LangOpts.CPlusPlus && Name.contains("__"))
return ReservedIdentifierStatus::ContainsDoubleUnderscore;
return ReservedIdentifierStatus::NotReserved;
}
StringRef IdentifierInfo::deuglifiedName() const {
StringRef Name = getName();
if (Name.size() >= 2 && Name.front() == '_' &&
(Name[1] == '_' || (Name[1] >= 'A' && Name[1] <= 'Z')))
return Name.ltrim('_');
return Name;
}
tok::PPKeywordKind IdentifierInfo::getPPKeywordID() const {
#define HASH(LEN, FIRST, THIRD) \
(LEN << 5) + (((FIRST-'a') + (THIRD-'a')) & 31)
#define CASE(LEN, FIRST, THIRD, NAME) \
case HASH(LEN, FIRST, THIRD): \
return memcmp(Name, #NAME, LEN) ? tok::pp_not_keyword : tok::pp_ ## NAME
unsigned Len = getLength();
if (Len < 2) return tok::pp_not_keyword;
const char *Name = getNameStart();
switch (HASH(Len, Name[0], Name[2])) {
default: return tok::pp_not_keyword;
CASE( 2, 'i', '\0', if);
CASE( 4, 'e', 'i', elif);
CASE( 4, 'e', 's', else);
CASE( 4, 'l', 'n', line);
CASE( 4, 's', 'c', sccs);
CASE( 5, 'e', 'd', endif);
CASE( 5, 'e', 'r', error);
CASE( 5, 'i', 'e', ident);
CASE( 5, 'i', 'd', ifdef);
CASE( 5, 'u', 'd', undef);
CASE( 6, 'a', 's', assert);
CASE( 6, 'd', 'f', define);
CASE( 6, 'i', 'n', ifndef);
CASE( 6, 'i', 'p', import);
CASE( 6, 'p', 'a', pragma);
CASE( 7, 'd', 'f', defined);
CASE( 7, 'e', 'i', elifdef);
CASE( 7, 'i', 'c', include);
CASE( 7, 'w', 'r', warning);
CASE( 8, 'e', 'i', elifndef);
CASE( 8, 'u', 'a', unassert);
CASE(12, 'i', 'c', include_next);
CASE(14, '_', 'p', __public_macro);
CASE(15, '_', 'p', __private_macro);
CASE(16, '_', 'i', __include_macros);
#undef CASE
#undef HASH
}
}
void IdentifierTable::PrintStats() const {
unsigned NumBuckets = HashTable.getNumBuckets();
unsigned NumIdentifiers = HashTable.getNumItems();
unsigned NumEmptyBuckets = NumBuckets-NumIdentifiers;
unsigned AverageIdentifierSize = 0;
unsigned MaxIdentifierLength = 0;
for (llvm::StringMap<IdentifierInfo*, llvm::BumpPtrAllocator>::const_iterator
I = HashTable.begin(), E = HashTable.end(); I != E; ++I) {
unsigned IdLen = I->getKeyLength();
AverageIdentifierSize += IdLen;
if (MaxIdentifierLength < IdLen)
MaxIdentifierLength = IdLen;
}
fprintf(stderr, "\n*** Identifier Table Stats:\n");
fprintf(stderr, "# Identifiers: %d\n", NumIdentifiers);
fprintf(stderr, "# Empty Buckets: %d\n", NumEmptyBuckets);
fprintf(stderr, "Hash density (#identifiers per bucket): %f\n",
NumIdentifiers/(double)NumBuckets);
fprintf(stderr, "Ave identifier length: %f\n",
(AverageIdentifierSize/(double)NumIdentifiers));
fprintf(stderr, "Max identifier length: %d\n", MaxIdentifierLength);
HashTable.getAllocator().PrintStats();
}
unsigned llvm::DenseMapInfo<clang::Selector>::getHashValue(clang::Selector S) {
return DenseMapInfo<void*>::getHashValue(S.getAsOpaquePtr());
}
namespace clang {
class alignas(IdentifierInfoAlignment) MultiKeywordSelector
: public detail::DeclarationNameExtra,
public llvm::FoldingSetNode {
MultiKeywordSelector(unsigned nKeys) : DeclarationNameExtra(nKeys) {}
public:
MultiKeywordSelector(unsigned nKeys, IdentifierInfo **IIV)
: DeclarationNameExtra(nKeys) {
assert((nKeys > 1) && "not a multi-keyword selector");
IdentifierInfo **KeyInfo = reinterpret_cast<IdentifierInfo **>(this + 1);
for (unsigned i = 0; i != nKeys; ++i)
KeyInfo[i] = IIV[i];
}
std::string getName() const;
using DeclarationNameExtra::getNumArgs;
using keyword_iterator = IdentifierInfo *const *;
keyword_iterator keyword_begin() const {
return reinterpret_cast<keyword_iterator>(this + 1);
}
keyword_iterator keyword_end() const {
return keyword_begin() + getNumArgs();
}
IdentifierInfo *getIdentifierInfoForSlot(unsigned i) const {
assert(i < getNumArgs() && "getIdentifierInfoForSlot(): illegal index");
return keyword_begin()[i];
}
static void Profile(llvm::FoldingSetNodeID &ID, keyword_iterator ArgTys,
unsigned NumArgs) {
ID.AddInteger(NumArgs);
for (unsigned i = 0; i != NumArgs; ++i)
ID.AddPointer(ArgTys[i]);
}
void Profile(llvm::FoldingSetNodeID &ID) {
Profile(ID, keyword_begin(), getNumArgs());
}
};
}
bool Selector::isKeywordSelector(ArrayRef<StringRef> Names) const {
assert(!Names.empty() && "must have >= 1 selector slots");
if (getNumArgs() != Names.size())
return false;
for (unsigned I = 0, E = Names.size(); I != E; ++I) {
if (getNameForSlot(I) != Names[I])
return false;
}
return true;
}
bool Selector::isUnarySelector(StringRef Name) const {
return isUnarySelector() && getNameForSlot(0) == Name;
}
unsigned Selector::getNumArgs() const {
unsigned IIF = getIdentifierInfoFlag();
if (IIF <= ZeroArg)
return 0;
if (IIF == OneArg)
return 1;
MultiKeywordSelector *SI = getMultiKeywordSelector();
return SI->getNumArgs();
}
IdentifierInfo *Selector::getIdentifierInfoForSlot(unsigned argIndex) const {
if (getIdentifierInfoFlag() < MultiArg) {
assert(argIndex == 0 && "illegal keyword index");
return getAsIdentifierInfo();
}
MultiKeywordSelector *SI = getMultiKeywordSelector();
return SI->getIdentifierInfoForSlot(argIndex);
}
StringRef Selector::getNameForSlot(unsigned int argIndex) const {
IdentifierInfo *II = getIdentifierInfoForSlot(argIndex);
return II ? II->getName() : StringRef();
}
std::string MultiKeywordSelector::getName() const {
SmallString<256> Str;
llvm::raw_svector_ostream OS(Str);
for (keyword_iterator I = keyword_begin(), E = keyword_end(); I != E; ++I) {
if (*I)
OS << (*I)->getName();
OS << ':';
}
return std::string(OS.str());
}
std::string Selector::getAsString() const {
if (InfoPtr == 0)
return "<null selector>";
if (getIdentifierInfoFlag() < MultiArg) {
IdentifierInfo *II = getAsIdentifierInfo();
if (getNumArgs() == 0) {
assert(II && "If the number of arguments is 0 then II is guaranteed to "
"not be null.");
return std::string(II->getName());
}
if (!II)
return ":";
return II->getName().str() + ":";
}
return getMultiKeywordSelector()->getName();
}
void Selector::print(llvm::raw_ostream &OS) const {
OS << getAsString();
}
LLVM_DUMP_METHOD void Selector::dump() const { print(llvm::errs()); }
static bool startsWithWord(StringRef name, StringRef word) {
if (name.size() < word.size()) return false;
return ((name.size() == word.size() || !isLowercase(name[word.size()])) &&
name.startswith(word));
}
ObjCMethodFamily Selector::getMethodFamilyImpl(Selector sel) {
IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
if (!first) return OMF_None;
StringRef name = first->getName();
if (sel.isUnarySelector()) {
if (name == "autorelease") return OMF_autorelease;
if (name == "dealloc") return OMF_dealloc;
if (name == "finalize") return OMF_finalize;
if (name == "release") return OMF_release;
if (name == "retain") return OMF_retain;
if (name == "retainCount") return OMF_retainCount;
if (name == "self") return OMF_self;
if (name == "initialize") return OMF_initialize;
}
if (name == "performSelector" || name == "performSelectorInBackground" ||
name == "performSelectorOnMainThread")
return OMF_performSelector;
while (!name.empty() && name.front() == '_')
name = name.substr(1);
if (name.empty()) return OMF_None;
switch (name.front()) {
case 'a':
if (startsWithWord(name, "alloc")) return OMF_alloc;
break;
case 'c':
if (startsWithWord(name, "copy")) return OMF_copy;
break;
case 'i':
if (startsWithWord(name, "init")) return OMF_init;
break;
case 'm':
if (startsWithWord(name, "mutableCopy")) return OMF_mutableCopy;
break;
case 'n':
if (startsWithWord(name, "new")) return OMF_new;
break;
default:
break;
}
return OMF_None;
}
ObjCInstanceTypeFamily Selector::getInstTypeMethodFamily(Selector sel) {
IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
if (!first) return OIT_None;
StringRef name = first->getName();
if (name.empty()) return OIT_None;
switch (name.front()) {
case 'a':
if (startsWithWord(name, "array")) return OIT_Array;
break;
case 'd':
if (startsWithWord(name, "default")) return OIT_ReturnsSelf;
if (startsWithWord(name, "dictionary")) return OIT_Dictionary;
break;
case 's':
if (startsWithWord(name, "shared")) return OIT_ReturnsSelf;
if (startsWithWord(name, "standard")) return OIT_Singleton;
break;
case 'i':
if (startsWithWord(name, "init")) return OIT_Init;
break;
default:
break;
}
return OIT_None;
}
ObjCStringFormatFamily Selector::getStringFormatFamilyImpl(Selector sel) {
IdentifierInfo *first = sel.getIdentifierInfoForSlot(0);
if (!first) return SFF_None;
StringRef name = first->getName();
switch (name.front()) {
case 'a':
if (name == "appendFormat") return SFF_NSString;
break;
case 'i':
if (name == "initWithFormat") return SFF_NSString;
break;
case 'l':
if (name == "localizedStringWithFormat") return SFF_NSString;
break;
case 's':
if (name == "stringByAppendingFormat" ||
name == "stringWithFormat") return SFF_NSString;
break;
}
return SFF_None;
}
namespace {
struct SelectorTableImpl {
llvm::FoldingSet<MultiKeywordSelector> Table;
llvm::BumpPtrAllocator Allocator;
};
}
static SelectorTableImpl &getSelectorTableImpl(void *P) {
return *static_cast<SelectorTableImpl*>(P);
}
SmallString<64>
SelectorTable::constructSetterName(StringRef Name) {
SmallString<64> SetterName("set");
SetterName += Name;
SetterName[3] = toUppercase(SetterName[3]);
return SetterName;
}
Selector
SelectorTable::constructSetterSelector(IdentifierTable &Idents,
SelectorTable &SelTable,
const IdentifierInfo *Name) {
IdentifierInfo *SetterName =
&Idents.get(constructSetterName(Name->getName()));
return SelTable.getUnarySelector(SetterName);
}
std::string SelectorTable::getPropertyNameFromSetterSelector(Selector Sel) {
StringRef Name = Sel.getNameForSlot(0);
assert(Name.startswith("set") && "invalid setter name");
return (Twine(toLowercase(Name[3])) + Name.drop_front(4)).str();
}
size_t SelectorTable::getTotalMemory() const {
SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
return SelTabImpl.Allocator.getTotalMemory();
}
Selector SelectorTable::getSelector(unsigned nKeys, IdentifierInfo **IIV) {
if (nKeys < 2)
return Selector(IIV[0], nKeys);
SelectorTableImpl &SelTabImpl = getSelectorTableImpl(Impl);
llvm::FoldingSetNodeID ID;
MultiKeywordSelector::Profile(ID, IIV, nKeys);
void *InsertPos = nullptr;
if (MultiKeywordSelector *SI =
SelTabImpl.Table.FindNodeOrInsertPos(ID, InsertPos))
return Selector(SI);
unsigned Size = sizeof(MultiKeywordSelector) + nKeys*sizeof(IdentifierInfo *);
MultiKeywordSelector *SI =
(MultiKeywordSelector *)SelTabImpl.Allocator.Allocate(
Size, alignof(MultiKeywordSelector));
new (SI) MultiKeywordSelector(nKeys, IIV);
SelTabImpl.Table.InsertNode(SI, InsertPos);
return Selector(SI);
}
SelectorTable::SelectorTable() {
Impl = new SelectorTableImpl();
}
SelectorTable::~SelectorTable() {
delete &getSelectorTableImpl(Impl);
}
const char *clang::getOperatorSpelling(OverloadedOperatorKind Operator) {
switch (Operator) {
case OO_None:
case NUM_OVERLOADED_OPERATORS:
return nullptr;
#define OVERLOADED_OPERATOR(Name,Spelling,Token,Unary,Binary,MemberOnly) \
case OO_##Name: return Spelling;
#include "clang/Basic/OperatorKinds.def"
}
llvm_unreachable("Invalid OverloadedOperatorKind!");
}
StringRef clang::getNullabilitySpelling(NullabilityKind kind,
bool isContextSensitive) {
switch (kind) {
case NullabilityKind::NonNull:
return isContextSensitive ? "nonnull" : "_Nonnull";
case NullabilityKind::Nullable:
return isContextSensitive ? "nullable" : "_Nullable";
case NullabilityKind::NullableResult:
assert(!isContextSensitive &&
"_Nullable_result isn't supported as context-sensitive keyword");
return "_Nullable_result";
case NullabilityKind::Unspecified:
return isContextSensitive ? "null_unspecified" : "_Null_unspecified";
}
llvm_unreachable("Unknown nullability kind.");
}