#include "llvm/DebugInfo/PDB/Native/GSIStreamBuilder.h"
#include "llvm/DebugInfo/CodeView/RecordName.h"
#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolSerializer.h"
#include "llvm/DebugInfo/MSF/MSFBuilder.h"
#include "llvm/DebugInfo/MSF/MSFCommon.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
#include "llvm/DebugInfo/PDB/Native/GlobalsStream.h"
#include "llvm/DebugInfo/PDB/Native/Hash.h"
#include "llvm/DebugInfo/PDB/Native/RawTypes.h"
#include "llvm/Support/BinaryItemStream.h"
#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Parallel.h"
#include "llvm/Support/xxhash.h"
#include <algorithm>
#include <vector>
using namespace llvm;
using namespace llvm::msf;
using namespace llvm::pdb;
using namespace llvm::codeview;
struct llvm::pdb::GSIHashStreamBuilder {
uint32_t RecordByteSize = 0;
std::vector<PSHashRecord> HashRecords;
std::array<support::ulittle32_t, (IPHR_HASH + 32) / 32> HashBitmap;
std::vector<support::ulittle32_t> HashBuckets;
uint32_t calculateSerializedLength() const;
Error commit(BinaryStreamWriter &Writer);
void finalizePublicBuckets();
void finalizeGlobalBuckets(uint32_t RecordZeroOffset);
void finalizeBuckets(uint32_t RecordZeroOffset,
MutableArrayRef<BulkPublic> Globals);
};
struct llvm::pdb::SymbolDenseMapInfo {
static inline CVSymbol getEmptyKey() {
static CVSymbol Empty;
return Empty;
}
static inline CVSymbol getTombstoneKey() {
static CVSymbol Tombstone(
DenseMapInfo<ArrayRef<uint8_t>>::getTombstoneKey());
return Tombstone;
}
static unsigned getHashValue(const CVSymbol &Val) {
return xxHash64(Val.RecordData);
}
static bool isEqual(const CVSymbol &LHS, const CVSymbol &RHS) {
return LHS.RecordData == RHS.RecordData;
}
};
namespace {
LLVM_PACKED_START
struct PublicSym32Layout {
RecordPrefix Prefix;
PublicSym32Header Pub;
};
LLVM_PACKED_END
}
static uint32_t sizeOfPublic(const BulkPublic &Pub) {
uint32_t NameLen = Pub.NameLen;
NameLen = std::min(NameLen,
uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
return alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
}
static CVSymbol serializePublic(uint8_t *Mem, const BulkPublic &Pub) {
uint32_t NameLen = std::min(
Pub.NameLen, uint32_t(MaxRecordLength - sizeof(PublicSym32Layout) - 1));
size_t Size = alignTo(sizeof(PublicSym32Layout) + NameLen + 1, 4);
assert(Size == sizeOfPublic(Pub));
auto *FixedMem = reinterpret_cast<PublicSym32Layout *>(Mem);
FixedMem->Prefix.RecordKind = static_cast<uint16_t>(codeview::S_PUB32);
FixedMem->Prefix.RecordLen = static_cast<uint16_t>(Size - 2);
FixedMem->Pub.Flags = Pub.Flags;
FixedMem->Pub.Offset = Pub.Offset;
FixedMem->Pub.Segment = Pub.Segment;
char *NameMem = reinterpret_cast<char *>(FixedMem + 1);
memcpy(NameMem, Pub.Name, NameLen);
memset(&NameMem[NameLen], 0, Size - sizeof(PublicSym32Layout) - NameLen);
return CVSymbol(makeArrayRef(reinterpret_cast<uint8_t *>(Mem), Size));
}
uint32_t GSIHashStreamBuilder::calculateSerializedLength() const {
uint32_t Size = sizeof(GSIHashHeader);
Size += HashRecords.size() * sizeof(PSHashRecord);
Size += HashBitmap.size() * sizeof(uint32_t);
Size += HashBuckets.size() * sizeof(uint32_t);
return Size;
}
Error GSIHashStreamBuilder::commit(BinaryStreamWriter &Writer) {
GSIHashHeader Header;
Header.VerSignature = GSIHashHeader::HdrSignature;
Header.VerHdr = GSIHashHeader::HdrVersion;
Header.HrSize = HashRecords.size() * sizeof(PSHashRecord);
Header.NumBuckets = HashBitmap.size() * 4 + HashBuckets.size() * 4;
if (auto EC = Writer.writeObject(Header))
return EC;
if (auto EC = Writer.writeArray(makeArrayRef(HashRecords)))
return EC;
if (auto EC = Writer.writeArray(makeArrayRef(HashBitmap)))
return EC;
if (auto EC = Writer.writeArray(makeArrayRef(HashBuckets)))
return EC;
return Error::success();
}
static bool isAsciiString(StringRef S) {
return llvm::all_of(S, [](char C) { return unsigned(C) < 0x80; });
}
static int gsiRecordCmp(StringRef S1, StringRef S2) {
size_t LS = S1.size();
size_t RS = S2.size();
if (LS != RS)
return (LS > RS) - (LS < RS);
if (LLVM_UNLIKELY(!isAsciiString(S1) || !isAsciiString(S2)))
return memcmp(S1.data(), S2.data(), LS);
return S1.compare_insensitive(S2.data());
}
void GSIStreamBuilder::finalizePublicBuckets() {
PSH->finalizeBuckets(0, Publics);
}
void GSIStreamBuilder::finalizeGlobalBuckets(uint32_t RecordZeroOffset) {
std::vector<BulkPublic> Records;
Records.resize(Globals.size());
uint32_t SymOffset = RecordZeroOffset;
for (size_t I = 0, E = Globals.size(); I < E; ++I) {
StringRef Name = getSymbolName(Globals[I]);
Records[I].Name = Name.data();
Records[I].NameLen = Name.size();
Records[I].SymOffset = SymOffset;
SymOffset += Globals[I].length();
}
GSH->finalizeBuckets(RecordZeroOffset, Records);
}
void GSIHashStreamBuilder::finalizeBuckets(
uint32_t RecordZeroOffset, MutableArrayRef<BulkPublic> Records) {
parallelFor(0, Records.size(), [&](size_t I) {
Records[I].setBucketIdx(hashStringV1(Records[I].Name) % IPHR_HASH);
});
uint32_t BucketStarts[IPHR_HASH] = {0};
for (const BulkPublic &P : Records)
++BucketStarts[P.BucketIdx];
uint32_t Sum = 0;
for (uint32_t &B : BucketStarts) {
uint32_t Size = B;
B = Sum;
Sum += Size;
}
HashRecords.resize(Records.size());
uint32_t BucketCursors[IPHR_HASH];
memcpy(BucketCursors, BucketStarts, sizeof(BucketCursors));
for (int I = 0, E = Records.size(); I < E; ++I) {
uint32_t HashIdx = BucketCursors[Records[I].BucketIdx]++;
HashRecords[HashIdx].Off = I;
HashRecords[HashIdx].CRef = 1;
}
parallelFor(0, IPHR_HASH, [&](size_t I) {
auto B = HashRecords.begin() + BucketStarts[I];
auto E = HashRecords.begin() + BucketCursors[I];
if (B == E)
return;
auto BucketCmp = [Records](const PSHashRecord &LHash,
const PSHashRecord &RHash) {
const BulkPublic &L = Records[uint32_t(LHash.Off)];
const BulkPublic &R = Records[uint32_t(RHash.Off)];
assert(L.BucketIdx == R.BucketIdx);
int Cmp = gsiRecordCmp(L.getName(), R.getName());
if (Cmp != 0)
return Cmp < 0;
return L.SymOffset < R.SymOffset;
};
llvm::sort(B, E, BucketCmp);
for (PSHashRecord &HRec : make_range(B, E))
HRec.Off = Records[uint32_t(HRec.Off)].SymOffset + 1;
});
for (uint32_t I = 0; I < HashBitmap.size(); ++I) {
uint32_t Word = 0;
for (uint32_t J = 0; J < 32; ++J) {
uint32_t BucketIdx = I * 32 + J;
if (BucketIdx >= IPHR_HASH ||
BucketStarts[BucketIdx] == BucketCursors[BucketIdx])
continue;
Word |= (1U << J);
const int SizeOfHROffsetCalc = 12;
ulittle32_t ChainStartOff =
ulittle32_t(BucketStarts[BucketIdx] * SizeOfHROffsetCalc);
HashBuckets.push_back(ChainStartOff);
}
HashBitmap[I] = Word;
}
}
GSIStreamBuilder::GSIStreamBuilder(msf::MSFBuilder &Msf)
: Msf(Msf), PSH(std::make_unique<GSIHashStreamBuilder>()),
GSH(std::make_unique<GSIHashStreamBuilder>()) {}
GSIStreamBuilder::~GSIStreamBuilder() = default;
uint32_t GSIStreamBuilder::calculatePublicsHashStreamSize() const {
uint32_t Size = 0;
Size += sizeof(PublicsStreamHeader);
Size += PSH->calculateSerializedLength();
Size += Publics.size() * sizeof(uint32_t);
return Size;
}
uint32_t GSIStreamBuilder::calculateGlobalsHashStreamSize() const {
return GSH->calculateSerializedLength();
}
Error GSIStreamBuilder::finalizeMsfLayout() {
finalizePublicBuckets();
finalizeGlobalBuckets(PSH->RecordByteSize);
Expected<uint32_t> Idx = Msf.addStream(calculateGlobalsHashStreamSize());
if (!Idx)
return Idx.takeError();
GlobalsStreamIndex = *Idx;
Idx = Msf.addStream(calculatePublicsHashStreamSize());
if (!Idx)
return Idx.takeError();
PublicsStreamIndex = *Idx;
uint32_t RecordBytes = PSH->RecordByteSize + GSH->RecordByteSize;
Idx = Msf.addStream(RecordBytes);
if (!Idx)
return Idx.takeError();
RecordStreamIndex = *Idx;
return Error::success();
}
void GSIStreamBuilder::addPublicSymbols(std::vector<BulkPublic> &&PublicsIn) {
assert(Publics.empty() && PSH->RecordByteSize == 0 &&
"publics can only be added once");
Publics = std::move(PublicsIn);
parallelSort(Publics, [](const BulkPublic &L, const BulkPublic &R) {
return L.getName() < R.getName();
});
uint32_t SymOffset = 0;
for (BulkPublic &Pub : Publics) {
Pub.SymOffset = SymOffset;
SymOffset += sizeOfPublic(Pub);
}
PSH->RecordByteSize = SymOffset;
}
void GSIStreamBuilder::addGlobalSymbol(const ProcRefSym &Sym) {
serializeAndAddGlobal(Sym);
}
void GSIStreamBuilder::addGlobalSymbol(const DataSym &Sym) {
serializeAndAddGlobal(Sym);
}
void GSIStreamBuilder::addGlobalSymbol(const ConstantSym &Sym) {
serializeAndAddGlobal(Sym);
}
template <typename T>
void GSIStreamBuilder::serializeAndAddGlobal(const T &Symbol) {
T Copy(Symbol);
addGlobalSymbol(SymbolSerializer::writeOneSymbol(Copy, Msf.getAllocator(),
CodeViewContainer::Pdb));
}
void GSIStreamBuilder::addGlobalSymbol(const codeview::CVSymbol &Symbol) {
if (Symbol.kind() == S_UDT || Symbol.kind() == S_CONSTANT) {
auto Iter = GlobalsSeen.insert(Symbol);
if (!Iter.second)
return;
}
GSH->RecordByteSize += Symbol.length();
Globals.push_back(Symbol);
}
static Error writePublics(BinaryStreamWriter &Writer,
ArrayRef<BulkPublic> Publics) {
std::vector<uint8_t> Storage;
for (const BulkPublic &Pub : Publics) {
Storage.resize(sizeOfPublic(Pub));
serializePublic(Storage.data(), Pub);
if (Error E = Writer.writeBytes(Storage))
return E;
}
return Error::success();
}
static Error writeRecords(BinaryStreamWriter &Writer,
ArrayRef<CVSymbol> Records) {
BinaryItemStream<CVSymbol> ItemStream(support::endianness::little);
ItemStream.setItems(Records);
BinaryStreamRef RecordsRef(ItemStream);
return Writer.writeStreamRef(RecordsRef);
}
Error GSIStreamBuilder::commitSymbolRecordStream(
WritableBinaryStreamRef Stream) {
BinaryStreamWriter Writer(Stream);
if (auto EC = writePublics(Writer, Publics))
return EC;
if (auto EC = writeRecords(Writer, Globals))
return EC;
return Error::success();
}
static std::vector<support::ulittle32_t>
computeAddrMap(ArrayRef<BulkPublic> Publics) {
std::vector<ulittle32_t> PubAddrMap;
PubAddrMap.reserve(Publics.size());
for (int I = 0, E = Publics.size(); I < E; ++I)
PubAddrMap.push_back(ulittle32_t(I));
auto AddrCmp = [Publics](const ulittle32_t &LIdx, const ulittle32_t &RIdx) {
const BulkPublic &L = Publics[LIdx];
const BulkPublic &R = Publics[RIdx];
if (L.Segment != R.Segment)
return L.Segment < R.Segment;
if (L.Offset != R.Offset)
return L.Offset < R.Offset;
return L.getName() < R.getName();
};
parallelSort(PubAddrMap, AddrCmp);
for (ulittle32_t &Entry : PubAddrMap)
Entry = Publics[Entry].SymOffset;
return PubAddrMap;
}
Error GSIStreamBuilder::commitPublicsHashStream(
WritableBinaryStreamRef Stream) {
BinaryStreamWriter Writer(Stream);
PublicsStreamHeader Header;
Header.SymHash = PSH->calculateSerializedLength();
Header.AddrMap = Publics.size() * 4;
Header.NumThunks = 0;
Header.SizeOfThunk = 0;
Header.ISectThunkTable = 0;
memset(Header.Padding, 0, sizeof(Header.Padding));
Header.OffThunkTable = 0;
Header.NumSections = 0;
if (auto EC = Writer.writeObject(Header))
return EC;
if (auto EC = PSH->commit(Writer))
return EC;
std::vector<support::ulittle32_t> PubAddrMap = computeAddrMap(Publics);
assert(PubAddrMap.size() == Publics.size());
if (auto EC = Writer.writeArray(makeArrayRef(PubAddrMap)))
return EC;
return Error::success();
}
Error GSIStreamBuilder::commitGlobalsHashStream(
WritableBinaryStreamRef Stream) {
BinaryStreamWriter Writer(Stream);
return GSH->commit(Writer);
}
Error GSIStreamBuilder::commit(const msf::MSFLayout &Layout,
WritableBinaryStreamRef Buffer) {
auto GS = WritableMappedBlockStream::createIndexedStream(
Layout, Buffer, getGlobalsStreamIndex(), Msf.getAllocator());
auto PS = WritableMappedBlockStream::createIndexedStream(
Layout, Buffer, getPublicsStreamIndex(), Msf.getAllocator());
auto PRS = WritableMappedBlockStream::createIndexedStream(
Layout, Buffer, getRecordStreamIndex(), Msf.getAllocator());
if (auto EC = commitSymbolRecordStream(*PRS))
return EC;
if (auto EC = commitGlobalsHashStream(*GS))
return EC;
if (auto EC = commitPublicsHashStream(*PS))
return EC;
return Error::success();
}