#include <algorithm>
#include <cstdint>
#include <memory>
#include <type_traits>
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/DWARF/DWARFContext.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
#include "llvm/Object/Binary.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/MemProf.h"
#include "llvm/ProfileData/MemProfData.inc"
#include "llvm/ProfileData/RawMemProfReader.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Path.h"
#define DEBUG_TYPE "memprof"
namespace llvm {
namespace memprof {
namespace {
template <class T = uint64_t> inline T alignedRead(const char *Ptr) {
static_assert(std::is_pod<T>::value, "Not a pod type.");
assert(reinterpret_cast<size_t>(Ptr) % sizeof(T) == 0 && "Unaligned Read");
return *reinterpret_cast<const T *>(Ptr);
}
Error checkBuffer(const MemoryBuffer &Buffer) {
if (!RawMemProfReader::hasFormat(Buffer))
return make_error<InstrProfError>(instrprof_error::bad_magic);
if (Buffer.getBufferSize() == 0)
return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
if (Buffer.getBufferSize() < sizeof(Header)) {
return make_error<InstrProfError>(instrprof_error::truncated);
}
uint64_t TotalSize = 0;
const char *Next = Buffer.getBufferStart();
while (Next < Buffer.getBufferEnd()) {
auto *H = reinterpret_cast<const Header *>(Next);
if (H->Version != MEMPROF_RAW_VERSION) {
return make_error<InstrProfError>(instrprof_error::unsupported_version);
}
TotalSize += H->TotalSize;
Next += H->TotalSize;
}
if (Buffer.getBufferSize() != TotalSize) {
return make_error<InstrProfError>(instrprof_error::malformed);
}
return Error::success();
}
llvm::SmallVector<SegmentEntry> readSegmentEntries(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
endian::readNext<uint64_t, little, unaligned>(Ptr);
llvm::SmallVector<SegmentEntry> Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
Items.push_back(*reinterpret_cast<const SegmentEntry *>(
Ptr + I * sizeof(SegmentEntry)));
}
return Items;
}
llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>>
readMemInfoBlocks(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
endian::readNext<uint64_t, little, unaligned>(Ptr);
llvm::SmallVector<std::pair<uint64_t, MemInfoBlock>> Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
const uint64_t Id = endian::readNext<uint64_t, little, unaligned>(Ptr);
const MemInfoBlock MIB = *reinterpret_cast<const MemInfoBlock *>(Ptr);
Items.push_back({Id, MIB});
Ptr += sizeof(MemInfoBlock);
}
return Items;
}
CallStackMap readStackInfo(const char *Ptr) {
using namespace support;
const uint64_t NumItemsToRead =
endian::readNext<uint64_t, little, unaligned>(Ptr);
CallStackMap Items;
for (uint64_t I = 0; I < NumItemsToRead; I++) {
const uint64_t StackId = endian::readNext<uint64_t, little, unaligned>(Ptr);
const uint64_t NumPCs = endian::readNext<uint64_t, little, unaligned>(Ptr);
SmallVector<uint64_t> CallStack;
for (uint64_t J = 0; J < NumPCs; J++) {
CallStack.push_back(endian::readNext<uint64_t, little, unaligned>(Ptr));
}
Items[StackId] = CallStack;
}
return Items;
}
bool mergeStackMap(const CallStackMap &From, CallStackMap &To) {
for (const auto &IdStack : From) {
auto I = To.find(IdStack.first);
if (I == To.end()) {
To[IdStack.first] = IdStack.second;
} else {
if (IdStack.second != I->second)
return true;
}
}
return false;
}
Error report(Error E, const StringRef Context) {
return joinErrors(createStringError(inconvertibleErrorCode(), Context),
std::move(E));
}
bool isRuntimePath(const StringRef Path) {
return StringRef(llvm::sys::path::convert_to_slash(Path))
.contains("memprof/memprof_");
}
std::string getBuildIdString(const SegmentEntry &Entry) {
constexpr size_t Size = sizeof(Entry.BuildId) / sizeof(uint8_t);
constexpr uint8_t Zeros[Size] = {0};
if (memcmp(Entry.BuildId, Zeros, Size) == 0)
return "<None>";
std::string Str;
raw_string_ostream OS(Str);
for (size_t I = 0; I < Size; I++) {
OS << format_hex_no_prefix(Entry.BuildId[I], 2);
}
return OS.str();
}
}
Expected<std::unique_ptr<RawMemProfReader>>
RawMemProfReader::create(const Twine &Path, const StringRef ProfiledBinary,
bool KeepName) {
auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
if (std::error_code EC = BufferOr.getError())
return report(errorCodeToError(EC), Path.getSingleStringRef());
std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
if (Error E = checkBuffer(*Buffer))
return report(std::move(E), Path.getSingleStringRef());
if (ProfiledBinary.empty())
return report(
errorCodeToError(make_error_code(std::errc::invalid_argument)),
"Path to profiled binary is empty!");
auto BinaryOr = llvm::object::createBinary(ProfiledBinary);
if (!BinaryOr) {
return report(BinaryOr.takeError(), ProfiledBinary);
}
std::unique_ptr<RawMemProfReader> Reader(
new RawMemProfReader(std::move(BinaryOr.get()), KeepName));
if (Error E = Reader->initialize(std::move(Buffer))) {
return std::move(E);
}
return std::move(Reader);
}
bool RawMemProfReader::hasFormat(const StringRef Path) {
auto BufferOr = MemoryBuffer::getFileOrSTDIN(Path);
if (!BufferOr)
return false;
std::unique_ptr<MemoryBuffer> Buffer(BufferOr.get().release());
return hasFormat(*Buffer);
}
bool RawMemProfReader::hasFormat(const MemoryBuffer &Buffer) {
if (Buffer.getBufferSize() < sizeof(uint64_t))
return false;
const uint64_t Magic = alignedRead(Buffer.getBufferStart());
return Magic == MEMPROF_RAW_MAGIC_64;
}
void RawMemProfReader::printYAML(raw_ostream &OS) {
uint64_t NumAllocFunctions = 0, NumMibInfo = 0;
for (const auto &KV : FunctionProfileData) {
const size_t NumAllocSites = KV.second.AllocSites.size();
if (NumAllocSites > 0) {
NumAllocFunctions++;
NumMibInfo += NumAllocSites;
}
}
OS << "MemprofProfile:\n";
OS << " Summary:\n";
OS << " Version: " << MEMPROF_RAW_VERSION << "\n";
OS << " NumSegments: " << SegmentInfo.size() << "\n";
OS << " NumMibInfo: " << NumMibInfo << "\n";
OS << " NumAllocFunctions: " << NumAllocFunctions << "\n";
OS << " NumStackOffsets: " << StackMap.size() << "\n";
OS << " Segments:\n";
for (const auto &Entry : SegmentInfo) {
OS << " -\n";
OS << " BuildId: " << getBuildIdString(Entry) << "\n";
OS << " Start: 0x" << llvm::utohexstr(Entry.Start) << "\n";
OS << " End: 0x" << llvm::utohexstr(Entry.End) << "\n";
OS << " Offset: 0x" << llvm::utohexstr(Entry.Offset) << "\n";
}
OS << " Records:\n";
for (const auto &Entry : *this) {
OS << " -\n";
OS << " FunctionGUID: " << Entry.first << "\n";
Entry.second.print(OS);
}
}
Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
const StringRef FileName = Binary.getBinary()->getFileName();
auto *ElfObject = dyn_cast<object::ELFObjectFileBase>(Binary.getBinary());
if (!ElfObject) {
return report(make_error<StringError>(Twine("Not an ELF file: "),
inconvertibleErrorCode()),
FileName);
}
auto* Elf64LEObject = llvm::cast<llvm::object::ELF64LEObjectFile>(ElfObject);
const llvm::object::ELF64LEFile& ElfFile = Elf64LEObject->getELFFile();
auto PHdrsOr = ElfFile.program_headers();
if(!PHdrsOr)
return report(make_error<StringError>(Twine("Could not read program headers: "),
inconvertibleErrorCode()),
FileName);
auto FirstLoadHeader = PHdrsOr->begin();
while (FirstLoadHeader->p_type != llvm::ELF::PT_LOAD)
++FirstLoadHeader;
if(FirstLoadHeader->p_vaddr == 0)
return report(make_error<StringError>(Twine("Unsupported position independent code"),
inconvertibleErrorCode()),
FileName);
auto Triple = ElfObject->makeTriple();
if (!Triple.isX86())
return report(make_error<StringError>(Twine("Unsupported target: ") +
Triple.getArchName(),
inconvertibleErrorCode()),
FileName);
auto *Object = cast<object::ObjectFile>(Binary.getBinary());
std::unique_ptr<DIContext> Context = DWARFContext::create(
*Object, DWARFContext::ProcessDebugRelocations::Process);
auto SOFOr = symbolize::SymbolizableObjectFile::create(
Object, std::move(Context), false);
if (!SOFOr)
return report(SOFOr.takeError(), FileName);
Symbolizer = std::move(SOFOr.get());
if (Error E = readRawProfile(std::move(DataBuffer)))
return E;
if (Error E = symbolizeAndFilterStackFrames())
return E;
return mapRawProfileToRecords();
}
Error RawMemProfReader::mapRawProfileToRecords() {
using LocationPtr = const llvm::SmallVector<FrameId> *;
llvm::DenseMap<GlobalValue::GUID, llvm::SetVector<LocationPtr>>
PerFunctionCallSites;
for (const auto &Entry : CallstackProfileData) {
const uint64_t StackId = Entry.first;
auto It = StackMap.find(StackId);
if (It == StackMap.end())
return make_error<InstrProfError>(
instrprof_error::malformed,
"memprof callstack record does not contain id: " + Twine(StackId));
llvm::SmallVector<FrameId> Callstack;
Callstack.reserve(It->getSecond().size());
llvm::ArrayRef<uint64_t> Addresses = It->getSecond();
for (size_t I = 0; I < Addresses.size(); I++) {
const uint64_t Address = Addresses[I];
assert(SymbolizedFrame.count(Address) > 0 &&
"Address not found in SymbolizedFrame map");
const SmallVector<FrameId> &Frames = SymbolizedFrame[Address];
assert(!idToFrame(Frames.back()).IsInlineFrame &&
"The last frame should not be inlined");
for (size_t J = 0; J < Frames.size(); J++) {
if (I == 0 && J == 0)
continue;
const GlobalValue::GUID Guid = idToFrame(Frames[J]).Function;
PerFunctionCallSites[Guid].insert(&Frames);
}
Callstack.append(Frames.begin(), Frames.end());
}
for (size_t I = 0; ; I++) {
const Frame &F = idToFrame(Callstack[I]);
auto Result =
FunctionProfileData.insert({F.Function, IndexedMemProfRecord()});
IndexedMemProfRecord &Record = Result.first->second;
Record.AllocSites.emplace_back(Callstack, Entry.second);
if (!F.IsInlineFrame)
break;
}
}
for (auto I = PerFunctionCallSites.begin(), E = PerFunctionCallSites.end();
I != E; I++) {
const GlobalValue::GUID Id = I->first;
auto Result = FunctionProfileData.insert({Id, IndexedMemProfRecord()});
IndexedMemProfRecord &Record = Result.first->second;
for (LocationPtr Loc : I->getSecond()) {
Record.CallSites.push_back(*Loc);
}
}
return Error::success();
}
Error RawMemProfReader::symbolizeAndFilterStackFrames() {
const DILineInfoSpecifier Specifier(
DILineInfoSpecifier::FileLineInfoKind::RawValue,
DILineInfoSpecifier::FunctionNameKind::LinkageName);
llvm::SmallVector<uint64_t> EntriesToErase;
llvm::DenseSet<uint64_t> AllVAddrsToDiscard;
for (auto &Entry : StackMap) {
for (const uint64_t VAddr : Entry.getSecond()) {
if (SymbolizedFrame.count(VAddr) > 0 ||
AllVAddrsToDiscard.contains(VAddr))
continue;
Expected<DIInliningInfo> DIOr = Symbolizer->symbolizeInlinedCode(
getModuleOffset(VAddr), Specifier, false);
if (!DIOr)
return DIOr.takeError();
DIInliningInfo DI = DIOr.get();
if (DI.getFrame(0).FunctionName == DILineInfo::BadString ||
isRuntimePath(DI.getFrame(0).FileName)) {
AllVAddrsToDiscard.insert(VAddr);
continue;
}
for (size_t I = 0, NumFrames = DI.getNumberOfFrames(); I < NumFrames;
I++) {
const auto &DIFrame = DI.getFrame(I);
const uint64_t Guid =
IndexedMemProfRecord::getGUID(DIFrame.FunctionName);
const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
I != NumFrames - 1);
if (KeepSymbolName)
GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
const FrameId Hash = F.hash();
IdToFrame.insert({Hash, F});
SymbolizedFrame[VAddr].push_back(Hash);
}
}
auto &CallStack = Entry.getSecond();
llvm::erase_if(CallStack, [&AllVAddrsToDiscard](const uint64_t A) {
return AllVAddrsToDiscard.contains(A);
});
if (CallStack.empty())
EntriesToErase.push_back(Entry.getFirst());
}
for (const uint64_t Id : EntriesToErase) {
StackMap.erase(Id);
CallstackProfileData.erase(Id);
}
if (StackMap.empty())
return make_error<InstrProfError>(
instrprof_error::malformed,
"no entries in callstack map after symbolization");
return Error::success();
}
Error RawMemProfReader::readRawProfile(
std::unique_ptr<MemoryBuffer> DataBuffer) {
const char *Next = DataBuffer->getBufferStart();
while (Next < DataBuffer->getBufferEnd()) {
auto *Header = reinterpret_cast<const memprof::Header *>(Next);
const llvm::SmallVector<SegmentEntry> Entries =
readSegmentEntries(Next + Header->SegmentOffset);
if (!SegmentInfo.empty() && SegmentInfo != Entries) {
return make_error<InstrProfError>(
instrprof_error::malformed,
"memprof raw profile has different segment information");
}
SegmentInfo.assign(Entries.begin(), Entries.end());
for (const auto &Value : readMemInfoBlocks(Next + Header->MIBOffset)) {
if (CallstackProfileData.count(Value.first)) {
CallstackProfileData[Value.first].Merge(Value.second);
} else {
CallstackProfileData[Value.first] = Value.second;
}
}
const CallStackMap CSM = readStackInfo(Next + Header->StackOffset);
if (StackMap.empty()) {
StackMap = CSM;
} else {
if (mergeStackMap(CSM, StackMap))
return make_error<InstrProfError>(
instrprof_error::malformed,
"memprof raw profile got different call stack for same id");
}
Next += Header->TotalSize;
}
return Error::success();
}
object::SectionedAddress
RawMemProfReader::getModuleOffset(const uint64_t VirtualAddress) {
LLVM_DEBUG({
SegmentEntry *ContainingSegment = nullptr;
for (auto &SE : SegmentInfo) {
if (VirtualAddress > SE.Start && VirtualAddress <= SE.End) {
ContainingSegment = &SE;
}
}
assert(ContainingSegment && "Could not find a segment entry");
});
return object::SectionedAddress{VirtualAddress};
}
Error RawMemProfReader::readNextRecord(GuidMemProfRecordPair &GuidRecord) {
if (FunctionProfileData.empty())
return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
if (Iter == FunctionProfileData.end())
return make_error<InstrProfError>(instrprof_error::eof);
auto IdToFrameCallback = [this](const FrameId Id) {
Frame F = this->idToFrame(Id);
if (!this->KeepSymbolName)
return F;
auto Iter = this->GuidToSymbolName.find(F.Function);
assert(Iter != this->GuidToSymbolName.end());
F.SymbolName = Iter->getSecond();
return F;
};
const IndexedMemProfRecord &IndexedRecord = Iter->second;
GuidRecord = {Iter->first, MemProfRecord(IndexedRecord, IdToFrameCallback)};
Iter++;
return Error::success();
}
} }