#include "ProfileGenerator.h"
#include "ErrorHandling.h"
#include "PerfReader.h"
#include "ProfiledBinary.h"
#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
#include "llvm/ProfileData/ProfileCommon.h"
#include <algorithm>
#include <float.h>
#include <unordered_set>
#include <utility>
cl::opt<std::string> OutputFilename("output", cl::value_desc("output"),
cl::Required,
cl::desc("Output profile file"));
static cl::alias OutputA("o", cl::desc("Alias for --output"),
cl::aliasopt(OutputFilename));
static cl::opt<SampleProfileFormat> OutputFormat(
"format", cl::desc("Format of output profile"), cl::init(SPF_Ext_Binary),
cl::values(
clEnumValN(SPF_Binary, "binary", "Binary encoding (default)"),
clEnumValN(SPF_Compact_Binary, "compbinary", "Compact binary encoding"),
clEnumValN(SPF_Ext_Binary, "extbinary", "Extensible binary encoding"),
clEnumValN(SPF_Text, "text", "Text encoding"),
clEnumValN(SPF_GCC, "gcc",
"GCC encoding (only meaningful for -sample)")));
cl::opt<bool> UseMD5(
"use-md5", cl::init(false), cl::Hidden,
cl::desc("Use md5 to represent function names in the output profile (only "
"meaningful for -extbinary)"));
static cl::opt<bool> PopulateProfileSymbolList(
"populate-profile-symbol-list", cl::init(false), cl::Hidden,
cl::desc("Populate profile symbol list (only meaningful for -extbinary)"));
static cl::opt<bool> FillZeroForAllFuncs(
"fill-zero-for-all-funcs", cl::init(false), cl::Hidden,
cl::desc("Attribute all functions' range with zero count "
"even it's not hit by any samples."));
static cl::opt<int32_t, true> RecursionCompression(
"compress-recursion",
cl::desc("Compressing recursion by deduplicating adjacent frame "
"sequences up to the specified size. -1 means no size limit."),
cl::Hidden,
cl::location(llvm::sampleprof::CSProfileGenerator::MaxCompressionSize));
static cl::opt<bool>
TrimColdProfile("trim-cold-profile",
cl::desc("If the total count of the profile is smaller "
"than threshold, it will be trimmed."));
static cl::opt<bool> CSProfMergeColdContext(
"csprof-merge-cold-context", cl::init(true),
cl::desc("If the total count of context profile is smaller than "
"the threshold, it will be merged into context-less base "
"profile."));
static cl::opt<uint32_t> CSProfMaxColdContextDepth(
"csprof-max-cold-context-depth", cl::init(1),
cl::desc("Keep the last K contexts while merging cold profile. 1 means the "
"context-less base profile"));
static cl::opt<int, true> CSProfMaxContextDepth(
"csprof-max-context-depth",
cl::desc("Keep the last K contexts while merging profile. -1 means no "
"depth limit."),
cl::location(llvm::sampleprof::CSProfileGenerator::MaxContextDepth));
static cl::opt<double> HotFunctionDensityThreshold(
"hot-function-density-threshold", llvm::cl::init(1000),
llvm::cl::desc(
"specify density threshold for hot functions (default: 1000)"),
llvm::cl::Optional);
static cl::opt<bool> ShowDensity("show-density", llvm::cl::init(false),
llvm::cl::desc("show profile density details"),
llvm::cl::Optional);
static cl::opt<bool> UpdateTotalSamples(
"update-total-samples", llvm::cl::init(false),
llvm::cl::desc(
"Update total samples by accumulating all its body samples."),
llvm::cl::Optional);
extern cl::opt<int> ProfileSummaryCutoffHot;
extern cl::opt<bool> UseContextLessSummary;
static cl::opt<bool> GenCSNestedProfile(
"gen-cs-nested-profile", cl::Hidden, cl::init(true),
cl::desc("Generate nested function profiles for CSSPGO"));
using namespace llvm;
using namespace sampleprof;
namespace llvm {
namespace sampleprof {
int32_t CSProfileGenerator::MaxCompressionSize = -1;
int CSProfileGenerator::MaxContextDepth = -1;
bool ProfileGeneratorBase::UseFSDiscriminator = false;
std::unique_ptr<ProfileGeneratorBase>
ProfileGeneratorBase::create(ProfiledBinary *Binary,
const ContextSampleCounterMap *SampleCounters,
bool ProfileIsCS) {
std::unique_ptr<ProfileGeneratorBase> Generator;
if (ProfileIsCS) {
if (Binary->useFSDiscriminator())
exitWithError("FS discriminator is not supported in CS profile.");
Generator.reset(new CSProfileGenerator(Binary, SampleCounters));
} else {
Generator.reset(new ProfileGenerator(Binary, SampleCounters));
}
ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
return Generator;
}
std::unique_ptr<ProfileGeneratorBase>
ProfileGeneratorBase::create(ProfiledBinary *Binary, SampleProfileMap &Profiles,
bool ProfileIsCS) {
std::unique_ptr<ProfileGeneratorBase> Generator;
if (ProfileIsCS) {
if (Binary->useFSDiscriminator())
exitWithError("FS discriminator is not supported in CS profile.");
Generator.reset(new CSProfileGenerator(Binary, Profiles));
} else {
Generator.reset(new ProfileGenerator(Binary, std::move(Profiles)));
}
ProfileGeneratorBase::UseFSDiscriminator = Binary->useFSDiscriminator();
FunctionSamples::ProfileIsFS = Binary->useFSDiscriminator();
return Generator;
}
void ProfileGeneratorBase::write(std::unique_ptr<SampleProfileWriter> Writer,
SampleProfileMap &ProfileMap) {
ProfileSymbolList SymbolList;
if (PopulateProfileSymbolList && OutputFormat == SPF_Ext_Binary) {
Binary->populateSymbolListFromDWARF(SymbolList);
Writer->setProfileSymbolList(&SymbolList);
}
if (std::error_code EC = Writer->write(ProfileMap))
exitWithError(std::move(EC));
}
void ProfileGeneratorBase::write() {
auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
if (std::error_code EC = WriterOrErr.getError())
exitWithError(EC, OutputFilename);
if (UseMD5) {
if (OutputFormat != SPF_Ext_Binary)
WithColor::warning() << "-use-md5 is ignored. Specify "
"--format=extbinary to enable it\n";
else
WriterOrErr.get()->setUseMD5();
}
write(std::move(WriterOrErr.get()), ProfileMap);
}
void ProfileGeneratorBase::showDensitySuggestion(double Density) {
if (Density == 0.0)
WithColor::warning() << "The --profile-summary-cutoff-hot option may be "
"set too low. Please check your command.\n";
else if (Density < HotFunctionDensityThreshold)
WithColor::warning()
<< "AutoFDO is estimated to optimize better with "
<< format("%.1f", HotFunctionDensityThreshold / Density)
<< "x more samples. Please consider increasing sampling rate or "
"profiling for longer duration to get more samples.\n";
if (ShowDensity)
outs() << "Minimum profile density for hot functions with top "
<< format("%.2f",
static_cast<double>(ProfileSummaryCutoffHot.getValue()) /
10000)
<< "% total samples: " << format("%.1f", Density) << "\n";
}
double ProfileGeneratorBase::calculateDensity(const SampleProfileMap &Profiles,
uint64_t HotCntThreshold) {
double Density = DBL_MAX;
std::vector<const FunctionSamples *> HotFuncs;
for (auto &I : Profiles) {
auto &FuncSamples = I.second;
if (FuncSamples.getTotalSamples() < HotCntThreshold)
continue;
HotFuncs.emplace_back(&FuncSamples);
}
for (auto *FuncSamples : HotFuncs) {
auto *Func = Binary->getBinaryFunction(FuncSamples->getName());
if (!Func)
continue;
uint64_t FuncSize = Func->getFuncSize();
if (FuncSize == 0)
continue;
Density =
std::min(Density, static_cast<double>(FuncSamples->getTotalSamples()) /
FuncSize);
}
return Density == DBL_MAX ? 0.0 : Density;
}
void ProfileGeneratorBase::findDisjointRanges(RangeSample &DisjointRanges,
const RangeSample &Ranges) {
struct BoundaryPoint {
uint64_t BeginCount = UINT64_MAX;
uint64_t EndCount = UINT64_MAX;
bool IsZeroRangeBegin = false;
bool IsZeroRangeEnd = false;
void addBeginCount(uint64_t Count) {
if (BeginCount == UINT64_MAX)
BeginCount = 0;
BeginCount += Count;
}
void addEndCount(uint64_t Count) {
if (EndCount == UINT64_MAX)
EndCount = 0;
EndCount += Count;
}
};
std::map<uint64_t, BoundaryPoint> Boundaries;
for (const auto &Item : Ranges) {
assert(Item.first.first <= Item.first.second &&
"Invalid instruction range");
auto &BeginPoint = Boundaries[Item.first.first];
auto &EndPoint = Boundaries[Item.first.second];
uint64_t Count = Item.second;
BeginPoint.addBeginCount(Count);
EndPoint.addEndCount(Count);
if (Count == 0) {
BeginPoint.IsZeroRangeBegin = true;
EndPoint.IsZeroRangeEnd = true;
}
}
uint64_t BeginAddress = UINT64_MAX;
int ZeroRangeDepth = 0;
uint64_t Count = 0;
for (const auto &Item : Boundaries) {
uint64_t Address = Item.first;
const BoundaryPoint &Point = Item.second;
if (Point.BeginCount != UINT64_MAX) {
if (BeginAddress != UINT64_MAX)
DisjointRanges[{BeginAddress, Address - 1}] = Count;
Count += Point.BeginCount;
BeginAddress = Address;
ZeroRangeDepth += Point.IsZeroRangeBegin;
}
if (Point.EndCount != UINT64_MAX) {
assert((BeginAddress != UINT64_MAX) &&
"First boundary point cannot be 'end' point");
DisjointRanges[{BeginAddress, Address}] = Count;
assert(Count >= Point.EndCount && "Mismatched live ranges");
Count -= Point.EndCount;
BeginAddress = Address + 1;
ZeroRangeDepth -= Point.IsZeroRangeEnd;
if (Count == 0 && ZeroRangeDepth == 0)
BeginAddress = UINT64_MAX;
}
}
}
void ProfileGeneratorBase::updateBodySamplesforFunctionProfile(
FunctionSamples &FunctionProfile, const SampleContextFrame &LeafLoc,
uint64_t Count) {
uint32_t Discriminator = getBaseDiscriminator(LeafLoc.Location.Discriminator);
Count *= getDuplicationFactor(LeafLoc.Location.Discriminator);
ErrorOr<uint64_t> R =
FunctionProfile.findSamplesAt(LeafLoc.Location.LineOffset, Discriminator);
uint64_t PreviousCount = R ? R.get() : 0;
if (PreviousCount <= Count) {
FunctionProfile.addBodySamples(LeafLoc.Location.LineOffset, Discriminator,
Count - PreviousCount);
}
}
void ProfileGeneratorBase::updateTotalSamples() {
for (auto &Item : ProfileMap) {
FunctionSamples &FunctionProfile = Item.second;
FunctionProfile.updateTotalSamples();
}
}
void ProfileGeneratorBase::updateCallsiteSamples() {
for (auto &Item : ProfileMap) {
FunctionSamples &FunctionProfile = Item.second;
FunctionProfile.updateCallsiteSamples();
}
}
void ProfileGeneratorBase::updateFunctionSamples() {
updateCallsiteSamples();
if (UpdateTotalSamples)
updateTotalSamples();
}
void ProfileGeneratorBase::collectProfiledFunctions() {
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
if (collectFunctionsFromRawProfile(ProfiledFunctions))
Binary->setProfiledFunctions(ProfiledFunctions);
else if (collectFunctionsFromLLVMProfile(ProfiledFunctions))
Binary->setProfiledFunctions(ProfiledFunctions);
else
llvm_unreachable("Unsupported input profile");
}
bool ProfileGeneratorBase::collectFunctionsFromRawProfile(
std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
if (!SampleCounters)
return false;
for (const auto &CI : *SampleCounters) {
if (const auto *CtxKey = dyn_cast<AddrBasedCtxKey>(CI.first.getPtr())) {
for (auto Addr : CtxKey->Context) {
if (FuncRange *FRange = Binary->findFuncRangeForOffset(
Binary->virtualAddrToOffset(Addr)))
ProfiledFunctions.insert(FRange->Func);
}
}
for (auto Item : CI.second.RangeCounter) {
uint64_t StartOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
ProfiledFunctions.insert(FRange->Func);
}
for (auto Item : CI.second.BranchCounter) {
uint64_t SourceOffset = Item.first.first;
uint64_t TargetOffset = Item.first.first;
if (FuncRange *FRange = Binary->findFuncRangeForOffset(SourceOffset))
ProfiledFunctions.insert(FRange->Func);
if (FuncRange *FRange = Binary->findFuncRangeForOffset(TargetOffset))
ProfiledFunctions.insert(FRange->Func);
}
}
return true;
}
bool ProfileGenerator::collectFunctionsFromLLVMProfile(
std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
for (const auto &FS : ProfileMap) {
if (auto *Func = Binary->getBinaryFunction(FS.first.getName()))
ProfiledFunctions.insert(Func);
}
return true;
}
bool CSProfileGenerator::collectFunctionsFromLLVMProfile(
std::unordered_set<const BinaryFunction *> &ProfiledFunctions) {
for (auto *Node : ContextTracker) {
if (!Node->getFuncName().empty())
if (auto *Func = Binary->getBinaryFunction(Node->getFuncName()))
ProfiledFunctions.insert(Func);
}
return true;
}
FunctionSamples &
ProfileGenerator::getTopLevelFunctionProfile(StringRef FuncName) {
SampleContext Context(FuncName);
auto Ret = ProfileMap.emplace(Context, FunctionSamples());
if (Ret.second) {
FunctionSamples &FProfile = Ret.first->second;
FProfile.setContext(Context);
}
return Ret.first->second;
}
void ProfileGenerator::generateProfile() {
collectProfiledFunctions();
if (Binary->usePseudoProbes())
Binary->decodePseudoProbe();
if (SampleCounters) {
if (Binary->usePseudoProbes()) {
generateProbeBasedProfile();
} else {
generateLineNumBasedProfile();
}
}
postProcessProfiles();
}
void ProfileGenerator::postProcessProfiles() {
computeSummaryAndThreshold(ProfileMap);
trimColdProfiles(ProfileMap, ColdCountThreshold);
calculateAndShowDensity(ProfileMap);
}
void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
uint64_t ColdCntThreshold) {
if (!TrimColdProfile)
return;
std::vector<SampleContext> ColdProfiles;
for (const auto &I : ProfileMap) {
if (I.second.getTotalSamples() < ColdCntThreshold)
ColdProfiles.emplace_back(I.first);
}
for (const auto &I : ColdProfiles)
ProfileMap.erase(I);
}
void ProfileGenerator::generateLineNumBasedProfile() {
assert(SampleCounters->size() == 1 &&
"Must have one entry for profile generation.");
const SampleCounter &SC = SampleCounters->begin()->second;
populateBodySamplesForAllFunctions(SC.RangeCounter);
populateBoundarySamplesForAllFunctions(SC.BranchCounter);
updateFunctionSamples();
}
void ProfileGenerator::generateProbeBasedProfile() {
assert(SampleCounters->size() == 1 &&
"Must have one entry for profile generation.");
FunctionSamples::ProfileIsProbeBased = true;
const SampleCounter &SC = SampleCounters->begin()->second;
populateBodySamplesWithProbesForAllFunctions(SC.RangeCounter);
populateBoundarySamplesWithProbesForAllFunctions(SC.BranchCounter);
updateFunctionSamples();
}
void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
const RangeSample &RangeCounter) {
ProbeCounterMap ProbeCounter;
extractProbesFromRange(preprocessRangeCounter(RangeCounter), ProbeCounter,
false);
for (const auto &PI : ProbeCounter) {
const MCDecodedPseudoProbe *Probe = PI.first;
uint64_t Count = PI.second;
SampleContextFrameVector FrameVec;
Binary->getInlineContextForProbe(Probe, FrameVec, true);
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, Count);
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
if (Probe->isEntry())
FunctionProfile.addHeadSamples(Count);
}
}
void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
const BranchSample &BranchCounters) {
for (const auto &Entry : BranchCounters) {
uint64_t SourceOffset = Entry.first.first;
uint64_t TargetOffset = Entry.first.second;
uint64_t Count = Entry.second;
assert(Count != 0 && "Unexpected zero weight branch");
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
const MCDecodedPseudoProbe *CallProbe =
Binary->getCallProbeForAddr(SourceAddress);
if (CallProbe == nullptr)
continue;
SampleContextFrameVector FrameVec;
Binary->getInlineContextForProbe(CallProbe, FrameVec, true);
if (!FrameVec.empty()) {
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, 0);
FunctionProfile.addCalledTargetSamples(
FrameVec.back().Location.LineOffset, 0, CalleeName, Count);
}
}
}
FunctionSamples &ProfileGenerator::getLeafProfileAndAddTotalSamples(
const SampleContextFrameVector &FrameVec, uint64_t Count) {
FunctionSamples *FunctionProfile =
&getTopLevelFunctionProfile(FrameVec[0].FuncName);
FunctionProfile->addTotalSamples(Count);
if (Binary->usePseudoProbes()) {
const auto *FuncDesc = Binary->getFuncDescForGUID(
Function::getGUID(FunctionProfile->getName()));
FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
}
for (size_t I = 1; I < FrameVec.size(); I++) {
LineLocation Callsite(
FrameVec[I - 1].Location.LineOffset,
getBaseDiscriminator(FrameVec[I - 1].Location.Discriminator));
FunctionSamplesMap &SamplesMap =
FunctionProfile->functionSamplesAt(Callsite);
auto Ret =
SamplesMap.emplace(FrameVec[I].FuncName.str(), FunctionSamples());
if (Ret.second) {
SampleContext Context(FrameVec[I].FuncName);
Ret.first->second.setContext(Context);
}
FunctionProfile = &Ret.first->second;
FunctionProfile->addTotalSamples(Count);
if (Binary->usePseudoProbes()) {
const auto *FuncDesc = Binary->getFuncDescForGUID(
Function::getGUID(FunctionProfile->getName()));
FunctionProfile->setFunctionHash(FuncDesc->FuncHash);
}
}
return *FunctionProfile;
}
RangeSample
ProfileGenerator::preprocessRangeCounter(const RangeSample &RangeCounter) {
RangeSample Ranges(RangeCounter.begin(), RangeCounter.end());
if (FillZeroForAllFuncs) {
for (auto &FuncI : Binary->getAllBinaryFunctions()) {
for (auto &R : FuncI.second.Ranges) {
Ranges[{R.first, R.second - 1}] += 0;
}
}
} else {
for (const auto &I : RangeCounter) {
uint64_t StartOffset = I.first.first;
for (const auto &Range : Binary->getRangesForOffset(StartOffset))
Ranges[{Range.first, Range.second - 1}] += 0;
}
}
RangeSample DisjointRanges;
findDisjointRanges(DisjointRanges, Ranges);
return DisjointRanges;
}
void ProfileGenerator::populateBodySamplesForAllFunctions(
const RangeSample &RangeCounter) {
for (const auto &Range : preprocessRangeCounter(RangeCounter)) {
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
uint64_t Count = Range.second;
InstructionPointer IP(Binary, RangeBegin, true);
if (IP.Address > RangeEnd)
continue;
do {
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
const SampleContextFrameVector &FrameVec =
Binary->getFrameLocationStack(Offset);
if (!FrameVec.empty()) {
FunctionSamples &FunctionProfile = getLeafProfileAndAddTotalSamples(
FrameVec, Count * Binary->getInstSize(Offset));
updateBodySamplesforFunctionProfile(FunctionProfile, FrameVec.back(),
Count);
}
} while (IP.advance() && IP.Address <= RangeEnd);
}
}
StringRef ProfileGeneratorBase::getCalleeNameForOffset(uint64_t TargetOffset) {
auto *FRange = Binary->findFuncRangeForStartOffset(TargetOffset);
if (!FRange || !FRange->IsFuncEntry)
return StringRef();
return FunctionSamples::getCanonicalFnName(FRange->getFuncName());
}
void ProfileGenerator::populateBoundarySamplesForAllFunctions(
const BranchSample &BranchCounters) {
for (const auto &Entry : BranchCounters) {
uint64_t SourceOffset = Entry.first.first;
uint64_t TargetOffset = Entry.first.second;
uint64_t Count = Entry.second;
assert(Count != 0 && "Unexpected zero weight branch");
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
const SampleContextFrameVector &FrameVec =
Binary->getFrameLocationStack(SourceOffset);
if (!FrameVec.empty()) {
FunctionSamples &FunctionProfile =
getLeafProfileAndAddTotalSamples(FrameVec, 0);
FunctionProfile.addCalledTargetSamples(
FrameVec.back().Location.LineOffset,
getBaseDiscriminator(FrameVec.back().Location.Discriminator),
CalleeName, Count);
}
FunctionSamples &CalleeProfile = getTopLevelFunctionProfile(CalleeName);
CalleeProfile.addHeadSamples(Count);
}
}
void ProfileGeneratorBase::calculateAndShowDensity(
const SampleProfileMap &Profiles) {
double Density = calculateDensity(Profiles, HotCountThreshold);
showDensitySuggestion(Density);
}
FunctionSamples *
CSProfileGenerator::getOrCreateFunctionSamples(ContextTrieNode *ContextNode,
bool WasLeafInlined) {
FunctionSamples *FProfile = ContextNode->getFunctionSamples();
if (!FProfile) {
FSamplesList.emplace_back();
FProfile = &FSamplesList.back();
FProfile->setName(ContextNode->getFuncName());
ContextNode->setFunctionSamples(FProfile);
}
if (WasLeafInlined)
FProfile->getContext().setAttribute(ContextWasInlined);
return FProfile;
}
ContextTrieNode *
CSProfileGenerator::getOrCreateContextNode(const SampleContextFrames Context,
bool WasLeafInlined) {
ContextTrieNode *ContextNode =
ContextTracker.getOrCreateContextPath(Context, true);
getOrCreateFunctionSamples(ContextNode, WasLeafInlined);
return ContextNode;
}
void CSProfileGenerator::generateProfile() {
FunctionSamples::ProfileIsCS = true;
collectProfiledFunctions();
if (Binary->usePseudoProbes())
Binary->decodePseudoProbe();
if (SampleCounters) {
if (Binary->usePseudoProbes()) {
generateProbeBasedProfile();
} else {
generateLineNumBasedProfile();
}
}
if (Binary->getTrackFuncContextSize())
computeSizeForProfiledFunctions();
postProcessProfiles();
}
void CSProfileGenerator::computeSizeForProfiledFunctions() {
std::unordered_set<const BinaryFunction *> ProfiledFunctions;
for (auto *Func : Binary->getProfiledFunctions())
Binary->computeInlinedContextSizeForFunc(Func);
Binary->flushSymbolizer();
}
void CSProfileGenerator::updateFunctionSamples() {
for (auto *Node : ContextTracker) {
FunctionSamples *FSamples = Node->getFunctionSamples();
if (FSamples) {
if (UpdateTotalSamples)
FSamples->updateTotalSamples();
FSamples->updateCallsiteSamples();
}
}
}
void CSProfileGenerator::generateLineNumBasedProfile() {
for (const auto &CI : *SampleCounters) {
const auto *CtxKey = cast<StringBasedCtxKey>(CI.first.getPtr());
ContextTrieNode *ContextNode = &getRootContext();
if (!CtxKey->Context.empty()) {
ContextNode =
getOrCreateContextNode(CtxKey->Context, CtxKey->WasLeafInlined);
populateBodySamplesForFunction(*ContextNode->getFunctionSamples(),
CI.second.RangeCounter);
}
populateBoundarySamplesForFunction(ContextNode, CI.second.BranchCounter);
}
populateInferredFunctionSamples(getRootContext());
updateFunctionSamples();
}
void CSProfileGenerator::populateBodySamplesForFunction(
FunctionSamples &FunctionProfile, const RangeSample &RangeCounter) {
RangeSample Ranges;
findDisjointRanges(Ranges, RangeCounter);
for (const auto &Range : Ranges) {
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
uint64_t Count = Range.second;
if (Count == 0)
continue;
InstructionPointer IP(Binary, RangeBegin, true);
if (IP.Address > RangeEnd)
continue;
do {
uint64_t Offset = Binary->virtualAddrToOffset(IP.Address);
auto LeafLoc = Binary->getInlineLeafFrameLoc(Offset);
if (LeafLoc) {
updateBodySamplesforFunctionProfile(FunctionProfile, *LeafLoc, Count);
FunctionProfile.addTotalSamples(Count);
}
} while (IP.advance() && IP.Address <= RangeEnd);
}
}
void CSProfileGenerator::populateBoundarySamplesForFunction(
ContextTrieNode *Node, const BranchSample &BranchCounters) {
for (const auto &Entry : BranchCounters) {
uint64_t SourceOffset = Entry.first.first;
uint64_t TargetOffset = Entry.first.second;
uint64_t Count = Entry.second;
assert(Count != 0 && "Unexpected zero weight branch");
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
ContextTrieNode *CallerNode = Node;
LineLocation CalleeCallSite(0, 0);
if (CallerNode != &getRootContext()) {
auto LeafLoc = Binary->getInlineLeafFrameLoc(SourceOffset);
if (LeafLoc) {
CallerNode->getFunctionSamples()->addCalledTargetSamples(
LeafLoc->Location.LineOffset,
getBaseDiscriminator(LeafLoc->Location.Discriminator), CalleeName,
Count);
CalleeCallSite = LeafLoc->Location;
}
}
ContextTrieNode *CalleeNode =
CallerNode->getOrCreateChildContext(CalleeCallSite, CalleeName);
FunctionSamples *CalleeProfile = getOrCreateFunctionSamples(CalleeNode);
CalleeProfile->addHeadSamples(Count);
}
}
void CSProfileGenerator::populateInferredFunctionSamples(
ContextTrieNode &Node) {
for (auto &It : Node.getAllChildContext())
populateInferredFunctionSamples(It.second);
FunctionSamples *CalleeProfile = Node.getFunctionSamples();
if (!CalleeProfile)
return;
if (CalleeProfile->getHeadSamples())
return;
ContextTrieNode *CallerNode = Node.getParentContext();
if (CallerNode == &getRootContext())
return;
LineLocation CallerLeafFrameLoc = Node.getCallSiteLoc();
FunctionSamples &CallerProfile = *getOrCreateFunctionSamples(CallerNode);
uint64_t EstimatedCallCount = CalleeProfile->getHeadSamplesEstimate();
if (!EstimatedCallCount && !CalleeProfile->getBodySamples().size())
EstimatedCallCount = 1;
CallerProfile.addCalledTargetSamples(CallerLeafFrameLoc.LineOffset,
CallerLeafFrameLoc.Discriminator,
Node.getFuncName(), EstimatedCallCount);
CallerProfile.addBodySamples(CallerLeafFrameLoc.LineOffset,
CallerLeafFrameLoc.Discriminator,
EstimatedCallCount);
CallerProfile.addTotalSamples(EstimatedCallCount);
}
void CSProfileGenerator::convertToProfileMap(
ContextTrieNode &Node, SampleContextFrameVector &Context) {
FunctionSamples *FProfile = Node.getFunctionSamples();
if (FProfile) {
Context.emplace_back(Node.getFuncName(), LineLocation(0, 0));
SampleContextFrames NewContext = *Contexts.insert(Context).first;
auto Ret = ProfileMap.emplace(NewContext, std::move(*FProfile));
FunctionSamples &NewProfile = Ret.first->second;
NewProfile.getContext().setContext(NewContext);
Context.pop_back();
}
for (auto &It : Node.getAllChildContext()) {
ContextTrieNode &ChildNode = It.second;
Context.emplace_back(Node.getFuncName(), ChildNode.getCallSiteLoc());
convertToProfileMap(ChildNode, Context);
Context.pop_back();
}
}
void CSProfileGenerator::convertToProfileMap() {
assert(ProfileMap.empty() &&
"ProfileMap should be empty before converting from the trie");
assert(IsProfileValidOnTrie &&
"Do not convert the trie twice, it's already destroyed");
SampleContextFrameVector Context;
for (auto &It : getRootContext().getAllChildContext())
convertToProfileMap(It.second, Context);
IsProfileValidOnTrie = false;
}
void CSProfileGenerator::postProcessProfiles() {
computeSummaryAndThreshold();
if (EnableCSPreInliner) {
ContextTracker.populateFuncToCtxtMap();
CSPreInliner(ContextTracker, *Binary, Summary.get()).run();
if (!CSProfMergeColdContext.getNumOccurrences())
CSProfMergeColdContext = false;
}
convertToProfileMap();
if (TrimColdProfile || CSProfMergeColdContext) {
SampleContextTrimmer(ProfileMap)
.trimAndMergeColdContextProfiles(
HotCountThreshold, TrimColdProfile, CSProfMergeColdContext,
CSProfMaxColdContextDepth, EnableCSPreInliner);
}
sampleprof::SampleProfileMap ContextLessProfiles;
for (const auto &I : ProfileMap) {
ContextLessProfiles[I.second.getName()].merge(I.second);
}
calculateAndShowDensity(ContextLessProfiles);
if (GenCSNestedProfile) {
CSProfileConverter CSConverter(ProfileMap);
CSConverter.convertProfiles();
FunctionSamples::ProfileIsCS = false;
}
}
void ProfileGeneratorBase::computeSummaryAndThreshold(
SampleProfileMap &Profiles) {
SampleProfileSummaryBuilder Builder(ProfileSummaryBuilder::DefaultCutoffs);
Summary = Builder.computeSummaryForProfiles(Profiles);
HotCountThreshold = ProfileSummaryBuilder::getHotCountThreshold(
(Summary->getDetailedSummary()));
ColdCountThreshold = ProfileSummaryBuilder::getColdCountThreshold(
(Summary->getDetailedSummary()));
}
void CSProfileGenerator::computeSummaryAndThreshold() {
SampleProfileMap ContextLessProfiles;
ContextTracker.createContextLessProfileMap(ContextLessProfiles);
FunctionSamples::ProfileIsCS = false;
assert(
(!UseContextLessSummary.getNumOccurrences() || UseContextLessSummary) &&
"Don't set --profile-summary-contextless to false for profile "
"generation");
ProfileGeneratorBase::computeSummaryAndThreshold(ContextLessProfiles);
FunctionSamples::ProfileIsCS = true;
}
void ProfileGeneratorBase::extractProbesFromRange(
const RangeSample &RangeCounter, ProbeCounterMap &ProbeCounter,
bool FindDisjointRanges) {
const RangeSample *PRanges = &RangeCounter;
RangeSample Ranges;
if (FindDisjointRanges) {
findDisjointRanges(Ranges, RangeCounter);
PRanges = &Ranges;
}
for (const auto &Range : *PRanges) {
uint64_t RangeBegin = Binary->offsetToVirtualAddr(Range.first.first);
uint64_t RangeEnd = Binary->offsetToVirtualAddr(Range.first.second);
uint64_t Count = Range.second;
InstructionPointer IP(Binary, RangeBegin, true);
if (IP.Address > RangeEnd)
continue;
do {
const AddressProbesMap &Address2ProbesMap =
Binary->getAddress2ProbesMap();
auto It = Address2ProbesMap.find(IP.Address);
if (It != Address2ProbesMap.end()) {
for (const auto &Probe : It->second) {
ProbeCounter[&Probe] += Count;
}
}
} while (IP.advance() && IP.Address <= RangeEnd);
}
}
static void
extractPrefixContextStack(SampleContextFrameVector &ContextStack,
const SmallVectorImpl<uint64_t> &Addresses,
ProfiledBinary *Binary) {
SmallVector<const MCDecodedPseudoProbe *, 16> Probes;
for (auto Addr : reverse(Addresses)) {
const MCDecodedPseudoProbe *CallProbe = Binary->getCallProbeForAddr(Addr);
if (!CallProbe)
break;
Probes.push_back(CallProbe);
}
std::reverse(Probes.begin(), Probes.end());
for (const auto *P : Probes) {
Binary->getInlineContextForProbe(P, ContextStack, true);
}
}
void CSProfileGenerator::generateProbeBasedProfile() {
FunctionSamples::ProfileIsProbeBased = true;
for (const auto &CI : *SampleCounters) {
const AddrBasedCtxKey *CtxKey =
dyn_cast<AddrBasedCtxKey>(CI.first.getPtr());
SampleContextFrameVector ContextStack;
extractPrefixContextStack(ContextStack, CtxKey->Context, Binary);
populateBodySamplesWithProbes(CI.second.RangeCounter, ContextStack);
populateBoundarySamplesWithProbes(CI.second.BranchCounter, ContextStack);
}
}
void CSProfileGenerator::populateBodySamplesWithProbes(
const RangeSample &RangeCounter, SampleContextFrames ContextStack) {
ProbeCounterMap ProbeCounter;
extractProbesFromRange(RangeCounter, ProbeCounter);
std::unordered_map<MCDecodedPseudoProbeInlineTree *,
std::unordered_set<FunctionSamples *>>
FrameSamples;
for (const auto &PI : ProbeCounter) {
const MCDecodedPseudoProbe *Probe = PI.first;
uint64_t Count = PI.second;
if (!Probe->isBlock() || Count == 0)
continue;
ContextTrieNode *ContextNode =
getContextNodeForLeafProbe(ContextStack, Probe);
FunctionSamples &FunctionProfile = *ContextNode->getFunctionSamples();
FrameSamples[Probe->getInlineTreeNode()].insert(&FunctionProfile);
FunctionProfile.addBodySamplesForProbe(Probe->getIndex(), Count);
FunctionProfile.addTotalSamples(Count);
if (Probe->isEntry()) {
FunctionProfile.addHeadSamples(Count);
const auto *InlinerDesc = Binary->getInlinerDescForProbe(Probe);
ContextTrieNode *CallerNode = ContextNode->getParentContext();
if (InlinerDesc != nullptr && CallerNode != &getRootContext()) {
uint64_t CallerIndex = ContextNode->getCallSiteLoc().LineOffset;
assert(CallerIndex &&
"Inferred caller's location index shouldn't be zero!");
FunctionSamples &CallerProfile =
*getOrCreateFunctionSamples(CallerNode);
CallerProfile.setFunctionHash(InlinerDesc->FuncHash);
CallerProfile.addBodySamples(CallerIndex, 0, Count);
CallerProfile.addTotalSamples(Count);
CallerProfile.addCalledTargetSamples(CallerIndex, 0,
ContextNode->getFuncName(), Count);
}
}
}
for (auto &I : FrameSamples) {
for (auto *FunctionProfile : I.second) {
for (auto *Probe : I.first->getProbes()) {
FunctionProfile->addBodySamplesForProbe(Probe->getIndex(), 0);
}
}
}
}
void CSProfileGenerator::populateBoundarySamplesWithProbes(
const BranchSample &BranchCounter, SampleContextFrames ContextStack) {
for (const auto &BI : BranchCounter) {
uint64_t SourceOffset = BI.first.first;
uint64_t TargetOffset = BI.first.second;
uint64_t Count = BI.second;
uint64_t SourceAddress = Binary->offsetToVirtualAddr(SourceOffset);
const MCDecodedPseudoProbe *CallProbe =
Binary->getCallProbeForAddr(SourceAddress);
if (CallProbe == nullptr)
continue;
FunctionSamples &FunctionProfile =
getFunctionProfileForLeafProbe(ContextStack, CallProbe);
FunctionProfile.addBodySamples(CallProbe->getIndex(), 0, Count);
FunctionProfile.addTotalSamples(Count);
StringRef CalleeName = getCalleeNameForOffset(TargetOffset);
if (CalleeName.size() == 0)
continue;
FunctionProfile.addCalledTargetSamples(CallProbe->getIndex(), 0, CalleeName,
Count);
}
}
ContextTrieNode *CSProfileGenerator::getContextNodeForLeafProbe(
SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
SampleContextFrameVector NewContextStack(ContextStack.begin(),
ContextStack.end());
Binary->getInlineContextForProbe(LeafProbe, NewContextStack, true);
auto LeafFrame = NewContextStack.back();
LeafFrame.Location = LineLocation(0, 0);
NewContextStack.pop_back();
CSProfileGenerator::compressRecursionContext(NewContextStack);
CSProfileGenerator::trimContext(NewContextStack);
NewContextStack.push_back(LeafFrame);
const auto *FuncDesc = Binary->getFuncDescForGUID(LeafProbe->getGuid());
bool WasLeafInlined = LeafProbe->getInlineTreeNode()->hasInlineSite();
ContextTrieNode *ContextNode =
getOrCreateContextNode(NewContextStack, WasLeafInlined);
ContextNode->getFunctionSamples()->setFunctionHash(FuncDesc->FuncHash);
return ContextNode;
}
FunctionSamples &CSProfileGenerator::getFunctionProfileForLeafProbe(
SampleContextFrames ContextStack, const MCDecodedPseudoProbe *LeafProbe) {
return *getContextNodeForLeafProbe(ContextStack, LeafProbe)
->getFunctionSamples();
}
} }