#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/User.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/SuffixTree.h"
using namespace llvm;
using namespace IRSimilarity;
namespace llvm {
cl::opt<bool>
DisableBranches("no-ir-sim-branch-matching", cl::init(false),
cl::ReallyHidden,
cl::desc("disable similarity matching, and outlining, "
"across branches for debugging purposes."));
cl::opt<bool>
DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
cl::ReallyHidden,
cl::desc("disable outlining indirect calls."));
cl::opt<bool>
MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
cl::desc("only allow matching call instructions if the "
"name and type signature match."));
cl::opt<bool>
DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
cl::desc("Don't match or outline intrinsics"));
}
IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
IRInstructionDataList &IDList)
: Inst(&I), Legal(Legality), IDL(&IDList) {
initializeInstruction();
}
void IRInstructionData::initializeInstruction() {
if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
CmpInst::Predicate Predicate = predicateForConsistency(C);
if (Predicate != C->getPredicate())
RevisedPredicate = Predicate;
}
for (Use &OI : Inst->operands()) {
if (isa<CmpInst>(Inst) && RevisedPredicate) {
OperVals.insert(OperVals.begin(), OI.get());
continue;
}
OperVals.push_back(OI.get());
}
if (PHINode *PN = dyn_cast<PHINode>(Inst))
for (BasicBlock *BB : PN->blocks())
OperVals.push_back(BB);
}
IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
: IDL(&IDList) {}
void IRInstructionData::setBranchSuccessors(
DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
assert(isa<BranchInst>(Inst) && "Instruction must be branch");
BranchInst *BI = cast<BranchInst>(Inst);
DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
BBNumIt = BasicBlockToInteger.find(BI->getParent());
assert(BBNumIt != BasicBlockToInteger.end() &&
"Could not find location for BasicBlock!");
int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
for (BasicBlock *Successor : BI->successors()) {
BBNumIt = BasicBlockToInteger.find(Successor);
assert(BBNumIt != BasicBlockToInteger.end() &&
"Could not find number for BasicBlock!");
int OtherBlockNumber = static_cast<int>(BBNumIt->second);
int Relative = OtherBlockNumber - CurrentBlockNumber;
RelativeBlockLocations.push_back(Relative);
}
}
void IRInstructionData::setCalleeName(bool MatchByName) {
CallInst *CI = dyn_cast<CallInst>(Inst);
assert(CI && "Instruction must be call");
CalleeName = "";
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
Intrinsic::ID IntrinsicID = II->getIntrinsicID();
FunctionType *FT = II->getFunctionType();
if (Intrinsic::isOverloaded(IntrinsicID))
CalleeName =
Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
else
CalleeName = Intrinsic::getName(IntrinsicID).str();
return;
}
if (!CI->isIndirectCall() && MatchByName)
CalleeName = CI->getCalledFunction()->getName().str();
}
void IRInstructionData::setPHIPredecessors(
DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
assert(isa<PHINode>(Inst) && "Instruction must be phi node");
PHINode *PN = cast<PHINode>(Inst);
DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
BBNumIt = BasicBlockToInteger.find(PN->getParent());
assert(BBNumIt != BasicBlockToInteger.end() &&
"Could not find location for BasicBlock!");
int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
BasicBlock *Incoming = PN->getIncomingBlock(Idx);
BBNumIt = BasicBlockToInteger.find(Incoming);
assert(BBNumIt != BasicBlockToInteger.end() &&
"Could not find number for BasicBlock!");
int OtherBlockNumber = static_cast<int>(BBNumIt->second);
int Relative = OtherBlockNumber - CurrentBlockNumber;
RelativeBlockLocations.push_back(Relative);
RelativeBlockLocations.push_back(Relative);
}
}
CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
switch (CI->getPredicate()) {
case CmpInst::FCMP_OGT:
case CmpInst::FCMP_UGT:
case CmpInst::FCMP_OGE:
case CmpInst::FCMP_UGE:
case CmpInst::ICMP_SGT:
case CmpInst::ICMP_UGT:
case CmpInst::ICMP_SGE:
case CmpInst::ICMP_UGE:
return CI->getSwappedPredicate();
default:
return CI->getPredicate();
}
}
CmpInst::Predicate IRInstructionData::getPredicate() const {
assert(isa<CmpInst>(Inst) &&
"Can only get a predicate from a compare instruction");
if (RevisedPredicate)
return RevisedPredicate.value();
return cast<CmpInst>(Inst)->getPredicate();
}
StringRef IRInstructionData::getCalleeName() const {
assert(isa<CallInst>(Inst) &&
"Can only get a name from a call instruction");
assert(CalleeName && "CalleeName has not been set");
return *CalleeName;
}
bool IRSimilarity::isClose(const IRInstructionData &A,
const IRInstructionData &B) {
if (!A.Legal || !B.Legal)
return false;
if (!A.Inst->isSameOperationAs(B.Inst)) {
if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
if (A.getPredicate() != B.getPredicate())
return false;
auto ZippedTypes = zip(A.OperVals, B.OperVals);
return all_of(
ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
return std::get<0>(R)->getType() == std::get<1>(R)->getType();
});
}
return false;
}
if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
if (GEP->isInBounds() != OtherGEP->isInBounds())
return false;
auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
return all_of(drop_begin(ZippedOperands),
[](std::tuple<llvm::Use &, llvm::Use &> R) {
return std::get<0>(R) == std::get<1>(R);
});
}
if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
if (A.getCalleeName().str() != B.getCalleeName().str())
return false;
}
if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
return false;
return true;
}
void IRInstructionMapper::convertToUnsignedVec(
BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
std::vector<unsigned> &IntegerMapping) {
BasicBlock::iterator It = BB.begin();
std::vector<unsigned> IntegerMappingForBB;
std::vector<IRInstructionData *> InstrListForBB;
for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
switch (InstClassifier.visit(*It)) {
case InstrType::Legal:
mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
break;
case InstrType::Illegal:
mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
break;
case InstrType::Invisible:
AddedIllegalLastTime = false;
break;
}
}
if (AddedIllegalLastTime)
mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
for (IRInstructionData *ID : InstrListForBB)
this->IDL->push_back(*ID);
llvm::append_range(InstrList, InstrListForBB);
llvm::append_range(IntegerMapping, IntegerMappingForBB);
}
unsigned IRInstructionMapper::mapToLegalUnsigned(
BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
std::vector<IRInstructionData *> &InstrListForBB) {
AddedIllegalLastTime = false;
if (CanCombineWithPrevInstr)
HaveLegalRange = true;
CanCombineWithPrevInstr = true;
IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
InstrListForBB.push_back(ID);
if (isa<BranchInst>(*It))
ID->setBranchSuccessors(BasicBlockToInteger);
if (isa<CallInst>(*It))
ID->setCalleeName(EnableMatchCallsByName);
if (isa<PHINode>(*It))
ID->setPHIPredecessors(BasicBlockToInteger);
bool WasInserted;
DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
ResultIt;
std::tie(ResultIt, WasInserted) =
InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
unsigned INumber = ResultIt->second;
if (WasInserted)
LegalInstrNumber++;
IntegerMappingForBB.push_back(INumber);
assert(LegalInstrNumber < IllegalInstrNumber &&
"Instruction mapping overflow!");
assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
"Tried to assign DenseMap tombstone or empty key to instruction.");
assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
"Tried to assign DenseMap tombstone or empty key to instruction.");
return INumber;
}
IRInstructionData *
IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
IRInstructionDataList &IDL) {
return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
}
IRInstructionData *
IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
}
IRInstructionDataList *
IRInstructionMapper::allocateIRInstructionDataList() {
return new (IDLAllocator->Allocate()) IRInstructionDataList();
}
unsigned IRInstructionMapper::mapToIllegalUnsigned(
BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
std::vector<IRInstructionData *> &InstrListForBB, bool End) {
CanCombineWithPrevInstr = false;
if (AddedIllegalLastTime)
return IllegalInstrNumber;
IRInstructionData *ID = nullptr;
if (!End)
ID = allocateIRInstructionData(*It, false, *IDL);
else
ID = allocateIRInstructionData(*IDL);
InstrListForBB.push_back(ID);
AddedIllegalLastTime = true;
unsigned INumber = IllegalInstrNumber;
IntegerMappingForBB.push_back(IllegalInstrNumber--);
assert(LegalInstrNumber < IllegalInstrNumber &&
"Instruction mapping overflow!");
assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
"IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
"IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
return INumber;
}
IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
IRInstructionData *FirstInstIt,
IRInstructionData *LastInstIt)
: StartIdx(StartIdx), Len(Len) {
assert(FirstInstIt != nullptr && "Instruction is nullptr!");
assert(LastInstIt != nullptr && "Instruction is nullptr!");
assert(StartIdx + Len > StartIdx &&
"Overflow for IRSimilarityCandidate range?");
assert(Len - 1 == static_cast<unsigned>(std::distance(
iterator(FirstInstIt), iterator(LastInstIt))) &&
"Length of the first and last IRInstructionData do not match the "
"given length");
unsigned LocalValNumber = 1;
IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
for (Value *Arg : ID->OperVals)
if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
ValueToNumber.try_emplace(Arg, LocalValNumber);
NumberToValue.try_emplace(LocalValNumber, Arg);
LocalValNumber++;
}
if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
NumberToValue.try_emplace(LocalValNumber, ID->Inst);
LocalValNumber++;
}
}
FirstInst = FirstInstIt;
LastInst = LastInstIt;
DenseSet<BasicBlock *> BBSet;
getBasicBlocks(BBSet);
for (BasicBlock *BB : BBSet) {
if (ValueToNumber.find(BB) != ValueToNumber.end())
continue;
ValueToNumber.try_emplace(BB, LocalValNumber);
NumberToValue.try_emplace(LocalValNumber, BB);
LocalValNumber++;
}
}
bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
if (A.getLength() != B.getLength())
return false;
auto InstrDataForBoth =
zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
return all_of(InstrDataForBoth,
[](std::tuple<IRInstructionData &, IRInstructionData &> R) {
IRInstructionData &A = std::get<0>(R);
IRInstructionData &B = std::get<1>(R);
if (!A.Legal || !B.Legal)
return false;
return isClose(A, B);
});
}
static bool checkNumberingAndReplaceCommutative(
const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
ArrayRef<Value *> &SourceOperands,
DenseSet<unsigned> &TargetValueNumbers){
DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
unsigned ArgVal;
bool WasInserted;
for (Value *V : SourceOperands) {
ArgVal = SourceValueToNumberMapping.find(V)->second;
std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
std::make_pair(ArgVal, TargetValueNumbers));
DenseSet<unsigned> NewSet;
for (unsigned &Curr : ValueMappingIt->second)
if (TargetValueNumbers.contains(Curr))
NewSet.insert(Curr);
if (NewSet.empty())
return false;
if (NewSet.size() != ValueMappingIt->second.size())
ValueMappingIt->second.swap(NewSet);
if (ValueMappingIt->second.size() != 1)
continue;
unsigned ValToRemove = *ValueMappingIt->second.begin();
for (Value *InnerV : SourceOperands) {
if (V == InnerV)
continue;
unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
continue;
ValueMappingIt->second.erase(ValToRemove);
if (ValueMappingIt->second.empty())
return false;
}
}
return true;
}
bool checkNumberingAndReplace(
DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
unsigned SourceArgVal, unsigned TargetArgVal) {
bool WasInserted;
DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
if (WasInserted)
return true;
DenseSet<unsigned> &TargetSet = Val->second;
if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
TargetSet.clear();
TargetSet.insert(TargetArgVal);
return true;
}
return TargetSet.contains(TargetArgVal);
}
bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
OperandMapping A, OperandMapping B) {
ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
unsigned OperandLength = A.OperVals.size();
for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
return false;
if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
return false;
}
return true;
}
bool IRSimilarityCandidate::compareCommutativeOperandMapping(
OperandMapping A, OperandMapping B) {
DenseSet<unsigned> ValueNumbersA;
DenseSet<unsigned> ValueNumbersB;
ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
unsigned OperandLength = A.OperVals.size();
for (unsigned Idx = 0; Idx < OperandLength;
Idx++, VItA++, VItB++) {
ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
}
if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
A.ValueNumberMapping, A.OperVals,
ValueNumbersB))
return false;
if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
B.ValueNumberMapping, B.OperVals,
ValueNumbersA))
return false;
return true;
}
bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
RelativeLocMapping B) {
BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
DenseSet<BasicBlock *> BasicBlockA;
DenseSet<BasicBlock *> BasicBlockB;
A.IRSC.getBasicBlocks(BasicBlockA);
B.IRSC.getBasicBlocks(BasicBlockB);
bool AContained = BasicBlockA.contains(ABB);
bool BContained = BasicBlockB.contains(BBB);
if (AContained != BContained)
return false;
if (AContained)
return A.RelativeLocation == B.RelativeLocation;
return true;
}
bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
DenseMap<unsigned, DenseSet<unsigned>> MappingA;
DenseMap<unsigned, DenseSet<unsigned>> MappingB;
return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
}
typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
SmallVector<int, 4> &, ArrayRef<Value *> &,
ArrayRef<Value *> &>
ZippedRelativeLocationsT;
bool IRSimilarityCandidate::compareStructure(
const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
if (A.getLength() != B.getLength())
return false;
if (A.ValueToNumber.size() != B.ValueToNumber.size())
return false;
iterator ItA = A.begin();
iterator ItB = B.begin();
DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
unsigned SectionLength = A.getStartIdx() + A.getLength();
for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
ItA++, ItB++, Loc++) {
if (!isClose(*ItA, *ItB))
return false;
Instruction *IA = ItA->Inst;
Instruction *IB = ItB->Inst;
if (!ItA->Legal || !ItB->Legal)
return false;
ArrayRef<Value *> OperValsA = ItA->OperVals;
ArrayRef<Value *> OperValsB = ItB->OperVals;
unsigned InstValA = A.ValueToNumber.find(IA)->second;
unsigned InstValB = B.ValueToNumber.find(IB)->second;
bool WasInserted;
std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
return false;
std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
return false;
if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
!isa<IntrinsicInst>(IA)) {
if (!compareCommutativeOperandMapping(
{A, OperValsA, ValueNumberMappingA},
{B, OperValsB, ValueNumberMappingB}))
return false;
continue;
}
if (!compareNonCommutativeOperandMapping(
{A, OperValsA, ValueNumberMappingA},
{B, OperValsB, ValueNumberMappingB}))
return false;
if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
!(isa<PHINode>(IA) && isa<PHINode>(IB)))
continue;
SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
OperValsA.size() != OperValsB.size())
return false;
ZippedRelativeLocationsT ZippedRelativeLocations =
zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
if (any_of(ZippedRelativeLocations,
[&A, &B](std::tuple<int, int, Value *, Value *> R) {
return !checkRelativeLocations(
{A, std::get<0>(R), std::get<2>(R)},
{B, std::get<1>(R), std::get<3>(R)});
}))
return false;
}
return true;
}
bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
const IRSimilarityCandidate &B) {
auto DoesOverlap = [](const IRSimilarityCandidate &X,
const IRSimilarityCandidate &Y) {
return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
};
return DoesOverlap(A, B) || DoesOverlap(B, A);
}
void IRSimilarityIdentifier::populateMapper(
Module &M, std::vector<IRInstructionData *> &InstrList,
std::vector<unsigned> &IntegerMapping) {
std::vector<IRInstructionData *> InstrListForModule;
std::vector<unsigned> IntegerMappingForModule;
Mapper.initializeForBBs(M);
for (Function &F : M) {
if (F.empty())
continue;
for (BasicBlock &BB : F) {
Mapper.convertToUnsignedVec(BB, InstrListForModule,
IntegerMappingForModule);
}
BasicBlock::iterator It = F.begin()->end();
Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
true);
if (InstrListForModule.size() > 0)
Mapper.IDL->push_back(*InstrListForModule.back());
}
llvm::append_range(InstrList, InstrListForModule);
llvm::append_range(IntegerMapping, IntegerMappingForModule);
}
void IRSimilarityIdentifier::populateMapper(
ArrayRef<std::unique_ptr<Module>> &Modules,
std::vector<IRInstructionData *> &InstrList,
std::vector<unsigned> &IntegerMapping) {
for (const std::unique_ptr<Module> &M : Modules)
populateMapper(*M, InstrList, IntegerMapping);
}
static void createCandidatesFromSuffixTree(
const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
unsigned StringLen = RS.Length;
if (StringLen < 2)
return;
for (const unsigned &StartIdx : RS.StartIndices) {
unsigned EndIdx = StartIdx + StringLen - 1;
bool ContainsIllegal = false;
for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
unsigned Key = IntegerMapping[CurrIdx];
if (Key > Mapper.IllegalInstrNumber) {
ContainsIllegal = true;
break;
}
}
if (ContainsIllegal)
continue;
std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
std::advance(StartIt, StartIdx);
std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
std::advance(EndIt, EndIdx);
CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
}
}
void IRSimilarityCandidate::createCanonicalRelationFrom(
IRSimilarityCandidate &SourceCand,
DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
assert(SourceCand.CanonNumToNumber.size() != 0 &&
"Base canonical relationship is empty!");
assert(SourceCand.NumberToCanonNum.size() != 0 &&
"Base canonical relationship is empty!");
assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
DenseSet<unsigned> UsedGVNs;
for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
unsigned SourceGVN = GVNMapping.first;
assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
unsigned ResultGVN;
if (GVNMapping.second.size() > 1) {
bool Found = false;
for (unsigned Val : GVNMapping.second) {
if (UsedGVNs.contains(Val))
continue;
DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
FromSourceMapping.find(Val);
if (!It->second.contains(SourceGVN))
continue;
Found = true;
ResultGVN = Val;
break;
}
assert(Found && "Could not find matching value for source GVN");
(void)Found;
} else
ResultGVN = *GVNMapping.second.begin();
UsedGVNs.insert(ResultGVN);
unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
}
DenseSet<BasicBlock *> BBSet;
getBasicBlocks(BBSet);
for (BasicBlock *BB : BBSet) {
unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end())
continue;
Value *FirstOutlineInst = BB == getStartBB()
? frontInstruction()
: &*BB->instructionsWithoutDebug().begin();
unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
Value *SourceV = *SourceCand.fromGVN(SourceGVN);
BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
}
}
void IRSimilarityCandidate::createCanonicalMappingFor(
IRSimilarityCandidate &CurrCand) {
assert(CurrCand.CanonNumToNumber.size() == 0 &&
"Canonical Relationship is non-empty");
assert(CurrCand.NumberToCanonNum.size() == 0 &&
"Canonical Relationship is non-empty");
unsigned CanonNum = 0;
for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
CanonNum++;
}
}
static void findCandidateStructures(
std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
InnerCandEndIt;
DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
bool SameStructure;
bool Inserted;
unsigned CurrentGroupNum = 0;
unsigned OuterGroupNum;
DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
for (CandIt = CandsForRepSubstring.begin(),
CandEndIt = CandsForRepSubstring.end();
CandIt != CandEndIt; CandIt++) {
CandToGroupIt = CandToGroup.find(&*CandIt);
if (CandToGroupIt == CandToGroup.end()) {
std::tie(CandToGroupIt, Inserted) =
CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
}
OuterGroupNum = CandToGroupIt->second;
CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
if (CurrentGroupPair == StructuralGroups.end()) {
IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
}
for (InnerCandIt = std::next(CandIt),
InnerCandEndIt = CandsForRepSubstring.end();
InnerCandIt != InnerCandEndIt; InnerCandIt++) {
CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
if (CandToGroupItInner != CandToGroup.end())
continue;
ValueNumberMappingA.clear();
ValueNumberMappingB.clear();
SameStructure = IRSimilarityCandidate::compareStructure(
*CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
if (!SameStructure)
continue;
InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
ValueNumberMappingB);
CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
CurrentGroupPair->second.push_back(*InnerCandIt);
}
}
}
void IRSimilarityIdentifier::findCandidates(
std::vector<IRInstructionData *> &InstrList,
std::vector<unsigned> &IntegerMapping) {
SuffixTree ST(IntegerMapping);
std::vector<IRSimilarityCandidate> CandsForRepSubstring;
std::vector<SimilarityGroup> NewCandidateGroups;
DenseMap<unsigned, SimilarityGroup> StructuralGroups;
for (SuffixTree::RepeatedSubstring &RS : ST) {
createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
CandsForRepSubstring);
if (CandsForRepSubstring.size() < 2)
continue;
findCandidateStructures(CandsForRepSubstring, StructuralGroups);
for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
if (Group.second.size() > 1)
SimilarityCandidates->push_back(Group.second);
CandsForRepSubstring.clear();
StructuralGroups.clear();
NewCandidateGroups.clear();
}
}
SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
ArrayRef<std::unique_ptr<Module>> Modules) {
resetSimilarityCandidates();
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
Mapper.InstClassifier.EnableBranches = this->EnableBranches;
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
populateMapper(Modules, InstrList, IntegerMapping);
findCandidates(InstrList, IntegerMapping);
return *SimilarityCandidates;
}
SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
resetSimilarityCandidates();
Mapper.InstClassifier.EnableBranches = this->EnableBranches;
Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
std::vector<IRInstructionData *> InstrList;
std::vector<unsigned> IntegerMapping;
populateMapper(M, InstrList, IntegerMapping);
findCandidates(InstrList, IntegerMapping);
return *SimilarityCandidates;
}
INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
"ir-similarity-identifier", false, true)
IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
: ModulePass(ID) {
initializeIRSimilarityIdentifierWrapperPassPass(
*PassRegistry::getPassRegistry());
}
bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
MatchCallsByName, !DisableIntrinsics,
false));
return false;
}
bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
IRSI.reset();
return false;
}
bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
IRSI->findSimilarity(M);
return false;
}
AnalysisKey IRSimilarityAnalysis::Key;
IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
ModuleAnalysisManager &) {
auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
MatchCallsByName, !DisableIntrinsics,
false);
IRSI.findSimilarity(M);
return IRSI;
}
PreservedAnalyses
IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity();
for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
OS << CandVec.size() << " candidates of length "
<< CandVec.begin()->getLength() << ". Found in: \n";
for (IRSimilarityCandidate &Cand : CandVec) {
OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
<< ", Basic Block: ";
if (Cand.front()->Inst->getParent()->getName().str() == "")
OS << "(unnamed)";
else
OS << Cand.front()->Inst->getParent()->getName().str();
OS << "\n Start Instruction: ";
Cand.frontInstruction()->print(OS);
OS << "\n End Instruction: ";
Cand.backInstruction()->print(OS);
OS << "\n";
}
}
return PreservedAnalyses::all();
}
char IRSimilarityIdentifierWrapperPass::ID = 0;