Compiler projects using llvm
//===-- llvm-sim.cpp - Find  similar sections of programs -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This program finds similar sections of a Module, and exports them as a JSON
// file.
//
// To find similarities contained across multiple modules, please use llvm-link
// first to merge the modules.
//
//===----------------------------------------------------------------------===//

#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/IRReader/IRReader.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/JSON.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/ToolOutputFile.h"

using namespace llvm;
using namespace IRSimilarity;

static cl::opt<std::string> OutputFilename("o", cl::desc("Output Filename"),
                                           cl::init("-"),
                                           cl::value_desc("filename"));

static cl::opt<std::string> InputSourceFile(cl::Positional,
                                            cl::desc("<Source file>"),
                                            cl::init("-"),
                                            cl::value_desc("filename"));

/// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
///
/// \param I - The Instruction to find the instruction number for.
/// \param LLVMInstNum - The mapping of Instructions to their location in the
/// module represented by an unsigned integer.
/// \returns The instruction number for \p I if it exists.
Optional<unsigned>
getPositionInModule(const Instruction *I,
                    const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
  assert(I && "Instruction is nullptr!");
  DenseMap<Instruction *, unsigned>::const_iterator It = LLVMInstNum.find(I);
  if (It == LLVMInstNum.end())
    return None;
  return It->second;
}

/// Exports the given SimilarityGroups to a JSON file at \p FilePath.
///
/// \param FilePath - The path to the output location.
/// \param SimSections - The similarity groups to process.
/// \param LLVMInstNum - The mapping of Instructions to their location in the
/// module represented by an unsigned integer.
/// \returns A nonzero error code if there was a failure creating the file.
std::error_code
exportToFile(const StringRef FilePath,
             const SimilarityGroupList &SimSections,
             const DenseMap<Instruction *, unsigned> &LLVMInstNum) {
  std::error_code EC;
  std::unique_ptr<ToolOutputFile> Out(
      new ToolOutputFile(FilePath, EC, sys::fs::OF_None));
  if (EC)
    return EC;

  json::OStream J(Out->os(), 1);
  J.objectBegin();

  unsigned SimOption = 1;
  // Process each list of SimilarityGroups organized by the Module.
  for (const SimilarityGroup &G : SimSections) {
    std::string SimOptionStr = std::to_string(SimOption);
    J.attributeBegin(SimOptionStr);
    J.arrayBegin();
    // For each file there is a list of the range where the similarity
    // exists.
    for (const IRSimilarityCandidate &C : G) {
      Optional<unsigned> Start =
          getPositionInModule((*C.front()).Inst, LLVMInstNum);
      Optional<unsigned> End =
          getPositionInModule((*C.back()).Inst, LLVMInstNum);

      assert(Start &&
             "Could not find instruction number for first instruction");
      assert(End && "Could not find instruction number for last instruction");

      J.object([&] {
        J.attribute("start", Start.value());
        J.attribute("end", End.value());
      });
    }
    J.arrayEnd();
    J.attributeEnd();
    SimOption++;
  }
  J.objectEnd();

  Out->keep();

  return EC;
}

int main(int argc, const char *argv[]) {
  InitLLVM X(argc, argv);

  cl::ParseCommandLineOptions(argc, argv, "LLVM IR Similarity Visualizer\n");

  LLVMContext CurrContext;
  SMDiagnostic Err;
  std::unique_ptr<Module> ModuleToAnalyze =
      parseIRFile(InputSourceFile, Err, CurrContext);

  if (!ModuleToAnalyze) {
    Err.print(argv[0], errs());
    return 1;
  }

  // Mapping from an Instruction pointer to its occurrence in a sequential
  // list of all the Instructions in a Module.
  DenseMap<Instruction *, unsigned> LLVMInstNum;

  // We give each instruction a number, which gives us a start and end value
  // for the beginning and end of each IRSimilarityCandidate.
  unsigned InstructionNumber = 1;
  for (Function &F : *ModuleToAnalyze)
    for (BasicBlock &BB : F)
      for (Instruction &I : BB.instructionsWithoutDebug())
        LLVMInstNum[&I]= InstructionNumber++;

  // The similarity identifier we will use to find the similar sections.
  IRSimilarityIdentifier SimIdent;
  SimilarityGroupList SimilaritySections =
      SimIdent.findSimilarity(*ModuleToAnalyze);

  std::error_code E =
      exportToFile(OutputFilename, SimilaritySections, LLVMInstNum);
  if (E) {
    errs() << argv[0] << ": " << E.message() << '\n';
    return 2;
  }

  return 0;
}