Compiler projects using llvm
//===-- llvm-mc-disassemble-fuzzer.cpp - Fuzzer for the MC layer ----------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//===----------------------------------------------------------------------===//

#include "llvm-c/Disassembler.h"
#include "llvm-c/Target.h"
#include "llvm/MC/SubtargetFeature.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Host.h"
#include "llvm/Support/raw_ostream.h"

using namespace llvm;

const unsigned AssemblyTextBufSize = 80;

static cl::opt<std::string>
    TripleName("triple", cl::desc("Target triple to assemble for, "
                                  "see -version for available targets"));

static cl::opt<std::string>
    MCPU("mcpu",
         cl::desc("Target a specific cpu type (-mcpu=help for details)"),
         cl::value_desc("cpu-name"), cl::init(""));

// This is useful for variable-length instruction sets.
static cl::opt<unsigned> InsnLimit(
    "insn-limit",
    cl::desc("Limit the number of instructions to process (0 for no limit)"),
    cl::value_desc("count"), cl::init(0));

static cl::list<std::string>
    MAttrs("mattr", cl::CommaSeparated,
           cl::desc("Target specific attributes (-mattr=help for details)"),
           cl::value_desc("a1,+a2,-a3,..."));
// The feature string derived from -mattr's values.
std::string FeaturesStr;

static cl::list<std::string>
    FuzzerArgs("fuzzer-args", cl::Positional,
               cl::desc("Options to pass to the fuzzer"),
               cl::PositionalEatsArgs);
static std::vector<char *> ModifiedArgv;

int DisassembleOneInput(const uint8_t *Data, size_t Size) {
  char AssemblyText[AssemblyTextBufSize];

  std::vector<uint8_t> DataCopy(Data, Data + Size);

  LLVMDisasmContextRef Ctx = LLVMCreateDisasmCPUFeatures(
      TripleName.c_str(), MCPU.c_str(), FeaturesStr.c_str(), nullptr, 0,
      nullptr, nullptr);
  assert(Ctx);
  uint8_t *p = DataCopy.data();
  unsigned Consumed;
  unsigned InstructionsProcessed = 0;
  do {
    Consumed = LLVMDisasmInstruction(Ctx, p, Size, 0, AssemblyText,
                                     AssemblyTextBufSize);
    Size -= Consumed;
    p += Consumed;

    InstructionsProcessed ++;
    if (InsnLimit != 0 && InstructionsProcessed < InsnLimit)
      break;
  } while (Consumed != 0);
  LLVMDisasmDispose(Ctx);
  return 0;
}

extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
  return DisassembleOneInput(Data, Size);
}

extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
                                                        char ***argv) {
  // The command line is unusual compared to other fuzzers due to the need to
  // specify the target. Options like -triple, -mcpu, and -mattr work like
  // their counterparts in llvm-mc, while -fuzzer-args collects options for the
  // fuzzer itself.
  //
  // Examples:
  //
  // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
  // 4-bytes each and use the contents of ./corpus as the test corpus:
  //   llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
  //       -fuzzer-args -max_len=4 -runs=100000 ./corpus
  //
  // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
  // feature enabled using up to 64-byte inputs:
  //   llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
  //       -disassemble -fuzzer-args ./corpus
  //
  // If your aim is to find instructions that are not tested, then it is
  // advisable to constrain the maximum input size to a single instruction
  // using -max_len as in the first example. This results in a test corpus of
  // individual instructions that test unique paths. Without this constraint,
  // there will be considerable redundancy in the corpus.

  char **OriginalArgv = *argv;

  LLVMInitializeAllTargetInfos();
  LLVMInitializeAllTargetMCs();
  LLVMInitializeAllDisassemblers();

  cl::ParseCommandLineOptions(*argc, OriginalArgv);

  // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
  // the driver can parse its arguments.
  //
  // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
  // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
  // non-const buffer to avoid the need to clean up when the fuzzer terminates.
  ModifiedArgv.push_back(OriginalArgv[0]);
  for (const auto &FuzzerArg : FuzzerArgs) {
    for (int i = 1; i < *argc; ++i) {
      if (FuzzerArg == OriginalArgv[i])
        ModifiedArgv.push_back(OriginalArgv[i]);
    }
  }
  *argc = ModifiedArgv.size();
  *argv = ModifiedArgv.data();

  // Package up features to be passed to target/subtarget
  // We have to pass it via a global since the callback doesn't
  // permit any user data.
  if (MAttrs.size()) {
    SubtargetFeatures Features;
    for (unsigned i = 0; i != MAttrs.size(); ++i)
      Features.AddFeature(MAttrs[i]);
    FeaturesStr = Features.getString();
  }

  if (TripleName.empty())
    TripleName = sys::getDefaultTargetTriple();

  return 0;
}