Compiler projects using llvm
//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//  This file defines type-erased base types and functions for building dataflow
//  analyses that run over Control-Flow Graphs (CFGs).
//
//===----------------------------------------------------------------------===//

#include <algorithm>
#include <memory>
#include <system_error>
#include <utility>
#include <vector>

#include "clang/AST/DeclCXX.h"
#include "clang/AST/OperationKinds.h"
#include "clang/AST/StmtVisitor.h"
#include "clang/Analysis/Analyses/PostOrderCFGView.h"
#include "clang/Analysis/CFG.h"
#include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
#include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
#include "clang/Analysis/FlowSensitive/Transfer.h"
#include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
#include "clang/Analysis/FlowSensitive/Value.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"

namespace clang {
namespace dataflow {

class StmtToEnvMapImpl : public StmtToEnvMap {
public:
  StmtToEnvMapImpl(
      const ControlFlowContext &CFCtx,
      llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>>
          BlockToState)
      : CFCtx(CFCtx), BlockToState(BlockToState) {}

  const Environment *getEnvironment(const Stmt &S) const override {
    auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S));
    assert(BlockIt != CFCtx.getStmtToBlock().end());
    const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
    assert(State);
    return &State.value().Env;
  }

private:
  const ControlFlowContext &CFCtx;
  llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockToState;
};

/// Returns the index of `Block` in the successors of `Pred`.
static int blockIndexInPredecessor(const CFGBlock &Pred,
                                   const CFGBlock &Block) {
  auto BlockPos = llvm::find_if(
      Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) {
        return Succ && Succ->getBlockID() == Block.getBlockID();
      });
  return BlockPos - Pred.succ_begin();
}

/// Extends the flow condition of an environment based on a terminator
/// statement.
class TerminatorVisitor : public ConstStmtVisitor<TerminatorVisitor> {
public:
  TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
                    int BlockSuccIdx, TransferOptions TransferOpts)
      : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx),
        TransferOpts(TransferOpts) {}

  void VisitIfStmt(const IfStmt *S) {
    auto *Cond = S->getCond();
    assert(Cond != nullptr);
    extendFlowCondition(*Cond);
  }

  void VisitWhileStmt(const WhileStmt *S) {
    auto *Cond = S->getCond();
    assert(Cond != nullptr);
    extendFlowCondition(*Cond);
  }

  void VisitDoStmt(const DoStmt *S) {
    auto *Cond = S->getCond();
    assert(Cond != nullptr);
    extendFlowCondition(*Cond);
  }

  void VisitForStmt(const ForStmt *S) {
    auto *Cond = S->getCond();
    if (Cond != nullptr)
      extendFlowCondition(*Cond);
  }

  void VisitBinaryOperator(const BinaryOperator *S) {
    assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
    auto *LHS = S->getLHS();
    assert(LHS != nullptr);
    extendFlowCondition(*LHS);
  }

  void VisitConditionalOperator(const ConditionalOperator *S) {
    auto *Cond = S->getCond();
    assert(Cond != nullptr);
    extendFlowCondition(*Cond);
  }

private:
  void extendFlowCondition(const Expr &Cond) {
    // The terminator sub-expression might not be evaluated.
    if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr)
      transfer(StmtToEnv, Cond, Env, TransferOpts);

    // FIXME: The flow condition must be an r-value, so `SkipPast::None` should
    // suffice.
    auto *Val =
        cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference));
    // Value merging depends on flow conditions from different environments
    // being mutually exclusive -- that is, they cannot both be true in their
    // entirety (even if they may share some clauses). So, we need *some* value
    // for the condition expression, even if just an atom.
    if (Val == nullptr) {
      // FIXME: Consider introducing a helper for this get-or-create pattern.
      auto *Loc = Env.getStorageLocation(Cond, SkipPast::None);
      if (Loc == nullptr) {
        Loc = &Env.createStorageLocation(Cond);
        Env.setStorageLocation(Cond, *Loc);
      }
      Val = &Env.makeAtomicBoolValue();
      Env.setValue(*Loc, *Val);
    }

    // The condition must be inverted for the successor that encompasses the
    // "else" branch, if such exists.
    if (BlockSuccIdx == 1)
      Val = &Env.makeNot(*Val);

    Env.addToFlowCondition(*Val);
  }

  const StmtToEnvMap &StmtToEnv;
  Environment &Env;
  int BlockSuccIdx;
  TransferOptions TransferOpts;
};

/// Computes the input state for a given basic block by joining the output
/// states of its predecessors.
///
/// Requirements:
///
///   All predecessors of `Block` except those with loop back edges must have
///   already been transferred. States in `BlockStates` that are set to
///   `llvm::None` represent basic blocks that are not evaluated yet.
static TypeErasedDataflowAnalysisState computeBlockInputState(
    const ControlFlowContext &CFCtx,
    std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
    const CFGBlock &Block, const Environment &InitEnv,
    TypeErasedDataflowAnalysis &Analysis) {
  llvm::DenseSet<const CFGBlock *> Preds;
  Preds.insert(Block.pred_begin(), Block.pred_end());
  if (Block.getTerminator().isTemporaryDtorsBranch()) {
    // This handles a special case where the code that produced the CFG includes
    // a conditional operator with a branch that constructs a temporary and
    // calls a destructor annotated as noreturn. The CFG models this as follows:
    //
    // B1 (contains the condition of the conditional operator) - succs: B2, B3
    // B2 (contains code that does not call a noreturn destructor) - succs: B4
    // B3 (contains code that calls a noreturn destructor) - succs: B4
    // B4 (has temporary destructor terminator) - succs: B5, B6
    // B5 (noreturn block that is associated with the noreturn destructor call)
    // B6 (contains code that follows the conditional operator statement)
    //
    // The first successor (B5 above) of a basic block with a temporary
    // destructor terminator (B4 above) is the block that evaluates the
    // destructor. If that block has a noreturn element then the predecessor
    // block that constructed the temporary object (B3 above) is effectively a
    // noreturn block and its state should not be used as input for the state
    // of the block that has a temporary destructor terminator (B4 above). This
    // holds regardless of which branch of the ternary operator calls the
    // noreturn destructor. However, it doesn't cases where a nested ternary
    // operator includes a branch that contains a noreturn destructor call.
    //
    // See `NoreturnDestructorTest` for concrete examples.
    if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) {
      auto StmtBlock = CFCtx.getStmtToBlock().find(Block.getTerminatorStmt());
      assert(StmtBlock != CFCtx.getStmtToBlock().end());
      Preds.erase(StmtBlock->getSecond());
    }
  }

  llvm::Optional<TypeErasedDataflowAnalysisState> MaybeState;
  bool ApplyBuiltinTransfer = Analysis.applyBuiltinTransfer();

  for (const CFGBlock *Pred : Preds) {
    // Skip if the `Block` is unreachable or control flow cannot get past it.
    if (!Pred || Pred->hasNoReturnElement())
      continue;

    // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
    // loop back edge to `Block`.
    const llvm::Optional<TypeErasedDataflowAnalysisState> &MaybePredState =
        BlockStates[Pred->getBlockID()];
    if (!MaybePredState)
      continue;

    TypeErasedDataflowAnalysisState PredState = MaybePredState.value();
    if (ApplyBuiltinTransfer) {
      if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
        const StmtToEnvMapImpl StmtToEnv(CFCtx, BlockStates);
        TerminatorVisitor(StmtToEnv, PredState.Env,
                          blockIndexInPredecessor(*Pred, Block),
                          Analysis.builtinTransferOptions())
            .Visit(PredTerminatorStmt);
      }
    }

    if (MaybeState) {
      Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice);
      MaybeState->Env.join(PredState.Env, Analysis);
    } else {
      MaybeState = std::move(PredState);
    }
  }
  if (!MaybeState) {
    // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
    // to enable building analyses like computation of dominators that
    // initialize the state of each basic block differently.
    MaybeState.emplace(Analysis.typeErasedInitialElement(), InitEnv);
  }
  return *MaybeState;
}

/// Transfers `State` by evaluating `CfgStmt` in the context of `Analysis`.
/// `HandleTransferredStmt` (if provided) will be applied to `CfgStmt`, after it
/// is evaluated.
static void transferCFGStmt(
    const ControlFlowContext &CFCtx,
    llvm::ArrayRef<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates,
    const CFGStmt &CfgStmt, TypeErasedDataflowAnalysis &Analysis,
    TypeErasedDataflowAnalysisState &State,
    std::function<void(const CFGStmt &,
                       const TypeErasedDataflowAnalysisState &)>
        HandleTransferredStmt) {
  const Stmt *S = CfgStmt.getStmt();
  assert(S != nullptr);

  if (Analysis.applyBuiltinTransfer())
    transfer(StmtToEnvMapImpl(CFCtx, BlockStates), *S, State.Env,
             Analysis.builtinTransferOptions());
  Analysis.transferTypeErased(S, State.Lattice, State.Env);

  if (HandleTransferredStmt != nullptr)
    HandleTransferredStmt(CfgStmt, State);
}

/// Transfers `State` by evaluating `CfgInit`.
static void transferCFGInitializer(const CFGInitializer &CfgInit,
                                   TypeErasedDataflowAnalysisState &State) {
  const auto &ThisLoc = *cast<AggregateStorageLocation>(
      State.Env.getThisPointeeStorageLocation());

  const CXXCtorInitializer *Initializer = CfgInit.getInitializer();
  assert(Initializer != nullptr);

  const FieldDecl *Member = Initializer->getMember();
  if (Member == nullptr)
    // Not a field initializer.
    return;

  auto *InitStmt = Initializer->getInit();
  assert(InitStmt != nullptr);

  auto *InitStmtLoc =
      State.Env.getStorageLocation(*InitStmt, SkipPast::Reference);
  if (InitStmtLoc == nullptr)
    return;

  auto *InitStmtVal = State.Env.getValue(*InitStmtLoc);
  if (InitStmtVal == nullptr)
    return;

  if (Member->getType()->isReferenceType()) {
    auto &MemberLoc = ThisLoc.getChild(*Member);
    State.Env.setValue(MemberLoc,
                       State.Env.takeOwnership(
                           std::make_unique<ReferenceValue>(*InitStmtLoc)));
  } else {
    auto &MemberLoc = ThisLoc.getChild(*Member);
    State.Env.setValue(MemberLoc, *InitStmtVal);
  }
}

TypeErasedDataflowAnalysisState transferBlock(
    const ControlFlowContext &CFCtx,
    std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> &BlockStates,
    const CFGBlock &Block, const Environment &InitEnv,
    TypeErasedDataflowAnalysis &Analysis,
    std::function<void(const CFGStmt &,
                       const TypeErasedDataflowAnalysisState &)>
        HandleTransferredStmt) {
  TypeErasedDataflowAnalysisState State =
      computeBlockInputState(CFCtx, BlockStates, Block, InitEnv, Analysis);
  for (const CFGElement &Element : Block) {
    switch (Element.getKind()) {
    case CFGElement::Statement:
      transferCFGStmt(CFCtx, BlockStates, *Element.getAs<CFGStmt>(), Analysis,
                      State, HandleTransferredStmt);
      break;
    case CFGElement::Initializer:
      if (Analysis.applyBuiltinTransfer())
        transferCFGInitializer(*Element.getAs<CFGInitializer>(), State);
      break;
    default:
      // FIXME: Evaluate other kinds of `CFGElement`.
      break;
    }
  }
  return State;
}

llvm::Expected<std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>>>
runTypeErasedDataflowAnalysis(
    const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis,
    const Environment &InitEnv,
    std::function<void(const Stmt *, const TypeErasedDataflowAnalysisState &)>
        PostVisitStmt) {
  PostOrderCFGView POV(&CFCtx.getCFG());
  ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV);

  std::vector<llvm::Optional<TypeErasedDataflowAnalysisState>> BlockStates;
  BlockStates.resize(CFCtx.getCFG().size(), llvm::None);

  // The entry basic block doesn't contain statements so it can be skipped.
  const CFGBlock &Entry = CFCtx.getCFG().getEntry();
  BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
                                     InitEnv};
  Worklist.enqueueSuccessors(&Entry);

  // Bugs in lattices and transfer functions can prevent the analysis from
  // converging. To limit the damage (infinite loops) that these bugs can cause,
  // limit the number of iterations.
  // FIXME: Consider making the maximum number of iterations configurable.
  // FIXME: Consider restricting the number of backedges followed, rather than
  // iterations.
  // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
  // of iterations, number of functions that time out, etc.
  static constexpr uint32_t MaxAverageVisitsPerBlock = 4;
  static constexpr uint32_t AbsoluteMaxIterations = 1 << 16;
  const uint32_t RelativeMaxIterations =
      MaxAverageVisitsPerBlock * BlockStates.size();
  const uint32_t MaxIterations =
      std::min(RelativeMaxIterations, AbsoluteMaxIterations);
  uint32_t Iterations = 0;
  while (const CFGBlock *Block = Worklist.dequeue()) {
    if (++Iterations > MaxIterations) {
      return llvm::createStringError(std::errc::timed_out,
                                     "maximum number of iterations reached");
    }

    const llvm::Optional<TypeErasedDataflowAnalysisState> &OldBlockState =
        BlockStates[Block->getBlockID()];
    TypeErasedDataflowAnalysisState NewBlockState =
        transferBlock(CFCtx, BlockStates, *Block, InitEnv, Analysis);

    if (OldBlockState &&
        Analysis.isEqualTypeErased(OldBlockState.value().Lattice,
                                   NewBlockState.Lattice) &&
        OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) {
      // The state of `Block` didn't change after transfer so there's no need to
      // revisit its successors.
      continue;
    }

    BlockStates[Block->getBlockID()] = std::move(NewBlockState);

    // Do not add unreachable successor blocks to `Worklist`.
    if (Block->hasNoReturnElement())
      continue;

    Worklist.enqueueSuccessors(Block);
  }
  // FIXME: Consider evaluating unreachable basic blocks (those that have a
  // state set to `llvm::None` at this point) to also analyze dead code.

  if (PostVisitStmt) {
    for (const CFGBlock *Block : CFCtx.getCFG()) {
      // Skip blocks that were not evaluated.
      if (!BlockStates[Block->getBlockID()])
        continue;
      transferBlock(
          CFCtx, BlockStates, *Block, InitEnv, Analysis,
          [&PostVisitStmt](const clang::CFGStmt &Stmt,
                           const TypeErasedDataflowAnalysisState &State) {
            PostVisitStmt(Stmt.getStmt(), State);
          });
    }
  }

  return BlockStates;
}

} // namespace dataflow
} // namespace clang