Compiler projects using llvm
//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass generates machine instructions for the CTR loops related pseudos:
// 1: MTCTRPseudo/DecreaseCTRPseudo
// 2: MTCTR8Pseudo/DecreaseCTR8Pseudo
//
// If a CTR loop can be generated:
// 1: MTCTRPseudo/MTCTR8Pseudo will be converted to "mtctr"
// 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "bdnz/bdz" and
//    its user branch instruction can be deleted.
//
// If a CTR loop can not be generated due to clobber of CTR:
// 1: MTCTRPseudo/MTCTR8Pseudo can be deleted.
// 2: DecreaseCTRPseudo/DecreaseCTR8Pseudo will be converted to "addi -1" and
//    a "cmplwi/cmpldi".
//
// This pass runs just before register allocation, because we don't want
// register allocator to allocate register for DecreaseCTRPseudo if a CTR can be
// generated or if a CTR loop can not be generated, we don't have any condition
// register for the new added "cmplwi/cmpldi".
//
//===----------------------------------------------------------------------===//

#include "PPC.h"
#include "PPCInstrInfo.h"
#include "PPCSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/PassRegistry.h"
#include "llvm/Support/CodeGen.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include <cassert>

using namespace llvm;

#define DEBUG_TYPE "ppc-ctrloops"

STATISTIC(NumCTRLoops, "Number of CTR loops generated");
STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");

namespace {
class PPCCTRLoops : public MachineFunctionPass {
public:
  static char ID;

  PPCCTRLoops() : MachineFunctionPass(ID) {
    initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
  }

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.addRequired<MachineLoopInfo>();
    MachineFunctionPass::getAnalysisUsage(AU);
  }

  bool runOnMachineFunction(MachineFunction &MF) override;

private:
  const PPCInstrInfo *TII = nullptr;
  MachineRegisterInfo *MRI = nullptr;

  bool processLoop(MachineLoop *ML);
  bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;
  void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
                         MachineInstr *Dec);
  void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);
};
} // namespace

char PPCCTRLoops::ID = 0;

INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
                      false, false)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",
                    false, false)

FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }

bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {
  bool Changed = false;

  auto &MLI = getAnalysis<MachineLoopInfo>();
  TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());
  MRI = &MF.getRegInfo();

  for (auto ML : MLI) {
    if (ML->isOutermost())
      Changed |= processLoop(ML);
  }

  return Changed;
}

bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {
  if (!CheckReads) {
    // If we are only checking for defs, that is we are going to find
    // definitions before MTCTRloop, for this case:
    // CTR defination inside the callee of a call instruction will not impact
    // the defination of MTCTRloop, so we can use definesRegister() for the
    // check, no need to check the regmask.
    return (MI->definesRegister(PPC::CTR) &&
            !MI->registerDefIsDead(PPC::CTR)) ||
           (MI->definesRegister(PPC::CTR8) &&
            !MI->registerDefIsDead(PPC::CTR8));
  }

  if ((MI->modifiesRegister(PPC::CTR) && !MI->registerDefIsDead(PPC::CTR)) ||
      (MI->modifiesRegister(PPC::CTR8) && !MI->registerDefIsDead(PPC::CTR8)))
    return true;

  if (MI->getDesc().isCall())
    return true;

  // We define the CTR in the loop preheader, so if there is any CTR reader in
  // the loop, we also can not use CTR loop form.
  if (MI->readsRegister(PPC::CTR) || MI->readsRegister(PPC::CTR8))
    return true;

  return false;
}

bool PPCCTRLoops::processLoop(MachineLoop *ML) {
  bool Changed = false;

  // Align with HardwareLoop pass, process inner loops first.
  for (auto I = ML->begin(), E = ML->end(); I != E; ++I)
    Changed |= processLoop(*I);

  // If any inner loop is changed, outter loop must be without hardware loop
  // intrinsics.
  if (Changed)
    return true;

  auto IsLoopStart = [](MachineInstr &MI) {
    return MI.getOpcode() == PPC::MTCTRPseudo ||
           MI.getOpcode() == PPC::MTCTR8Pseudo;
  };

  auto SearchForStart =
      [&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {
    for (auto &MI : *MBB) {
      if (IsLoopStart(MI))
        return &MI;
    }
    return nullptr;
  };

  MachineInstr *Start = nullptr;
  MachineInstr *Dec = nullptr;
  bool InvalidCTRLoop = false;

  MachineBasicBlock *Preheader = ML->getLoopPreheader();
  // If there is no preheader for this loop, there must be no MTCTRPseudo
  // either.
  if (!Preheader)
    return false;

  Start = SearchForStart(Preheader);
  // This is not a CTR loop candidate.
  if (!Start)
    return false;

  // If CTR is live to the preheader, we can not redefine the CTR register.
  if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))
    InvalidCTRLoop = true;

  // Make sure there is also no CTR clobber in the block preheader between the
  // begin and MTCTR.
  for (MachineBasicBlock::reverse_instr_iterator I =
           std::next(Start->getReverseIterator());
       I != Preheader->instr_rend(); ++I)
    // Only check the definitions of CTR. If there is non-dead definition for
    // the CTR, we conservatively don't generate a CTR loop.
    if (isCTRClobber(&*I, /* CheckReads */ false)) {
      InvalidCTRLoop = true;
      break;
    }

  // Make sure there is also no CTR clobber/user in the block preheader between
  // MTCTR and the end.
  for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());
       I != Preheader->instr_end(); ++I)
    if (isCTRClobber(&*I, /* CheckReads */ true)) {
      InvalidCTRLoop = true;
      break;
    }

  // Find the CTR loop components and decide whether or not to fall back to a
  // normal loop.
  for (auto *MBB : reverse(ML->getBlocks())) {
    for (auto &MI : *MBB) {
      if (MI.getOpcode() == PPC::DecreaseCTRPseudo ||
          MI.getOpcode() == PPC::DecreaseCTR8Pseudo)
        Dec = &MI;
      else if (!InvalidCTRLoop)
        // If any instruction clobber CTR, then we can not generate a CTR loop.
        InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);
    }
    if (Dec && InvalidCTRLoop)
      break;
  }

  assert(Dec && "CTR loop is not complete!");

  if (InvalidCTRLoop) {
    expandNormalLoops(ML, Start, Dec);
    ++NumNormalLoops;
  }
  else {
    expandCTRLoops(ML, Start, Dec);
    ++NumCTRLoops;
  }
  return true;
}

void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,
                                    MachineInstr *Dec) {
  bool Is64Bit =
      Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();

  MachineBasicBlock *Preheader = Start->getParent();
  MachineBasicBlock *Exiting = Dec->getParent();
  assert((Preheader && Exiting) &&
         "Preheader and exiting should exist for CTR loop!");

  assert(Dec->getOperand(1).getImm() == 1 &&
         "Loop decrement stride must be 1");

  unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;
  unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;

  Register PHIDef =
      MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
                                         : &PPC::GPRC_and_GPRC_NOR0RegClass);

  Start->getParent()->getParent()->getProperties().reset(
      MachineFunctionProperties::Property::NoPHIs);

  // Generate "PHI" in the header block.
  auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),
                        DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);
  PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);

  Register ADDIDef =
      MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass
                                         : &PPC::GPRC_and_GPRC_NOR0RegClass);
  // Generate "addi -1" in the exiting block.
  BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)
      .addReg(PHIDef)
      .addImm(-1);

  // Add other inputs for the PHI node.
  if (ML->isLoopLatch(Exiting)) {
    // There must be only two predecessors for the loop header, one is the
    // Preheader and the other one is loop latch Exiting. In hardware loop
    // insertion pass, the block containing DecreaseCTRloop must dominate all
    // loop latches. So there must be only one latch.
    assert(ML->getHeader()->pred_size() == 2 &&
           "Loop header predecessor is not right!");
    PHIMIB.addReg(ADDIDef).addMBB(Exiting);
  } else {
    // If the block containing DecreaseCTRloop is not a loop latch, we can use
    // ADDIDef as the value for all other blocks for the PHI. In hardware loop
    // insertion pass, the block containing DecreaseCTRloop must dominate all
    // loop latches.
    for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {
      if (ML->contains(P)) {
        assert(ML->isLoopLatch(P) &&
               "Loop's header in-loop predecessor is not loop latch!");
        PHIMIB.addReg(ADDIDef).addMBB(P);
      } else
        assert(P == Preheader &&
               "CTR loop should not be generated for irreducible loop!");
    }
  }

  // Generate the compare in the exiting block.
  Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);
  auto CMPMIB =
      BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)
          .addReg(ADDIDef)
          .addImm(0);

  BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),
          Dec->getOperand(0).getReg())
      .addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);

  // Remove the pseudo instructions.
  Start->eraseFromParent();
  Dec->eraseFromParent();
}

void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,
                                 MachineInstr *Dec) {
  bool Is64Bit =
      Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();

  MachineBasicBlock *Preheader = Start->getParent();
  MachineBasicBlock *Exiting = Dec->getParent();
  assert((Preheader && Exiting) &&
         "Preheader and exiting should exist for CTR loop!");

  assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");

  unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;
  unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;
  auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());
  assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&
         "There should be only one user for loop decrement pseudo!");

  unsigned Opcode = 0;
  switch (BrInstr->getOpcode()) {
  case PPC::BC:
    Opcode = BDNZOpcode;
    (void) ML;
    assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&
           "Invalid ctr loop!");
    break;
  case PPC::BCn:
    Opcode = BDZOpcode;
    assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&
           "Invalid ctr loop!");
    break;
  default:
    llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");
  }

  unsigned MTCTROpcode = Is64Bit ? PPC::MTCTR8 : PPC::MTCTR;

  // Generate "mtctr" in the loop preheader.
  BuildMI(*Preheader, Start, Start->getDebugLoc(), TII->get(MTCTROpcode))
      .addReg(Start->getOperand(0).getReg());

  // Generate "bdnz/bdz" in the exiting block just before the terminator.
  BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))
      .addMBB(BrInstr->getOperand(1).getMBB());

  // Remove the pseudo instructions.
  Start->eraseFromParent();
  BrInstr->eraseFromParent();
  Dec->eraseFromParent();
}