Compiler projects using llvm
//===-------------- RISCVSExtWRemoval.cpp - MI sext.w Removal -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// This pass removes unneeded sext.w instructions at the MI level.
//
//===---------------------------------------------------------------------===//

#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetInstrInfo.h"

using namespace llvm;

#define DEBUG_TYPE "riscv-sextw-removal"

STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
STATISTIC(NumTransformedToWInstrs,
          "Number of instructions transformed to W-ops");

static cl::opt<bool> DisableSExtWRemoval("riscv-disable-sextw-removal",
                                         cl::desc("Disable removal of sext.w"),
                                         cl::init(false), cl::Hidden);
namespace {

class RISCVSExtWRemoval : public MachineFunctionPass {
public:
  static char ID;

  RISCVSExtWRemoval() : MachineFunctionPass(ID) {
    initializeRISCVSExtWRemovalPass(*PassRegistry::getPassRegistry());
  }

  bool runOnMachineFunction(MachineFunction &MF) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override {
    AU.setPreservesCFG();
    MachineFunctionPass::getAnalysisUsage(AU);
  }

  StringRef getPassName() const override { return "RISCV sext.w Removal"; }
};

} // end anonymous namespace

char RISCVSExtWRemoval::ID = 0;
INITIALIZE_PASS(RISCVSExtWRemoval, DEBUG_TYPE, "RISCV sext.w Removal", false,
                false)

FunctionPass *llvm::createRISCVSExtWRemovalPass() {
  return new RISCVSExtWRemoval();
}

// add uses of MI to the Worklist
static void addUses(const MachineInstr &MI,
                    SmallVectorImpl<const MachineInstr *> &Worklist,
                    MachineRegisterInfo &MRI) {
  for (auto &UserOp : MRI.reg_operands(MI.getOperand(0).getReg())) {
    const auto *User = UserOp.getParent();
    if (User == &MI) // ignore the def, current MI
      continue;
    Worklist.push_back(User);
  }
}

// returns true if all uses of OrigMI only depend on the lower word of its
// output, so we can transform OrigMI to the corresponding W-version.
// TODO: handle multiple interdependent transformations
static bool isAllUsesReadW(const MachineInstr &OrigMI,
                           MachineRegisterInfo &MRI) {

  SmallPtrSet<const MachineInstr *, 4> Visited;
  SmallVector<const MachineInstr *, 4> Worklist;

  Visited.insert(&OrigMI);
  addUses(OrigMI, Worklist, MRI);

  while (!Worklist.empty()) {
    const MachineInstr *MI = Worklist.pop_back_val();

    if (!Visited.insert(MI).second) {
      // If we've looped back to OrigMI through a PHI cycle, we can't transform
      // LD or LWU, because these operations use all 64 bits of input.
      if (MI == &OrigMI) {
        unsigned opcode = MI->getOpcode();
        if (opcode == RISCV::LD || opcode == RISCV::LWU)
          return false;
      }
      continue;
    }

    switch (MI->getOpcode()) {
    case RISCV::ADDIW:
    case RISCV::ADDW:
    case RISCV::DIVUW:
    case RISCV::DIVW:
    case RISCV::MULW:
    case RISCV::REMUW:
    case RISCV::REMW:
    case RISCV::SLLIW:
    case RISCV::SLLW:
    case RISCV::SRAIW:
    case RISCV::SRAW:
    case RISCV::SRLIW:
    case RISCV::SRLW:
    case RISCV::SUBW:
    case RISCV::ROLW:
    case RISCV::RORW:
    case RISCV::RORIW:
    case RISCV::CLZW:
    case RISCV::CTZW:
    case RISCV::CPOPW:
    case RISCV::SLLI_UW:
    case RISCV::FCVT_S_W:
    case RISCV::FCVT_S_WU:
    case RISCV::FCVT_D_W:
    case RISCV::FCVT_D_WU:
      continue;

    // these overwrite higher input bits, otherwise the lower word of output
    // depends only on the lower word of input. So check their uses read W.
    case RISCV::SLLI:
      if (MI->getOperand(2).getImm() >= 32)
        continue;
      addUses(*MI, Worklist, MRI);
      continue;
    case RISCV::ANDI:
      if (isUInt<11>(MI->getOperand(2).getImm()))
        continue;
      addUses(*MI, Worklist, MRI);
      continue;
    case RISCV::ORI:
      if (!isUInt<11>(MI->getOperand(2).getImm()))
        continue;
      addUses(*MI, Worklist, MRI);
      continue;

    case RISCV::BEXTI:
      if (MI->getOperand(2).getImm() >= 32)
        return false;
      continue;

    // For these, lower word of output in these operations, depends only on
    // the lower word of input. So, we check all uses only read lower word.
    case RISCV::COPY:
    case RISCV::PHI:

    case RISCV::ADD:
    case RISCV::ADDI:
    case RISCV::AND:
    case RISCV::MUL:
    case RISCV::OR:
    case RISCV::SLL:
    case RISCV::SUB:
    case RISCV::XOR:
    case RISCV::XORI:

    case RISCV::ADD_UW:
    case RISCV::ANDN:
    case RISCV::CLMUL:
    case RISCV::ORC_B:
    case RISCV::ORN:
    case RISCV::SEXT_B:
    case RISCV::SEXT_H:
    case RISCV::SH1ADD:
    case RISCV::SH1ADD_UW:
    case RISCV::SH2ADD:
    case RISCV::SH2ADD_UW:
    case RISCV::SH3ADD:
    case RISCV::SH3ADD_UW:
    case RISCV::XNOR:
    case RISCV::ZEXT_H_RV64:
      addUses(*MI, Worklist, MRI);
      continue;
    default:
      return false;
    }
  }
  return true;
}

// This function returns true if the machine instruction always outputs a value
// where bits 63:32 match bit 31.
// Alternatively, if the instruction can be converted to W variant
// (e.g. ADD->ADDW) and all of its uses only use the lower word of its output,
// then return true and add the instr to FixableDef to be convereted later
// TODO: Allocate a bit in TSFlags for the W instructions?
// TODO: Add other W instructions.
static bool isSignExtendingOpW(MachineInstr &MI, MachineRegisterInfo &MRI,
                               SmallPtrSetImpl<MachineInstr *> &FixableDef) {
  switch (MI.getOpcode()) {
  case RISCV::LUI:
  case RISCV::LW:
  case RISCV::ADDW:
  case RISCV::ADDIW:
  case RISCV::SUBW:
  case RISCV::MULW:
  case RISCV::SLLW:
  case RISCV::SLLIW:
  case RISCV::SRAW:
  case RISCV::SRAIW:
  case RISCV::SRLW:
  case RISCV::SRLIW:
  case RISCV::DIVW:
  case RISCV::DIVUW:
  case RISCV::REMW:
  case RISCV::REMUW:
  case RISCV::ROLW:
  case RISCV::RORW:
  case RISCV::RORIW:
  case RISCV::CLZW:
  case RISCV::CTZW:
  case RISCV::CPOPW:
  case RISCV::FCVT_W_H:
  case RISCV::FCVT_WU_H:
  case RISCV::FCVT_W_S:
  case RISCV::FCVT_WU_S:
  case RISCV::FCVT_W_D:
  case RISCV::FCVT_WU_D:
  case RISCV::FMV_X_W:
  // The following aren't W instructions, but are either sign extended from a
  // smaller size, always outputs a small integer, or put zeros in bits 63:31.
  case RISCV::LBU:
  case RISCV::LHU:
  case RISCV::LB:
  case RISCV::LH:
  case RISCV::SLT:
  case RISCV::SLTI:
  case RISCV::SLTU:
  case RISCV::SLTIU:
  case RISCV::SEXT_B:
  case RISCV::SEXT_H:
  case RISCV::ZEXT_H_RV64:
  case RISCV::FMV_X_H:
  case RISCV::BEXT:
  case RISCV::BEXTI:
  case RISCV::CLZ:
  case RISCV::CPOP:
  case RISCV::CTZ:
    return true;
  // shifting right sufficiently makes the value 32-bit sign-extended
  case RISCV::SRAI:
    return MI.getOperand(2).getImm() >= 32;
  case RISCV::SRLI:
    return MI.getOperand(2).getImm() > 32;
  // The LI pattern ADDI rd, X0, imm is sign extended.
  case RISCV::ADDI:
    if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == RISCV::X0)
      return true;
    if (isAllUsesReadW(MI, MRI)) {
      // transform to ADDIW
      FixableDef.insert(&MI);
      return true;
    }
    return false;
  // An ANDI with an 11 bit immediate will zero bits 63:11.
  case RISCV::ANDI:
    return isUInt<11>(MI.getOperand(2).getImm());
  // An ORI with an >11 bit immediate (negative 12-bit) will set bits 63:11.
  case RISCV::ORI:
    return !isUInt<11>(MI.getOperand(2).getImm());
  // Copying from X0 produces zero.
  case RISCV::COPY:
    return MI.getOperand(1).getReg() == RISCV::X0;

  // With these opcode, we can "fix" them with the W-version
  // if we know all users of the result only rely on bits 31:0
  case RISCV::SLLI:
    // SLLIW reads the lowest 5 bits, while SLLI reads lowest 6 bits
    if (MI.getOperand(2).getImm() >= 32)
      return false;
    LLVM_FALLTHROUGH;
  case RISCV::ADD:
  case RISCV::LD:
  case RISCV::LWU:
  case RISCV::MUL:
  case RISCV::SUB:
    if (isAllUsesReadW(MI, MRI)) {
      FixableDef.insert(&MI);
      return true;
    }
  }

  return false;
}

static bool isSignExtendedW(MachineInstr &OrigMI, MachineRegisterInfo &MRI,
                            SmallPtrSetImpl<MachineInstr *> &FixableDef) {

  SmallPtrSet<const MachineInstr *, 4> Visited;
  SmallVector<MachineInstr *, 4> Worklist;

  Worklist.push_back(&OrigMI);

  while (!Worklist.empty()) {
    MachineInstr *MI = Worklist.pop_back_val();

    // If we already visited this instruction, we don't need to check it again.
    if (!Visited.insert(MI).second)
      continue;

    // If this is a sign extending operation we don't need to look any further.
    if (isSignExtendingOpW(*MI, MRI, FixableDef))
      continue;

    // Is this an instruction that propagates sign extend.
    switch (MI->getOpcode()) {
    default:
      // Unknown opcode, give up.
      return false;
    case RISCV::COPY: {
      Register SrcReg = MI->getOperand(1).getReg();

      // TODO: Handle arguments and returns from calls?

      // If this is a copy from another register, check its source instruction.
      if (!SrcReg.isVirtual())
        return false;
      MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
      if (!SrcMI)
        return false;

      // Add SrcMI to the worklist.
      Worklist.push_back(SrcMI);
      break;
    }

    // For these, we just need to check if the 1st operand is sign extended.
    case RISCV::BCLRI:
    case RISCV::BINVI:
    case RISCV::BSETI:
      if (MI->getOperand(2).getImm() >= 31)
        return false;
      LLVM_FALLTHROUGH;
    case RISCV::REM:
    case RISCV::ANDI:
    case RISCV::ORI:
    case RISCV::XORI: {
      // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
      // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
      // Logical operations use a sign extended 12-bit immediate.
      Register SrcReg = MI->getOperand(1).getReg();
      if (!SrcReg.isVirtual())
        return false;
      MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
      if (!SrcMI)
        return false;

      // Add SrcMI to the worklist.
      Worklist.push_back(SrcMI);
      break;
    }
    case RISCV::REMU:
    case RISCV::AND:
    case RISCV::OR:
    case RISCV::XOR:
    case RISCV::ANDN:
    case RISCV::ORN:
    case RISCV::XNOR:
    case RISCV::MAX:
    case RISCV::MAXU:
    case RISCV::MIN:
    case RISCV::MINU:
    case RISCV::PHI: {
      // If all incoming values are sign-extended, the output of AND, OR, XOR,
      // MIN, MAX, or PHI is also sign-extended.

      // The input registers for PHI are operand 1, 3, ...
      // The input registers for others are operand 1 and 2.
      unsigned E = 3, D = 1;
      if (MI->getOpcode() == RISCV::PHI) {
        E = MI->getNumOperands();
        D = 2;
      }

      for (unsigned I = 1; I != E; I += D) {
        if (!MI->getOperand(I).isReg())
          return false;

        Register SrcReg = MI->getOperand(I).getReg();
        if (!SrcReg.isVirtual())
          return false;
        MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
        if (!SrcMI)
          return false;

        // Add SrcMI to the worklist.
        Worklist.push_back(SrcMI);
      }

      break;
    }
    }
  }

  // If we get here, then every node we visited produces a sign extended value
  // or propagated sign extended values. So the result must be sign extended.
  return true;
}

static unsigned getWOp(unsigned Opcode) {
  switch (Opcode) {
  case RISCV::ADDI:
    return RISCV::ADDIW;
  case RISCV::ADD:
    return RISCV::ADDW;
  case RISCV::LD:
  case RISCV::LWU:
    return RISCV::LW;
  case RISCV::MUL:
    return RISCV::MULW;
  case RISCV::SLLI:
    return RISCV::SLLIW;
  case RISCV::SUB:
    return RISCV::SUBW;
  default:
    llvm_unreachable("Unexpected opcode for replacement with W variant");
  }
}

bool RISCVSExtWRemoval::runOnMachineFunction(MachineFunction &MF) {
  if (skipFunction(MF.getFunction()) || DisableSExtWRemoval)
    return false;

  MachineRegisterInfo &MRI = MF.getRegInfo();
  const RISCVSubtarget &ST = MF.getSubtarget<RISCVSubtarget>();

  if (!ST.is64Bit())
    return false;

  SmallPtrSet<MachineInstr *, 4> SExtWRemovalCands;

  // Replacing instructions invalidates the MI iterator
  // we collect the candidates, then iterate over them separately.
  for (MachineBasicBlock &MBB : MF) {
    for (auto I = MBB.begin(), IE = MBB.end(); I != IE;) {
      MachineInstr *MI = &*I++;

      // We're looking for the sext.w pattern ADDIW rd, rs1, 0.
      if (!RISCV::isSEXT_W(*MI))
        continue;

      // Input should be a virtual register.
      Register SrcReg = MI->getOperand(1).getReg();
      if (!SrcReg.isVirtual())
        continue;

      SExtWRemovalCands.insert(MI);
    }
  }

  bool MadeChange = false;
  for (auto MI : SExtWRemovalCands) {
    SmallPtrSet<MachineInstr *, 4> FixableDef;
    Register SrcReg = MI->getOperand(1).getReg();
    MachineInstr &SrcMI = *MRI.getVRegDef(SrcReg);

    // If all definitions reaching MI sign-extend their output,
    // then sext.w is redundant
    if (!isSignExtendedW(SrcMI, MRI, FixableDef))
      continue;

    Register DstReg = MI->getOperand(0).getReg();
    if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
      continue;
    // Replace Fixable instructions with their W versions.
    for (MachineInstr *Fixable : FixableDef) {
      MachineBasicBlock &MBB = *Fixable->getParent();
      const DebugLoc &DL = Fixable->getDebugLoc();
      unsigned Code = getWOp(Fixable->getOpcode());
      MachineInstrBuilder Replacement =
          BuildMI(MBB, Fixable, DL, ST.getInstrInfo()->get(Code));
      for (auto Op : Fixable->operands())
        Replacement.add(Op);
      for (auto Op : Fixable->memoperands())
        Replacement.addMemOperand(Op);

      LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
      LLVM_DEBUG(dbgs() << "     with " << *Replacement);

      Fixable->eraseFromParent();
      ++NumTransformedToWInstrs;
    }

    LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
    MRI.replaceRegWith(DstReg, SrcReg);
    MRI.clearKillFlags(SrcReg);
    MI->eraseFromParent();
    ++NumRemovedSExtW;
    MadeChange = true;
  }

  return MadeChange;
}