Compiler projects using llvm
// Mask immediates for MMA instructions (2, 4 and 8 bits).
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
def Msk8Imm : ImmLeaf<i32, [{ return isUInt<8>(Imm); }]>;

def MMA : Predicate<"Subtarget->hasMMA()">;


// Multiclass definitions for MMA accumulator instructions.
// ----------------------------------------------------------------------------

// Defines 2 unmasked instructions where the xo field for acc/non-acc version
// is even/odd.
multiclass ACC_UM_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                       string asmstr> {
  let Predicates = [MMA] in {
  def NAME :
    XX3Form_AT3_XAB6<opcode, !or(xo, 0x01), (outs acc:$AT), IOL,
                     !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
    RegConstraint<"@earlyclobber $AT">;
  def PP :
    XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), !con((ins acc:$ATi), IOL),
                     !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// Defines 4 instructions, masked/unmasked with masks 8, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M844_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                            string asmstr> {
  defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
  let Predicates = [MMA, PrefixInstrs] in {
  def PM#NAME :
    MMIRR_XX3Form_XY4P8_XAB6<
      opcode, !or(xo, 0x01), (outs acc:$AT),
      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK)),
      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"@earlyclobber $AT">;
  def PM#NAME#PP :
    MMIRR_XX3Form_XY4P8_XAB6<
      opcode, xo, (outs acc:$AT),
      !con((ins acc:$ATi),
           !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u8imm:$PMSK))),
      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// Defines 4 instructions, masked/unmasked with masks 4, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M444_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                            string asmstr> {
  defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
  let Predicates = [MMA, PrefixInstrs] in {
  def PM#NAME :
    MMIRR_XX3Form_XYP4_XAB6<
      opcode, !or(xo, 0x01), (outs acc:$AT),
      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK)),
      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"@earlyclobber $AT">;
  def PM#NAME#PP :
    MMIRR_XX3Form_XYP4_XAB6<
      opcode, xo, (outs acc:$AT),
      !con((ins acc:$ATi),
           !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u4imm:$PMSK))),
      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
// The XO field for acc/non-acc version is even/odd.
multiclass ACC_UM_M244_XOEO<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                            string asmstr> {
  defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
  let Predicates = [MMA, PrefixInstrs] in {
  def PM#NAME :
    MMIRR_XX3Form_XY4P2_XAB6<
      opcode, !or(xo, 0x01), (outs acc:$AT),
      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"@earlyclobber $AT">;
  def PM#NAME#PP :
    MMIRR_XX3Form_XY4P2_XAB6<
      opcode, xo, (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// Defines 4 instructions, masked/unmasked with masks 2, 4, 4 bits.
// Upper nibble of XO field for acc/non-acc version is 0x4/0x6.
multiclass ACC_UM_M244_XO46<bits<6> opcode, bits<8> xo, dag IOL, string asmbase,
                            string asmstr> {
  let Predicates = [MMA] in {
  def NAME :
    XX3Form_AT3_XAB6<opcode, xo, (outs acc:$AT), IOL,
                     !strconcat(asmbase#" ", asmstr), IIC_VecFP, []>,
    RegConstraint<"@earlyclobber $AT">;
  def PP :
    XX3Form_AT3_XAB6<
      opcode, !or(xo, 0x20), (outs acc:$AT), !con((ins acc:$ATi), IOL),
      !strconcat(asmbase#"pp ", asmstr), IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
  let Predicates = [MMA, PrefixInstrs] in {
  def PM#NAME :
    MMIRR_XX3Form_XY4P2_XAB6<
      opcode, xo, (outs acc:$AT),
      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK)),
      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"@earlyclobber $AT">;
  def PM#NAME#PP :
    MMIRR_XX3Form_XY4P2_XAB6<
      opcode, !or(xo, 0x20), (outs acc:$AT),
      !con((ins acc:$ATi),
           !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// Defines 10 instructions, operand negating, unmasked, masked with 2, 4, 4
// bits. Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M244_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                                  string asmbase, string asmstr> {
  defm NAME : ACC_UM_M244_XOEO<opcode, xo, IOL, asmbase, asmstr>;
  let Predicates = [MMA] in {
  def PN : XX3Form_AT3_XAB6<
             opcode, !or(xo, 0x80), (outs acc:$AT), !con((ins acc:$ATi), IOL),
             !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def NP : XX3Form_AT3_XAB6<
             opcode, !or(xo, 0x40), (outs acc:$AT), !con((ins acc:$ATi), IOL),
             !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def NN : XX3Form_AT3_XAB6<
             opcode, !or(xo, 0xC0), (outs acc:$AT), !con((ins acc:$ATi), IOL),
             !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
  let Predicates = [MMA, PrefixInstrs] in {
  def PM#NAME#PN :
    MMIRR_XX3Form_XY4P2_XAB6<
      opcode, !or(xo, 0x80), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
      !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def PM#NAME#NP :
    MMIRR_XX3Form_XY4P2_XAB6<
      opcode, !or(xo, 0x40), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
      !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def PM#NAME#NN :
    MMIRR_XX3Form_XY4P2_XAB6<
      opcode, !or(xo, 0xC0), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK, u2imm:$PMSK))),
      !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK, $PMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// Defines 5 instructions, unmasked, operand negating.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                             string asmbase, string asmstr> {
  defm NAME : ACC_UM_XOEO<opcode, xo, IOL, asmbase, asmstr>;
  let Predicates = [MMA] in {
  def PN : XX3Form_AT3_XAB6<opcode, !or(xo, 0x80), (outs acc:$AT),
                            !con((ins acc:$ATi), IOL),
                            !strconcat(asmbase#"pn ", asmstr), IIC_VecFP, []>,
           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def NP : XX3Form_AT3_XAB6<opcode, !or(xo, 0x40), (outs acc:$AT),
                            !con((ins acc:$ATi), IOL),
                            !strconcat(asmbase#"np ", asmstr), IIC_VecFP, []>,
           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def NN : XX3Form_AT3_XAB6<opcode, !or(xo, 0xC0), (outs acc:$AT),
                            !con((ins acc:$ATi), IOL),
                            !strconcat(asmbase#"nn ", asmstr), IIC_VecFP, []>,
           RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// Defines 10 instructions, operand negating, unmasked, masked with 4, 4 bits.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M44_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                                 string asmbase, string asmstr> {
  defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
  let Predicates = [MMA, PrefixInstrs] in {
  def PM#NAME :
    MMIRR_XX3Form_XY4_XAB6<
      opcode, !or(xo, 0x01), (outs acc:$AT),
      !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK)),
      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"@earlyclobber $AT">;
  def PM#NAME#PP :
    MMIRR_XX3Form_XY4_XAB6<
      opcode, xo, (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def PM#NAME#PN :
    MMIRR_XX3Form_XY4_XAB6<
      opcode, !or(xo, 0x80), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
      !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def PM#NAME#NP :
    MMIRR_XX3Form_XY4_XAB6<
      opcode, !or(xo, 0x40), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
      !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def PM#NAME#NN :
    MMIRR_XX3Form_XY4_XAB6<
      opcode, !or(xo, 0xC0), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u4imm:$YMSK))),
      !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// Defines 10 instructions, operand negating, unmasked, masked with 4, 2 bits.
// Upper nibble are masked with 0x8, 0x4, 0xC for negating operands.
multiclass ACC_NEG_UM_M42_XOM84C<bits<6> opcode, bits<8> xo, dag IOL,
                                 string asmbase, string asmstr> {
  defm NAME : ACC_NEG_UM_XOM84C<opcode, xo, IOL, asmbase, asmstr>;
  let Predicates = [MMA, PrefixInstrs] in {
  def PM#NAME :
    MMIRR_XX3Form_X4Y2_XAB6<
      opcode, !or(xo, 0x01), (outs acc:$AT),
      !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK)),
      !strconcat("pm"#asmbase#" ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"@earlyclobber $AT">;
  def PM#NAME#PP :
    MMIRR_XX3Form_X4Y2_XAB6<
      opcode, xo, (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
      !strconcat("pm"#asmbase#"pp ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def PM#NAME#PN :
    MMIRR_XX3Form_X4Y2_XAB6<
      opcode, !or(xo, 0x80), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
      !strconcat("pm"#asmbase#"pn ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def PM#NAME#NP :
    MMIRR_XX3Form_X4Y2_XAB6<
      opcode, !or(xo, 0x40), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
      !strconcat("pm"#asmbase#"np ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def PM#NAME#NN :
    MMIRR_XX3Form_X4Y2_XAB6<
      opcode, !or(xo, 0xC0), (outs acc:$AT),
      !con((ins acc:$ATi), !con(IOL, (ins u4imm:$XMSK, u2imm:$YMSK))),
      !strconcat("pm"#asmbase#"nn ", asmstr#", $XMSK, $YMSK"),
      IIC_VecFP, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  }
}

// End of class definitions.
//-----------------------------------------------------------------------------

let Predicates = [MMA] in {
  def XXMFACC :
    XForm_AT3<31, 0, 177, (outs acc:$ASo), (ins acc:$AS), "xxmfacc $AS",
              IIC_VecGeneral,
              [(set v512i1:$ASo, (int_ppc_mma_xxmfacc v512i1:$AS))]>,
              RegConstraint<"$ASo = $AS">, NoEncode<"$ASo">;
  def XXMTACC :
    XForm_AT3<31, 1, 177, (outs acc:$AT), (ins acc:$ATi), "xxmtacc $AT",
              IIC_VecGeneral,
              [(set v512i1:$AT, (int_ppc_mma_xxmtacc v512i1:$ATi))]>,
              RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  def KILL_PAIR : PPCPostRAExpPseudo<(outs vsrprc:$XTp), (ins vsrprc:$XSp),
                                      "#KILL_PAIR", []>,
                                      RegConstraint<"$XTp = $XSp">;
  def BUILD_UACC : PPCPostRAExpPseudo<(outs acc:$AT), (ins uacc:$AS),
                                      "#BUILD_UACC $AT, $AS", []>;
  // We define XXSETACCZ as rematerializable to undo CSE of that intrinsic in
  // the backend. We avoid CSE here because it generates a copy of the acc
  // register and this copy is more expensive than calling the intrinsic again.
  let isAsCheapAsAMove = 1, isReMaterializable = 1 in {
    def XXSETACCZ :
      XForm_AT3<31, 3, 177, (outs acc:$AT), (ins), "xxsetaccz $AT", IIC_VecGeneral,
                [(set v512i1:$AT, (int_ppc_mma_xxsetaccz))]>;
  }
  def XVI8GER4SPP :
    XX3Form_AT3_XAB6<59, 99, (outs acc:$AT), (ins acc:$ATi, vsrc:$XA, vsrc:$XB),
                     "xvi8ger4spp $AT, $XA, $XB", IIC_VecGeneral, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
  let mayStore = 1 in {
    def SPILL_ACC: PPCEmitTimePseudo<(outs), (ins acc:$AT, memrix16:$dst),
                                     "#SPILL_ACC", []>;
    def SPILL_UACC: PPCEmitTimePseudo<(outs), (ins uacc:$AT, memrix16:$dst),
                                     "#SPILL_UACC", []>;
  }
  let mayLoad = 1, hasSideEffects = 0 in {
    def RESTORE_ACC: PPCEmitTimePseudo<(outs acc:$AT), (ins memrix16:$src),
                                       "#RESTORE_ACC", []>;
    def RESTORE_UACC: PPCEmitTimePseudo<(outs uacc:$AT), (ins memrix16:$src),
                                       "#RESTORE_UACC", []>;
  }
}

let Predicates = [MMA, PrefixInstrs] in {
  def PMXVI8GER4SPP :
    MMIRR_XX3Form_XYP4_XAB6<59, 99, (outs acc:$AT),
                            (ins acc:$ATi, vsrc:$XA,vsrc:$XB, u4imm:$XMSK,
                             u4imm:$YMSK, u4imm:$PMSK),
                            "pmxvi8ger4spp $AT, $XA, $XB, $XMSK, $YMSK, $PMSK",
                            IIC_VecGeneral, []>,
    RegConstraint<"$ATi = $AT">, NoEncode<"$ATi">;
}

// MMA accumulating/non-accumulating instructions.
//------------------------------------------------------------------------------

// XVBF16GER2, XVBF16GER2PP, XVBF16GER2PN, XVBF16GER2NP, XVBF16GER2NN
// PMXVBF16GER2, PMXVBF16GER2PP, PMXVBF16GER2PN, PMXVBF16GER2NP, PMXVBF16GER2NN
defm XVBF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 50, (ins vsrc:$XA, vsrc:$XB),
                                         "xvbf16ger2", "$AT, $XA, $XB">;

// XVI4GER8, XVI4GER8PP, PMXVI4GER8,  PMXVI4GER8PP
defm XVI4GER8 : ACC_UM_M844_XOEO<59, 34, (ins vsrc:$XA, vsrc:$XB),
                                 "xvi4ger8", "$AT, $XA, $XB">;

// XVI8GER4, XVI8GER4PP, PMXVI8GER4, PMXVI8GER4PP
defm XVI8GER4 : ACC_UM_M444_XOEO<59, 2, (ins vsrc:$XA, vsrc:$XB),
                                 "xvi8ger4", "$AT, $XA, $XB">;

// XVI16GER2, XVI16GER2PP, PMXVI16GER2, PMXVI16GER2PP
defm XVI16GER2 : ACC_UM_M244_XO46<59, 75, (ins vsrc:$XA, vsrc:$XB),
                                  "xvi16ger2", "$AT, $XA, $XB">;

// XVI16GER2S, XVI16GER2SPP, PMXVI16GER2S, PMXVI16GER2SPP
defm XVI16GER2S : ACC_UM_M244_XOEO<59, 42, (ins vsrc:$XA, vsrc:$XB),
                                   "xvi16ger2s", "$AT, $XA, $XB">;

// XVF16GER2, XVF16GER2PP, XVF16GER2PN, XVF16GER2NP, XVF16GER2NN
// PMXVF16GER2, PMXVF16GER2PP, PMXVF16GER2PN, PMXVF16GER2NP, PMXVF16GER2NN
defm XVF16GER2 : ACC_NEG_UM_M244_XOM84C<59, 18, (ins vsrc:$XA, vsrc:$XB),
                                        "xvf16ger2", "$AT, $XA, $XB">;

// XVF32GER, XVF32GERPP, XVF32GERPN, XVF32GERNP, XVF32GERPP
// PMXVF32GER, PMXVF32GERPP, PMXVF32GERPN, PMXVF32GERNP, PMXVF32GERPP
defm XVF32GER : ACC_NEG_UM_M44_XOM84C<59, 26, (ins vsrc:$XA, vsrc:$XB),
                                      "xvf32ger", "$AT, $XA, $XB">;

// XVF64GER, XVF64GERPP, XVF64GERPN, XVF64GERNP, XVF64GERNN
// PMXVF64GER, PMXVF64GERPP, PMXVF64GERPN, PMXVF64GERNP, PMXVF64GERNN
defm XVF64GER : ACC_NEG_UM_M42_XOM84C<59, 58, (ins vsrpevenrc:$XA, vsrc:$XB),
                                      "xvf64ger", "$AT, $XA, $XB">;
//------------------------------------------------------------------------------

// MMA Intrinsics
let Predicates = [MMA] in {
  def : Pat<(v512i1 (int_ppc_mma_xvi4ger8 v16i8:$XA, v16i8:$XB)),
            (XVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;

  def : Pat<(v512i1 (int_ppc_mma_xvi8ger4 v16i8:$XA, v16i8:$XB)),
            (XVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;

  def : Pat<(v512i1 (int_ppc_mma_xvi16ger2s v16i8:$XA, v16i8:$XB)),
            (XVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;

  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2 v16i8:$XA, v16i8:$XB)),
            (XVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;

  def : Pat<(v512i1 (int_ppc_mma_xvf32ger v16i8:$XA, v16i8:$XB)),
            (XVF32GER RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf64ger v256i1:$XA, v16i8:$XB)),
            (XVF64GER $XA, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
            (XVF64GERPP $ATi, $XA, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
            (XVF64GERPN $ATi, $XA, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
            (XVF64GERNP $ATi, $XA, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB)),
            (XVF64GERNN $ATi, $XA, RCCp.BToVSRC)>;

  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2 v16i8:$XA, v16i8:$XB)),
            (XVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvi16ger2 v16i8:$XA, v16i8:$XB)),
            (XVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
  def : Pat<(v512i1 (int_ppc_mma_xvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB)),
            (XVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC)>;
}

// MMA Intrinsics
let Predicates = [MMA, PrefixInstrs] in {
  def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                            Msk4Imm:$YMSK, Msk8Imm:$PMSK)),
            (PMXVI4GER8 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                        Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvi4ger8pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                              Msk8Imm:$PMSK)),
            (PMXVI4GER8PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk4Imm:$YMSK, Msk8Imm:$PMSK)>;

  def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                            Msk4Imm:$YMSK, Msk4Imm:$PMSK)),
            (PMXVI8GER4 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                        Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                              Msk4Imm:$PMSK)),
            (PMXVI8GER4PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk4Imm:$YMSK, Msk4Imm:$PMSK)>;

  def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2s v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                              Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
            (PMXVI16GER2S RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                                Msk2Imm:$PMSK)),
            (PMXVI16GER2SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                             Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
            (PMXVF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                         Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                               Msk2Imm:$PMSK)),
            (PMXVF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                               Msk2Imm:$PMSK)),
            (PMXVF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                               Msk2Imm:$PMSK)),
            (PMXVF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                               Msk2Imm:$PMSK)),
            (PMXVF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;

  def : Pat<(v512i1 (int_ppc_mma_pmxvf32ger v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                            Msk4Imm:$YMSK)),
            (PMXVF32GER RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                        Msk4Imm:$YMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
            (PMXVF32GERPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk4Imm:$YMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf32gerpn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
            (PMXVF32GERPN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk4Imm:$YMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
            (PMXVF32GERNP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk4Imm:$YMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf32gernn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk4Imm:$YMSK)),
            (PMXVF32GERNN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk4Imm:$YMSK)>;

  def : Pat<(v512i1 (int_ppc_mma_pmxvf64ger v256i1:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                            Msk2Imm:$YMSK)),
            (PMXVF64GER $XA, RCCp.BToVSRC, Msk4Imm:$XMSK, Msk2Imm:$YMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
            (PMXVF64GERPP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk2Imm:$YMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf64gerpn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
            (PMXVF64GERPN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk2Imm:$YMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernp v512i1:$ATi, v256i1:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
            (PMXVF64GERNP $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk2Imm:$YMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvf64gernn v512i1:$ATi, v256i1:$XA, v16i8:$XB,
                                              Msk4Imm:$XMSK, Msk2Imm:$YMSK)),
            (PMXVF64GERNN $ATi, $XA, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk2Imm:$YMSK)>;

  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                              Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
            (PMXVBF16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                          Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                                Msk2Imm:$PMSK)),
            (PMXVBF16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2pn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                                Msk2Imm:$PMSK)),
            (PMXVBF16GER2PN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2np v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                                Msk2Imm:$PMSK)),
            (PMXVBF16GER2NP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvbf16ger2nn v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                                Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                                Msk2Imm:$PMSK)),
            (PMXVBF16GER2NN $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                            Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2 v16i8:$XA, v16i8:$XB, Msk4Imm:$XMSK,
                                             Msk4Imm:$YMSK, Msk2Imm:$PMSK)),
            (PMXVI16GER2 RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                         Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvi8ger4spp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                               Msk2Imm:$PMSK)),
            (PMXVI8GER4SPP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
  def : Pat<(v512i1 (int_ppc_mma_pmxvi16ger2pp v512i1:$ATi, v16i8:$XA, v16i8:$XB,
                                               Msk4Imm:$XMSK, Msk4Imm:$YMSK,
                                               Msk2Imm:$PMSK)),
            (PMXVI16GER2PP $ATi, RCCp.AToVSRC, RCCp.BToVSRC, Msk4Imm:$XMSK,
                           Msk4Imm:$YMSK, Msk2Imm:$PMSK)>;
}

def ConcatsMMA {
  dag VecsToVecPair0 =
    (v256i1 (INSERT_SUBREG
      (INSERT_SUBREG (IMPLICIT_DEF), $vs0, sub_vsx1),
      $vs1, sub_vsx0));
  dag VecsToVecPair1 =
    (v256i1 (INSERT_SUBREG
      (INSERT_SUBREG (IMPLICIT_DEF), $vs2, sub_vsx1),
      $vs3, sub_vsx0));
  dag VecsToVecQuad =
    (BUILD_UACC (INSERT_SUBREG
                  (INSERT_SUBREG (v512i1 (IMPLICIT_DEF)),
                                 (KILL_PAIR VecsToVecPair0), sub_pair0),
                  (KILL_PAIR VecsToVecPair1), sub_pair1));
}

def Extracts {
  dag Pair0 = (v256i1 (EXTRACT_SUBREG $v, sub_pair0));
  dag Pair1 = (v256i1 (EXTRACT_SUBREG $v, sub_pair1));
  dag Vec0 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx0));
  dag Vec1 = (v4i32 (EXTRACT_SUBREG Pair0, sub_vsx1));
  dag Vec2 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx0));
  dag Vec3 = (v4i32 (EXTRACT_SUBREG Pair1, sub_vsx1));
}

let Predicates = [MMA] in {
  def : Pat<(v512i1 (PPCAccBuild v4i32:$vs1, v4i32:$vs0, v4i32:$vs3, v4i32:$vs2)),
            (XXMTACC ConcatsMMA.VecsToVecQuad)>;
  def : Pat<(v512i1 (int_ppc_mma_assemble_acc v16i8:$vs1, v16i8:$vs0,
                                              v16i8:$vs3, v16i8:$vs2)),
            (XXMTACC ConcatsMMA.VecsToVecQuad)>;
  def : Pat<(v512i1 (PPCxxmfacc v512i1:$AS)), (XXMFACC acc:$AS)>;
  def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 0)),
            Extracts.Vec0>;
  def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 1)),
            Extracts.Vec1>;
  def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 2)),
            Extracts.Vec2>;
  def : Pat<(v4i32 (PPCAccExtractVsx acc:$v, 3)),
            Extracts.Vec3>;
}