# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s # This previously would produce a bundle that could not be satisfied # due to using nearly the entire register budget and not considering # the alignment requirement of large SGPR tuples. --- name: soft_clause_bundle_out_of_registers tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true waveLimiter: true scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99' stackPtrOffsetReg: '$sgpr32' occupancy: 6 body: | ; CHECK-LABEL: name: soft_clause_bundle_out_of_registers ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x80000000) ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM4:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM5:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM6:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 bb.0: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6 %0:sgpr_64 = COPY $sgpr4_sgpr5 %1:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0, 0, 0 :: (dereferenceable invariant load (s64), addrspace 4) %2:vreg_64 = IMPLICIT_DEF bb.1: undef %3.sub0:sreg_64 = S_ADD_U32 %1.sub0, 0, implicit-def $scc %3.sub1:sreg_64 = S_ADDC_U32 %1.sub1, 0, implicit-def dead $scc, implicit killed $scc %4:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 0, 0 :: (load (s512), align 4, addrspace 4) %5:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4096, 0 :: (load (s512), align 4, addrspace 4) %6:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8192, 0 :: (load (s512), align 4, addrspace 4) %7:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 12288, 0 :: (load (s512), align 4, addrspace 4) %8:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 64, 0 :: (load (s512), align 4, addrspace 4) %9:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 4160, 0 :: (load (s512), align 4, addrspace 4) %10:sgpr_512 = S_LOAD_DWORDX16_IMM %3, 8256, 0 :: (load (s512), align 4, addrspace 4) dead $sgpr30_sgpr31 = SI_CALL undef $sgpr4_sgpr5, 0, csr_amdgpu, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31, implicit-def $sgpr32_sgpr33_sgpr34_sgpr35_sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, implicit-def $sgpr70, implicit-def $sgpr80, implicit-def $sgpr90, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit-def $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 dead %11:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub1, 0, 0, implicit $mode, implicit $exec dead %12:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub2, 0, 0, implicit $mode, implicit $exec dead %13:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub3, 0, 0, implicit $mode, implicit $exec dead %14:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub4, 0, 0, implicit $mode, implicit $exec dead %15:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub5, 0, 0, implicit $mode, implicit $exec dead %16:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub6, 0, 0, implicit $mode, implicit $exec dead %17:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub7, 0, 0, implicit $mode, implicit $exec dead %18:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub8, 0, 0, implicit $mode, implicit $exec dead %19:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub9, 0, 0, implicit $mode, implicit $exec dead %20:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub10, 0, 0, implicit $mode, implicit $exec dead %21:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub11, 0, 0, implicit $mode, implicit $exec dead %22:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub12, 0, 0, implicit $mode, implicit $exec dead %23:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub13, 0, 0, implicit $mode, implicit $exec dead %24:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub14, 0, 0, implicit $mode, implicit $exec dead %25:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %4.sub15, 0, 0, implicit $mode, implicit $exec dead %26:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub0, 0, 0, implicit $mode, implicit $exec dead %27:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub1, 0, 0, implicit $mode, implicit $exec dead %28:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub2, 0, 0, implicit $mode, implicit $exec dead %29:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub3, 0, 0, implicit $mode, implicit $exec dead %30:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub4, 0, 0, implicit $mode, implicit $exec dead %31:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub5, 0, 0, implicit $mode, implicit $exec dead %32:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub6, 0, 0, implicit $mode, implicit $exec dead %33:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub7, 0, 0, implicit $mode, implicit $exec dead %34:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub8, 0, 0, implicit $mode, implicit $exec dead %35:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub9, 0, 0, implicit $mode, implicit $exec dead %36:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub10, 0, 0, implicit $mode, implicit $exec dead %37:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub11, 0, 0, implicit $mode, implicit $exec dead %38:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub12, 0, 0, implicit $mode, implicit $exec dead %39:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub13, 0, 0, implicit $mode, implicit $exec dead %40:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub14, 0, 0, implicit $mode, implicit $exec dead %41:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %5.sub15, 0, 0, implicit $mode, implicit $exec dead %42:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub0, 0, 0, implicit $mode, implicit $exec dead %43:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub1, 0, 0, implicit $mode, implicit $exec dead %44:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub2, 0, 0, implicit $mode, implicit $exec dead %45:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub3, 0, 0, implicit $mode, implicit $exec dead %46:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub4, 0, 0, implicit $mode, implicit $exec dead %47:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub5, 0, 0, implicit $mode, implicit $exec dead %48:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub8, 0, 0, implicit $mode, implicit $exec dead %49:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub9, 0, 0, implicit $mode, implicit $exec dead %50:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub10, 0, 0, implicit $mode, implicit $exec dead %51:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub11, 0, 0, implicit $mode, implicit $exec dead %52:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub12, 0, 0, implicit $mode, implicit $exec dead %53:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub14, 0, 0, implicit $mode, implicit $exec dead %54:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %8.sub15, 0, 0, implicit $mode, implicit $exec dead %55:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %9.sub10, 0, 0, implicit $mode, implicit $exec dead %56:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %6.sub0, 0, 0, implicit $mode, implicit $exec dead %57:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %7.sub10, 0, 0, implicit $mode, implicit $exec dead %58:vgpr_32 = nofpexcept V_ADD_F32_e64 0, 0, 0, %10.sub0, 0, 0, implicit $mode, implicit $exec S_CMP_LG_U32 0, 0, implicit-def $scc S_CBRANCH_SCC1 %bb.1, implicit killed $scc S_BRANCH %bb.2 bb.2: S_ENDPGM 0 ... # Case with simple clause which exceeds the pressure limit, though # didn't hit the register allocator error. --- name: simple_huge_reg_tuple_clause tracksRegLiveness: true machineFunctionInfo: isEntryFunction: true waveLimiter: false memoryBound: false scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' stackPtrOffsetReg: '$sgpr32' occupancy: 10 body: | bb.0: liveins: $vgpr0, $sgpr4_sgpr5, $sgpr6 ; CHECK-LABEL: name: simple_huge_reg_tuple_clause ; CHECK: [[S_LOAD_DWORDX16_IMM:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM1:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM2:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: [[S_LOAD_DWORDX16_IMM3:%[0-9]+]]:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load (s512), align 4, addrspace 4) ; CHECK-NEXT: S_NOP 0, implicit [[S_LOAD_DWORDX16_IMM]] %0:sreg_64 = COPY $sgpr4_sgpr5 %1:sreg_64 = S_MOV_B64 0 %2:sreg_64 = S_MOV_B64 1 %3:sreg_64 = S_MOV_B64 2 %4:sreg_64 = S_MOV_B64 3 %5:sreg_64 = S_MOV_B64 4 %6:sreg_64 = S_MOV_B64 5 %7:sreg_64 = S_MOV_B64 6 %8:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 0, 0 :: (load (s512), align 4, addrspace 4) %9:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 64, 0 :: (load (s512), align 4, addrspace 4) %10:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4096, 0 :: (load (s512), align 4, addrspace 4) %11:sgpr_512 = S_LOAD_DWORDX16_IMM %0, 4160, 0 :: (load (s512), align 4, addrspace 4) S_NOP 0, implicit %8 S_NOP 0, implicit %9 S_NOP 0, implicit %10 S_NOP 0, implicit %11 S_NOP 0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5 S_NOP 0, implicit %6 S_NOP 0, implicit %7 S_ENDPGM 0 ...