# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,SI,SICI,SIVI # RUN: llc -march=amdgcn -mcpu=hawaii -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,CI,SICI # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,VI,SIVI # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=GCN,GFX9 --- | define amdgpu_kernel void @smrd_imm(i32 addrspace(4)* %const0) { ret void } define amdgpu_kernel void @smrd_wide() { ret void } define amdgpu_kernel void @constant_address_positive() { ret void } define amdgpu_kernel void @smrd_sgpr() { ret void } define amdgpu_kernel void @smrd_sgpr_imm() { ret void } ... --- name: smrd_imm legalized: true regBankSelected: true # GCN: body: # GCN: [[PTR:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 # Immediate offset: # SICI: S_LOAD_DWORD_IMM [[PTR]], 1, 0 # VI: S_LOAD_DWORD_IMM [[PTR]], 4, 0 # Max immediate offset for SI # SICI: S_LOAD_DWORD_IMM [[PTR]], 255, 0 # VI: S_LOAD_DWORD_IMM [[PTR]], 1020, 0 # Immediate overflow for SI # SI: [[K1024:%[0-9]+]]:sreg_32 = S_MOV_B32 1024 # SI: S_LOAD_DWORD_SGPR [[PTR]], [[K1024]], 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 256, 0 # VI: S_LOAD_DWORD_IMM [[PTR]], 1024, 0 # Max immediate offset for VI # SI: [[K1048572:%[0-9]+]]:sreg_32 = S_MOV_B32 1048572 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262143 # VI: S_LOAD_DWORD_IMM [[PTR]], 1048572 # # Immediate overflow for VI # SIVI: [[K1048576:%[0-9]+]]:sreg_32 = S_MOV_B32 1048576 # SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K1048576]], 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 262144, 0 # Max immediate for CI # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 3 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 # SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] # SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 # SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 4294967295, 0 # Immediate overflow for CI # GCN: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 # GCN: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 4 # GCN: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # GCN-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 # GCN-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 # GCN-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] # GCN-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 # GCN-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 # GCN: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # GCN: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # GCN: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 # Max 32-bit byte offset # SIVI: [[K4294967292:%[0-9]+]]:sreg_32 = S_MOV_B32 4294967292 # SIVI: S_LOAD_DWORD_SGPR [[PTR]], [[K4294967292]], 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741823, 0 # Overflow 32-bit byte offset # SIVI: [[K_LO:%[0-9]+]]:sreg_32 = S_MOV_B32 0 # SIVI: [[K_HI:%[0-9]+]]:sreg_32 = S_MOV_B32 1 # SIVI: [[K:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[K_LO]], %subreg.sub0, [[K_HI]], %subreg.sub1 # SIVI-DAG: [[K_SUB0:%[0-9]+]]:sreg_32 = COPY [[K]].sub0 # SIVI-DAG: [[PTR_LO:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub0 # SIVI-DAG: [[ADD_PTR_LO:%[0-9]+]]:sreg_32 = S_ADD_U32 [[PTR_LO]], [[K_SUB0]] # SIVI-DAG: [[K_SUB1:%[0-9]+]]:sreg_32 = COPY [[K]].sub1 # SIVI-DAG: [[PTR_HI:%[0-9]+]]:sreg_32 = COPY [[PTR]].sub1 # SIVI: [[ADD_PTR_HI:%[0-9]+]]:sreg_32 = S_ADDC_U32 [[PTR_HI]], [[K_SUB1]] # SIVI: [[ADD_PTR:%[0-9]+]]:sreg_64_xexec = REG_SEQUENCE [[ADD_PTR_LO]], %subreg.sub0, [[ADD_PTR_HI]], %subreg.sub1 # SIVI: S_LOAD_DWORD_IMM [[ADD_PTR]], 0, 0 # CI: S_LOAD_DWORD_IMM_ci [[PTR]], 1073741824, 0 # Pointer loads # GCN: [[AS0:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 # GCN: $sgpr0_sgpr1 = COPY [[AS0]] # GCN: [[AS1:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 # GCN: $sgpr0_sgpr1 = COPY [[AS1]] # GCN: [[AS4:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM %0 # GCN: $sgpr0_sgpr1 = COPY [[AS4]] body: | bb.0: liveins: $sgpr0_sgpr1 %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s64) = G_CONSTANT i64 4 %2:sgpr(p4) = G_PTR_ADD %0, %1 %3:sgpr(s32) = G_LOAD %2 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %3 %4:sgpr(s64) = G_CONSTANT i64 1020 %5:sgpr(p4) = G_PTR_ADD %0, %4 %6:sgpr(s32) = G_LOAD %5 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %6 %7:sgpr(s64) = G_CONSTANT i64 1024 %8:sgpr(p4) = G_PTR_ADD %0, %7 %9:sgpr(s32) = G_LOAD %8 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %9 %10:sgpr(s64) = G_CONSTANT i64 1048572 %11:sgpr(p4) = G_PTR_ADD %0, %10 %12:sgpr(s32) = G_LOAD %11 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %12 %13:sgpr(s64) = G_CONSTANT i64 1048576 %14:sgpr(p4) = G_PTR_ADD %0, %13 %15:sgpr(s32) = G_LOAD %14 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %15 %16:sgpr(s64) = G_CONSTANT i64 17179869180 %17:sgpr(p4) = G_PTR_ADD %0, %16 %18:sgpr(s32) = G_LOAD %17 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %18 %19:sgpr(s64) = G_CONSTANT i64 17179869184 %20:sgpr(p4) = G_PTR_ADD %0, %19 %21:sgpr(s32) = G_LOAD %20 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %21 %22:sgpr(s64) = G_CONSTANT i64 4294967292 %23:sgpr(p4) = G_PTR_ADD %0, %22 %24:sgpr(s32) = G_LOAD %23 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %24 %25:sgpr(s64) = G_CONSTANT i64 4294967296 %26:sgpr(p4) = G_PTR_ADD %0, %25 %27:sgpr(s32) = G_LOAD %26 :: (load (s32) from %ir.const0, addrspace 4) $sgpr0 = COPY %27 %28:sgpr(p0) = G_LOAD %0 :: (load (p0) from %ir.const0, addrspace 4) $sgpr0_sgpr1 = COPY %28 %29:sgpr(p1) = G_LOAD %0 :: (load (p1) from %ir.const0, addrspace 4) $sgpr0_sgpr1 = COPY %29 %30:sgpr(p4) = G_LOAD %0 :: (load (p4) from %ir.const0, addrspace 4) $sgpr0_sgpr1 = COPY %30 ... --- name: smrd_wide legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0_sgpr1, $vgpr2_vgpr3 %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(p1) = COPY $sgpr2_sgpr3 ; CHECK: [[CONSTANT_PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1 ; CHECK: [[GLOBAL_PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr2_sgpr3 ; CHECK: s_load_dwordx8 [[CONSTANT_PTR]] %2:sgpr(<8 x s32>) = G_LOAD %0 :: (load (<8 x s32>), addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %2 ; CHECK: s_load_dwordx16 [[CONSTANT_PTR]] %3:sgpr(<16 x s32>) = G_LOAD %0 :: (load (<16 x s32>), addrspace 4) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %3 ; CHECK: s_load_dwordx8 [[GLOBAL_PTR]] %4:sgpr(<8 x s32>) = G_LOAD %1 :: (load (<8 x s32>), addrspace 1) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7 = COPY %4 ; CHECK s_load_dwordx16 [[GLOBAL_PTR]] %5:sgpr(<16 x s32>) = G_LOAD %1 :: (load (<16 x s32>), addrspace 1) $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15 = COPY %5 ... # Test a load of an offset from a constant base address # GCN-LABEL: name: constant_address_positive{{$}} # GCN: %0:sreg_64 = S_MOV_B64 44 # VI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 64, 0 :: (dereferenceable invariant load (s32), addrspace 4) # SICI: %3:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 16, 0 :: (dereferenceable invariant load (s32), addrspace 4) --- name: constant_address_positive legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0_sgpr1, $vgpr2_vgpr3 %0:sgpr(p4) = G_CONSTANT i64 44 %1:sgpr(s64) = G_CONSTANT i64 64 %2:sgpr(p4) = G_PTR_ADD %0, %1 %3:sgpr(s32) = G_LOAD %2 :: (dereferenceable invariant load (s32), align 4, addrspace 4) S_ENDPGM 0, implicit %3 ... --- # Test a load with a register offset. # GCN-LABEL: name: smrd_sgpr{{$}} # GCN: S_LOAD_DWORD_SGPR %0, %1, 0 name: smrd_sgpr legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0_sgpr1, $sgpr2 %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 %2:sgpr(s64) = G_ZEXT %1:sgpr(s32) %4:sgpr(p4) = G_PTR_ADD %0, %2 %5:sgpr(s32) = G_LOAD %4 :: (dereferenceable invariant load (s32), align 4, addrspace 4) S_ENDPGM 0, implicit %5 ... --- # Test a load with a (register + immediate) offset. # GCN-LABEL: name: smrd_sgpr_imm{{$}} # GFX9-DAG: %[[BASE:.*]]:sreg_64 = COPY $sgpr0_sgpr1 # GFX9-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2 # GFX9: S_LOAD_DWORD_SGPR_IMM %[[BASE]], %[[OFFSET]], 16, name: smrd_sgpr_imm legalized: true regBankSelected: true body: | bb.0: liveins: $sgpr0_sgpr1, $sgpr2 %0:sgpr(p4) = COPY $sgpr0_sgpr1 %1:sgpr(s32) = COPY $sgpr2 %2:sgpr(s64) = G_ZEXT %1:sgpr(s32) %4:sgpr(p4) = G_PTR_ADD %0, %2 %5:sgpr(s64) = G_CONSTANT i64 16 %6:sgpr(p4) = G_PTR_ADD %4, %5 %7:sgpr(s32) = G_LOAD %6 :: (dereferenceable invariant load (s32), align 4, addrspace 4) S_ENDPGM 0, implicit %7 ...