; RUN: llc < %s -mtriple=nvptx-unknown-unknown | FileCheck %s ; RUN: %if ptxas %{ llc < %s -mtriple=nvptx-unknown-unknown | %ptxas-verify %} ; ; Check that parameters of a __global__ (kernel) function do not get increased ; alignment, and no additional vectorization is performed on loads/stores with ; that parameters. ; ; Test IR is a minimized version of IR generated with the following command ; from the source code below: ; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu ; ; ---------------------------------------------------------------------------- ; #include <stdint.h> ; ; struct St4x1 { uint32_t field[1]; }; ; struct St4x2 { uint32_t field[2]; }; ; struct St4x3 { uint32_t field[3]; }; ; struct St4x4 { uint32_t field[4]; }; ; struct St4x5 { uint32_t field[5]; }; ; struct St4x6 { uint32_t field[6]; }; ; struct St4x7 { uint32_t field[7]; }; ; struct St4x8 { uint32_t field[8]; }; ; struct St8x1 { uint64_t field[1]; }; ; struct St8x2 { uint64_t field[2]; }; ; struct St8x3 { uint64_t field[3]; }; ; struct St8x4 { uint64_t field[4]; }; ; ; #define DECLARE_FUNCTION(StName) \ ; static __global__ __attribute__((noinline)) \ ; void foo_##StName(struct StName in, struct StName* ret) { \ ; const unsigned size = sizeof(ret->field) / sizeof(*ret->field); \ ; for (unsigned i = 0; i != size; ++i) \ ; ret->field[i] = in.field[i]; \ ; } \ ; ; DECLARE_FUNCTION(St4x1) ; DECLARE_FUNCTION(St4x2) ; DECLARE_FUNCTION(St4x3) ; DECLARE_FUNCTION(St4x4) ; DECLARE_FUNCTION(St4x5) ; DECLARE_FUNCTION(St4x6) ; DECLARE_FUNCTION(St4x7) ; DECLARE_FUNCTION(St4x8) ; DECLARE_FUNCTION(St8x1) ; DECLARE_FUNCTION(St8x2) ; DECLARE_FUNCTION(St8x3) ; DECLARE_FUNCTION(St8x4) ; ---------------------------------------------------------------------------- %struct.St4x1 = type { [1 x i32] } %struct.St4x2 = type { [2 x i32] } %struct.St4x3 = type { [3 x i32] } %struct.St4x4 = type { [4 x i32] } %struct.St4x5 = type { [5 x i32] } %struct.St4x6 = type { [6 x i32] } %struct.St4x7 = type { [7 x i32] } %struct.St4x8 = type { [8 x i32] } %struct.St8x1 = type { [1 x i64] } %struct.St8x2 = type { [2 x i64] } %struct.St8x3 = type { [3 x i64] } %struct.St8x4 = type { [4 x i64] } define dso_local void @foo_St4x1(ptr nocapture noundef readonly byval(%struct.St4x1) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x1( ; CHECK: .param .align 4 .b8 foo_St4x1_param_0[4], ; CHECK: .param .b32 foo_St4x1_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x1_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x1_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ret; %1 = load i32, ptr %in, align 4 store i32 %1, ptr %ret, align 4 ret void } define dso_local void @foo_St4x2(ptr nocapture noundef readonly byval(%struct.St4x2) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x2( ; CHECK: .param .align 4 .b8 foo_St4x2_param_0[8], ; CHECK: .param .b32 foo_St4x2_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x2_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x2_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x2_param_0+4]; ; CHECK: st.u32 [[[R1]]+4], [[R3]]; ; CHECK: ret; %1 = load i32, ptr %in, align 4 store i32 %1, ptr %ret, align 4 %arrayidx.1 = getelementptr inbounds [2 x i32], ptr %in, i64 0, i64 1 %2 = load i32, ptr %arrayidx.1, align 4 %arrayidx3.1 = getelementptr inbounds [2 x i32], ptr %ret, i64 0, i64 1 store i32 %2, ptr %arrayidx3.1, align 4 ret void } define dso_local void @foo_St4x3(ptr nocapture noundef readonly byval(%struct.St4x3) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x3( ; CHECK: .param .align 4 .b8 foo_St4x3_param_0[12], ; CHECK: .param .b32 foo_St4x3_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x3_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x3_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x3_param_0+4]; ; CHECK: st.u32 [[[R1]]+4], [[R3]]; ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x3_param_0+8]; ; CHECK: st.u32 [[[R1]]+8], [[R4]]; ; CHECK: ret; %1 = load i32, ptr %in, align 4 store i32 %1, ptr %ret, align 4 %arrayidx.1 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 1 %2 = load i32, ptr %arrayidx.1, align 4 %arrayidx3.1 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 1 store i32 %2, ptr %arrayidx3.1, align 4 %arrayidx.2 = getelementptr inbounds [3 x i32], ptr %in, i64 0, i64 2 %3 = load i32, ptr %arrayidx.2, align 4 %arrayidx3.2 = getelementptr inbounds [3 x i32], ptr %ret, i64 0, i64 2 store i32 %3, ptr %arrayidx3.2, align 4 ret void } define dso_local void @foo_St4x4(ptr nocapture noundef readonly byval(%struct.St4x4) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x4( ; CHECK: .param .align 4 .b8 foo_St4x4_param_0[16], ; CHECK: .param .b32 foo_St4x4_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x4_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x4_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x4_param_0+4]; ; CHECK: st.u32 [[[R1]]+4], [[R3]]; ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x4_param_0+8]; ; CHECK: st.u32 [[[R1]]+8], [[R4]]; ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x4_param_0+12]; ; CHECK: st.u32 [[[R1]]+12], [[R5]]; ; CHECK: ret; %1 = load i32, ptr %in, align 4 store i32 %1, ptr %ret, align 4 %arrayidx.1 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 1 %2 = load i32, ptr %arrayidx.1, align 4 %arrayidx3.1 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 1 store i32 %2, ptr %arrayidx3.1, align 4 %arrayidx.2 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 2 %3 = load i32, ptr %arrayidx.2, align 4 %arrayidx3.2 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 2 store i32 %3, ptr %arrayidx3.2, align 4 %arrayidx.3 = getelementptr inbounds [4 x i32], ptr %in, i64 0, i64 3 %4 = load i32, ptr %arrayidx.3, align 4 %arrayidx3.3 = getelementptr inbounds [4 x i32], ptr %ret, i64 0, i64 3 store i32 %4, ptr %arrayidx3.3, align 4 ret void } define dso_local void @foo_St4x5(ptr nocapture noundef readonly byval(%struct.St4x5) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x5( ; CHECK: .param .align 4 .b8 foo_St4x5_param_0[20], ; CHECK: .param .b32 foo_St4x5_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x5_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x5_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x5_param_0+4]; ; CHECK: st.u32 [[[R1]]+4], [[R3]]; ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x5_param_0+8]; ; CHECK: st.u32 [[[R1]]+8], [[R4]]; ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x5_param_0+12]; ; CHECK: st.u32 [[[R1]]+12], [[R5]]; ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x5_param_0+16]; ; CHECK: st.u32 [[[R1]]+16], [[R6]]; ; CHECK: ret; %1 = load i32, ptr %in, align 4 store i32 %1, ptr %ret, align 4 %arrayidx.1 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 1 %2 = load i32, ptr %arrayidx.1, align 4 %arrayidx3.1 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 1 store i32 %2, ptr %arrayidx3.1, align 4 %arrayidx.2 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 2 %3 = load i32, ptr %arrayidx.2, align 4 %arrayidx3.2 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 2 store i32 %3, ptr %arrayidx3.2, align 4 %arrayidx.3 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 3 %4 = load i32, ptr %arrayidx.3, align 4 %arrayidx3.3 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 3 store i32 %4, ptr %arrayidx3.3, align 4 %arrayidx.4 = getelementptr inbounds [5 x i32], ptr %in, i64 0, i64 4 %5 = load i32, ptr %arrayidx.4, align 4 %arrayidx3.4 = getelementptr inbounds [5 x i32], ptr %ret, i64 0, i64 4 store i32 %5, ptr %arrayidx3.4, align 4 ret void } define dso_local void @foo_St4x6(ptr nocapture noundef readonly byval(%struct.St4x6) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x6( ; CHECK: .param .align 4 .b8 foo_St4x6_param_0[24], ; CHECK: .param .b32 foo_St4x6_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x6_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x6_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x6_param_0+4]; ; CHECK: st.u32 [[[R1]]+4], [[R3]]; ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x6_param_0+8]; ; CHECK: st.u32 [[[R1]]+8], [[R4]]; ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x6_param_0+12]; ; CHECK: st.u32 [[[R1]]+12], [[R5]]; ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x6_param_0+16]; ; CHECK: st.u32 [[[R1]]+16], [[R6]]; ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x6_param_0+20]; ; CHECK: st.u32 [[[R1]]+20], [[R7]]; ; CHECK: ret; %1 = load i32, ptr %in, align 4 store i32 %1, ptr %ret, align 4 %arrayidx.1 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 1 %2 = load i32, ptr %arrayidx.1, align 4 %arrayidx3.1 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 1 store i32 %2, ptr %arrayidx3.1, align 4 %arrayidx.2 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 2 %3 = load i32, ptr %arrayidx.2, align 4 %arrayidx3.2 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 2 store i32 %3, ptr %arrayidx3.2, align 4 %arrayidx.3 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 3 %4 = load i32, ptr %arrayidx.3, align 4 %arrayidx3.3 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 3 store i32 %4, ptr %arrayidx3.3, align 4 %arrayidx.4 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 4 %5 = load i32, ptr %arrayidx.4, align 4 %arrayidx3.4 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 4 store i32 %5, ptr %arrayidx3.4, align 4 %arrayidx.5 = getelementptr inbounds [6 x i32], ptr %in, i64 0, i64 5 %6 = load i32, ptr %arrayidx.5, align 4 %arrayidx3.5 = getelementptr inbounds [6 x i32], ptr %ret, i64 0, i64 5 store i32 %6, ptr %arrayidx3.5, align 4 ret void } define dso_local void @foo_St4x7(ptr nocapture noundef readonly byval(%struct.St4x7) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x7( ; CHECK: .param .align 4 .b8 foo_St4x7_param_0[28], ; CHECK: .param .b32 foo_St4x7_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x7_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x7_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x7_param_0+4]; ; CHECK: st.u32 [[[R1]]+4], [[R3]]; ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x7_param_0+8]; ; CHECK: st.u32 [[[R1]]+8], [[R4]]; ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x7_param_0+12]; ; CHECK: st.u32 [[[R1]]+12], [[R5]]; ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x7_param_0+16]; ; CHECK: st.u32 [[[R1]]+16], [[R6]]; ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x7_param_0+20]; ; CHECK: st.u32 [[[R1]]+20], [[R7]]; ; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x7_param_0+24]; ; CHECK: st.u32 [[[R1]]+24], [[R8]]; ; CHECK: ret; %1 = load i32, ptr %in, align 4 store i32 %1, ptr %ret, align 4 %arrayidx.1 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 1 %2 = load i32, ptr %arrayidx.1, align 4 %arrayidx3.1 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 1 store i32 %2, ptr %arrayidx3.1, align 4 %arrayidx.2 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 2 %3 = load i32, ptr %arrayidx.2, align 4 %arrayidx3.2 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 2 store i32 %3, ptr %arrayidx3.2, align 4 %arrayidx.3 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 3 %4 = load i32, ptr %arrayidx.3, align 4 %arrayidx3.3 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 3 store i32 %4, ptr %arrayidx3.3, align 4 %arrayidx.4 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 4 %5 = load i32, ptr %arrayidx.4, align 4 %arrayidx3.4 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 4 store i32 %5, ptr %arrayidx3.4, align 4 %arrayidx.5 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 5 %6 = load i32, ptr %arrayidx.5, align 4 %arrayidx3.5 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 5 store i32 %6, ptr %arrayidx3.5, align 4 %arrayidx.6 = getelementptr inbounds [7 x i32], ptr %in, i64 0, i64 6 %7 = load i32, ptr %arrayidx.6, align 4 %arrayidx3.6 = getelementptr inbounds [7 x i32], ptr %ret, i64 0, i64 6 store i32 %7, ptr %arrayidx3.6, align 4 ret void } define dso_local void @foo_St4x8(ptr nocapture noundef readonly byval(%struct.St4x8) align 4 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St4x8( ; CHECK: .param .align 4 .b8 foo_St4x8_param_0[32], ; CHECK: .param .b32 foo_St4x8_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x8_param_1]; ; CHECK: ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x8_param_0]; ; CHECK: st.u32 [[[R1]]], [[R2]]; ; CHECK: ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x8_param_0+4]; ; CHECK: st.u32 [[[R1]]+4], [[R3]]; ; CHECK: ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x8_param_0+8]; ; CHECK: st.u32 [[[R1]]+8], [[R4]]; ; CHECK: ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x8_param_0+12]; ; CHECK: st.u32 [[[R1]]+12], [[R5]]; ; CHECK: ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x8_param_0+16]; ; CHECK: st.u32 [[[R1]]+16], [[R6]]; ; CHECK: ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x8_param_0+20]; ; CHECK: st.u32 [[[R1]]+20], [[R7]]; ; CHECK: ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x8_param_0+24]; ; CHECK: st.u32 [[[R1]]+24], [[R8]]; ; CHECK: ld.param.u32 [[R9:%r[0-9]+]], [foo_St4x8_param_0+28]; ; CHECK: st.u32 [[[R1]]+28], [[R9]]; ; CHECK: ret; %1 = load i32, ptr %in, align 4 store i32 %1, ptr %ret, align 4 %arrayidx.1 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 1 %2 = load i32, ptr %arrayidx.1, align 4 %arrayidx3.1 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 1 store i32 %2, ptr %arrayidx3.1, align 4 %arrayidx.2 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 2 %3 = load i32, ptr %arrayidx.2, align 4 %arrayidx3.2 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 2 store i32 %3, ptr %arrayidx3.2, align 4 %arrayidx.3 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 3 %4 = load i32, ptr %arrayidx.3, align 4 %arrayidx3.3 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 3 store i32 %4, ptr %arrayidx3.3, align 4 %arrayidx.4 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 4 %5 = load i32, ptr %arrayidx.4, align 4 %arrayidx3.4 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 4 store i32 %5, ptr %arrayidx3.4, align 4 %arrayidx.5 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 5 %6 = load i32, ptr %arrayidx.5, align 4 %arrayidx3.5 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 5 store i32 %6, ptr %arrayidx3.5, align 4 %arrayidx.6 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 6 %7 = load i32, ptr %arrayidx.6, align 4 %arrayidx3.6 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 6 store i32 %7, ptr %arrayidx3.6, align 4 %arrayidx.7 = getelementptr inbounds [8 x i32], ptr %in, i64 0, i64 7 %8 = load i32, ptr %arrayidx.7, align 4 %arrayidx3.7 = getelementptr inbounds [8 x i32], ptr %ret, i64 0, i64 7 store i32 %8, ptr %arrayidx3.7, align 4 ret void } define dso_local void @foo_St8x1(ptr nocapture noundef readonly byval(%struct.St8x1) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St8x1( ; CHECK: .param .align 8 .b8 foo_St8x1_param_0[8], ; CHECK: .param .b32 foo_St8x1_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x1_param_1]; ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x1_param_0]; ; CHECK: st.u64 [[[R1]]], [[RD1]]; ; CHECK: ret; %1 = load i64, ptr %in, align 8 store i64 %1, ptr %ret, align 8 ret void } define dso_local void @foo_St8x2(ptr nocapture noundef readonly byval(%struct.St8x2) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St8x2( ; CHECK: .param .align 8 .b8 foo_St8x2_param_0[16], ; CHECK: .param .b32 foo_St8x2_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x2_param_1]; ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x2_param_0]; ; CHECK: st.u64 [[[R1]]], [[RD1]]; ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x2_param_0+8]; ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; ; CHECK: ret; %1 = load i64, ptr %in, align 8 store i64 %1, ptr %ret, align 8 %arrayidx.1 = getelementptr inbounds [2 x i64], ptr %in, i64 0, i64 1 %2 = load i64, ptr %arrayidx.1, align 8 %arrayidx3.1 = getelementptr inbounds [2 x i64], ptr %ret, i64 0, i64 1 store i64 %2, ptr %arrayidx3.1, align 8 ret void } define dso_local void @foo_St8x3(ptr nocapture noundef readonly byval(%struct.St8x3) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St8x3( ; CHECK: .param .align 8 .b8 foo_St8x3_param_0[24], ; CHECK: .param .b32 foo_St8x3_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x3_param_1]; ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x3_param_0]; ; CHECK: st.u64 [[[R1]]], [[RD1]]; ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x3_param_0+8]; ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x3_param_0+16]; ; CHECK: st.u64 [[[R1]]+16], [[RD3]]; ; CHECK: ret; %1 = load i64, ptr %in, align 8 store i64 %1, ptr %ret, align 8 %arrayidx.1 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 1 %2 = load i64, ptr %arrayidx.1, align 8 %arrayidx3.1 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 1 store i64 %2, ptr %arrayidx3.1, align 8 %arrayidx.2 = getelementptr inbounds [3 x i64], ptr %in, i64 0, i64 2 %3 = load i64, ptr %arrayidx.2, align 8 %arrayidx3.2 = getelementptr inbounds [3 x i64], ptr %ret, i64 0, i64 2 store i64 %3, ptr %arrayidx3.2, align 8 ret void } define dso_local void @foo_St8x4(ptr nocapture noundef readonly byval(%struct.St8x4) align 8 %in, ptr nocapture noundef writeonly %ret) { ; CHECK-LABEL: .visible .func foo_St8x4( ; CHECK: .param .align 8 .b8 foo_St8x4_param_0[32], ; CHECK: .param .b32 foo_St8x4_param_1 ; CHECK: ) ; CHECK: ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x4_param_1]; ; CHECK: ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x4_param_0]; ; CHECK: st.u64 [[[R1]]], [[RD1]]; ; CHECK: ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x4_param_0+8]; ; CHECK: st.u64 [[[R1]]+8], [[RD2]]; ; CHECK: ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x4_param_0+16]; ; CHECK: st.u64 [[[R1]]+16], [[RD3]]; ; CHECK: ld.param.u64 [[RD4:%rd[0-9]+]], [foo_St8x4_param_0+24]; ; CHECK: st.u64 [[[R1]]+24], [[RD4]]; ; CHECK: ret; %1 = load i64, ptr %in, align 8 store i64 %1, ptr %ret, align 8 %arrayidx.1 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 1 %2 = load i64, ptr %arrayidx.1, align 8 %arrayidx3.1 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 1 store i64 %2, ptr %arrayidx3.1, align 8 %arrayidx.2 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 2 %3 = load i64, ptr %arrayidx.2, align 8 %arrayidx3.2 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 2 store i64 %3, ptr %arrayidx3.2, align 8 %arrayidx.3 = getelementptr inbounds [4 x i64], ptr %in, i64 0, i64 3 %4 = load i64, ptr %arrayidx.3, align 8 %arrayidx3.3 = getelementptr inbounds [4 x i64], ptr %ret, i64 0, i64 3 store i64 %4, ptr %arrayidx3.3, align 8 ret void }