Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs
; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-rewrite-out-arguments < %s | FileCheck %s


define void @no_ret_blocks() #0 {
  unreachable
}

define void @void_one_out_arg_i32_no_use(i32* %val) #0 {
  ret void
}

define void @skip_byval_arg(i32* byval(i32) %val) #0 {
  store i32 0, i32* %val
  ret void
}

define void @skip_optnone(i32* byval(i32) %val) #1 {
  store i32 0, i32* %val
  ret void
}

define void @skip_volatile(i32* byval(i32) %val) #0 {
  store volatile i32 0, i32* %val
  ret void
}

define void @skip_atomic(i32* byval(i32) %val) #0 {
  store atomic i32 0, i32* %val seq_cst, align 4
  ret void
}

define void @skip_store_pointer_val(i32* %val) #0 {
  store i32* %val, i32** undef
  ret void
}

define void @skip_store_gep(i32* %val) #0 {
  %gep = getelementptr inbounds i32, i32* %val, i32 1
  store i32 0, i32* %gep
  ret void
}

define void @skip_sret(i32* sret(i32) %sret, i32* %out) #0 {
  store i32 1, i32* %sret
  store i32 0, i32* %out
  ret void
}


define void @void_one_out_arg_i32_1_use(i32* %val) #0 {
  store i32 0, i32* %val
  ret void
}


define void @void_one_out_arg_i32_1_use_align(i32* align 8 %val) #0 {
  store i32 0, i32* %val, align 8
  ret void
}




define void @void_one_out_arg_i32_2_use(i1 %arg0, i32* %val) #0 {
  br i1 %arg0, label %ret0, label %ret1

ret0:
  store i32 0, i32* %val
  ret void

ret1:
  store i32 9, i32* %val
  ret void
}

declare void @may.clobber()


define void @void_one_out_arg_i32_2_stores(i32* %val) #0 {
  store i32 0, i32* %val
  store i32 1, i32* %val
  ret void
}


define void @void_one_out_arg_i32_2_stores_clobber(i32* %val) #0 {
  store i32 0, i32* %val
  call void @may.clobber()
  store i32 1, i32* %val
  ret void
}


define void @void_one_out_arg_i32_call_may_clobber(i32* %val) #0 {
  store i32 0, i32* %val
  call void @may.clobber()
  ret void
}


define void @void_one_out_arg_i32_pre_call_may_clobber(i32* %val) #0 {
  call void @may.clobber()
  store i32 0, i32* %val
  ret void
}

define void @void_one_out_arg_i32_reload(i32* %val) #0 {
  store i32 0, i32* %val
  %load = load i32, i32* %val, align 4
  ret void
}

define void @void_one_out_arg_i32_store_in_different_block(i32* %out) #0 {
  %load = load i32, i32 addrspace(1)* undef
  store i32 0, i32* %out
  br label %ret

ret:
  ret void
}


define void @unused_out_arg_one_branch(i1 %arg0, i32* %val) #0 {
  br i1 %arg0, label %ret0, label %ret1

ret0:
  ret void

ret1:
  store i32 9, i32* %val
  ret void
}


define void @void_one_out_arg_v2i32_1_use(<2 x i32>* %val) #0 {
  store <2 x i32> <i32 17, i32 9>, <2 x i32>* %val
  ret void
}

%struct = type { i32, i8, float }


; Normally this is split into element accesses which we don't handle.
define void @void_one_out_arg_struct_1_use(%struct* %out) #0 {
  store %struct { i32 9, i8 99, float 4.0 }, %struct* %out
  ret void
}


define i32 @i32_one_out_arg_i32_1_use(i32* %val) #0 {
  store i32 24, i32* %val
  ret i32 9
}


define void @unused_different_type(i32* %arg0, float* nocapture %arg1) #0 {
  store float 4.0, float* %arg1, align 4
  ret void
}


define void @multiple_same_return_noalias(i32* noalias %out0, i32* noalias %out1) #0 {
  store i32 1, i32* %out0, align 4
  store i32 2, i32* %out1, align 4
  ret void
}


define void @multiple_same_return_mayalias(i32* %out0, i32* %out1) #0 {
  store i32 1, i32* %out0, align 4
  store i32 2, i32* %out1, align 4
  ret void
}


define void @multiple_same_return_mayalias_order(i32* %out0, i32* %out1) #0 {
  store i32 2, i32* %out1, align 4
  store i32 1, i32* %out0, align 4
  ret void
}

; Currently this fails to convert because the store won't be found if
; it isn't in the same block as the return.
define i32 @store_in_entry_block(i1 %arg0, i32* %out) #0 {
entry:
  %val0 = load i32, i32 addrspace(1)* undef
  store i32 %val0, i32* %out
  br i1 %arg0, label %if, label %endif

if:
  %val1 = load i32, i32 addrspace(1)* undef
  br label %endif

endif:
  %phi = phi i32 [ 0, %entry ], [ %val1, %if ]
  ret i32 %phi
}


define i1 @i1_one_out_arg_i32_1_use(i32* %val) #0 {
  store i32 24, i32* %val
  ret i1 true
}

; Make sure we don't leave around return attributes that are
; incompatible with struct return types.


define zeroext i1 @i1_zeroext_one_out_arg_i32_1_use(i32* %val) #0 {
  store i32 24, i32* %val
  ret i1 true
}


define signext i1 @i1_signext_one_out_arg_i32_1_use(i32* %val) #0 {
  store i32 24, i32* %val
  ret i1 true
}


define noalias i32 addrspace(1)* @p1i32_noalias_one_out_arg_i32_1_use(i32* %val) #0 {
  store i32 24, i32* %val
  ret i32 addrspace(1)* null
}

define void @void_one_out_non_private_arg_i32_1_use(i32 addrspace(1)* %val) #0 {
  store i32 0, i32 addrspace(1)* %val
  ret void
}

define void @func_ptr_type(void()** %out) #0 {
  %func = load void()*, void()** undef
  store void()* %func, void()** %out
  ret void
}

define void @bitcast_func_ptr_type(void()** %out) #0 {
  %func = load i32()*, i32()** undef
  %cast = bitcast void()** %out to i32()**
  store i32()* %func, i32()** %cast
  ret void
}


define void @out_arg_small_array([4 x i32]* %val) #0 {
  store [4 x i32] [i32 0, i32 1, i32 2, i32 3], [4 x i32]* %val
  ret void
}

define void @out_arg_large_array([17 x i32]* %val) #0 {
  store [17 x i32] zeroinitializer, [17 x i32]* %val
  ret void
}

define <16 x i32> @num_regs_return_limit(i32* %out, i32 %val) #0 {
  %load = load volatile <16 x i32>, <16 x i32> addrspace(1)* undef
  store i32 %val, i32* %out
  ret <16 x i32> %load
}

define [15 x i32] @num_regs_reach_limit(i32* %out, i32 %val) #0 {
  %load = load volatile [15 x i32], [15 x i32] addrspace(1)* undef
  store i32 %val, i32* %out
  ret [15 x i32] %load
}


define [15 x i32] @num_regs_reach_limit_leftover(i32* %out0, i32* %out1, i32 %val0) #0 {
  %load0 = load volatile [15 x i32], [15 x i32] addrspace(1)* undef
  %load1 = load volatile i32, i32 addrspace(1)* undef
  store i32 %val0, i32* %out0
  store i32 %load1, i32* %out1
  ret [15 x i32] %load0
}


define void @preserve_debug_info(i32 %arg0, i32* %val) #0 !dbg !5 {
  call void @may.clobber(), !dbg !10
  store i32 %arg0, i32* %val, !dbg !11
  ret void, !dbg !12
}

define void @preserve_metadata(i32 %arg0, i32* %val) #0 !kernel_arg_access_qual !13 {
  call void @may.clobber()
  store i32 %arg0, i32* %val
  ret void
}

; Clang emits this pattern for 3-vectors for some reason.

define void @bitcast_pointer_v4i32_v3i32(<3 x i32>* %out) #0 {
  %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
  %bitcast = bitcast <3 x i32>* %out to <4 x i32>*
  store <4 x i32> %load, <4 x i32>* %bitcast
  ret void
}

define void @bitcast_pointer_v4i32_v3f32(<3 x float>* %out) #0 {
  %load = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef
  %bitcast = bitcast <3 x float>* %out to <4 x i32>*
  store <4 x i32> %load, <4 x i32>* %bitcast
  ret void
}


; Try different element and bitwidths which could produce broken
; casts.


define void @bitcast_pointer_i32_f32(float* %out) #0 {
  %load = load volatile i32, i32 addrspace(1)* undef
  %bitcast = bitcast float* %out to i32*
  store i32 %load, i32* %bitcast
  ret void
}

define void @bitcast_pointer_i32_f16(half* %out) #0 {
  %load = load volatile i32, i32 addrspace(1)* undef
  %bitcast = bitcast half* %out to i32*
  store i32 %load, i32* %bitcast
  ret void
}

define void @bitcast_pointer_f16_i32(i32* %out) #0 {
  %load = load volatile half, half addrspace(1)* undef
  %bitcast = bitcast i32* %out to half*
  store half %load, half* %bitcast
  ret void
}

%struct.i128 = type { i128 }
%struct.v2f32 = type { <2 x float> }
%struct.v3f32 = type { <3 x float> }
%struct.v3f32.f32 = type { <3 x float>, float }
%struct.v4f32 = type { <4 x float> }


define void @bitcast_struct_v3f32_v3f32(%struct.v3f32* %out, <3 x float> %value) #0 {
  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %cast = bitcast %struct.v3f32* %out to <4 x float>*
  store <4 x float> %extractVec, <4 x float>* %cast, align 16
  ret void
}


define void @bitcast_struct_v3f32_v3i32(%struct.v3f32* %out, <3 x i32> %value) #0 {
  %extractVec = shufflevector <3 x i32> %value, <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %cast = bitcast %struct.v3f32* %out to <4 x i32>*
  store <4 x i32> %extractVec, <4 x i32>* %cast, align 16
  ret void
}


define void @bitcast_struct_v4f32_v4f32(%struct.v4f32* %out, <4 x float> %value) #0 {
  %cast = bitcast %struct.v4f32* %out to <4 x float>*
  store <4 x float> %value, <4 x float>* %cast, align 16
  ret void
}

define void @bitcast_struct_v3f32_v4i32(%struct.v3f32* %out, <4 x i32> %value) #0 {
  %cast = bitcast %struct.v3f32* %out to <4 x i32>*
  store <4 x i32> %value, <4 x i32>* %cast, align 16
  ret void
}

define void @bitcast_struct_v4f32_v3f32(%struct.v4f32* %out, <3 x float> %value) #0 {
  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %cast = bitcast %struct.v4f32* %out to <4 x float>*
  store <4 x float> %extractVec, <4 x float>* %cast, align 16
  ret void
}

define void @bitcast_struct_v3f32_v2f32(%struct.v3f32* %out, <2 x float> %value) #0 {
  %cast = bitcast %struct.v3f32* %out to <2 x float>*
  store <2 x float> %value, <2 x float>* %cast, align 8
  ret void
}

define void @bitcast_struct_v3f32_f32_v3f32(%struct.v3f32.f32* %out, <3 x float> %value) #0 {
  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %cast = bitcast %struct.v3f32.f32* %out to <4 x float>*
  store <4 x float> %extractVec, <4 x float>* %cast, align 16
  ret void
}

define void @bitcast_struct_v3f32_f32_v4f32(%struct.v3f32.f32* %out, <4 x float> %value) #0 {
  %cast = bitcast %struct.v3f32.f32* %out to <4 x float>*
  store <4 x float> %value, <4 x float>* %cast, align 16
  ret void
}

define void @bitcast_struct_i128_v4f32(%struct.i128* %out, <4 x float> %value) #0 {
  %cast = bitcast %struct.i128* %out to <4 x float>*
  store <4 x float> %value, <4 x float>* %cast, align 16
  ret void
}

define void @bitcast_array_v4i32_v4f32([4 x i32]* %out, [4 x float] %value) #0 {
  %cast = bitcast [4 x i32]* %out to [4 x float]*
  store [4 x float] %value, [4 x float]* %cast, align 4
  ret void
}


define void @multi_return_bitcast_struct_v3f32_v3f32(i1 %cond, %struct.v3f32* %out, <3 x float> %value) #0 {
entry:
  br i1 %cond, label %ret0, label %ret1

ret0:
  %extractVec = shufflevector <3 x float> %value, <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
  %cast0 = bitcast %struct.v3f32* %out to <4 x float>*
  store <4 x float> %extractVec, <4 x float>* %cast0, align 16
  ret void

ret1:
  %cast1 = bitcast %struct.v3f32* %out to <4 x float>*
  %load = load <4 x float>, <4 x float> addrspace(1)* undef
  store <4 x float> %load, <4 x float>* %cast1, align 16
  ret void
}

define void @bitcast_v3f32_struct_v3f32(<3 x float>* %out, %struct.v3f32 %value) #0 {
  %cast = bitcast <3 x float>* %out to %struct.v3f32*
  store %struct.v3f32 %value, %struct.v3f32* %cast, align 4
  ret void
}

attributes #0 = { nounwind }
attributes #1 = { nounwind noinline optnone }
attributes #2 = { alwaysinline nounwind }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "code-object-metadata-kernel-debug-props.cl", directory: "/some/random/directory")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 2}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8}
!8 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64)
!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!10 = !DILocation(line: 2, column: 3, scope: !5)
!11 = !DILocation(line: 2, column: 8, scope: !5)
!12 = !DILocation(line: 3, column: 3, scope: !5)
!13 = !{!"none"}

; CHECK-LABEL: define {{[^@]+}}@no_ret_blocks
; CHECK-SAME: () #[[ATTR0:[0-9]+]] {
; CHECK-NEXT:    unreachable
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_no_use
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@skip_byval_arg
; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store i32 0, i32* [[VAL]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@skip_optnone
; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT:    store i32 0, i32* [[VAL]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@skip_volatile
; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store volatile i32 0, i32* [[VAL]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@skip_atomic
; CHECK-SAME: (i32* byval(i32) [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store atomic i32 0, i32* [[VAL]] seq_cst, align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@skip_store_pointer_val
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store i32* [[VAL]], i32** undef, align 8
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@skip_store_gep
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[VAL]], i32 1
; CHECK-NEXT:    store i32 0, i32* [[GEP]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@skip_sret
; CHECK-SAME: (i32* sret(i32) [[SRET:%.*]], i32* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store i32 1, i32* [[SRET]], align 4
; CHECK-NEXT:    store i32 0, i32* [[OUT]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] zeroinitializer
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE:%.*]] @void_one_out_arg_i32_1_use.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align.body
; CHECK-SAME: (i32* align 8 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] zeroinitializer
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_1_use_align
; CHECK-SAME: (i32* align 8 [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN:%.*]] @void_one_out_arg_i32_1_use_align.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_1_USE_ALIGN]] [[TMP2]], 0
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 8
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use.body
; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    br i1 [[ARG0]], label [[RET0:%.*]], label [[RET1:%.*]]
; CHECK:       ret0:
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] zeroinitializer
; CHECK:       ret1:
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_I32_2_USE]] { i32 9 }
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_use
; CHECK-SAME: (i1 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_USE:%.*]] @void_one_out_arg_i32_2_use.body(i1 [[TMP0]], i32* undef)
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_USE]] [[TMP3]], 0
; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP1]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store i32 0, i32* [[VAL]], align 4
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] { i32 1 }
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES:%.*]] @void_one_out_arg_i32_2_stores.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_STORES]] [[TMP2]], 0
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store i32 0, i32* [[VAL]], align 4
; CHECK-NEXT:    call void @may.clobber()
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] { i32 1 }
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_2_stores_clobber
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER:%.*]] @void_one_out_arg_i32_2_stores_clobber.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_2_STORES_CLOBBER]] [[TMP2]], 0
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_call_may_clobber
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store i32 0, i32* [[VAL]], align 4
; CHECK-NEXT:    call void @may.clobber()
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    call void @may.clobber()
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] zeroinitializer
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_pre_call_may_clobber
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER:%.*]] @void_one_out_arg_i32_pre_call_may_clobber.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_I32_PRE_CALL_MAY_CLOBBER]] [[TMP2]], 0
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_reload
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store i32 0, i32* [[VAL]], align 4
; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32* [[VAL]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_i32_store_in_different_block
; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD:%.*]] = load i32, i32 addrspace(1)* undef, align 4
; CHECK-NEXT:    store i32 0, i32* [[OUT]], align 4
; CHECK-NEXT:    br label [[RET:%.*]]
; CHECK:       ret:
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@unused_out_arg_one_branch
; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    br i1 [[ARG0]], label [[RET0:%.*]], label [[RET1:%.*]]
; CHECK:       ret0:
; CHECK-NEXT:    ret void
; CHECK:       ret1:
; CHECK-NEXT:    store i32 9, i32* [[VAL]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use.body
; CHECK-SAME: (<2 x i32>* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] { <2 x i32> <i32 17, i32 9> }
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_v2i32_1_use
; CHECK-SAME: (<2 x i32>* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_V2I32_1_USE:%.*]] @void_one_out_arg_v2i32_1_use.body(<2 x i32>* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_V2I32_1_USE]] [[TMP2]], 0
; CHECK-NEXT:    store <2 x i32> [[TMP3]], <2 x i32>* [[TMP0]], align 8
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use.body
; CHECK-SAME: (%struct* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] { [[STRUCT:%.*]] { i32 9, i8 99, float 4.000000e+00 } }
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_arg_struct_1_use
; CHECK-SAME: (%struct* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[VOID_ONE_OUT_ARG_STRUCT_1_USE:%.*]] @void_one_out_arg_struct_1_use.body(%struct* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[VOID_ONE_OUT_ARG_STRUCT_1_USE]] [[TMP2]], 0
; CHECK-NEXT:    store [[STRUCT:%.*]] [[TMP3]], %struct* [[TMP0]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] { i32 9, i32 24 }
;
;
; CHECK-LABEL: define {{[^@]+}}@i32_one_out_arg_i32_1_use
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[I32_ONE_OUT_ARG_I32_1_USE:%.*]] @i32_one_out_arg_i32_1_use.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[I32_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[I32_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
; CHECK-NEXT:    ret i32 [[TMP4]]
;
;
; CHECK-LABEL: define {{[^@]+}}@unused_different_type.body
; CHECK-SAME: (i32* [[ARG0:%.*]], float* nocapture [[ARG1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[UNUSED_DIFFERENT_TYPE:%.*]] { float 4.000000e+00 }
;
;
; CHECK-LABEL: define {{[^@]+}}@unused_different_type
; CHECK-SAME: (i32* [[TMP0:%.*]], float* nocapture [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[UNUSED_DIFFERENT_TYPE:%.*]] @unused_different_type.body(i32* [[TMP0]], float* undef)
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[UNUSED_DIFFERENT_TYPE]] [[TMP3]], 0
; CHECK-NEXT:    store float [[TMP4]], float* [[TMP1]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias.body
; CHECK-SAME: (i32* noalias [[OUT0:%.*]], i32* noalias [[OUT1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] { i32 1, i32 2 }
;
;
; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_noalias
; CHECK-SAME: (i32* noalias [[TMP0:%.*]], i32* noalias [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_NOALIAS:%.*]] @multiple_same_return_noalias.body(i32* undef, i32* undef)
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_NOALIAS]] [[TMP3]], 0
; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_NOALIAS]] [[TMP3]], 1
; CHECK-NEXT:    store i32 [[TMP5]], i32* [[TMP1]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias.body
; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] { i32 2, i32 1 }
;
;
; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias
; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS:%.*]] @multiple_same_return_mayalias.body(i32* undef, i32* undef)
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS]] [[TMP3]], 0
; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS]] [[TMP3]], 1
; CHECK-NEXT:    store i32 [[TMP5]], i32* [[TMP1]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order.body
; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] { i32 1, i32 2 }
;
;
; CHECK-LABEL: define {{[^@]+}}@multiple_same_return_mayalias_order
; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER:%.*]] @multiple_same_return_mayalias_order.body(i32* undef, i32* undef)
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER]] [[TMP3]], 0
; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[MULTIPLE_SAME_RETURN_MAYALIAS_ORDER]] [[TMP3]], 1
; CHECK-NEXT:    store i32 [[TMP5]], i32* [[TMP1]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@store_in_entry_block
; CHECK-SAME: (i1 [[ARG0:%.*]], i32* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    [[VAL0:%.*]] = load i32, i32 addrspace(1)* undef, align 4
; CHECK-NEXT:    store i32 [[VAL0]], i32* [[OUT]], align 4
; CHECK-NEXT:    br i1 [[ARG0]], label [[IF:%.*]], label [[ENDIF:%.*]]
; CHECK:       if:
; CHECK-NEXT:    [[VAL1:%.*]] = load i32, i32 addrspace(1)* undef, align 4
; CHECK-NEXT:    br label [[ENDIF]]
; CHECK:       endif:
; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VAL1]], [[IF]] ]
; CHECK-NEXT:    ret i32 [[PHI]]
;
;
; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 }
;
;
; CHECK-LABEL: define {{[^@]+}}@i1_one_out_arg_i32_1_use
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_one_out_arg_i32_1_use.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[I1_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[I1_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
; CHECK-NEXT:    ret i1 [[TMP4]]
;
;
; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 }
;
;
; CHECK-LABEL: define {{[^@]+}}@i1_zeroext_one_out_arg_i32_1_use
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_zeroext_one_out_arg_i32_1_use.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[I1_ZEROEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
; CHECK-NEXT:    ret i1 [[TMP4]]
;
;
; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] { i1 true, i32 24 }
;
;
; CHECK-LABEL: define {{[^@]+}}@i1_signext_one_out_arg_i32_1_use
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE:%.*]] @i1_signext_one_out_arg_i32_1_use.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[I1_SIGNEXT_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
; CHECK-NEXT:    ret i1 [[TMP4]]
;
;
; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use.body
; CHECK-SAME: (i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] { i32 addrspace(1)* null, i32 24 }
;
;
; CHECK-LABEL: define {{[^@]+}}@p1i32_noalias_one_out_arg_i32_1_use
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE:%.*]] @p1i32_noalias_one_out_arg_i32_1_use.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 1
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[P1I32_NOALIAS_ONE_OUT_ARG_I32_1_USE]] [[TMP2]], 0
; CHECK-NEXT:    ret i32 addrspace(1)* [[TMP4]]
;
;
; CHECK-LABEL: define {{[^@]+}}@void_one_out_non_private_arg_i32_1_use
; CHECK-SAME: (i32 addrspace(1)* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store i32 0, i32 addrspace(1)* [[VAL]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@func_ptr_type.body
; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[FUNC:%.*]] = load void ()*, void ()** undef, align 8
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[FUNC_PTR_TYPE:%.*]] undef, void ()* [[FUNC]], 0
; CHECK-NEXT:    ret [[FUNC_PTR_TYPE]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@func_ptr_type
; CHECK-SAME: (void ()** [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[FUNC_PTR_TYPE:%.*]] @func_ptr_type.body(void ()** undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[FUNC_PTR_TYPE]] [[TMP2]], 0
; CHECK-NEXT:    store void ()* [[TMP3]], void ()** [[TMP0]], align 8
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type.body
; CHECK-SAME: (void ()** [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[FUNC:%.*]] = load i32 ()*, i32 ()** undef, align 8
; CHECK-NEXT:    [[CAST:%.*]] = bitcast void ()** [[OUT]] to i32 ()**
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_FUNC_PTR_TYPE:%.*]] undef, i32 ()* [[FUNC]], 0
; CHECK-NEXT:    ret [[BITCAST_FUNC_PTR_TYPE]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_func_ptr_type
; CHECK-SAME: (void ()** [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_FUNC_PTR_TYPE:%.*]] @bitcast_func_ptr_type.body(void ()** undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_FUNC_PTR_TYPE]] [[TMP2]], 0
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast void ()** [[TMP0]] to i32 ()**
; CHECK-NEXT:    store i32 ()* [[TMP3]], i32 ()** [[TMP4]], align 8
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array.body
; CHECK-SAME: ([4 x i32]* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    ret [[OUT_ARG_SMALL_ARRAY:%.*]] { [4 x i32] [i32 0, i32 1, i32 2, i32 3] }
;
;
; CHECK-LABEL: define {{[^@]+}}@out_arg_small_array
; CHECK-SAME: ([4 x i32]* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[OUT_ARG_SMALL_ARRAY:%.*]] @out_arg_small_array.body([4 x i32]* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[OUT_ARG_SMALL_ARRAY]] [[TMP2]], 0
; CHECK-NEXT:    store [4 x i32] [[TMP3]], [4 x i32]* [[TMP0]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@out_arg_large_array
; CHECK-SAME: ([17 x i32]* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    store [17 x i32] zeroinitializer, [17 x i32]* [[VAL]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@num_regs_return_limit
; CHECK-SAME: (i32* [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <16 x i32>, <16 x i32> addrspace(1)* undef, align 64
; CHECK-NEXT:    store i32 [[VAL]], i32* [[OUT]], align 4
; CHECK-NEXT:    ret <16 x i32> [[LOAD]]
;
;
; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit.body
; CHECK-SAME: (i32* [[OUT:%.*]], i32 [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* undef, align 4
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT:%.*]] undef, [15 x i32] [[LOAD]], 0
; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT]] [[TMP1]], i32 [[VAL]], 1
; CHECK-NEXT:    ret [[NUM_REGS_REACH_LIMIT]] [[TMP2]]
;
;
; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit
; CHECK-SAME: (i32* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[NUM_REGS_REACH_LIMIT:%.*]] @num_regs_reach_limit.body(i32* undef, i32 [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT]] [[TMP3]], 1
; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT]] [[TMP3]], 0
; CHECK-NEXT:    ret [15 x i32] [[TMP5]]
;
;
; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover.body
; CHECK-SAME: (i32* [[OUT0:%.*]], i32* [[OUT1:%.*]], i32 [[VAL0:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD0:%.*]] = load volatile [15 x i32], [15 x i32] addrspace(1)* undef, align 4
; CHECK-NEXT:    [[LOAD1:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] undef, [15 x i32] [[LOAD0]], 0
; CHECK-NEXT:    [[TMP2:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP1]], i32 [[LOAD1]], 1
; CHECK-NEXT:    [[TMP3:%.*]] = insertvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP2]], i32 [[VAL0]], 2
; CHECK-NEXT:    ret [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP3]]
;
;
; CHECK-LABEL: define {{[^@]+}}@num_regs_reach_limit_leftover
; CHECK-SAME: (i32* [[TMP0:%.*]], i32* [[TMP1:%.*]], i32 [[TMP2:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP4:%.*]] = call [[NUM_REGS_REACH_LIMIT_LEFTOVER:%.*]] @num_regs_reach_limit_leftover.body(i32* undef, i32* undef, i32 [[TMP2]])
; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 1
; CHECK-NEXT:    store i32 [[TMP5]], i32* [[TMP0]], align 4
; CHECK-NEXT:    [[TMP6:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 2
; CHECK-NEXT:    store i32 [[TMP6]], i32* [[TMP1]], align 4
; CHECK-NEXT:    [[TMP7:%.*]] = extractvalue [[NUM_REGS_REACH_LIMIT_LEFTOVER]] [[TMP4]], 0
; CHECK-NEXT:    ret [15 x i32] [[TMP7]]
;
;
; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info.body
; CHECK-SAME: (i32 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    call void @may.clobber(), !dbg [[DBG5:![0-9]+]]
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[PRESERVE_DEBUG_INFO:%.*]] undef, i32 [[ARG0]], 0, !dbg [[DBG11:![0-9]+]]
; CHECK-NEXT:    ret [[PRESERVE_DEBUG_INFO]] [[TMP1]], !dbg [[DBG11]]
;
;
; CHECK-LABEL: define {{[^@]+}}@preserve_debug_info
; CHECK-SAME: (i32 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] !dbg [[DBG6:![0-9]+]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[PRESERVE_DEBUG_INFO:%.*]] @preserve_debug_info.body(i32 [[TMP0]], i32* undef)
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[PRESERVE_DEBUG_INFO]] [[TMP3]], 0
; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP1]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@preserve_metadata.body
; CHECK-SAME: (i32 [[ARG0:%.*]], i32* [[VAL:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    call void @may.clobber()
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[PRESERVE_METADATA:%.*]] undef, i32 [[ARG0]], 0
; CHECK-NEXT:    ret [[PRESERVE_METADATA]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@preserve_metadata
; CHECK-SAME: (i32 [[TMP0:%.*]], i32* [[TMP1:%.*]]) #[[ATTR2]] !kernel_arg_access_qual !12 {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[PRESERVE_METADATA:%.*]] @preserve_metadata.body(i32 [[TMP0]], i32* undef)
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[PRESERVE_METADATA]] [[TMP3]], 0
; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP1]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32.body
; CHECK-SAME: (<3 x i32>* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast <3 x i32>* [[OUT]] to <4 x i32>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3I32:%.*]] undef, <4 x i32> [[LOAD]], 0
; CHECK-NEXT:    ret [[BITCAST_POINTER_V4I32_V3I32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3i32
; CHECK-SAME: (<3 x i32>* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3I32:%.*]] @bitcast_pointer_v4i32_v3i32.body(<3 x i32>* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3I32]] [[TMP2]], 0
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <3 x i32>* [[TMP0]] to <4 x i32>*
; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32.body
; CHECK-SAME: (<3 x float>* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD:%.*]] = load volatile <4 x i32>, <4 x i32> addrspace(1)* undef, align 16
; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast <3 x float>* [[OUT]] to <4 x i32>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_V4I32_V3F32:%.*]] undef, <4 x i32> [[LOAD]], 0
; CHECK-NEXT:    ret [[BITCAST_POINTER_V4I32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_v4i32_v3f32
; CHECK-SAME: (<3 x float>* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_V4I32_V3F32:%.*]] @bitcast_pointer_v4i32_v3f32.body(<3 x float>* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_V4I32_V3F32]] [[TMP2]], 0
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <3 x float>* [[TMP0]] to <4 x i32>*
; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32.body
; CHECK-SAME: (float* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast float* [[OUT]] to i32*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F32:%.*]] undef, i32 [[LOAD]], 0
; CHECK-NEXT:    ret [[BITCAST_POINTER_I32_F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f32
; CHECK-SAME: (float* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F32:%.*]] @bitcast_pointer_i32_f32.body(float* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F32]] [[TMP2]], 0
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP0]] to i32*
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP4]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16.body
; CHECK-SAME: (half* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast half* [[OUT]] to i32*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_I32_F16:%.*]] undef, i32 [[LOAD]], 0
; CHECK-NEXT:    ret [[BITCAST_POINTER_I32_F16]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_i32_f16
; CHECK-SAME: (half* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_I32_F16:%.*]] @bitcast_pointer_i32_f16.body(half* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_I32_F16]] [[TMP2]], 0
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast half* [[TMP0]] to i32*
; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP4]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32.body
; CHECK-SAME: (i32* [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[LOAD:%.*]] = load volatile half, half addrspace(1)* undef, align 2
; CHECK-NEXT:    [[BITCAST:%.*]] = bitcast i32* [[OUT]] to half*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_POINTER_F16_I32:%.*]] undef, half [[LOAD]], 0
; CHECK-NEXT:    ret [[BITCAST_POINTER_F16_I32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_pointer_f16_i32
; CHECK-SAME: (i32* [[TMP0:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP2:%.*]] = call [[BITCAST_POINTER_F16_I32:%.*]] @bitcast_pointer_f16_i32.body(i32* undef)
; CHECK-NEXT:    [[TMP3:%.*]] = extractvalue [[BITCAST_POINTER_F16_I32]] [[TMP2]], 0
; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP0]] to half*
; CHECK-NEXT:    store half [[TMP3]], half* [[TMP4]], align 2
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32.body
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3f32
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3F32:%.*]] @bitcast_struct_v3f32_v3f32.body(%struct.v3f32* undef, <3 x float> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x float>*
; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32.body
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <3 x i32> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x i32> [[VALUE]], <3 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V3I32:%.*]] undef, <4 x i32> [[EXTRACTVEC]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v3i32
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <3 x i32> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V3I32:%.*]] @bitcast_struct_v3f32_v3i32.body(%struct.v3f32* undef, <3 x i32> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V3I32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32.body
; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v4f32
; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V4F32:%.*]] @bitcast_struct_v4f32_v4f32.body(%struct.v4f32* undef, <4 x float> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V4F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>*
; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32.body
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <4 x i32> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x i32>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V4I32:%.*]] undef, <4 x i32> [[VALUE]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v4i32
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <4 x i32> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V4I32:%.*]] @bitcast_struct_v3f32_v4i32.body(%struct.v3f32* undef, <4 x i32> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V4I32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <4 x i32>*
; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32.body
; CHECK-SAME: (%struct.v4f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v4f32* [[OUT]] to <4 x float>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V4F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v4f32_v3f32
; CHECK-SAME: (%struct.v4f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V4F32_V3F32:%.*]] @bitcast_struct_v4f32_v3f32.body(%struct.v4f32* undef, <3 x float> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V4F32_V3F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v4f32* [[TMP0]] to <4 x float>*
; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32.body
; CHECK-SAME: (%struct.v3f32* [[OUT:%.*]], <2 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32* [[OUT]] to <2 x float>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_V2F32:%.*]] undef, <2 x float> [[VALUE]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_v2f32
; CHECK-SAME: (%struct.v3f32* [[TMP0:%.*]], <2 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_V2F32:%.*]] @bitcast_struct_v3f32_v2f32.body(%struct.v3f32* undef, <2 x float> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_V2F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32* [[TMP0]] to <2 x float>*
; CHECK-NEXT:    store <2 x float> [[TMP4]], <2 x float>* [[TMP5]], align 8
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32.body
; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v3f32
; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <3 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V3F32:%.*]] @bitcast_struct_v3f32_f32_v3f32.body(%struct.v3f32.f32* undef, <3 x float> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V3F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>*
; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32.body
; CHECK-SAME: (%struct.v3f32.f32* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.v3f32.f32* [[OUT]] to <4 x float>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_v3f32_f32_v4f32
; CHECK-SAME: (%struct.v3f32.f32* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_V3F32_F32_V4F32:%.*]] @bitcast_struct_v3f32_f32_v4f32.body(%struct.v3f32.f32* undef, <4 x float> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_V3F32_F32_V4F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.v3f32.f32* [[TMP0]] to <4 x float>*
; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32.body
; CHECK-SAME: (%struct.i128* [[OUT:%.*]], <4 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[CAST:%.*]] = bitcast %struct.i128* [[OUT]] to <4 x float>*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_STRUCT_I128_V4F32:%.*]] undef, <4 x float> [[VALUE]], 0
; CHECK-NEXT:    ret [[BITCAST_STRUCT_I128_V4F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_struct_i128_v4f32
; CHECK-SAME: (%struct.i128* [[TMP0:%.*]], <4 x float> [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_STRUCT_I128_V4F32:%.*]] @bitcast_struct_i128_v4f32.body(%struct.i128* undef, <4 x float> [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_STRUCT_I128_V4F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast %struct.i128* [[TMP0]] to <4 x float>*
; CHECK-NEXT:    store <4 x float> [[TMP4]], <4 x float>* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32.body
; CHECK-SAME: ([4 x i32]* [[OUT:%.*]], [4 x float] [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[CAST:%.*]] = bitcast [4 x i32]* [[OUT]] to [4 x float]*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_ARRAY_V4I32_V4F32:%.*]] undef, [4 x float] [[VALUE]], 0
; CHECK-NEXT:    ret [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_array_v4i32_v4f32
; CHECK-SAME: ([4 x i32]* [[TMP0:%.*]], [4 x float] [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_ARRAY_V4I32_V4F32:%.*]] @bitcast_array_v4i32_v4f32.body([4 x i32]* undef, [4 x float] [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_ARRAY_V4I32_V4F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast [4 x i32]* [[TMP0]] to [4 x float]*
; CHECK-NEXT:    store [4 x float] [[TMP4]], [4 x float]* [[TMP5]], align 4
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32.body
; CHECK-SAME: (i1 [[COND:%.*]], %struct.v3f32* [[OUT:%.*]], <3 x float> [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:  entry:
; CHECK-NEXT:    br i1 [[COND]], label [[RET0:%.*]], label [[RET1:%.*]]
; CHECK:       ret0:
; CHECK-NEXT:    [[EXTRACTVEC:%.*]] = shufflevector <3 x float> [[VALUE]], <3 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
; CHECK-NEXT:    [[CAST0:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
; CHECK-NEXT:    [[TMP0:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] undef, <4 x float> [[EXTRACTVEC]], 0
; CHECK-NEXT:    ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP0]]
; CHECK:       ret1:
; CHECK-NEXT:    [[CAST1:%.*]] = bitcast %struct.v3f32* [[OUT]] to <4 x float>*
; CHECK-NEXT:    [[LOAD:%.*]] = load <4 x float>, <4 x float> addrspace(1)* undef, align 16
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] undef, <4 x float> [[LOAD]], 0
; CHECK-NEXT:    ret [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@multi_return_bitcast_struct_v3f32_v3f32
; CHECK-SAME: (i1 [[TMP0:%.*]], %struct.v3f32* [[TMP1:%.*]], <3 x float> [[TMP2:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP4:%.*]] = call [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32:%.*]] @multi_return_bitcast_struct_v3f32_v3f32.body(i1 [[TMP0]], %struct.v3f32* undef, <3 x float> [[TMP2]])
; CHECK-NEXT:    [[TMP5:%.*]] = extractvalue [[MULTI_RETURN_BITCAST_STRUCT_V3F32_V3F32]] [[TMP4]], 0
; CHECK-NEXT:    [[TMP6:%.*]] = bitcast %struct.v3f32* [[TMP1]] to <4 x float>*
; CHECK-NEXT:    store <4 x float> [[TMP5]], <4 x float>* [[TMP6]], align 16
; CHECK-NEXT:    ret void
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32.body
; CHECK-SAME: (<3 x float>* [[OUT:%.*]], [[STRUCT_V3F32:%.*]] [[VALUE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT:    [[CAST:%.*]] = bitcast <3 x float>* [[OUT]] to %struct.v3f32*
; CHECK-NEXT:    [[TMP1:%.*]] = insertvalue [[BITCAST_V3F32_STRUCT_V3F32:%.*]] undef, [[STRUCT_V3F32]] [[VALUE]], 0
; CHECK-NEXT:    ret [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP1]]
;
;
; CHECK-LABEL: define {{[^@]+}}@bitcast_v3f32_struct_v3f32
; CHECK-SAME: (<3 x float>* [[TMP0:%.*]], [[STRUCT_V3F32:%.*]] [[TMP1:%.*]]) #[[ATTR2]] {
; CHECK-NEXT:    [[TMP3:%.*]] = call [[BITCAST_V3F32_STRUCT_V3F32:%.*]] @bitcast_v3f32_struct_v3f32.body(<3 x float>* undef, [[STRUCT_V3F32]] [[TMP1]])
; CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[BITCAST_V3F32_STRUCT_V3F32]] [[TMP3]], 0
; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <3 x float>* [[TMP0]] to %struct.v3f32*
; CHECK-NEXT:    store [[STRUCT_V3F32]] [[TMP4]], %struct.v3f32* [[TMP5]], align 16
; CHECK-NEXT:    ret void
;