; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s ; Check that building up a vector w/ only one non-zero lane initializes ; efficiently. define <8 x i8> @v8i8z(i8 %t, i8 %s) nounwind { ; CHECK-LABEL: v8i8z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: mov v0.b[7], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %v = insertelement <8 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 7 ret <8 x i8> %v } define <16 x i8> @v16i8z(i8 %t, i8 %s) nounwind { ; CHECK-LABEL: v16i8z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.b[15], w1 ; CHECK-NEXT: ret %v = insertelement <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef>, i8 %s, i32 15 ret <16 x i8> %v } define <4 x i16> @v4i16z(i16 %t, i16 %s) nounwind { ; CHECK-LABEL: v4i16z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: mov v0.h[3], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %v = insertelement <4 x i16> <i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 3 ret <4 x i16> %v } define <8 x i16> @v8i16z(i16 %t, i16 %s) nounwind { ; CHECK-LABEL: v8i16z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.h[7], w1 ; CHECK-NEXT: ret %v = insertelement <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>, i16 %s, i32 7 ret <8 x i16> %v } define <2 x i32> @v2i32z(i32 %t, i32 %s) nounwind { ; CHECK-LABEL: v2i32z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: mov v0.s[1], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %v = insertelement <2 x i32> <i32 0, i32 undef>, i32 %s, i32 1 ret <2 x i32> %v } define <4 x i32> @v4i32z(i32 %t, i32 %s) nounwind { ; CHECK-LABEL: v4i32z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.s[3], w1 ; CHECK-NEXT: ret %v = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 undef>, i32 %s, i32 3 ret <4 x i32> %v } define <2 x i64> @v2i64z(i64 %t, i64 %s) nounwind { ; CHECK-LABEL: v2i64z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ret %v = insertelement <2 x i64> <i64 0, i64 undef>, i64 %s, i32 1 ret <2 x i64> %v } define <2 x float> @v2f32z(float %t, float %s) nounwind { ; CHECK-LABEL: v2f32z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %v = insertelement <2 x float> <float 0.0, float undef>, float %s, i32 1 ret <2 x float> %v } define <4 x float> @v4f32z(float %t, float %s) nounwind { ; CHECK-LABEL: v4f32z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: ret %v = insertelement <4 x float> <float 0.0, float 0.0, float 0.0, float undef>, float %s, i32 3 ret <4 x float> %v } define <2 x double> @v2f64z(double %t, double %s) nounwind { ; CHECK-LABEL: v2f64z: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret %v = insertelement <2 x double> <double 0.0, double undef>, double %s, i32 1 ret <2 x double> %v } ; Check that building up a vector w/ only one non-ones lane initializes ; efficiently. define <8 x i8> @v8i8m(i8 %t, i8 %s) nounwind { ; CHECK-LABEL: v8i8m: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0xffffffffffffffff ; CHECK-NEXT: mov v0.b[7], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %v = insertelement <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef>, i8 %s, i32 7 ret <8 x i8> %v } define <16 x i8> @v16i8m(i8 %t, i8 %s) nounwind { ; CHECK-LABEL: v16i8m: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v0.b[15], w1 ; CHECK-NEXT: ret %v = insertelement <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 undef>, i8 %s, i32 15 ret <16 x i8> %v } define <4 x i16> @v4i16m(i16 %t, i16 %s) nounwind { ; CHECK-LABEL: v4i16m: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0xffffffffffffffff ; CHECK-NEXT: mov v0.h[3], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %v = insertelement <4 x i16> <i16 -1, i16 -1, i16 -1, i16 undef>, i16 %s, i32 3 ret <4 x i16> %v } define <8 x i16> @v8i16m(i16 %t, i16 %s) nounwind { ; CHECK-LABEL: v8i16m: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v0.h[7], w1 ; CHECK-NEXT: ret %v = insertelement <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 undef>, i16 %s, i32 7 ret <8 x i16> %v } define <2 x i32> @v2i32m(i32 %t, i32 %s) nounwind { ; CHECK-LABEL: v2i32m: ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0xffffffffffffffff ; CHECK-NEXT: mov v0.s[1], w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %v = insertelement <2 x i32> <i32 -1, i32 undef>, i32 %s, i32 1 ret <2 x i32> %v } define <4 x i32> @v4i32m(i32 %t, i32 %s) nounwind { ; CHECK-LABEL: v4i32m: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v0.s[3], w1 ; CHECK-NEXT: ret %v = insertelement <4 x i32> <i32 -1, i32 -1, i32 -1, i32 undef>, i32 %s, i32 3 ret <4 x i32> %v } define <2 x i64> @v2i64m(i64 %t, i64 %s) nounwind { ; CHECK-LABEL: v2i64m: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ret %v = insertelement <2 x i64> <i64 -1, i64 undef>, i64 %s, i32 1 ret <2 x i64> %v } ; Check that building up a vector w/ some constants initializes efficiently. define void @v8i8st(<8 x i8>* %p, i8 %s) nounwind { ; CHECK-LABEL: v8i8st: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.8b, #1 ; CHECK-NEXT: mov v0.b[7], w1 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %v = insertelement <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 undef>, i8 %s, i32 7 store <8 x i8> %v, <8 x i8>* %p, align 8 ret void } define void @v16i8st(<16 x i8>* %p, i8 %s) nounwind { ; CHECK-LABEL: v16i8st: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.16b, #128 ; CHECK-NEXT: mov v0.b[15], w1 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %v = insertelement <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 undef>, i8 %s, i32 15 store <16 x i8> %v, <16 x i8>* %p, align 16 ret void } define void @v4i16st(<4 x i16>* %p, i16 %s) nounwind { ; CHECK-LABEL: v4i16st: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.4h, #85, lsl #8 ; CHECK-NEXT: mov v0.h[3], w1 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %v = insertelement <4 x i16> <i16 21760, i16 21760, i16 21760, i16 undef>, i16 %s, i32 3 store <4 x i16> %v, <4 x i16>* %p, align 8 ret void } define void @v8i16st(<8 x i16>* %p, i16 %s) nounwind { ; CHECK-LABEL: v8i16st: ; CHECK: // %bb.0: ; CHECK-NEXT: mvni v0.8h, #85, lsl #8 ; CHECK-NEXT: mov v0.h[7], w1 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %v = insertelement <8 x i16> <i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 -21761, i16 undef>, i16 %s, i32 7 store <8 x i16> %v, <8 x i16>* %p, align 16 ret void } define void @v2i32st(<2 x i32>* %p, i32 %s) nounwind { ; CHECK-LABEL: v2i32st: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2s, #15, lsl #16 ; CHECK-NEXT: mov v0.s[1], w1 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %v = insertelement <2 x i32> <i32 983040, i32 undef>, i32 %s, i32 1 store <2 x i32> %v, <2 x i32>* %p, align 8 ret void } define void @v4i32st(<4 x i32>* %p, i32 %s) nounwind { ; CHECK-LABEL: v4i32st: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.4s, #248, msl #16 ; CHECK-NEXT: mov v0.s[3], w1 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %v = insertelement <4 x i32> <i32 16318463, i32 16318463, i32 16318463, i32 undef>, i32 %s, i32 3 store <4 x i32> %v, <4 x i32>* %p, align 16 ret void } define void @v2i64st(<2 x i64>* %p, i64 %s) nounwind { ; CHECK-LABEL: v2i64st: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov v0.2d, #-2.00000000 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %v = insertelement <2 x i64> <i64 13835058055282163712, i64 undef>, i64 %s, i32 1 store <2 x i64> %v, <2 x i64>* %p, align 16 ret void } define void @v2f32st(<2 x float>* %p, float %s) nounwind { ; CHECK-LABEL: v2f32st: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2s, #64, lsl #24 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[1], v0.s[0] ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret %v = insertelement <2 x float> <float 2.0, float undef>, float %s, i32 1 store <2 x float> %v, <2 x float>* %p, align 8 ret void } define void @v4f32st(<4 x float>* %p, float %s) nounwind { ; CHECK-LABEL: v4f32st: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #192, lsl #24 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret %v = insertelement <4 x float> <float -2.0, float -2.0, float -2.0, float undef>, float %s, i32 3 store <4 x float> %v, <4 x float>* %p, align 16 ret void } define void @v2f64st(<2 x double>* %p, double %s) nounwind { ; CHECK-LABEL: v2f64st: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov v1.2d, #2.00000000 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v1.d[1], v0.d[0] ; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret %v = insertelement <2 x double> <double 2.0, double undef>, double %s, i32 1 store <2 x double> %v, <2 x double>* %p, align 16 ret void } ; In this test the illegal type has a preferred alignment greater than the ; stack alignment, that gets reduced to the alignment of a broken down ; legal type. define <32 x i8> @test_lanex_32xi8(<32 x i8> %a, i32 %x) { ; CHECK-LABEL: test_lanex_32xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x9, x0, #0x1f ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: mov w10, #30 ; CHECK-NEXT: stp q0, q1, [sp] ; CHECK-NEXT: strb w10, [x8, x9] ; CHECK-NEXT: ldp q0, q1, [sp], #32 ; CHECK-NEXT: ret %b = insertelement <32 x i8> %a, i8 30, i32 %x ret <32 x i8> %b }