; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s ; ; Test storing of replicated values using vector replicate type instructions. ;; Replicated registers define void @fun_2x1b(i8* %Src, i16* %Dst) { ; CHECK-LABEL: fun_2x1b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlrepb %v0, 0(%r2) ; CHECK-NEXT: vsteh %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 %i = load i8, i8* %Src %ZE = zext i8 %i to i16 %Val = mul i16 %ZE, 257 store i16 %Val, i16* %Dst ret void } ; Test multiple stores of same value. define void @fun_4x1b(i8* %Src, i32* %Dst, i32* %Dst2) { ; CHECK-LABEL: fun_4x1b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlrepb %v0, 0(%r2) ; CHECK-NEXT: vstef %v0, 0(%r3), 0 ; CHECK-NEXT: vstef %v0, 0(%r4), 0 ; CHECK-NEXT: br %r14 %i = load i8, i8* %Src %ZE = zext i8 %i to i32 %Val = mul i32 %ZE, 16843009 store i32 %Val, i32* %Dst store i32 %Val, i32* %Dst2 ret void } define void @fun_8x1b(i8* %Src, i64* %Dst) { ; CHECK-LABEL: fun_8x1b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlrepb %v0, 0(%r2) ; CHECK-NEXT: vsteg %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 %i = load i8, i8* %Src %ZE = zext i8 %i to i64 %Val = mul i64 %ZE, 72340172838076673 store i64 %Val, i64* %Dst ret void } ; A second truncated store of same value. define void @fun_8x1b_4x1b(i8* %Src, i64* %Dst, i32* %Dst2) { ; CHECK-LABEL: fun_8x1b_4x1b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlrepb %v0, 0(%r2) ; CHECK-NEXT: vsteg %v0, 0(%r3), 0 ; CHECK-NEXT: vstef %v0, 0(%r4), 0 ; CHECK-NEXT: br %r14 %i = load i8, i8* %Src %ZE = zext i8 %i to i64 %Val = mul i64 %ZE, 72340172838076673 store i64 %Val, i64* %Dst %TrVal = trunc i64 %Val to i32 store i32 %TrVal, i32* %Dst2 ret void } define void @fun_2x2b(i16* %Src, i32* %Dst) { ; CHECK-LABEL: fun_2x2b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlreph %v0, 0(%r2) ; CHECK-NEXT: vstef %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 %i = load i16, i16* %Src %ZE = zext i16 %i to i32 %Val = mul i32 %ZE, 65537 store i32 %Val, i32* %Dst ret void } define void @fun_4x2b(i16* %Src, i64* %Dst) { ; CHECK-LABEL: fun_4x2b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlreph %v0, 0(%r2) ; CHECK-NEXT: vsteg %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 %i = load i16, i16* %Src %ZE = zext i16 %i to i64 %Val = mul i64 %ZE, 281479271743489 store i64 %Val, i64* %Dst ret void } define void @fun_2x4b(i32* %Src, i64* %Dst) { ; CHECK-LABEL: fun_2x4b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlrepf %v0, 0(%r2) ; CHECK-NEXT: vsteg %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 %i = load i32, i32* %Src %ZE = zext i32 %i to i64 %Val = mul i64 %ZE, 4294967297 store i64 %Val, i64* %Dst ret void } ;; Replicated registers already in a vector. ; Test multiple stores of same value. define void @fun_2Eltsx8x1b(i8* %Src, <2 x i64>* %Dst, <2 x i64>* %Dst2) { ; CHECK-LABEL: fun_2Eltsx8x1b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlrepb %v0, 0(%r2) ; CHECK-NEXT: vst %v0, 0(%r3), 3 ; CHECK-NEXT: vst %v0, 0(%r4), 3 ; CHECK-NEXT: br %r14 %i = load i8, i8* %Src %ZE = zext i8 %i to i64 %Mul = mul i64 %ZE, 72340172838076673 %tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0 %Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer store <2 x i64> %Val, <2 x i64>* %Dst store <2 x i64> %Val, <2 x i64>* %Dst2 ret void } define void @fun_4Eltsx2x2b(i16* %Src, <4 x i32>* %Dst) { ; CHECK-LABEL: fun_4Eltsx2x2b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlreph %v0, 0(%r2) ; CHECK-NEXT: vst %v0, 0(%r3), 3 ; CHECK-NEXT: br %r14 %i = load i16, i16* %Src %ZE = zext i16 %i to i32 %Mul = mul i32 %ZE, 65537 %tmp = insertelement <4 x i32> undef, i32 %Mul, i32 0 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer store <4 x i32> %Val, <4 x i32>* %Dst ret void } define void @fun_6Eltsx2x2b(i16* %Src, <6 x i32>* %Dst) { ; CHECK-LABEL: fun_6Eltsx2x2b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlreph %v0, 0(%r2) ; CHECK-NEXT: vsteg %v0, 16(%r3), 0 ; CHECK-NEXT: vst %v0, 0(%r3), 4 ; CHECK-NEXT: br %r14 %i = load i16, i16* %Src %ZE = zext i16 %i to i32 %Mul = mul i32 %ZE, 65537 %tmp = insertelement <6 x i32> undef, i32 %Mul, i32 0 %Val = shufflevector <6 x i32> %tmp, <6 x i32> undef, <6 x i32> zeroinitializer store <6 x i32> %Val, <6 x i32>* %Dst ret void } define void @fun_2Eltsx2x4b(i32* %Src, <2 x i64>* %Dst) { ; CHECK-LABEL: fun_2Eltsx2x4b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlrepf %v0, 0(%r2) ; CHECK-NEXT: vst %v0, 0(%r3), 3 ; CHECK-NEXT: br %r14 %i = load i32, i32* %Src %ZE = zext i32 %i to i64 %Mul = mul i64 %ZE, 4294967297 %tmp = insertelement <2 x i64> undef, i64 %Mul, i32 0 %Val = shufflevector <2 x i64> %tmp, <2 x i64> undef, <2 x i32> zeroinitializer store <2 x i64> %Val, <2 x i64>* %Dst ret void } define void @fun_5Eltsx2x4b(i32* %Src, <5 x i64>* %Dst) { ; CHECK-LABEL: fun_5Eltsx2x4b: ; CHECK: # %bb.0: ; CHECK-NEXT: vlrepf %v0, 0(%r2) ; CHECK-NEXT: vsteg %v0, 32(%r3), 0 ; CHECK-NEXT: vst %v0, 16(%r3), 4 ; CHECK-NEXT: vst %v0, 0(%r3), 4 ; CHECK-NEXT: br %r14 %i = load i32, i32* %Src %ZE = zext i32 %i to i64 %Mul = mul i64 %ZE, 4294967297 %tmp = insertelement <5 x i64> undef, i64 %Mul, i32 0 %Val = shufflevector <5 x i64> %tmp, <5 x i64> undef, <5 x i32> zeroinitializer store <5 x i64> %Val, <5 x i64>* %Dst ret void } ; Test replicating an incoming argument. define void @fun_8x1b_arg(i8 %Arg, i64* %Dst) { ; CHECK-LABEL: fun_8x1b_arg: ; CHECK: # %bb.0: ; CHECK-NEXT: vlvgp %v0, %r2, %r2 ; CHECK-NEXT: vrepb %v0, %v0, 7 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 %ZE = zext i8 %Arg to i64 %Val = mul i64 %ZE, 72340172838076673 store i64 %Val, i64* %Dst ret void } ; A replication of a non-local value (ISD::AssertZext case). define void @fun_nonlocalval() { ; CHECK-LABEL: fun_nonlocalval: ; CHECK: # %bb.0: ; CHECK-NEXT: lhi %r0, 0 ; CHECK-NEXT: ciblh %r0, 0, 0(%r14) ; CHECK-NEXT: .LBB13_1: # %bb2 ; CHECK-NEXT: llgf %r0, 0(%r1) ; CHECK-NEXT: vlvgp %v0, %r0, %r0 ; CHECK-NEXT: vrepf %v0, %v0, 1 ; CHECK-NEXT: vst %v0, 0(%r1), 3 ; CHECK-NEXT: br %r14 %i = load i32, i32* undef, align 4 br i1 undef, label %bb2, label %bb7 bb2: ; preds = %bb1 %i3 = zext i32 %i to i64 %i4 = mul nuw i64 %i3, 4294967297 %i5 = insertelement <2 x i64> poison, i64 %i4, i64 0 %i6 = shufflevector <2 x i64> %i5, <2 x i64> poison, <2 x i32> zeroinitializer store <2 x i64> %i6, <2 x i64>* undef, align 8 ret void bb7: ret void } ;; Replicated immediates ; Some cases where scalar instruction is better define void @fun_8x1i_zero(i64* %Dst) { ; CHECK-LABEL: fun_8x1i_zero: ; CHECK: # %bb.0: ; CHECK-NEXT: mvghi 0(%r2), 0 ; CHECK-NEXT: br %r14 store i64 0, i64* %Dst ret void } define void @fun_4x1i_minus1(i32* %Dst) { ; CHECK-LABEL: fun_4x1i_minus1: ; CHECK: # %bb.0: ; CHECK-NEXT: mvhi 0(%r2), -1 ; CHECK-NEXT: br %r14 store i32 -1, i32* %Dst ret void } define void @fun_4x1i_allones(i32* %Dst) { ; CHECK-LABEL: fun_4x1i_allones: ; CHECK: # %bb.0: ; CHECK-NEXT: mvhi 0(%r2), -1 ; CHECK-NEXT: br %r14 store i32 4294967295, i32* %Dst ret void } define void @fun_2i(i16* %Dst) { ; CHECK-LABEL: fun_2i: ; CHECK: # %bb.0: ; CHECK-NEXT: mvhhi 0(%r2), 1 ; CHECK-NEXT: br %r14 store i16 1, i16* %Dst ret void } define void @fun_2x2i(i32* %Dst) { ; CHECK-LABEL: fun_2x2i: ; CHECK: # %bb.0: ; CHECK-NEXT: vrepih %v0, 1 ; CHECK-NEXT: vstef %v0, 0(%r2), 0 ; CHECK-NEXT: br %r14 store i32 65537, i32* %Dst ret void } define void @fun_4x2i(i64* %Dst) { ; CHECK-LABEL: fun_4x2i: ; CHECK: # %bb.0: ; CHECK-NEXT: vrepih %v0, 1 ; CHECK-NEXT: vsteg %v0, 0(%r2), 0 ; CHECK-NEXT: br %r14 store i64 281479271743489, i64* %Dst ret void } define void @fun_2x4i(i64* %Dst) { ; CHECK-LABEL: fun_2x4i: ; CHECK: # %bb.0: ; CHECK-NEXT: vrepif %v0, 1 ; CHECK-NEXT: vsteg %v0, 0(%r2), 0 ; CHECK-NEXT: br %r14 store i64 4294967297, i64* %Dst ret void } ; Store replicated immediate twice using the same vector. define void @fun_4x1i(i32* %Dst, i32* %Dst2) { ; CHECK-LABEL: fun_4x1i: ; CHECK: # %bb.0: ; CHECK-NEXT: vrepib %v0, 3 ; CHECK-NEXT: vstef %v0, 0(%r2), 0 ; CHECK-NEXT: vstef %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 store i32 50529027, i32* %Dst store i32 50529027, i32* %Dst2 ret void } define void @fun_8x1i(i64* %Dst, i64* %Dst2) { ; CHECK-LABEL: fun_8x1i: ; CHECK: # %bb.0: ; CHECK-NEXT: vrepib %v0, 1 ; CHECK-NEXT: vsteg %v0, 0(%r2), 0 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 store i64 72340172838076673, i64* %Dst store i64 72340172838076673, i64* %Dst2 ret void } ; Similar, but with vectors. define void @fun_4Eltsx4x1i_2Eltsx4x1i(<4 x i32>* %Dst, <2 x i32>* %Dst2) { ; CHECK-LABEL: fun_4Eltsx4x1i_2Eltsx4x1i: ; CHECK: # %bb.0: ; CHECK-NEXT: vrepib %v0, 3 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 %tmp = insertelement <4 x i32> undef, i32 50529027, i32 0 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer store <4 x i32> %Val, <4 x i32>* %Dst %tmp2 = insertelement <2 x i32> undef, i32 50529027, i32 0 %Val2 = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer store <2 x i32> %Val2, <2 x i32>* %Dst2 ret void } ; Same, but 64-bit store is scalar. define void @fun_4Eltsx4x1i_8x1i(<4 x i32>* %Dst, i64* %Dst2) { ; CHECK-LABEL: fun_4Eltsx4x1i_8x1i: ; CHECK: # %bb.0: ; CHECK-NEXT: vrepib %v0, 3 ; CHECK-NEXT: vst %v0, 0(%r2), 3 ; CHECK-NEXT: vsteg %v0, 0(%r3), 0 ; CHECK-NEXT: br %r14 %tmp = insertelement <4 x i32> undef, i32 50529027, i32 0 %Val = shufflevector <4 x i32> %tmp, <4 x i32> undef, <4 x i32> zeroinitializer store <4 x i32> %Val, <4 x i32>* %Dst store i64 217020518514230019, i64* %Dst2 ret void } define void @fun_3Eltsx2x4i(<3 x i64>* %Dst) { ; CHECK-LABEL: fun_3Eltsx2x4i: ; CHECK: # %bb.0: ; CHECK-NEXT: vrepif %v0, 1 ; CHECK-NEXT: vsteg %v0, 16(%r2), 0 ; CHECK-NEXT: vst %v0, 0(%r2), 4 ; CHECK-NEXT: br %r14 %tmp = insertelement <3 x i64> undef, i64 4294967297, i32 0 %Val = shufflevector <3 x i64> %tmp, <3 x i64> undef, <3 x i32> zeroinitializer store <3 x i64> %Val, <3 x i64>* %Dst ret void } ; i128 replicated '1': not using vrepib, but should compile. define void @fun_16x1i(i128* %Dst) { ; CHECK-LABEL: fun_16x1i: ; CHECK: # %bb.0: ; CHECK-NEXT: llihf %r0, 16843009 ; CHECK-NEXT: oilf %r0, 16843009 ; CHECK-NEXT: stg %r0, 8(%r2) ; CHECK-NEXT: stg %r0, 0(%r2) ; CHECK-NEXT: br %r14 store i128 1334440654591915542993625911497130241, i128* %Dst ret void }