; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX2,LMULMAX2-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX2,LMULMAX2-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64 ; RUN: llc -mtriple=riscv32 -mattr=+v,+zvl128b -riscv-v-vector-bits-min=-1 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,LMULMAX1,LMULMAX1-RV32 ; RUN: llc -mtriple=riscv64 -mattr=+v,+zvl128b -riscv-v-vector-bits-min=-1 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,LMULMAX1,LMULMAX1-RV64 define void @add_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: add_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = add <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @add_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: add_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = add <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @add_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: add_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = add <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @add_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: add_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = add <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @sub_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: sub_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = sub <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @sub_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: sub_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = sub <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @sub_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: sub_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = sub <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @sub_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: sub_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = sub <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @mul_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: mul_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = mul <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @mul_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: mul_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = mul <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @mul_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: mul_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = mul <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @mul_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: mul_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = mul <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @and_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: and_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = and <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @and_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: and_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = and <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @and_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: and_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = and <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @and_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: and_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vand.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = and <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @or_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: or_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = or <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @or_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: or_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = or <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @or_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: or_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = or <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @or_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: or_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = or <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @xor_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: xor_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = xor <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @xor_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: xor_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = xor <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @xor_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: xor_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = xor <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @xor_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: xor_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vxor.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = xor <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @lshr_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: lshr_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = lshr <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @lshr_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: lshr_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = lshr <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @lshr_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: lshr_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = lshr <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @lshr_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: lshr_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vsrl.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = lshr <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @ashr_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: ashr_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = ashr <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @ashr_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: ashr_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = ashr <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @ashr_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: ashr_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = ashr <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @ashr_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: ashr_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vsra.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = ashr <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @shl_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: shl_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = shl <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @shl_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: shl_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = shl <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @shl_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: shl_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = shl <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @shl_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: shl_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vsll.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = shl <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @sdiv_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: sdiv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = sdiv <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @sdiv_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: sdiv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = sdiv <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @sdiv_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: sdiv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = sdiv <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @sdiv_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: sdiv_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = sdiv <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @srem_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: srem_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = srem <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @srem_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: srem_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = srem <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @srem_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: srem_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = srem <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @srem_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: srem_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = srem <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @udiv_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: udiv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = udiv <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @udiv_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: udiv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = udiv <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @udiv_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: udiv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = udiv <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @udiv_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: udiv_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = udiv <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @urem_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: urem_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %c = urem <16 x i8> %a, %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @urem_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: urem_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %c = urem <8 x i16> %a, %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @urem_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: urem_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %c = urem <4 x i32> %a, %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @urem_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: urem_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %c = urem <2 x i64> %a, %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @mulhu_v16i8(<16 x i8>* %x) { ; RV32-LABEL: mulhu_v16i8: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vle8.v v8, (a0) ; RV32-NEXT: li a1, 513 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vmv.v.i v9, 4 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 ; RV32-NEXT: lui a1, 1 ; RV32-NEXT: addi a2, a1, 78 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV32-NEXT: vmv.s.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vmerge.vim v9, v9, 3, v0 ; RV32-NEXT: lui a2, 8 ; RV32-NEXT: addi a2, a2, 304 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV32-NEXT: vmv.s.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vmerge.vim v9, v9, 2, v0 ; RV32-NEXT: lui a2, 3 ; RV32-NEXT: addi a2, a2, -2044 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV32-NEXT: vmv.s.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: li a2, -128 ; RV32-NEXT: vmerge.vxm v11, v10, a2, v0 ; RV32-NEXT: addi a1, a1, 32 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: lui a1, %hi(.LCPI52_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI52_0) ; RV32-NEXT: vle8.v v12, (a1) ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; RV32-NEXT: vsrl.vv v10, v8, v10 ; RV32-NEXT: vmulhu.vv v10, v10, v12 ; RV32-NEXT: vsub.vv v8, v8, v10 ; RV32-NEXT: vmulhu.vv v8, v8, v11 ; RV32-NEXT: vadd.vv v8, v8, v10 ; RV32-NEXT: vsrl.vv v8, v8, v9 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhu_v16i8: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vle8.v v8, (a0) ; RV64-NEXT: li a1, 513 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vmv.v.i v9, 4 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 ; RV64-NEXT: lui a1, 1 ; RV64-NEXT: addiw a2, a1, 78 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV64-NEXT: vmv.s.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vmerge.vim v9, v9, 3, v0 ; RV64-NEXT: lui a2, 8 ; RV64-NEXT: addiw a2, a2, 304 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV64-NEXT: vmv.s.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vmerge.vim v9, v9, 2, v0 ; RV64-NEXT: lui a2, 3 ; RV64-NEXT: addiw a2, a2, -2044 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV64-NEXT: vmv.s.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vmv.v.i v10, 0 ; RV64-NEXT: li a2, -128 ; RV64-NEXT: vmerge.vxm v11, v10, a2, v0 ; RV64-NEXT: addiw a1, a1, 32 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: lui a1, %hi(.LCPI52_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI52_0) ; RV64-NEXT: vle8.v v12, (a1) ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 ; RV64-NEXT: vsrl.vv v10, v8, v10 ; RV64-NEXT: vmulhu.vv v10, v10, v12 ; RV64-NEXT: vsub.vv v8, v8, v10 ; RV64-NEXT: vmulhu.vv v8, v8, v11 ; RV64-NEXT: vadd.vv v8, v8, v10 ; RV64-NEXT: vsrl.vv v8, v8, v9 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = udiv <16 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25> store <16 x i8> %b, <16 x i8>* %x ret void } define void @mulhu_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: mulhu_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vmv.s.x v9, a1 ; CHECK-NEXT: li a1, 33 ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmv.v.i v10, 3 ; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vi v10, v9, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: lui a1, 1048568 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, mu ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmv.s.x v12, a1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, mu ; CHECK-NEXT: vslideup.vi v11, v9, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: lui a1, %hi(.LCPI53_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI53_0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vsrl.vv v11, v8, v11 ; CHECK-NEXT: vmulhu.vv v9, v11, v9 ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vmulhu.vv v8, v8, v12 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = udiv <8 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> store <8 x i16> %b, <8 x i16>* %x ret void } define void @mulhu_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: mulhu_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vmv.s.x v9, a1 ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, mu ; CHECK-NEXT: vslideup.vi v10, v9, 2 ; CHECK-NEXT: lui a1, %hi(.LCPI54_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI54_0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vmulhu.vv v9, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: vmulhu.vv v8, v8, v10 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: vmv.s.x v9, a1 ; CHECK-NEXT: vmv.v.i v10, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; CHECK-NEXT: vslideup.vi v10, v9, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = udiv <4 x i32> %a, <i32 5, i32 6, i32 7, i32 9> store <4 x i32> %b, <4 x i32>* %x ret void } define void @mulhu_v2i64(<2 x i64>* %x) { ; RV32-LABEL: mulhu_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a1, %hi(.LCPI55_0) ; RV32-NEXT: addi a1, a1, %lo(.LCPI55_0) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vle32.v v9, (a1) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vmulhu.vv v8, v8, v9 ; RV32-NEXT: lui a1, %hi(.LCPI55_1) ; RV32-NEXT: addi a1, a1, %lo(.LCPI55_1) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vle32.v v9, (a1) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vsrl.vv v8, v8, v9 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhu_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: lui a1, %hi(.LCPI55_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI55_0) ; RV64-NEXT: vlse64.v v8, (a1), zero ; RV64-NEXT: lui a1, %hi(.LCPI55_1) ; RV64-NEXT: ld a1, %lo(.LCPI55_1)(a1) ; RV64-NEXT: vle64.v v9, (a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmulhu.vv v8, v9, v8 ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vadd.vi v9, v9, 1 ; RV64-NEXT: vsrl.vv v8, v8, v9 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = udiv <2 x i64> %a, <i64 3, i64 5> store <2 x i64> %b, <2 x i64>* %x ret void } define void @mulhs_v16i8(<16 x i8>* %x) { ; RV32-LABEL: mulhs_v16i8: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vle8.v v8, (a0) ; RV32-NEXT: lui a1, 5 ; RV32-NEXT: addi a1, a1, -1452 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV32-NEXT: vmv.v.i v9, 7 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 ; RV32-NEXT: li a1, -123 ; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: li a1, 57 ; RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; RV32-NEXT: vmulhu.vv v8, v8, v10 ; RV32-NEXT: vsrl.vv v8, v8, v9 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhs_v16i8: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vle8.v v8, (a0) ; RV64-NEXT: lui a1, 5 ; RV64-NEXT: addiw a1, a1, -1452 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64-NEXT: vmv.v.i v9, 7 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 ; RV64-NEXT: li a1, -123 ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: li a1, 57 ; RV64-NEXT: vmerge.vxm v10, v10, a1, v0 ; RV64-NEXT: vmulhu.vv v8, v8, v10 ; RV64-NEXT: vsrl.vv v8, v8, v9 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9> store <16 x i8> %b, <16 x i8>* %x ret void } define void @mulhs_v8i16(<8 x i16>* %x) { ; RV32-LABEL: mulhs_v8i16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: li a1, 105 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, 5 ; RV32-NEXT: addi a1, a1, -1755 ; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: lui a1, 1048571 ; RV32-NEXT: addi a1, a1, 1755 ; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 ; RV32-NEXT: vmulh.vv v8, v8, v9 ; RV32-NEXT: vsra.vi v8, v8, 1 ; RV32-NEXT: vsrl.vi v9, v8, 15 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse16.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhs_v8i16: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: li a1, 105 ; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: lui a1, 5 ; RV64-NEXT: addiw a1, a1, -1755 ; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: lui a1, 1048571 ; RV64-NEXT: addiw a1, a1, 1755 ; RV64-NEXT: vmerge.vxm v9, v9, a1, v0 ; RV64-NEXT: vmulh.vv v8, v8, v9 ; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vsrl.vi v9, v8, 15 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: vse16.v v8, (a0) ; RV64-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = sdiv <8 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7> store <8 x i16> %b, <8 x i16>* %x ret void } define void @mulhs_v4i32(<4 x i32>* %x) { ; RV32-LABEL: mulhs_v4i32: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: li a1, 5 ; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: lui a1, 419430 ; RV32-NEXT: addi a1, a1, 1639 ; RV32-NEXT: vmv.v.x v9, a1 ; RV32-NEXT: lui a1, 629146 ; RV32-NEXT: addi a1, a1, -1639 ; RV32-NEXT: vmerge.vxm v9, v9, a1, v0 ; RV32-NEXT: vmulh.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 31 ; RV32-NEXT: vsra.vi v8, v8, 1 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhs_v4i32: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a1, %hi(.LCPI58_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI58_0) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vlse64.v v9, (a1), zero ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV64-NEXT: vmulh.vv v8, v8, v9 ; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vsrl.vi v9, v8, 31 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = sdiv <4 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5> store <4 x i32> %b, <4 x i32>* %x ret void } define void @mulhs_v2i64(<2 x i64>* %x) { ; RV32-LABEL: mulhs_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a2, a1, 1365 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vmv.v.x v9, a2 ; RV32-NEXT: addi a1, a1, 1366 ; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; RV32-NEXT: vmv.s.x v9, a1 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vmulh.vv v9, v8, v9 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vid.v v10 ; RV32-NEXT: vsrl.vi v10, v10, 1 ; RV32-NEXT: vrsub.vi v10, v10, 0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vmadd.vv v10, v8, v9 ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu ; RV32-NEXT: vslideup.vi v9, v8, 2 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vsra.vv v8, v10, v9 ; RV32-NEXT: li a1, 63 ; RV32-NEXT: vsrl.vx v9, v10, a1 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhs_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: lui a1, %hi(.LCPI59_0) ; RV64-NEXT: addi a1, a1, %lo(.LCPI59_0) ; RV64-NEXT: vlse64.v v8, (a1), zero ; RV64-NEXT: lui a1, %hi(.LCPI59_1) ; RV64-NEXT: ld a1, %lo(.LCPI59_1)(a1) ; RV64-NEXT: vle64.v v9, (a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; RV64-NEXT: vmv.s.x v8, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV64-NEXT: vmulh.vv v8, v9, v8 ; RV64-NEXT: vid.v v10 ; RV64-NEXT: vrsub.vi v11, v10, 0 ; RV64-NEXT: vmadd.vv v11, v9, v8 ; RV64-NEXT: li a1, 63 ; RV64-NEXT: vsrl.vx v8, v11, a1 ; RV64-NEXT: vsra.vv v9, v11, v10 ; RV64-NEXT: vadd.vv v8, v9, v8 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = sdiv <2 x i64> %a, <i64 3, i64 -3> store <2 x i64> %b, <2 x i64>* %x ret void } define void @smin_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: smin_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %cc = icmp slt <16 x i8> %a, %b %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @smin_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: smin_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %cc = icmp slt <8 x i16> %a, %b %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @smin_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: smin_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %cc = icmp slt <4 x i32> %a, %b %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @smin_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: smin_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vmin.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %cc = icmp slt <2 x i64> %a, %b %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @smin_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: smin_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmin.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %a, <16 x i8> %c) store <16 x i8> %d, <16 x i8>* %x ret void } declare <16 x i8> @llvm.smin.v16i8(<16 x i8>, <16 x i8>) define void @smin_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: smin_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmin.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %a, <8 x i16> %c) store <8 x i16> %d, <8 x i16>* %x ret void } declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>) define void @smin_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: smin_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmin.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %a, <4 x i32> %c) store <4 x i32> %d, <4 x i32>* %x ret void } declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) define void @smin_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: smin_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmin.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = call <16 x i8> @llvm.smin.v16i8(<16 x i8> %c, <16 x i8> %a) store <16 x i8> %d, <16 x i8>* %x ret void } define void @smin_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: smin_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmin.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = call <8 x i16> @llvm.smin.v8i16(<8 x i16> %c, <8 x i16> %a) store <8 x i16> %d, <8 x i16>* %x ret void } define void @smin_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: smin_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmin.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %c, <4 x i32> %a) store <4 x i32> %d, <4 x i32>* %x ret void } define void @smax_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: smax_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %cc = icmp sgt <16 x i8> %a, %b %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @smax_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: smax_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %cc = icmp sgt <8 x i16> %a, %b %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @smax_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: smax_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %cc = icmp sgt <4 x i32> %a, %b %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @smax_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: smax_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vmax.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %cc = icmp sgt <2 x i64> %a, %b %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @smax_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: smax_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmax.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %a, <16 x i8> %c) store <16 x i8> %d, <16 x i8>* %x ret void } declare <16 x i8> @llvm.smax.v16i8(<16 x i8>, <16 x i8>) define void @smax_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: smax_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmax.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %a, <8 x i16> %c) store <8 x i16> %d, <8 x i16>* %x ret void } declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>) define void @smax_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: smax_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmax.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %a, <4 x i32> %c) store <4 x i32> %d, <4 x i32>* %x ret void } declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) define void @smax_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: smax_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmax.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = call <16 x i8> @llvm.smax.v16i8(<16 x i8> %c, <16 x i8> %a) store <16 x i8> %d, <16 x i8>* %x ret void } define void @smax_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: smax_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmax.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = call <8 x i16> @llvm.smax.v8i16(<8 x i16> %c, <8 x i16> %a) store <8 x i16> %d, <8 x i16>* %x ret void } define void @smax_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: smax_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmax.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %c, <4 x i32> %a) store <4 x i32> %d, <4 x i32>* %x ret void } define void @umin_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: umin_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %cc = icmp ult <16 x i8> %a, %b %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @umin_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: umin_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %cc = icmp ult <8 x i16> %a, %b %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @umin_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: umin_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %cc = icmp ult <4 x i32> %a, %b %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @umin_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: umin_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vminu.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %cc = icmp ult <2 x i64> %a, %b %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @umin_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: umin_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vminu.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %a, <16 x i8> %c) store <16 x i8> %d, <16 x i8>* %x ret void } declare <16 x i8> @llvm.umin.v16i8(<16 x i8>, <16 x i8>) define void @umin_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: umin_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vminu.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %a, <8 x i16> %c) store <8 x i16> %d, <8 x i16>* %x ret void } declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>) define void @umin_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: umin_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vminu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %a, <4 x i32> %c) store <4 x i32> %d, <4 x i32>* %x ret void } declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>) define void @umin_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: umin_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vminu.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = call <16 x i8> @llvm.umin.v16i8(<16 x i8> %c, <16 x i8> %a) store <16 x i8> %d, <16 x i8>* %x ret void } define void @umin_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: umin_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vminu.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = call <8 x i16> @llvm.umin.v8i16(<8 x i16> %c, <8 x i16> %a) store <8 x i16> %d, <8 x i16>* %x ret void } define void @umin_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: umin_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vminu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %c, <4 x i32> %a) store <4 x i32> %d, <4 x i32>* %x ret void } define void @umax_v16i8(<16 x i8>* %x, <16 x i8>* %y) { ; CHECK-LABEL: umax_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = load <16 x i8>, <16 x i8>* %y %cc = icmp ugt <16 x i8> %a, %b %c = select <16 x i1> %cc, <16 x i8> %a, <16 x i8> %b store <16 x i8> %c, <16 x i8>* %x ret void } define void @umax_v8i16(<8 x i16>* %x, <8 x i16>* %y) { ; CHECK-LABEL: umax_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = load <8 x i16>, <8 x i16>* %y %cc = icmp ugt <8 x i16> %a, %b %c = select <8 x i1> %cc, <8 x i16> %a, <8 x i16> %b store <8 x i16> %c, <8 x i16>* %x ret void } define void @umax_v4i32(<4 x i32>* %x, <4 x i32>* %y) { ; CHECK-LABEL: umax_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = load <4 x i32>, <4 x i32>* %y %cc = icmp ugt <4 x i32> %a, %b %c = select <4 x i1> %cc, <4 x i32> %a, <4 x i32> %b store <4 x i32> %c, <4 x i32>* %x ret void } define void @umax_v2i64(<2 x i64>* %x, <2 x i64>* %y) { ; CHECK-LABEL: umax_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vle64.v v9, (a1) ; CHECK-NEXT: vmaxu.vv v8, v8, v9 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = load <2 x i64>, <2 x i64>* %y %cc = icmp ugt <2 x i64> %a, %b %c = select <2 x i1> %cc, <2 x i64> %a, <2 x i64> %b store <2 x i64> %c, <2 x i64>* %x ret void } define void @umax_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: umax_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmaxu.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %a, <16 x i8> %c) store <16 x i8> %d, <16 x i8>* %x ret void } declare <16 x i8> @llvm.umax.v16i8(<16 x i8>, <16 x i8>) define void @umax_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: umax_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmaxu.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %a, <8 x i16> %c) store <8 x i16> %d, <8 x i16>* %x ret void } declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>) define void @umax_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: umax_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmaxu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %a, <4 x i32> %c) store <4 x i32> %d, <4 x i32>* %x ret void } declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>) define void @umax_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: umax_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmaxu.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = call <16 x i8> @llvm.umax.v16i8(<16 x i8> %c, <16 x i8> %a) store <16 x i8> %d, <16 x i8>* %x ret void } define void @umax_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: umax_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmaxu.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = call <8 x i16> @llvm.umax.v8i16(<8 x i16> %c, <8 x i16> %a) store <8 x i16> %d, <8 x i16>* %x ret void } define void @umax_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: umax_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmaxu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %c, <4 x i32> %a) store <4 x i32> %d, <4 x i32>* %x ret void } define void @add_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: add_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: add_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: add_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = add <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @add_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: add_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: add_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: add_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = add <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @add_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: add_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: add_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: add_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = add <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @add_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: add_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: add_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: add_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = add <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @sub_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: sub_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: sub_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: sub_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = sub <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @sub_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: sub_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: sub_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: sub_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = sub <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @sub_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: sub_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: sub_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: sub_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = sub <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @sub_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: sub_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: sub_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: sub_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsub.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsub.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = sub <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @mul_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: mul_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mul_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mul_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = mul <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @mul_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: mul_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mul_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mul_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = mul <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @mul_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: mul_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mul_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mul_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = mul <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @mul_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: mul_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vmul.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mul_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mul_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmul.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmul.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = mul <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @and_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: and_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: and_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: and_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = and <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @and_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: and_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: and_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: and_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = and <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @and_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: and_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: and_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: and_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = and <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @and_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: and_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vand.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: and_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: and_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vand.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vand.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = and <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @or_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: or_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: or_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: or_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = or <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @or_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: or_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: or_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: or_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = or <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @or_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: or_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: or_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: or_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = or <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @or_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: or_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: or_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: or_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = or <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @xor_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: xor_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vxor.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: xor_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: xor_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = xor <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @xor_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: xor_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vxor.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: xor_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: xor_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = xor <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @xor_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: xor_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vxor.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: xor_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: xor_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = xor <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @xor_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: xor_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vxor.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: xor_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vxor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vxor.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: xor_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vxor.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vxor.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = xor <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @lshr_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: lshr_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: lshr_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: lshr_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = lshr <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @lshr_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: lshr_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: lshr_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: lshr_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = lshr <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @lshr_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: lshr_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: lshr_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: lshr_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = lshr <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @lshr_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: lshr_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: lshr_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: lshr_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = lshr <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @ashr_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: ashr_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vsra.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: ashr_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ashr_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = ashr <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @ashr_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: ashr_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vsra.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: ashr_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ashr_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = ashr <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @ashr_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: ashr_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vsra.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: ashr_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ashr_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = ashr <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @ashr_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: ashr_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vsra.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: ashr_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsra.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsra.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ashr_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsra.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsra.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = ashr <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @shl_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: shl_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vsll.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: shl_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: shl_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = shl <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @shl_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: shl_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vsll.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: shl_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: shl_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = shl <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @shl_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: shl_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vsll.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: shl_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: shl_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = shl <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @shl_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: shl_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vsll.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: shl_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vsll.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsll.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: shl_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vsll.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vsll.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = shl <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @sdiv_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: sdiv_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vdiv.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: sdiv_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: sdiv_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = sdiv <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @sdiv_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: sdiv_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vdiv.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: sdiv_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: sdiv_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = sdiv <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @sdiv_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: sdiv_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vdiv.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: sdiv_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: sdiv_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = sdiv <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @sdiv_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: sdiv_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vdiv.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: sdiv_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: sdiv_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = sdiv <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @srem_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: srem_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vrem.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: srem_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: srem_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = srem <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @srem_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: srem_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vrem.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: srem_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: srem_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = srem <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @srem_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: srem_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vrem.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: srem_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: srem_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = srem <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @srem_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: srem_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vrem.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: srem_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vrem.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vrem.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: srem_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vrem.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vrem.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = srem <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @udiv_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: udiv_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vdivu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: udiv_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: udiv_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = udiv <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @udiv_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: udiv_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vdivu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: udiv_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: udiv_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = udiv <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @udiv_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: udiv_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vdivu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: udiv_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: udiv_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = udiv <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @udiv_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: udiv_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vdivu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: udiv_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: udiv_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = udiv <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @urem_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: urem_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vremu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: urem_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: urem_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %c = urem <32 x i8> %a, %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @urem_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: urem_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vremu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: urem_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: urem_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %c = urem <16 x i16> %a, %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @urem_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: urem_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vremu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: urem_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: urem_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %c = urem <8 x i32> %a, %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @urem_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: urem_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vremu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: urem_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vremu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vremu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: urem_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vremu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vremu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %c = urem <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @extract_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: extract_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: extract_v4i64: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-NEXT: vle64.v v8, (a0) ; LMULMAX1-NEXT: addi a2, a0, 16 ; LMULMAX1-NEXT: vle64.v v9, (a2) ; LMULMAX1-NEXT: vle64.v v10, (a1) ; LMULMAX1-NEXT: addi a1, a1, 16 ; LMULMAX1-NEXT: vle64.v v11, (a1) ; LMULMAX1-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-NEXT: vse64.v v8, (a0) ; LMULMAX1-NEXT: vse64.v v9, (a2) ; LMULMAX1-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y br label %"compute" "compute": %c = add <4 x i64> %a, %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @mulhu_v32i8(<32 x i8>* %x) { ; LMULMAX2-RV32-LABEL: mulhu_v32i8: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV32-NEXT: lui a2, 66049 ; LMULMAX2-RV32-NEXT: addi a2, a2, 32 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI153_0) ; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI153_0) ; LMULMAX2-RV32-NEXT: vle8.v v10, (a2) ; LMULMAX2-RV32-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV32-NEXT: vmerge.vim v14, v12, 1, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v14, v8, v14 ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v14, v10 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a2, 163907 ; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV32-NEXT: li a2, -128 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a2, v0 ; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a2, 8208 ; LMULMAX2-RV32-NEXT: addi a2, a2, 513 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-RV32-NEXT: lui a2, 66785 ; LMULMAX2-RV32-NEXT: addi a2, a2, 78 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 3, v0 ; LMULMAX2-RV32-NEXT: lui a2, 529160 ; LMULMAX2-RV32-NEXT: addi a2, a2, 304 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 2, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: mulhu_v32i8: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV64-NEXT: lui a2, 66049 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI153_0) ; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI153_0) ; LMULMAX2-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV64-NEXT: vmerge.vim v14, v12, 1, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v14, v8, v14 ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v14, v10 ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: lui a2, 163907 ; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV64-NEXT: li a2, -128 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a2, v0 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: lui a2, 8208 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-RV64-NEXT: lui a2, 66785 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 78 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 3, v0 ; LMULMAX2-RV64-NEXT: lui a2, 529160 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 304 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 2, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-LABEL: mulhu_v32i8: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle8.v v8, (a1) ; LMULMAX1-NEXT: lui a2, %hi(.LCPI153_0) ; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI153_0) ; LMULMAX1-NEXT: vle8.v v9, (a2) ; LMULMAX1-NEXT: vle8.v v10, (a0) ; LMULMAX1-NEXT: vdivu.vv v8, v8, v9 ; LMULMAX1-NEXT: vdivu.vv v9, v10, v9 ; LMULMAX1-NEXT: vse8.v v9, (a0) ; LMULMAX1-NEXT: vse8.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = udiv <32 x i8> %a, <i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25> store <32 x i8> %b, <32 x i8>* %x ret void } define void @mulhu_v16i16(<16 x i16>* %x) { ; LMULMAX2-RV32-LABEL: mulhu_v16i16: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) ; LMULMAX2-RV32-NEXT: lui a1, 2 ; LMULMAX2-RV32-NEXT: addi a1, a1, 289 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vmv.v.i v12, 3 ; LMULMAX2-RV32-NEXT: lui a1, 4 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64 ; LMULMAX2-RV32-NEXT: vmv.s.x v8, a1 ; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 2, v0 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 ; LMULMAX2-RV32-NEXT: li a1, 257 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI154_0) ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI154_0) ; LMULMAX2-RV32-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV32-NEXT: lui a1, 1048568 ; LMULMAX2-RV32-NEXT: vmerge.vxm v18, v14, a1, v0 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV32-NEXT: vmerge.vim v8, v14, 1, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v10, v8 ; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v16 ; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v8 ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v18 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: mulhu_v16i16: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle16.v v10, (a0) ; LMULMAX2-RV64-NEXT: lui a1, 2 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV64-NEXT: vmv.v.i v12, 3 ; LMULMAX2-RV64-NEXT: lui a1, 4 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 ; LMULMAX2-RV64-NEXT: vmv.s.x v8, a1 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 2, v0 ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 ; LMULMAX2-RV64-NEXT: li a1, 257 ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI154_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI154_0) ; LMULMAX2-RV64-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV64-NEXT: lui a1, 1048568 ; LMULMAX2-RV64-NEXT: vmerge.vxm v18, v14, a1, v0 ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 ; LMULMAX2-RV64-NEXT: vmerge.vim v8, v14, 1, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v16 ; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v8 ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v18 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-LABEL: mulhu_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle16.v v8, (a1) ; LMULMAX1-NEXT: lui a2, %hi(.LCPI154_0) ; LMULMAX1-NEXT: addi a2, a2, %lo(.LCPI154_0) ; LMULMAX1-NEXT: vle16.v v9, (a2) ; LMULMAX1-NEXT: vle16.v v10, (a0) ; LMULMAX1-NEXT: vdivu.vv v8, v8, v9 ; LMULMAX1-NEXT: vdivu.vv v9, v10, v9 ; LMULMAX1-NEXT: vse16.v v9, (a0) ; LMULMAX1-NEXT: vse16.v v8, (a1) ; LMULMAX1-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = udiv <16 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 7, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15> store <16 x i16> %b, <16 x i16>* %x ret void } define void @mulhu_v8i32(<8 x i32>* %x) { ; LMULMAX2-LABEL: mulhu_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: li a1, 68 ; LMULMAX2-NEXT: vmv.s.x v0, a1 ; LMULMAX2-NEXT: lui a1, %hi(.LCPI155_0) ; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI155_0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vmv.v.i v12, 0 ; LMULMAX2-NEXT: lui a1, 524288 ; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0 ; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 136 ; LMULMAX2-NEXT: vmv.s.x v0, a1 ; LMULMAX2-NEXT: vmv.v.i v10, 2 ; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mulhu_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, 524288 ; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 ; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 ; LMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, m1, tu, mu ; LMULMAX1-RV32-NEXT: vslideup.vi v11, v10, 2 ; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI155_0) ; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI155_0) ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV32-NEXT: vmulhu.vv v12, v9, v10 ; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12 ; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 ; LMULMAX1-RV32-NEXT: li a2, 1 ; LMULMAX1-RV32-NEXT: vmv.s.x v12, a2 ; LMULMAX1-RV32-NEXT: vmv.v.i v13, 2 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, tu, mu ; LMULMAX1-RV32-NEXT: vslideup.vi v13, v12, 3 ; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v13 ; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v13 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhu_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI155_0) ; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI155_0) ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v10, (a0) ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v9 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = udiv <8 x i32> %a, <i32 5, i32 6, i32 7, i32 9, i32 5, i32 6, i32 7, i32 9> store <8 x i32> %b, <8 x i32>* %x ret void } define void @mulhu_v4i64(<4 x i64>* %x) { ; LMULMAX2-RV32-LABEL: mulhu_v4i64: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI156_0) ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI156_0) ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle32.v v10, (a1) ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, 524288 ; LMULMAX2-RV32-NEXT: vmv.s.x v12, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu ; LMULMAX2-RV32-NEXT: vslideup.vi v14, v12, 5 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v14 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI156_1) ; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI156_1) ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle32.v v10, (a1) ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: mulhu_v4i64: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV64-NEXT: li a1, -1 ; LMULMAX2-RV64-NEXT: slli a1, a1, 63 ; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 ; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV64-NEXT: vsetivli zero, 3, e64, m2, tu, mu ; LMULMAX2-RV64-NEXT: vslideup.vi v12, v10, 2 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI156_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI156_0) ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI156_1) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI156_1) ; LMULMAX2-RV64-NEXT: vle64.v v14, (a1) ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v14 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mulhu_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI156_0) ; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI156_0) ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI156_1) ; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI156_1) ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhu_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0 ; LMULMAX1-RV64-NEXT: li a2, -1 ; LMULMAX1-RV64-NEXT: slli a2, a2, 63 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_0) ; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI156_0) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_1) ; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI156_1)(a2) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 ; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-RV64-NEXT: vid.v v10 ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_2) ; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI156_2) ; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero ; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI156_3) ; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI156_3)(a2) ; LMULMAX1-RV64-NEXT: vadd.vi v12, v10, 2 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v12 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v10, 1 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = udiv <4 x i64> %a, <i64 3, i64 5, i64 7, i64 9> store <4 x i64> %b, <4 x i64>* %x ret void } define void @mulhs_v32i8(<32 x i8>* %x) { ; LMULMAX2-RV32-LABEL: mulhs_v32i8: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV32-NEXT: li a2, -123 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX2-RV32-NEXT: lui a2, 304453 ; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV32-NEXT: li a2, 57 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 ; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: mulhs_v32i8: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV64-NEXT: li a2, -123 ; LMULMAX2-RV64-NEXT: vmv.v.x v10, a2 ; LMULMAX2-RV64-NEXT: lui a2, 304453 ; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, mu ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a2 ; LMULMAX2-RV64-NEXT: li a2, 57 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mulhs_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, 5 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1452 ; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vmv.v.i v10, -9 ; LMULMAX1-RV32-NEXT: vmerge.vim v10, v10, 9, v0 ; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhs_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a1) ; LMULMAX1-RV64-NEXT: lui a2, 5 ; LMULMAX1-RV64-NEXT: addiw a2, a2, -1452 ; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v0, a2 ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmv.v.i v10, -9 ; LMULMAX1-RV64-NEXT: vmerge.vim v10, v10, 9, v0 ; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = udiv <32 x i8> %a, <i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9, i8 9, i8 -9, i8 -9, i8 9, i8 -9> store <32 x i8> %b, <32 x i8>* %x ret void } define void @mulhs_v16i16(<16 x i16>* %x) { ; LMULMAX2-RV32-LABEL: mulhs_v16i16: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV32-NEXT: lui a1, 7 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1687 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 5 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1755 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32-NEXT: lui a1, 1048571 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1755 ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsra.vi v8, v8, 1 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 15 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: mulhs_v16i16: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX2-RV64-NEXT: lui a1, 7 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -1687 ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV64-NEXT: lui a1, 5 ; LMULMAX2-RV64-NEXT: addiw a1, a1, -1755 ; LMULMAX2-RV64-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV64-NEXT: lui a1, 1048571 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1755 ; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV64-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsra.vi v8, v8, 1 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 15 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-LABEL: mulhs_v16i16: ; LMULMAX1: # %bb.0: ; LMULMAX1-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-NEXT: vle16.v v8, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle16.v v9, (a1) ; LMULMAX1-NEXT: li a2, 105 ; LMULMAX1-NEXT: vmv.s.x v0, a2 ; LMULMAX1-NEXT: vmv.v.i v10, 7 ; LMULMAX1-NEXT: vmerge.vim v10, v10, -7, v0 ; LMULMAX1-NEXT: vdiv.vv v9, v9, v10 ; LMULMAX1-NEXT: vdiv.vv v8, v8, v10 ; LMULMAX1-NEXT: vse16.v v8, (a0) ; LMULMAX1-NEXT: vse16.v v9, (a1) ; LMULMAX1-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = sdiv <16 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7, i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7, i16 -7, i16 7> store <16 x i16> %b, <16 x i16>* %x ret void } define void @mulhs_v8i32(<8 x i32>* %x) { ; LMULMAX2-RV32-LABEL: mulhs_v8i32: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV32-NEXT: li a1, 85 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 419430 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1639 ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32-NEXT: lui a1, 629146 ; LMULMAX2-RV32-NEXT: addi a1, a1, -1639 ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 31 ; LMULMAX2-RV32-NEXT: vsra.vi v8, v8, 1 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: mulhs_v8i32: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI159_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI159_0) ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: vlse64.v v10, (a1), zero ; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV64-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsra.vi v8, v8, 1 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 31 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mulhs_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) ; LMULMAX1-RV32-NEXT: li a2, 5 ; LMULMAX1-RV32-NEXT: vmv.s.x v0, a2 ; LMULMAX1-RV32-NEXT: lui a2, 419430 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1639 ; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX1-RV32-NEXT: lui a2, 629146 ; LMULMAX1-RV32-NEXT: addi a2, a2, -1639 ; LMULMAX1-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 ; LMULMAX1-RV32-NEXT: vmulh.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 31 ; LMULMAX1-RV32-NEXT: vsra.vi v9, v9, 1 ; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v11 ; LMULMAX1-RV32-NEXT: vmulh.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 31 ; LMULMAX1-RV32-NEXT: vsra.vi v8, v8, 1 ; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhs_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) ; LMULMAX1-RV64-NEXT: li a2, 3 ; LMULMAX1-RV64-NEXT: slli a2, a2, 33 ; LMULMAX1-RV64-NEXT: addi a2, a2, -5 ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmv.v.x v10, a2 ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vdiv.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vdiv.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = sdiv <8 x i32> %a, <i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5, i32 -5, i32 5> store <8 x i32> %b, <8 x i32>* %x ret void } define void @mulhs_v4i64(<4 x i64>* %x) { ; LMULMAX2-RV32-LABEL: mulhs_v4i64: ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV32-NEXT: li a1, 17 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a2, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1366 ; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmulh.vv v10, v8, v10 ; LMULMAX2-RV32-NEXT: li a1, 51 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.i v12, -1 ; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 0, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmadd.vv v12, v8, v10 ; LMULMAX2-RV32-NEXT: li a1, 63 ; LMULMAX2-RV32-NEXT: vsrl.vx v8, v12, a1 ; LMULMAX2-RV32-NEXT: li a1, 68 ; LMULMAX2-RV32-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV32-NEXT: vsra.vv v10, v12, v10 ; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; ; LMULMAX2-RV64-LABEL: mulhs_v4i64: ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-RV64-NEXT: li a1, 5 ; LMULMAX2-RV64-NEXT: vmv.s.x v0, a1 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI160_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI160_0) ; LMULMAX2-RV64-NEXT: vlse64.v v8, (a1), zero ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI160_1) ; LMULMAX2-RV64-NEXT: ld a1, %lo(.LCPI160_1)(a1) ; LMULMAX2-RV64-NEXT: vle64.v v10, (a0) ; LMULMAX2-RV64-NEXT: vmv.v.i v12, -1 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 ; LMULMAX2-RV64-NEXT: vmerge.vxm v8, v8, a1, v0 ; LMULMAX2-RV64-NEXT: vmulh.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vmacc.vv v8, v10, v12 ; LMULMAX2-RV64-NEXT: li a1, 63 ; LMULMAX2-RV64-NEXT: vsrl.vx v10, v8, a1 ; LMULMAX2-RV64-NEXT: vmv.v.i v12, 1 ; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 ; LMULMAX2-RV64-NEXT: vsra.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; ; LMULMAX1-RV32-LABEL: mulhs_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) ; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI160_0) ; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI160_0) ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhs_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI160_0) ; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI160_0) ; LMULMAX1-RV64-NEXT: vlse64.v v9, (a1), zero ; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI160_1) ; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI160_1)(a1) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, mu ; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vmulh.vv v11, v10, v9 ; LMULMAX1-RV64-NEXT: vid.v v12 ; LMULMAX1-RV64-NEXT: vrsub.vi v13, v12, 0 ; LMULMAX1-RV64-NEXT: vmacc.vv v11, v13, v10 ; LMULMAX1-RV64-NEXT: li a1, 63 ; LMULMAX1-RV64-NEXT: vsrl.vx v10, v11, a1 ; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 ; LMULMAX1-RV64-NEXT: vadd.vv v10, v11, v10 ; LMULMAX1-RV64-NEXT: vmulh.vv v9, v8, v9 ; LMULMAX1-RV64-NEXT: vmacc.vv v9, v8, v13 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v9, a1 ; LMULMAX1-RV64-NEXT: vsra.vv v9, v9, v12 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v9, v8 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v10, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = sdiv <4 x i64> %a, <i64 3, i64 -3, i64 3, i64 -3> store <4 x i64> %b, <4 x i64>* %x ret void } define void @smin_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: smin_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vmin.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: smin_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: smin_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %cc = icmp slt <32 x i8> %a, %b %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @smin_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: smin_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vmin.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: smin_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: smin_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %cc = icmp slt <16 x i16> %a, %b %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @smin_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: smin_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vmin.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: smin_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: smin_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %cc = icmp slt <8 x i32> %a, %b %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @smin_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: smin_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vmin.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: smin_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmin.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmin.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: smin_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmin.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmin.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %cc = icmp slt <4 x i64> %a, %b %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @smax_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: smax_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vmax.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: smax_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: smax_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %cc = icmp sgt <32 x i8> %a, %b %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @smax_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: smax_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vmax.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: smax_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: smax_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %cc = icmp sgt <16 x i16> %a, %b %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @smax_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: smax_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vmax.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: smax_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: smax_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %cc = icmp sgt <8 x i32> %a, %b %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @smax_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: smax_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vmax.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: smax_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmax.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: smax_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmax.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %cc = icmp sgt <4 x i64> %a, %b %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @umin_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: umin_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vminu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: umin_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: umin_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %cc = icmp ult <32 x i8> %a, %b %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @umin_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: umin_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vminu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: umin_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: umin_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %cc = icmp ult <16 x i16> %a, %b %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @umin_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: umin_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vminu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: umin_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: umin_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %cc = icmp ult <8 x i32> %a, %b %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @umin_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: umin_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vminu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: umin_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vminu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vminu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: umin_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vminu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vminu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %cc = icmp ult <4 x i64> %a, %b %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @umax_v32i8(<32 x i8>* %x, <32 x i8>* %y) { ; LMULMAX2-LABEL: umax_v32i8: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: li a2, 32 ; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; LMULMAX2-NEXT: vle8.v v8, (a0) ; LMULMAX2-NEXT: vle8.v v10, (a1) ; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse8.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: umax_v32i8: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle8.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: umax_v32i8: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle8.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle8.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle8.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse8.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <32 x i8>, <32 x i8>* %x %b = load <32 x i8>, <32 x i8>* %y %cc = icmp ugt <32 x i8> %a, %b %c = select <32 x i1> %cc, <32 x i8> %a, <32 x i8> %b store <32 x i8> %c, <32 x i8>* %x ret void } define void @umax_v16i16(<16 x i16>* %x, <16 x i16>* %y) { ; LMULMAX2-LABEL: umax_v16i16: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; LMULMAX2-NEXT: vle16.v v8, (a0) ; LMULMAX2-NEXT: vle16.v v10, (a1) ; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse16.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: umax_v16i16: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle16.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: umax_v16i16: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle16.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle16.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle16.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle16.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse16.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <16 x i16>, <16 x i16>* %x %b = load <16 x i16>, <16 x i16>* %y %cc = icmp ugt <16 x i16> %a, %b %c = select <16 x i1> %cc, <16 x i16> %a, <16 x i16> %b store <16 x i16> %c, <16 x i16>* %x ret void } define void @umax_v8i32(<8 x i32>* %x, <8 x i32>* %y) { ; LMULMAX2-LABEL: umax_v8i32: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; LMULMAX2-NEXT: vle32.v v8, (a0) ; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: umax_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle32.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: umax_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle32.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle32.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, <8 x i32>* %x %b = load <8 x i32>, <8 x i32>* %y %cc = icmp ugt <8 x i32> %a, %b %c = select <8 x i1> %cc, <8 x i32> %a, <8 x i32> %b store <8 x i32> %c, <8 x i32>* %x ret void } define void @umax_v4i64(<4 x i64>* %x, <4 x i64>* %y) { ; LMULMAX2-LABEL: umax_v4i64: ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; LMULMAX2-NEXT: vle64.v v8, (a0) ; LMULMAX2-NEXT: vle64.v v10, (a1) ; LMULMAX2-NEXT: vmaxu.vv v8, v8, v10 ; LMULMAX2-NEXT: vse64.v v8, (a0) ; LMULMAX2-NEXT: ret ; ; LMULMAX1-RV32-LABEL: umax_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a2, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV32-NEXT: addi a3, a1, 16 ; LMULMAX1-RV32-NEXT: vle64.v v10, (a3) ; LMULMAX1-RV32-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV32-NEXT: vmaxu.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vmaxu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: umax_v4i64: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a2, a1, 16 ; LMULMAX1-RV64-NEXT: vle64.v v9, (a2) ; LMULMAX1-RV64-NEXT: addi a2, a0, 16 ; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vle64.v v11, (a1) ; LMULMAX1-RV64-NEXT: vmaxu.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vmaxu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, <4 x i64>* %x %b = load <4 x i64>, <4 x i64>* %y %cc = icmp ugt <4 x i64> %a, %b %c = select <4 x i1> %cc, <4 x i64> %a, <4 x i64> %b store <4 x i64> %c, <4 x i64>* %x ret void } define void @add_vi_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: add_vi_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 -1, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = add <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @add_vi_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: add_vi_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 -1, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = add <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @add_vi_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: add_vi_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 -1, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = add <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @add_vi_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: add_vi_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vadd.vi v8, v8, -1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = add <2 x i64> %a, %c store <2 x i64> %d, <2 x i64>* %x ret void } define void @add_iv_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: add_iv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 1, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = add <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @add_iv_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: add_iv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 1, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = add <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @add_iv_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: add_iv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 1, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = add <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @add_iv_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: add_iv_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vadd.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = add <2 x i64> %c, %a store <2 x i64> %d, <2 x i64>* %x ret void } define void @add_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: add_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = add <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @add_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: add_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = add <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @add_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: add_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = add <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @add_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: add_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = add <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @add_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: add_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = add <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @add_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: add_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vadd.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = add <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @sub_vi_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: sub_vi_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 -1, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = sub <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @sub_vi_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: sub_vi_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 -1, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = sub <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @sub_vi_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: sub_vi_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 -1, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = sub <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @sub_vi_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: sub_vi_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = sub <2 x i64> %a, %c store <2 x i64> %d, <2 x i64>* %x ret void } define void @sub_iv_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: sub_iv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vrsub.vi v8, v8, 1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 1, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = sub <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @sub_iv_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: sub_iv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vrsub.vi v8, v8, 1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 1, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = sub <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @sub_iv_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: sub_iv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrsub.vi v8, v8, 1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 1, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = sub <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @sub_iv_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: sub_iv_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vrsub.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = sub <2 x i64> %c, %a store <2 x i64> %d, <2 x i64>* %x ret void } define void @sub_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: sub_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = sub <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @sub_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: sub_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = sub <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @sub_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: sub_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = sub <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @sub_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: sub_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = sub <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @sub_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: sub_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = sub <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @sub_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: sub_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrsub.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = sub <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @mul_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: mul_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = mul <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @mul_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: mul_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = mul <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @mul_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: mul_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = mul <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @mul_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: mul_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = mul <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @mul_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: mul_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = mul <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @mul_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: mul_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vmul.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = mul <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @and_vi_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: and_vi_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vand.vi v8, v8, -2 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 -2, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = and <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @and_vi_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: and_vi_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vand.vi v8, v8, -2 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 -2, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = and <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @and_vi_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: and_vi_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vand.vi v8, v8, -2 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 -2, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = and <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @and_vi_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: and_vi_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vand.vi v8, v8, -2 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 -2, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = and <2 x i64> %a, %c store <2 x i64> %d, <2 x i64>* %x ret void } define void @and_iv_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: and_iv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 1, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = and <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @and_iv_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: and_iv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 1, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = and <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @and_iv_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: and_iv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 1, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = and <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @and_iv_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: and_iv_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = and <2 x i64> %c, %a store <2 x i64> %d, <2 x i64>* %x ret void } define void @and_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: and_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = and <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @and_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: and_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = and <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @and_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: and_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = and <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @and_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: and_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = and <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @and_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: and_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = and <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @and_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: and_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vand.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = and <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @or_vi_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: or_vi_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vor.vi v8, v8, -2 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 -2, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = or <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @or_vi_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: or_vi_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vor.vi v8, v8, -2 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 -2, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = or <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @or_vi_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: or_vi_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vor.vi v8, v8, -2 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 -2, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = or <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @or_vi_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: or_vi_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vor.vi v8, v8, -2 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 -2, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = or <2 x i64> %a, %c store <2 x i64> %d, <2 x i64>* %x ret void } define void @or_iv_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: or_iv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vor.vi v8, v8, 1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 1, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = or <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @or_iv_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: or_iv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vor.vi v8, v8, 1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 1, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = or <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @or_iv_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: or_iv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vor.vi v8, v8, 1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 1, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = or <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @or_iv_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: or_iv_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vor.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = or <2 x i64> %c, %a store <2 x i64> %d, <2 x i64>* %x ret void } define void @or_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: or_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = or <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @or_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: or_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = or <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @or_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: or_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = or <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @or_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: or_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = or <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @or_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: or_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = or <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @or_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: or_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = or <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @xor_vi_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: xor_vi_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 -1, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = xor <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @xor_vi_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: xor_vi_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 -1, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = xor <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @xor_vi_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: xor_vi_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 -1, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = xor <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @xor_vi_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: xor_vi_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vnot.v v8, v8 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 -1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = xor <2 x i64> %a, %c store <2 x i64> %d, <2 x i64>* %x ret void } define void @xor_iv_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: xor_iv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vxor.vi v8, v8, 1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 1, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = xor <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @xor_iv_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: xor_iv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vxor.vi v8, v8, 1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 1, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = xor <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @xor_iv_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: xor_iv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vxor.vi v8, v8, 1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 1, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = xor <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @xor_iv_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: xor_iv_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vxor.vi v8, v8, 1 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 1, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = xor <2 x i64> %c, %a store <2 x i64> %d, <2 x i64>* %x ret void } define void @xor_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: xor_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = xor <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @xor_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: xor_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = xor <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @xor_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: xor_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = xor <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @xor_xv_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: xor_xv_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = xor <16 x i8> %c, %a store <16 x i8> %d, <16 x i8>* %x ret void } define void @xor_xv_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: xor_xv_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = xor <8 x i16> %c, %a store <8 x i16> %d, <8 x i16>* %x ret void } define void @xor_xv_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: xor_xv_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vxor.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = xor <4 x i32> %c, %a store <4 x i32> %d, <4 x i32>* %x ret void } define void @lshr_vi_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: lshr_vi_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsrl.vi v8, v8, 7 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 7, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = lshr <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @lshr_vi_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: lshr_vi_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsrl.vi v8, v8, 15 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 15, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = lshr <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @lshr_vi_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: lshr_vi_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 31, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = lshr <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @lshr_vi_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: lshr_vi_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsrl.vi v8, v8, 31 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = lshr <2 x i64> %a, %c store <2 x i64> %d, <2 x i64>* %x ret void } define void @lshr_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: lshr_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsrl.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = lshr <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @lshr_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: lshr_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsrl.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = lshr <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @lshr_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: lshr_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsrl.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = lshr <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @ashr_vi_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: ashr_vi_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsra.vi v8, v8, 7 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 7, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = ashr <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @ashr_vi_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: ashr_vi_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsra.vi v8, v8, 15 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 15, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = ashr <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @ashr_vi_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: ashr_vi_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 31, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = ashr <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @ashr_vi_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: ashr_vi_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsra.vi v8, v8, 31 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = ashr <2 x i64> %a, %c store <2 x i64> %d, <2 x i64>* %x ret void } define void @ashr_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: ashr_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsra.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = ashr <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @ashr_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: ashr_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsra.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = ashr <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @ashr_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: ashr_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsra.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = ashr <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @shl_vi_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: shl_vi_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsll.vi v8, v8, 7 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 7, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = shl <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @shl_vi_v8i16(<8 x i16>* %x) { ; CHECK-LABEL: shl_vi_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsll.vi v8, v8, 15 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 15, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = shl <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @shl_vi_v4i32(<4 x i32>* %x) { ; CHECK-LABEL: shl_vi_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 31, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = shl <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @shl_vi_v2i64(<2 x i64>* %x) { ; CHECK-LABEL: shl_vi_v2i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsll.vi v8, v8, 31 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = insertelement <2 x i64> poison, i64 31, i32 0 %c = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer %d = shl <2 x i64> %a, %c store <2 x i64> %d, <2 x i64>* %x ret void } define void @shl_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: shl_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsll.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = shl <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @shl_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: shl_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vsll.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = shl <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @shl_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: shl_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsll.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = shl <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @sdiv_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: sdiv_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vdiv.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = sdiv <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @sdiv_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: sdiv_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vdiv.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = sdiv <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @sdiv_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: sdiv_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vdiv.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = sdiv <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @srem_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: srem_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vrem.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = srem <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @srem_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: srem_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vrem.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = srem <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @srem_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: srem_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vrem.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = srem <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @udiv_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: udiv_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vdivu.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = udiv <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @udiv_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: udiv_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vdivu.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = udiv <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @udiv_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: udiv_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vdivu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = udiv <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @urem_vx_v16i8(<16 x i8>* %x, i8 %y) { ; CHECK-LABEL: urem_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vremu.vx v8, v8, a1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = insertelement <16 x i8> poison, i8 %y, i32 0 %c = shufflevector <16 x i8> %b, <16 x i8> poison, <16 x i32> zeroinitializer %d = urem <16 x i8> %a, %c store <16 x i8> %d, <16 x i8>* %x ret void } define void @urem_vx_v8i16(<8 x i16>* %x, i16 %y) { ; CHECK-LABEL: urem_vx_v8i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: vremu.vx v8, v8, a1 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = insertelement <8 x i16> poison, i16 %y, i32 0 %c = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer %d = urem <8 x i16> %a, %c store <8 x i16> %d, <8 x i16>* %x ret void } define void @urem_vx_v4i32(<4 x i32>* %x, i32 %y) { ; CHECK-LABEL: urem_vx_v4i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vremu.vx v8, v8, a1 ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = insertelement <4 x i32> poison, i32 %y, i32 0 %c = shufflevector <4 x i32> %b, <4 x i32> poison, <4 x i32> zeroinitializer %d = urem <4 x i32> %a, %c store <4 x i32> %d, <4 x i32>* %x ret void } define void @mulhu_vx_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: mulhu_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a1, 57 ; CHECK-NEXT: vmulhu.vx v8, v8, a1 ; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = udiv <16 x i8> %a, <i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9, i8 9> store <16 x i8> %b, <16 x i8>* %x ret void } define void @mulhu_vx_v8i16(<8 x i16>* %x) { ; RV32-LABEL: mulhu_vx_v8i16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a1, 2 ; RV32-NEXT: addi a1, a1, 1171 ; RV32-NEXT: vmulhu.vx v9, v8, a1 ; RV32-NEXT: vsub.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 1 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vse16.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhu_vx_v8i16: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a1, 2 ; RV64-NEXT: addiw a1, a1, 1171 ; RV64-NEXT: vmulhu.vx v9, v8, a1 ; RV64-NEXT: vsub.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vse16.v v8, (a0) ; RV64-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = udiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> store <8 x i16> %b, <8 x i16>* %x ret void } define void @mulhu_vx_v4i32(<4 x i32>* %x) { ; RV32-LABEL: mulhu_vx_v4i32: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a1, 838861 ; RV32-NEXT: addi a1, a1, -819 ; RV32-NEXT: vmulhu.vx v8, v8, a1 ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhu_vx_v4i32: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a1, 838861 ; RV64-NEXT: addiw a1, a1, -819 ; RV64-NEXT: vmulhu.vx v8, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 2 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = udiv <4 x i32> %a, <i32 5, i32 5, i32 5, i32 5> store <4 x i32> %b, <4 x i32>* %x ret void } define void @mulhu_vx_v2i64(<2 x i64>* %x) { ; RV32-LABEL: mulhu_vx_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a1, 699051 ; RV32-NEXT: addi a2, a1, -1366 ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, a1, -1365 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vmulhu.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v8, v8, 1 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mulhu_vx_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, %hi(.LCPI289_0) ; RV64-NEXT: ld a1, %lo(.LCPI289_0)(a1) ; RV64-NEXT: vmulhu.vx v8, v8, a1 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = udiv <2 x i64> %a, <i64 3, i64 3> store <2 x i64> %b, <2 x i64>* %x ret void } define void @mulhs_vx_v16i8(<16 x i8>* %x) { ; CHECK-LABEL: mulhs_vx_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: li a1, -123 ; CHECK-NEXT: vmulhu.vx v8, v8, a1 ; CHECK-NEXT: vsrl.vi v8, v8, 7 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, <16 x i8>* %x %b = udiv <16 x i8> %a, <i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9, i8 -9> store <16 x i8> %b, <16 x i8>* %x ret void } define void @mulhs_vx_v8i16(<8 x i16>* %x) { ; RV32-LABEL: mulhs_vx_v8i16: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: lui a1, 5 ; RV32-NEXT: addi a1, a1, -1755 ; RV32-NEXT: vmulh.vx v8, v8, a1 ; RV32-NEXT: vsra.vi v8, v8, 1 ; RV32-NEXT: vsrl.vi v9, v8, 15 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse16.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhs_vx_v8i16: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: lui a1, 5 ; RV64-NEXT: addiw a1, a1, -1755 ; RV64-NEXT: vmulh.vx v8, v8, a1 ; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vsrl.vi v9, v8, 15 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: vse16.v v8, (a0) ; RV64-NEXT: ret %a = load <8 x i16>, <8 x i16>* %x %b = sdiv <8 x i16> %a, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7> store <8 x i16> %b, <8 x i16>* %x ret void } define void @mulhs_vx_v4i32(<4 x i32>* %x) { ; RV32-LABEL: mulhs_vx_v4i32: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: lui a1, 629146 ; RV32-NEXT: addi a1, a1, -1639 ; RV32-NEXT: vmulh.vx v8, v8, a1 ; RV32-NEXT: vsrl.vi v9, v8, 31 ; RV32-NEXT: vsra.vi v8, v8, 1 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: mulhs_vx_v4i32: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: lui a1, 629146 ; RV64-NEXT: addiw a1, a1, -1639 ; RV64-NEXT: vmulh.vx v8, v8, a1 ; RV64-NEXT: vsra.vi v8, v8, 1 ; RV64-NEXT: vsrl.vi v9, v8, 31 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret %a = load <4 x i32>, <4 x i32>* %x %b = sdiv <4 x i32> %a, <i32 -5, i32 -5, i32 -5, i32 -5> store <4 x i32> %b, <4 x i32>* %x ret void } define void @mulhs_vx_v2i64(<2 x i64>* %x) { ; RV32-LABEL: mulhs_vx_v2i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a2, a1, 1365 ; RV32-NEXT: sw a2, 12(sp) ; RV32-NEXT: addi a1, a1, 1366 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vlse64.v v9, (a1), zero ; RV32-NEXT: vmulh.vv v8, v8, v9 ; RV32-NEXT: li a1, 63 ; RV32-NEXT: vsrl.vx v9, v8, a1 ; RV32-NEXT: vadd.vv v8, v8, v9 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mulhs_vx_v2i64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, %hi(.LCPI293_0) ; RV64-NEXT: ld a1, %lo(.LCPI293_0)(a1) ; RV64-NEXT: vmulh.vx v8, v8, a1 ; RV64-NEXT: li a1, 63 ; RV64-NEXT: vsrl.vx v9, v8, a1 ; RV64-NEXT: vadd.vv v8, v8, v9 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %a = load <2 x i64>, <2 x i64>* %x %b = sdiv <2 x i64> %a, <i64 3, i64 3> store <2 x i64> %b, <2 x i64>* %x ret void }