; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s ; arm64 has its own copy of this because of the intrinsics define <8 x i8> @mul8xi8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: mul8xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: mul v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %tmp3 = mul <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @mul16xi8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: mul16xi8: ; CHECK: // %bb.0: ; CHECK-NEXT: mul v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = mul <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <4 x i16> @mul4xi16(<4 x i16> %A, <4 x i16> %B) { ; CHECK-LABEL: mul4xi16: ; CHECK: // %bb.0: ; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp3 = mul <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @mul8xi16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: mul8xi16: ; CHECK: // %bb.0: ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %tmp3 = mul <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <2 x i32> @mul2xi32(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: mul2xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp3 = mul <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @mul4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: mul4x32: ; CHECK: // %bb.0: ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp3 = mul <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <1 x i64> @mul1xi64(<1 x i64> %A, <1 x i64> %B) { ; CHECK-LABEL: mul1xi64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mul x8, x9, x8 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %tmp3 = mul <1 x i64> %A, %B; ret <1 x i64> %tmp3 } define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: mul2xi64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x8, v1.d[1] ; CHECK-NEXT: mov x11, v0.d[1] ; CHECK-NEXT: mul x9, x10, x9 ; CHECK-NEXT: mul x8, x11, x8 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %tmp3 = mul <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <2 x float> @mul2xfloat(<2 x float> %A, <2 x float> %B) { ; CHECK-LABEL: mul2xfloat: ; CHECK: // %bb.0: ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp3 = fmul <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @mul4xfloat(<4 x float> %A, <4 x float> %B) { ; CHECK-LABEL: mul4xfloat: ; CHECK: // %bb.0: ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp3 = fmul <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @mul2xdouble(<2 x double> %A, <2 x double> %B) { ; CHECK-LABEL: mul2xdouble: ; CHECK: // %bb.0: ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp3 = fmul <2 x double> %A, %B; ret <2 x double> %tmp3 } define <2 x float> @div2xfloat(<2 x float> %A, <2 x float> %B) { ; CHECK-LABEL: div2xfloat: ; CHECK: // %bb.0: ; CHECK-NEXT: fdiv v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %tmp3 = fdiv <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @div4xfloat(<4 x float> %A, <4 x float> %B) { ; CHECK-LABEL: div4xfloat: ; CHECK: // %bb.0: ; CHECK-NEXT: fdiv v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %tmp3 = fdiv <4 x float> %A, %B; ret <4 x float> %tmp3 } define <2 x double> @div2xdouble(<2 x double> %A, <2 x double> %B) { ; CHECK-LABEL: div2xdouble: ; CHECK: // %bb.0: ; CHECK-NEXT: fdiv v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret %tmp3 = fdiv <2 x double> %A, %B; ret <2 x double> %tmp3 } define <1 x i8> @sdiv1x8(<1 x i8> %A, <1 x i8> %B) { ; CHECK-LABEL: sdiv1x8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v1.b[0] ; CHECK-NEXT: smov w9, v0.b[0] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = sdiv <1 x i8> %A, %B; ret <1 x i8> %tmp3 } define <8 x i8> @sdiv8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: sdiv8x8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v1.b[1] ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w10, v0.b[0] ; CHECK-NEXT: smov w11, v0.b[2] ; CHECK-NEXT: smov w12, v0.b[3] ; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: smov w9, v1.b[5] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 ; CHECK-NEXT: smov w10, v0.b[6] ; CHECK-NEXT: sdiv w12, w13, w12 ; CHECK-NEXT: smov w13, v0.b[5] ; CHECK-NEXT: mov v2.b[3], w11 ; CHECK-NEXT: smov w11, v0.b[7] ; CHECK-NEXT: sdiv w8, w13, w9 ; CHECK-NEXT: smov w9, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w8 ; CHECK-NEXT: sdiv w8, w11, w10 ; CHECK-NEXT: mov v2.b[6], w9 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp3 = sdiv <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @sdiv16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: sdiv16x8: ; CHECK: // %bb.0: ; CHECK-NEXT: smov w8, v1.b[1] ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w10, v0.b[0] ; CHECK-NEXT: smov w11, v0.b[2] ; CHECK-NEXT: smov w12, v0.b[3] ; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: smov w14, v0.b[5] ; CHECK-NEXT: smov w15, v0.b[6] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w16, v0.b[7] ; CHECK-NEXT: smov w17, v0.b[8] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: smov w9, v1.b[9] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 ; CHECK-NEXT: smov w10, v0.b[10] ; CHECK-NEXT: sdiv w12, w13, w12 ; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 ; CHECK-NEXT: smov w11, v0.b[11] ; CHECK-NEXT: sdiv w13, w14, w13 ; CHECK-NEXT: smov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 ; CHECK-NEXT: smov w12, v0.b[12] ; CHECK-NEXT: sdiv w14, w15, w14 ; CHECK-NEXT: smov w15, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w13 ; CHECK-NEXT: smov w13, v0.b[13] ; CHECK-NEXT: sdiv w15, w16, w15 ; CHECK-NEXT: smov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: sdiv w16, w17, w16 ; CHECK-NEXT: smov w17, v0.b[9] ; CHECK-NEXT: mov v2.b[7], w15 ; CHECK-NEXT: sdiv w8, w17, w9 ; CHECK-NEXT: smov w9, v1.b[10] ; CHECK-NEXT: mov v2.b[8], w16 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[11] ; CHECK-NEXT: mov v2.b[9], w8 ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[12] ; CHECK-NEXT: mov v2.b[10], w9 ; CHECK-NEXT: smov w9, v1.b[14] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[13] ; CHECK-NEXT: mov v2.b[11], w10 ; CHECK-NEXT: smov w10, v1.b[15] ; CHECK-NEXT: sdiv w8, w13, w12 ; CHECK-NEXT: smov w12, v0.b[14] ; CHECK-NEXT: mov v2.b[12], w11 ; CHECK-NEXT: smov w11, v0.b[15] ; CHECK-NEXT: sdiv w9, w12, w9 ; CHECK-NEXT: mov v2.b[13], w8 ; CHECK-NEXT: sdiv w8, w11, w10 ; CHECK-NEXT: mov v2.b[14], w9 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = sdiv <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <1 x i16> @sdiv1x16(<1 x i16> %A, <1 x i16> %B) { ; CHECK-LABEL: sdiv1x16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v1.h[0] ; CHECK-NEXT: smov w9, v0.h[0] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = sdiv <1 x i16> %A, %B; ret <1 x i16> %tmp3 } define <4 x i16> @sdiv4x16(<4 x i16> %A, <4 x i16> %B) { ; CHECK-LABEL: sdiv4x16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: smov w11, v0.h[2] ; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: sdiv w8, w12, w11 ; CHECK-NEXT: mov v0.h[2], w10 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = sdiv <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @sdiv8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: sdiv8x16: ; CHECK: // %bb.0: ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: smov w11, v0.h[2] ; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: smov w13, v0.h[4] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: smov w9, v1.h[5] ; CHECK-NEXT: mov v2.h[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 ; CHECK-NEXT: smov w10, v0.h[6] ; CHECK-NEXT: sdiv w12, w13, w12 ; CHECK-NEXT: smov w13, v0.h[5] ; CHECK-NEXT: mov v2.h[3], w11 ; CHECK-NEXT: smov w11, v0.h[7] ; CHECK-NEXT: sdiv w8, w13, w9 ; CHECK-NEXT: smov w9, v1.h[6] ; CHECK-NEXT: mov v2.h[4], w12 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[7] ; CHECK-NEXT: mov v2.h[5], w8 ; CHECK-NEXT: sdiv w8, w11, w10 ; CHECK-NEXT: mov v2.h[6], w9 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = sdiv <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <1 x i32> @sdiv1x32(<1 x i32> %A, <1 x i32> %B) { ; CHECK-LABEL: sdiv1x32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = sdiv <1 x i32> %A, %B; ret <1 x i32> %tmp3 } define <2 x i32> @sdiv2x32(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: sdiv2x32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = sdiv <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: sdiv4x32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: fmov w10, s0 ; CHECK-NEXT: mov w11, v0.s[2] ; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: mov w10, v1.s[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: mov w11, v1.s[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: sdiv w8, w12, w11 ; CHECK-NEXT: mov v0.s[2], w10 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = sdiv <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <1 x i64> @sdiv1x64(<1 x i64> %A, <1 x i64> %B) { ; CHECK-LABEL: sdiv1x64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: sdiv x8, x9, x8 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %tmp3 = sdiv <1 x i64> %A, %B; ret <1 x i64> %tmp3 } define <2 x i64> @sdiv2x64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: sdiv2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov x10, v0.d[1] ; CHECK-NEXT: sdiv x8, x9, x8 ; CHECK-NEXT: mov x9, v1.d[1] ; CHECK-NEXT: sdiv x9, x10, x9 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %tmp3 = sdiv <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <1 x i8> @udiv1x8(<1 x i8> %A, <1 x i8> %B) { ; CHECK-LABEL: udiv1x8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v1.b[0] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = udiv <1 x i8> %A, %B; ret <1 x i8> %tmp3 } define <8 x i8> @udiv8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: udiv8x8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w10, v0.b[0] ; CHECK-NEXT: umov w11, v0.b[2] ; CHECK-NEXT: umov w12, v0.b[3] ; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: umov w9, v1.b[5] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 ; CHECK-NEXT: umov w10, v0.b[6] ; CHECK-NEXT: udiv w12, w13, w12 ; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: mov v2.b[3], w11 ; CHECK-NEXT: umov w11, v0.b[7] ; CHECK-NEXT: udiv w8, w13, w9 ; CHECK-NEXT: umov w9, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w8 ; CHECK-NEXT: udiv w8, w11, w10 ; CHECK-NEXT: mov v2.b[6], w9 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp3 = udiv <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @udiv16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: udiv16x8: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w10, v0.b[0] ; CHECK-NEXT: umov w11, v0.b[2] ; CHECK-NEXT: umov w12, v0.b[3] ; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: umov w14, v0.b[5] ; CHECK-NEXT: umov w15, v0.b[6] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w16, v0.b[7] ; CHECK-NEXT: umov w17, v0.b[8] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: umov w9, v1.b[9] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 ; CHECK-NEXT: umov w10, v0.b[10] ; CHECK-NEXT: udiv w12, w13, w12 ; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 ; CHECK-NEXT: umov w11, v0.b[11] ; CHECK-NEXT: udiv w13, w14, w13 ; CHECK-NEXT: umov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 ; CHECK-NEXT: umov w12, v0.b[12] ; CHECK-NEXT: udiv w14, w15, w14 ; CHECK-NEXT: umov w15, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w13 ; CHECK-NEXT: umov w13, v0.b[13] ; CHECK-NEXT: udiv w15, w16, w15 ; CHECK-NEXT: umov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: udiv w16, w17, w16 ; CHECK-NEXT: umov w17, v0.b[9] ; CHECK-NEXT: mov v2.b[7], w15 ; CHECK-NEXT: udiv w8, w17, w9 ; CHECK-NEXT: umov w9, v1.b[10] ; CHECK-NEXT: mov v2.b[8], w16 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[11] ; CHECK-NEXT: mov v2.b[9], w8 ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[12] ; CHECK-NEXT: mov v2.b[10], w9 ; CHECK-NEXT: umov w9, v1.b[14] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[13] ; CHECK-NEXT: mov v2.b[11], w10 ; CHECK-NEXT: umov w10, v1.b[15] ; CHECK-NEXT: udiv w8, w13, w12 ; CHECK-NEXT: umov w12, v0.b[14] ; CHECK-NEXT: mov v2.b[12], w11 ; CHECK-NEXT: umov w11, v0.b[15] ; CHECK-NEXT: udiv w9, w12, w9 ; CHECK-NEXT: mov v2.b[13], w8 ; CHECK-NEXT: udiv w8, w11, w10 ; CHECK-NEXT: mov v2.b[14], w9 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = udiv <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <1 x i16> @udiv1x16(<1 x i16> %A, <1 x i16> %B) { ; CHECK-LABEL: udiv1x16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v1.h[0] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = udiv <1 x i16> %A, %B; ret <1 x i16> %tmp3 } define <4 x i16> @udiv4x16(<4 x i16> %A, <4 x i16> %B) { ; CHECK-LABEL: udiv4x16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[2] ; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: udiv w8, w12, w11 ; CHECK-NEXT: mov v0.h[2], w10 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = udiv <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @udiv8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: udiv8x16: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[2] ; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: umov w13, v0.h[4] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: umov w9, v1.h[5] ; CHECK-NEXT: mov v2.h[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 ; CHECK-NEXT: umov w10, v0.h[6] ; CHECK-NEXT: udiv w12, w13, w12 ; CHECK-NEXT: umov w13, v0.h[5] ; CHECK-NEXT: mov v2.h[3], w11 ; CHECK-NEXT: umov w11, v0.h[7] ; CHECK-NEXT: udiv w8, w13, w9 ; CHECK-NEXT: umov w9, v1.h[6] ; CHECK-NEXT: mov v2.h[4], w12 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[7] ; CHECK-NEXT: mov v2.h[5], w8 ; CHECK-NEXT: udiv w8, w11, w10 ; CHECK-NEXT: mov v2.h[6], w9 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = udiv <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <1 x i32> @udiv1x32(<1 x i32> %A, <1 x i32> %B) { ; CHECK-LABEL: udiv1x32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = udiv <1 x i32> %A, %B; ret <1 x i32> %tmp3 } define <2 x i32> @udiv2x32(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: udiv2x32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = udiv <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: udiv4x32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: fmov w10, s0 ; CHECK-NEXT: mov w11, v0.s[2] ; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: mov w10, v1.s[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: mov w11, v1.s[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: udiv w8, w12, w11 ; CHECK-NEXT: mov v0.s[2], w10 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = udiv <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <1 x i64> @udiv1x64(<1 x i64> %A, <1 x i64> %B) { ; CHECK-LABEL: udiv1x64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: udiv x8, x9, x8 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %tmp3 = udiv <1 x i64> %A, %B; ret <1 x i64> %tmp3 } define <2 x i64> @udiv2x64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: udiv2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov x10, v0.d[1] ; CHECK-NEXT: udiv x8, x9, x8 ; CHECK-NEXT: mov x9, v1.d[1] ; CHECK-NEXT: udiv x9, x10, x9 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %tmp3 = udiv <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <1 x i8> @srem1x8(<1 x i8> %A, <1 x i8> %B) { ; CHECK-LABEL: srem1x8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v1.b[0] ; CHECK-NEXT: smov w9, v0.b[0] ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = srem <1 x i8> %A, %B; ret <1 x i8> %tmp3 } define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: srem8x8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w11, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[0] ; CHECK-NEXT: smov w8, v1.b[1] ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w14, v1.b[2] ; CHECK-NEXT: smov w15, v0.b[2] ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: smov w12, v1.b[5] ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: smov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.b[6] ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.b[6] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: msub w8, w16, w14, w15 ; CHECK-NEXT: smov w15, v1.b[7] ; CHECK-NEXT: sdiv w0, w18, w17 ; CHECK-NEXT: smov w16, v0.b[7] ; CHECK-NEXT: mov v2.b[2], w8 ; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 ; CHECK-NEXT: mov v2.b[3], w14 ; CHECK-NEXT: msub w14, w3, w1, w2 ; CHECK-NEXT: sdiv w9, w13, w12 ; CHECK-NEXT: mov v2.b[4], w14 ; CHECK-NEXT: msub w9, w9, w12, w13 ; CHECK-NEXT: sdiv w8, w11, w10 ; CHECK-NEXT: mov v2.b[5], w9 ; CHECK-NEXT: msub w8, w8, w10, w11 ; CHECK-NEXT: sdiv w12, w16, w15 ; CHECK-NEXT: mov v2.b[6], w8 ; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp3 = srem <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: srem16x8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w23, -40 ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 ; CHECK-NEXT: smov w11, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[0] ; CHECK-NEXT: smov w8, v1.b[1] ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w14, v1.b[2] ; CHECK-NEXT: smov w15, v0.b[2] ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] ; CHECK-NEXT: smov w4, v1.b[5] ; CHECK-NEXT: smov w5, v0.b[5] ; CHECK-NEXT: smov w7, v1.b[6] ; CHECK-NEXT: smov w19, v0.b[6] ; CHECK-NEXT: smov w21, v1.b[7] ; CHECK-NEXT: smov w22, v0.b[7] ; CHECK-NEXT: smov w24, v1.b[8] ; CHECK-NEXT: smov w25, v0.b[8] ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: smov w12, v1.b[9] ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: smov w13, v0.b[9] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.b[10] ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.b[10] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: msub w8, w16, w14, w15 ; CHECK-NEXT: smov w15, v1.b[11] ; CHECK-NEXT: sdiv w0, w18, w17 ; CHECK-NEXT: smov w16, v0.b[11] ; CHECK-NEXT: mov v2.b[2], w8 ; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: smov w18, v1.b[12] ; CHECK-NEXT: sdiv w3, w2, w1 ; CHECK-NEXT: smov w0, v0.b[12] ; CHECK-NEXT: mov v2.b[3], w14 ; CHECK-NEXT: msub w14, w3, w1, w2 ; CHECK-NEXT: smov w2, v1.b[13] ; CHECK-NEXT: sdiv w6, w5, w4 ; CHECK-NEXT: smov w3, v0.b[13] ; CHECK-NEXT: mov v2.b[4], w14 ; CHECK-NEXT: msub w17, w6, w4, w5 ; CHECK-NEXT: sdiv w20, w19, w7 ; CHECK-NEXT: mov v2.b[5], w17 ; CHECK-NEXT: msub w17, w20, w7, w19 ; CHECK-NEXT: sdiv w23, w22, w21 ; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov v2.b[6], w17 ; CHECK-NEXT: msub w1, w23, w21, w22 ; CHECK-NEXT: sdiv w26, w25, w24 ; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v2.b[7], w1 ; CHECK-NEXT: msub w1, w26, w24, w25 ; CHECK-NEXT: sdiv w9, w13, w12 ; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov v2.b[8], w1 ; CHECK-NEXT: msub w9, w9, w12, w13 ; CHECK-NEXT: smov w13, v1.b[15] ; CHECK-NEXT: sdiv w8, w11, w10 ; CHECK-NEXT: mov v2.b[9], w9 ; CHECK-NEXT: smov w9, v1.b[14] ; CHECK-NEXT: msub w8, w8, w10, w11 ; CHECK-NEXT: smov w10, v0.b[14] ; CHECK-NEXT: sdiv w14, w16, w15 ; CHECK-NEXT: mov v2.b[10], w8 ; CHECK-NEXT: msub w11, w14, w15, w16 ; CHECK-NEXT: smov w14, v0.b[15] ; CHECK-NEXT: sdiv w17, w0, w18 ; CHECK-NEXT: mov v2.b[11], w11 ; CHECK-NEXT: msub w11, w17, w18, w0 ; CHECK-NEXT: sdiv w12, w3, w2 ; CHECK-NEXT: mov v2.b[12], w11 ; CHECK-NEXT: msub w12, w12, w2, w3 ; CHECK-NEXT: sdiv w8, w10, w9 ; CHECK-NEXT: mov v2.b[13], w12 ; CHECK-NEXT: msub w8, w8, w9, w10 ; CHECK-NEXT: sdiv w11, w14, w13 ; CHECK-NEXT: mov v2.b[14], w8 ; CHECK-NEXT: msub w8, w11, w13, w14 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <1 x i16> @srem1x16(<1 x i16> %A, <1 x i16> %B) { ; CHECK-LABEL: srem1x16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w8, v1.h[0] ; CHECK-NEXT: smov w9, v0.h[0] ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = srem <1 x i16> %A, %B; ret <1 x i16> %tmp3 } define <4 x i16> @srem4x16(<4 x i16> %A, <4 x i16> %B) { ; CHECK-LABEL: srem4x16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w11, v1.h[0] ; CHECK-NEXT: smov w12, v0.h[0] ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: smov w12, v1.h[3] ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: smov w13, v0.h[3] ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: msub w10, w16, w14, w15 ; CHECK-NEXT: sdiv w9, w13, w12 ; CHECK-NEXT: mov v0.h[2], w10 ; CHECK-NEXT: msub w8, w9, w12, w13 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = srem <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: srem8x16: ; CHECK: // %bb.0: ; CHECK-NEXT: smov w11, v1.h[0] ; CHECK-NEXT: smov w12, v0.h[0] ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] ; CHECK-NEXT: smov w17, v1.h[3] ; CHECK-NEXT: smov w18, v0.h[3] ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.h[4] ; CHECK-NEXT: smov w2, v0.h[4] ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: smov w12, v1.h[5] ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: smov w13, v0.h[5] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.h[6] ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.h[6] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 ; CHECK-NEXT: msub w8, w16, w14, w15 ; CHECK-NEXT: smov w15, v1.h[7] ; CHECK-NEXT: sdiv w0, w18, w17 ; CHECK-NEXT: smov w16, v0.h[7] ; CHECK-NEXT: mov v2.h[2], w8 ; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 ; CHECK-NEXT: mov v2.h[3], w14 ; CHECK-NEXT: msub w14, w3, w1, w2 ; CHECK-NEXT: sdiv w9, w13, w12 ; CHECK-NEXT: mov v2.h[4], w14 ; CHECK-NEXT: msub w9, w9, w12, w13 ; CHECK-NEXT: sdiv w8, w11, w10 ; CHECK-NEXT: mov v2.h[5], w9 ; CHECK-NEXT: msub w8, w8, w10, w11 ; CHECK-NEXT: sdiv w12, w16, w15 ; CHECK-NEXT: mov v2.h[6], w8 ; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = srem <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <1 x i32> @srem1x32(<1 x i32> %A, <1 x i32> %B) { ; CHECK-LABEL: srem1x32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = srem <1 x i32> %A, %B; ret <1 x i32> %tmp3 } define <2 x i32> @srem2x32(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: srem2x32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: msub w9, w13, w11, w12 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = srem <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: srem4x32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov w11, s1 ; CHECK-NEXT: fmov w12, s0 ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: mov w14, v1.s[2] ; CHECK-NEXT: mov w15, v0.s[2] ; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: mov w17, v1.s[3] ; CHECK-NEXT: mov w18, v0.s[3] ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: sdiv w10, w9, w8 ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: msub w10, w16, w14, w15 ; CHECK-NEXT: sdiv w9, w18, w17 ; CHECK-NEXT: mov v0.s[2], w10 ; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = srem <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <1 x i64> @srem1x64(<1 x i64> %A, <1 x i64> %B) { ; CHECK-LABEL: srem1x64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: sdiv x10, x9, x8 ; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %tmp3 = srem <1 x i64> %A, %B; ret <1 x i64> %tmp3 } define <2 x i64> @srem2x64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: srem2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov x11, v1.d[1] ; CHECK-NEXT: mov x12, v0.d[1] ; CHECK-NEXT: sdiv x10, x9, x8 ; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: sdiv x13, x12, x11 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: msub x9, x13, x11, x12 ; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %tmp3 = srem <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <1 x i8> @urem1x8(<1 x i8> %A, <1 x i8> %B) { ; CHECK-LABEL: urem1x8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v1.b[0] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = urem <1 x i8> %A, %B; ret <1 x i8> %tmp3 } define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: urem8x8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w11, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[0] ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w14, v1.b[2] ; CHECK-NEXT: umov w15, v0.b[2] ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: umov w12, v1.b[5] ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.b[6] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: msub w8, w16, w14, w15 ; CHECK-NEXT: umov w15, v1.b[7] ; CHECK-NEXT: udiv w0, w18, w17 ; CHECK-NEXT: umov w16, v0.b[7] ; CHECK-NEXT: mov v2.b[2], w8 ; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 ; CHECK-NEXT: mov v2.b[3], w14 ; CHECK-NEXT: msub w14, w3, w1, w2 ; CHECK-NEXT: udiv w9, w13, w12 ; CHECK-NEXT: mov v2.b[4], w14 ; CHECK-NEXT: msub w9, w9, w12, w13 ; CHECK-NEXT: udiv w8, w11, w10 ; CHECK-NEXT: mov v2.b[5], w9 ; CHECK-NEXT: msub w8, w8, w10, w11 ; CHECK-NEXT: udiv w12, w16, w15 ; CHECK-NEXT: mov v2.b[6], w8 ; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp3 = urem <8 x i8> %A, %B; ret <8 x i8> %tmp3 } define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: urem16x8: ; CHECK: // %bb.0: ; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w23, -40 ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 ; CHECK-NEXT: umov w11, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[0] ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w14, v1.b[2] ; CHECK-NEXT: umov w15, v0.b[2] ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] ; CHECK-NEXT: umov w4, v1.b[5] ; CHECK-NEXT: umov w5, v0.b[5] ; CHECK-NEXT: umov w7, v1.b[6] ; CHECK-NEXT: umov w19, v0.b[6] ; CHECK-NEXT: umov w21, v1.b[7] ; CHECK-NEXT: umov w22, v0.b[7] ; CHECK-NEXT: umov w24, v1.b[8] ; CHECK-NEXT: umov w25, v0.b[8] ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: umov w12, v1.b[9] ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: umov w13, v0.b[9] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.b[10] ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.b[10] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: msub w8, w16, w14, w15 ; CHECK-NEXT: umov w15, v1.b[11] ; CHECK-NEXT: udiv w0, w18, w17 ; CHECK-NEXT: umov w16, v0.b[11] ; CHECK-NEXT: mov v2.b[2], w8 ; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: umov w18, v1.b[12] ; CHECK-NEXT: udiv w3, w2, w1 ; CHECK-NEXT: umov w0, v0.b[12] ; CHECK-NEXT: mov v2.b[3], w14 ; CHECK-NEXT: msub w14, w3, w1, w2 ; CHECK-NEXT: umov w2, v1.b[13] ; CHECK-NEXT: udiv w6, w5, w4 ; CHECK-NEXT: umov w3, v0.b[13] ; CHECK-NEXT: mov v2.b[4], w14 ; CHECK-NEXT: msub w17, w6, w4, w5 ; CHECK-NEXT: udiv w20, w19, w7 ; CHECK-NEXT: mov v2.b[5], w17 ; CHECK-NEXT: msub w17, w20, w7, w19 ; CHECK-NEXT: udiv w23, w22, w21 ; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov v2.b[6], w17 ; CHECK-NEXT: msub w1, w23, w21, w22 ; CHECK-NEXT: udiv w26, w25, w24 ; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v2.b[7], w1 ; CHECK-NEXT: msub w1, w26, w24, w25 ; CHECK-NEXT: udiv w9, w13, w12 ; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov v2.b[8], w1 ; CHECK-NEXT: msub w9, w9, w12, w13 ; CHECK-NEXT: umov w13, v1.b[15] ; CHECK-NEXT: udiv w8, w11, w10 ; CHECK-NEXT: mov v2.b[9], w9 ; CHECK-NEXT: umov w9, v1.b[14] ; CHECK-NEXT: msub w8, w8, w10, w11 ; CHECK-NEXT: umov w10, v0.b[14] ; CHECK-NEXT: udiv w14, w16, w15 ; CHECK-NEXT: mov v2.b[10], w8 ; CHECK-NEXT: msub w11, w14, w15, w16 ; CHECK-NEXT: umov w14, v0.b[15] ; CHECK-NEXT: udiv w17, w0, w18 ; CHECK-NEXT: mov v2.b[11], w11 ; CHECK-NEXT: msub w11, w17, w18, w0 ; CHECK-NEXT: udiv w12, w3, w2 ; CHECK-NEXT: mov v2.b[12], w11 ; CHECK-NEXT: msub w12, w12, w2, w3 ; CHECK-NEXT: udiv w8, w10, w9 ; CHECK-NEXT: mov v2.b[13], w12 ; CHECK-NEXT: msub w8, w8, w9, w10 ; CHECK-NEXT: udiv w11, w14, w13 ; CHECK-NEXT: mov v2.b[14], w8 ; CHECK-NEXT: msub w8, w11, w13, w14 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <16 x i8> %A, %B; ret <16 x i8> %tmp3 } define <1 x i16> @urem1x16(<1 x i16> %A, <1 x i16> %B) { ; CHECK-LABEL: urem1x16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v1.h[0] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = urem <1 x i16> %A, %B; ret <1 x i16> %tmp3 } define <4 x i16> @urem4x16(<4 x i16> %A, <4 x i16> %B) { ; CHECK-LABEL: urem4x16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w11, v1.h[0] ; CHECK-NEXT: umov w12, v0.h[0] ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: umov w12, v1.h[3] ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: umov w13, v0.h[3] ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: msub w10, w16, w14, w15 ; CHECK-NEXT: udiv w9, w13, w12 ; CHECK-NEXT: mov v0.h[2], w10 ; CHECK-NEXT: msub w8, w9, w12, w13 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = urem <4 x i16> %A, %B; ret <4 x i16> %tmp3 } define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: urem8x16: ; CHECK: // %bb.0: ; CHECK-NEXT: umov w11, v1.h[0] ; CHECK-NEXT: umov w12, v0.h[0] ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] ; CHECK-NEXT: umov w17, v1.h[3] ; CHECK-NEXT: umov w18, v0.h[3] ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.h[4] ; CHECK-NEXT: umov w2, v0.h[4] ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: umov w12, v1.h[5] ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: umov w13, v0.h[5] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.h[6] ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.h[6] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 ; CHECK-NEXT: msub w8, w16, w14, w15 ; CHECK-NEXT: umov w15, v1.h[7] ; CHECK-NEXT: udiv w0, w18, w17 ; CHECK-NEXT: umov w16, v0.h[7] ; CHECK-NEXT: mov v2.h[2], w8 ; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 ; CHECK-NEXT: mov v2.h[3], w14 ; CHECK-NEXT: msub w14, w3, w1, w2 ; CHECK-NEXT: udiv w9, w13, w12 ; CHECK-NEXT: mov v2.h[4], w14 ; CHECK-NEXT: msub w9, w9, w12, w13 ; CHECK-NEXT: udiv w8, w11, w10 ; CHECK-NEXT: mov v2.h[5], w9 ; CHECK-NEXT: msub w8, w8, w10, w11 ; CHECK-NEXT: udiv w12, w16, w15 ; CHECK-NEXT: mov v2.h[6], w8 ; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = urem <8 x i16> %A, %B; ret <8 x i16> %tmp3 } define <1 x i32> @urem1x32(<1 x i32> %A, <1 x i32> %B) { ; CHECK-LABEL: urem1x32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ret %tmp3 = urem <1 x i32> %A, %B; ret <1 x i32> %tmp3 } define <2 x i32> @urem2x32(<2 x i32> %A, <2 x i32> %B) { ; CHECK-LABEL: urem2x32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: msub w9, w13, w11, w12 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = urem <2 x i32> %A, %B; ret <2 x i32> %tmp3 } define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: urem4x32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov w11, s1 ; CHECK-NEXT: fmov w12, s0 ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: mov w14, v1.s[2] ; CHECK-NEXT: mov w15, v0.s[2] ; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: mov w17, v1.s[3] ; CHECK-NEXT: mov w18, v0.s[3] ; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: udiv w10, w9, w8 ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: msub w10, w16, w14, w15 ; CHECK-NEXT: udiv w9, w18, w17 ; CHECK-NEXT: mov v0.s[2], w10 ; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = urem <4 x i32> %A, %B; ret <4 x i32> %tmp3 } define <1 x i64> @urem1x64(<1 x i64> %A, <1 x i64> %B) { ; CHECK-LABEL: urem1x64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: udiv x10, x9, x8 ; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %tmp3 = urem <1 x i64> %A, %B; ret <1 x i64> %tmp3 } define <2 x i64> @urem2x64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: urem2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: mov x11, v1.d[1] ; CHECK-NEXT: mov x12, v0.d[1] ; CHECK-NEXT: udiv x10, x9, x8 ; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: udiv x13, x12, x11 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: msub x9, x13, x11, x12 ; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %tmp3 = urem <2 x i64> %A, %B; ret <2 x i64> %tmp3 } define <2 x float> @frem2f32(<2 x float> %A, <2 x float> %B) { ; CHECK-LABEL: frem2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: mov s1, v1.s[1] ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: str d0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %tmp3 = frem <2 x float> %A, %B; ret <2 x float> %tmp3 } define <4 x float> @frem4f32(<4 x float> %A, <4 x float> %B) { ; CHECK-LABEL: frem4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: stp q0, q1, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] ; CHECK-NEXT: mov s1, v1.s[1] ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: // kill: def $s1 killed $s1 killed $q1 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: mov s1, v1.s[2] ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldp q0, q1, [sp, #16] // 32-byte Folded Reload ; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %tmp3 = frem <4 x float> %A, %B; ret <4 x float> %tmp3 } define <1 x double> @frem1d64(<1 x double> %A, <1 x double> %B) { ; CHECK-LABEL: frem1d64: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: bl fmod ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = frem <1 x double> %A, %B; ret <1 x double> %tmp3 } define <2 x double> @frem2d64(<2 x double> %A, <2 x double> %B) { ; CHECK-LABEL: frem2d64: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill ; CHECK-NEXT: mov d0, v0.d[1] ; CHECK-NEXT: mov d1, v1.d[1] ; CHECK-NEXT: bl fmod ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: bl fmod ; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %tmp3 = frem <2 x double> %A, %B; ret <2 x double> %tmp3 } declare <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8>, <8 x i8>) declare <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8>, <16 x i8>) define <8 x i8> @poly_mulv8i8(<8 x i8> %lhs, <8 x i8> %rhs) { ; CHECK-LABEL: poly_mulv8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: pmul v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %prod = call <8 x i8> @llvm.aarch64.neon.pmul.v8i8(<8 x i8> %lhs, <8 x i8> %rhs) ret <8 x i8> %prod } define <16 x i8> @poly_mulv16i8(<16 x i8> %lhs, <16 x i8> %rhs) { ; CHECK-LABEL: poly_mulv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: pmul v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %prod = call <16 x i8> @llvm.aarch64.neon.pmul.v16i8(<16 x i8> %lhs, <16 x i8> %rhs) ret <16 x i8> %prod } declare <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16>, <4 x i16>) declare <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16>, <8 x i16>) declare <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32>, <4 x i32>) define <4 x i16> @test_sqdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_sqdmulh_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sqdmulh v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %prod = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ret <4 x i16> %prod } define <8 x i16> @test_sqdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_sqdmulh_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sqdmulh v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %prod = call <8 x i16> @llvm.aarch64.neon.sqdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ret <8 x i16> %prod } define <2 x i32> @test_sqdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_sqdmulh_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sqdmulh v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %prod = call <2 x i32> @llvm.aarch64.neon.sqdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %prod } define <4 x i32> @test_sqdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sqdmulh_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sqdmulh v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %prod = call <4 x i32> @llvm.aarch64.neon.sqdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ret <4 x i32> %prod } declare <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16>, <4 x i16>) declare <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16>, <8 x i16>) declare <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32>, <2 x i32>) declare <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32>, <4 x i32>) define <4 x i16> @test_sqrdmulh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { ; CHECK-LABEL: test_sqrdmulh_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sqrdmulh v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) ret <4 x i16> %prod } define <8 x i16> @test_sqrdmulh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { ; CHECK-LABEL: test_sqrdmulh_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sqrdmulh v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmulh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) ret <8 x i16> %prod } define <2 x i32> @test_sqrdmulh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmulh_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sqrdmulh v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmulh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) ret <2 x i32> %prod } define <4 x i32> @test_sqrdmulh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { ; CHECK-LABEL: test_sqrdmulh_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sqrdmulh v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmulh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) ret <4 x i32> %prod } declare <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float>, <2 x float>) declare <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float>, <4 x float>) declare <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double>, <2 x double>) define <2 x float> @fmulx_v2f32(<2 x float> %lhs, <2 x float> %rhs) { ; CHECK-LABEL: fmulx_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmulx v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret ; Using registers other than v0, v1 and v2 are possible, but would be odd. %val = call <2 x float> @llvm.aarch64.neon.fmulx.v2f32(<2 x float> %lhs, <2 x float> %rhs) ret <2 x float> %val } define <4 x float> @fmulx_v4f32(<4 x float> %lhs, <4 x float> %rhs) { ; CHECK-LABEL: fmulx_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmulx v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret ; Using registers other than v0, v1 and v2 are possible, but would be odd. %val = call <4 x float> @llvm.aarch64.neon.fmulx.v4f32(<4 x float> %lhs, <4 x float> %rhs) ret <4 x float> %val } define <2 x double> @fmulx_v2f64(<2 x double> %lhs, <2 x double> %rhs) { ; CHECK-LABEL: fmulx_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fmulx v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret ; Using registers other than v0, v1 and v2 are possible, but would be odd. %val = call <2 x double> @llvm.aarch64.neon.fmulx.v2f64(<2 x double> %lhs, <2 x double> %rhs) ret <2 x double> %val }