Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s

; Supported combines

define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
; CHECK-LABEL: dupsext_v8i8_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    dup v1.8b, w0
; CHECK-NEXT:    smull v0.8h, v1.8b, v0.8b
; CHECK-NEXT:    ret
entry:
    %in = sext i8 %src to i16
    %ext.b = sext <8 x i8> %b to <8 x i16>
    %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
    %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
    %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
    ret <8 x i16> %out
}

define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) {
; CHECK-LABEL: dupzext_v8i8_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    dup v1.8b, w0
; CHECK-NEXT:    umull v0.8h, v1.8b, v0.8b
; CHECK-NEXT:    ret
entry:
    %in = zext i8 %src to i16
    %ext.b = zext <8 x i8> %b to <8 x i16>
    %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
    %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
    %out = mul nuw <8 x i16> %broadcast.splat, %ext.b
    ret <8 x i16> %out
}

define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
; CHECK-LABEL: dupsext_v4i16_v4i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    dup v1.4h, w0
; CHECK-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-NEXT:    ret
entry:
    %in = sext i16 %src to i32
    %ext.b = sext <4 x i16> %b to <4 x i32>
    %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
    %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
    %out = mul nsw <4 x i32> %broadcast.splat, %ext.b
    ret <4 x i32> %out
}

define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) {
; CHECK-LABEL: dupzext_v4i16_v4i32:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    dup v1.4h, w0
; CHECK-NEXT:    umull v0.4s, v1.4h, v0.4h
; CHECK-NEXT:    ret
entry:
    %in = zext i16 %src to i32
    %ext.b = zext <4 x i16> %b to <4 x i32>
    %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0
    %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
    %out = mul nuw <4 x i32> %broadcast.splat, %ext.b
    ret <4 x i32> %out
}

define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
; CHECK-LABEL: dupsext_v2i32_v2i64:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    dup v1.2s, w0
; CHECK-NEXT:    smull v0.2d, v1.2s, v0.2s
; CHECK-NEXT:    ret
entry:
    %in = sext i32 %src to i64
    %ext.b = sext <2 x i32> %b to <2 x i64>
    %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
    %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
    %out = mul nsw <2 x i64> %broadcast.splat, %ext.b
    ret <2 x i64> %out
}

define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) {
; CHECK-LABEL: dupzext_v2i32_v2i64:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    dup v1.2s, w0
; CHECK-NEXT:    umull v0.2d, v1.2s, v0.2s
; CHECK-NEXT:    ret
entry:
    %in = zext i32 %src to i64
    %ext.b = zext <2 x i32> %b to <2 x i64>
    %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
    %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
    %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
    ret <2 x i64> %out
}

; Unsupported combines

define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) {
; CHECK-LABEL: dupsext_v2i8_v2i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    sxtb w8, w0
; CHECK-NEXT:    shl v0.2s, v0.2s, #24
; CHECK-NEXT:    sshr v0.2s, v0.2s, #24
; CHECK-NEXT:    dup v1.2s, w8
; CHECK-NEXT:    mul v0.2s, v1.2s, v0.2s
; CHECK-NEXT:    ret
entry:
    %in = sext i8 %src to i16
    %ext.b = sext <2 x i8> %b to <2 x i16>
    %broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0
    %broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer
    %out = mul nsw <2 x i16> %broadcast.splat, %ext.b
    ret <2 x i16> %out
}

define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) {
; CHECK-LABEL: dupzext_v2i16_v2i64:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
; CHECK-NEXT:    // kill: def $w0 killed $w0 def $x0
; CHECK-NEXT:    and x8, x0, #0xffff
; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-NEXT:    ushll v0.2d, v0.2s, #0
; CHECK-NEXT:    fmov x9, d0
; CHECK-NEXT:    mov x10, v0.d[1]
; CHECK-NEXT:    mul x9, x8, x9
; CHECK-NEXT:    mul x8, x8, x10
; CHECK-NEXT:    fmov d0, x9
; CHECK-NEXT:    mov v0.d[1], x8
; CHECK-NEXT:    ret
entry:
    %in = zext i16 %src to i64
    %ext.b = zext <2 x i16> %b to <2 x i64>
    %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0
    %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
    %out = mul nuw <2 x i64> %broadcast.splat, %ext.b
    ret <2 x i64> %out
}

; dupsext_v4i8_v4i16
; dupsext_v2i8_v2i32
; dupsext_v4i8_v4i32
; dupsext_v2i8_v2i64
; dupsext_v2i16_v2i32
; dupsext_v2i16_v2i64
; dupzext_v2i8_v2i16
; dupzext_v4i8_v4i16
; dupzext_v2i8_v2i32
; dupzext_v4i8_v4i32
; dupzext_v2i8_v2i64
; dupzext_v2i16_v2i32
; dupzext_v2i16_v2i64

; Unsupported states

define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) {
; CHECK-LABEL: nonsplat_shuffleinsert:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    dup v1.8b, w0
; CHECK-NEXT:    smull v0.8h, v1.8b, v0.8b
; CHECK-NEXT:    ret
entry:
    %in = sext i8 %src to i16
    %ext.b = sext <8 x i8> %b to <8 x i16>
    %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1
    %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
    %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
    ret <8 x i16> %out
}

define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b2, i16 %b3) {
; CHECK-LABEL: nonsplat_shuffleinsert2:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    fmov s1, w0
; CHECK-NEXT:    mov v1.h[1], w1
; CHECK-NEXT:    mov v1.h[2], w2
; CHECK-NEXT:    mov v1.h[3], w3
; CHECK-NEXT:    smull v0.4s, v1.4h, v0.4h
; CHECK-NEXT:    ret
entry:
    %s0 = sext i16 %b0 to i32
    %s1 = sext i16 %b1 to i32
    %s2 = sext i16 %b2 to i32
    %s3 = sext i16 %b3 to i32
    %ext.b = sext <4 x i16> %b to <4 x i32>
    %v0 = insertelement <4 x i32> undef, i32 %s0, i32 0
    %v1 = insertelement <4 x i32> %v0, i32 %s1, i32 1
    %v2 = insertelement <4 x i32> %v1, i32 %s2, i32 2
    %v3 = insertelement <4 x i32> %v2, i32 %s3, i32 3
    %out = mul nsw <4 x i32> %v3, %ext.b
    ret <4 x i32> %out
}

define void @typei1_orig(i64 %a, i8* %p, <8 x i16>* %q) {
; CHECK-LABEL: typei1_orig:
; CHECK:       // %bb.0:
; CHECK-NEXT:    cmp x0, #0
; CHECK-NEXT:    ldr q0, [x2]
; CHECK-NEXT:    cset w8, gt
; CHECK-NEXT:    neg v0.8h, v0.8h
; CHECK-NEXT:    dup v1.8h, w8
; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    movi v1.2d, #0000000000000000
; CHECK-NEXT:    cmtst v0.8h, v0.8h, v0.8h
; CHECK-NEXT:    xtn v0.8b, v0.8h
; CHECK-NEXT:    mov v0.d[1], v1.d[0]
; CHECK-NEXT:    str q0, [x1]
; CHECK-NEXT:    ret
    %tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>
    %tmp6 = load <8 x i16>, <8 x i16>* %q, align 2
    %tmp7 = sub <8 x i16> zeroinitializer, %tmp6
    %tmp8 = shufflevector <8 x i16> %tmp7, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    %tmp9 = icmp slt i64 0, %a
    %tmp10 = zext i1 %tmp9 to i16
    %tmp11 = insertelement <16 x i16> undef, i16 %tmp10, i64 0
    %tmp12 = shufflevector <16 x i16> %tmp11, <16 x i16> undef, <16 x i32> zeroinitializer
    %tmp13 = mul nuw <16 x i16> %tmp8, %tmp12
    %tmp14 = icmp ne <16 x i16> %tmp13, zeroinitializer
    %tmp15 = and <16 x i1> %tmp14, %tmp
    %tmp16 = sext <16 x i1> %tmp15 to <16 x i8>
    %tmp17 = bitcast i8* %p to <16 x i8>*
    store <16 x i8> %tmp16, <16 x i8>* %tmp17, align 1
    ret void
}

define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) {
; CHECK-LABEL: typei1_v8i1_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    movi v1.8b, #1
; CHECK-NEXT:    and w8, w0, #0x1
; CHECK-NEXT:    and v0.8b, v0.8b, v1.8b
; CHECK-NEXT:    dup v1.8h, w8
; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-NEXT:    mul v0.8h, v1.8h, v0.8h
; CHECK-NEXT:    ret
entry:
    %in = zext i1 %src to i16
    %ext.b = zext <8 x i1> %b to <8 x i16>
    %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0
    %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer
    %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
    ret <8 x i16> %out
}

define <8 x i16> @missing_insert(<8 x i8> %b) {
; CHECK-LABEL: missing_insert:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ext v1.8b, v0.8b, v0.8b, #2
; CHECK-NEXT:    smull v0.8h, v1.8b, v0.8b
; CHECK-NEXT:    ret
entry:
    %ext.b = sext <8 x i8> %b to <8 x i16>
    %broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
    %out = mul nsw <8 x i16> %broadcast.splat, %ext.b
    ret <8 x i16> %out
}

define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
; CHECK-LABEL: shufsext_v8i8_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rev64 v0.8b, v0.8b
; CHECK-NEXT:    smull v0.8h, v0.8b, v1.8b
; CHECK-NEXT:    ret
entry:
  %in = sext <8 x i8> %src to <8 x i16>
  %ext.b = sext <8 x i8> %b to <8 x i16>
  %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
  %out = mul nsw <8 x i16> %shuf, %ext.b
  ret <8 x i16> %out
}

define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
; CHECK-LABEL: shufsext_v2i32_v2i64:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rev64 v0.2s, v0.2s
; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
; CHECK-NEXT:    ret
entry:
  %in = sext <2 x i32> %src to <2 x i64>
  %ext.b = sext <2 x i32> %b to <2 x i64>
  %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
  %out = mul nsw <2 x i64> %shuf, %ext.b
  ret <2 x i64> %out
}

define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) {
; CHECK-LABEL: shufzext_v8i8_v8i16:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rev64 v0.8b, v0.8b
; CHECK-NEXT:    umull v0.8h, v0.8b, v1.8b
; CHECK-NEXT:    ret
entry:
  %in = zext <8 x i8> %src to <8 x i16>
  %ext.b = zext <8 x i8> %b to <8 x i16>
  %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
  %out = mul nsw <8 x i16> %shuf, %ext.b
  ret <8 x i16> %out
}

define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) {
; CHECK-LABEL: shufzext_v2i32_v2i64:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    rev64 v0.2s, v0.2s
; CHECK-NEXT:    smull v0.2d, v0.2s, v1.2s
; CHECK-NEXT:    ret
entry:
  %in = sext <2 x i32> %src to <2 x i64>
  %ext.b = sext <2 x i32> %b to <2 x i64>
  %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0>
  %out = mul nsw <2 x i64> %shuf, %ext.b
  ret <2 x i64> %out
}

define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
; CHECK-LABEL: shufzext_v8i8_v8i16_twoin:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    trn1 v0.8b, v0.8b, v1.8b
; CHECK-NEXT:    umull v0.8h, v0.8b, v2.8b
; CHECK-NEXT:    ret
entry:
  %in1 = zext <8 x i8> %src1 to <8 x i16>
  %in2 = zext <8 x i8> %src2 to <8 x i16>
  %ext.b = zext <8 x i8> %b to <8 x i16>
  %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
  %out = mul nsw <8 x i16> %shuf, %ext.b
  ret <8 x i16> %out
}

define <8 x i16> @shufszext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) {
; CHECK-LABEL: shufszext_v8i8_v8i16_twoin:
; CHECK:       // %bb.0: // %entry
; CHECK-NEXT:    ushll v0.8h, v0.8b, #0
; CHECK-NEXT:    sshll v1.8h, v1.8b, #0
; CHECK-NEXT:    trn1 v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ushll v1.8h, v2.8b, #0
; CHECK-NEXT:    mul v0.8h, v0.8h, v1.8h
; CHECK-NEXT:    ret
entry:
  %in1 = zext <8 x i8> %src1 to <8 x i16>
  %in2 = sext <8 x i8> %src2 to <8 x i16>
  %ext.b = zext <8 x i8> %b to <8 x i16>
  %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
  %out = mul nsw <8 x i16> %shuf, %ext.b
  ret <8 x i16> %out
}