Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64

; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c

define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_mask_compress_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpcompressw %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_compress_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpcompressw %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <32 x i16>
  %1 = bitcast <8 x i64> %__S to <32 x i16>
  %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
  %3 = bitcast <32 x i16> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_maskz_compress_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_compress_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <32 x i16>
  %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
  %2 = bitcast <32 x i16> %1 to <8 x i64>
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_mask_compress_epi8:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    kunpckdq %k1, %k0, %k1
; X86-NEXT:    vpcompressb %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_compress_epi8:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovq %rdi, %k1
; X64-NEXT:    vpcompressb %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <64 x i8>
  %1 = bitcast <8 x i64> %__S to <64 x i8>
  %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
  %3 = bitcast <64 x i8> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_maskz_compress_epi8:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    kunpckdq %k1, %k0, %k1
; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_compress_epi8:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovq %rdi, %k1
; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <64 x i8>
  %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
  %2 = bitcast <64 x i8> %1 to <8 x i64>
  ret <8 x i64> %2
}

define void @test_mm512_mask_compressstoreu_epi16(ptr %__P, i32 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_mask_compressstoreu_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1}
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_compressstoreu_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %esi, %k1
; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1}
; X64-NEXT:    vzeroupper
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <32 x i16>
  %1 = bitcast i32 %__U to <32 x i1>
  tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, ptr %__P, <32 x i1> %1)
  ret void
}

define void @test_mm512_mask_compressstoreu_epi8(ptr %__P, i64 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_mask_compressstoreu_epi8:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kunpckdq %k1, %k0, %k1
; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1}
; X86-NEXT:    vzeroupper
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_compressstoreu_epi8:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovq %rsi, %k1
; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1}
; X64-NEXT:    vzeroupper
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <64 x i8>
  %1 = bitcast i64 %__U to <64 x i1>
  tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, ptr %__P, <64 x i1> %1)
  ret void
}

define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_mask_expand_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpexpandw %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_expand_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpexpandw %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <32 x i16>
  %1 = bitcast <8 x i64> %__S to <32 x i16>
  %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
  %3 = bitcast <32 x i16> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_maskz_expand_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_expand_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <32 x i16>
  %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
  %2 = bitcast <32 x i16> %1 to <8 x i64>
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_mask_expand_epi8:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    kunpckdq %k1, %k0, %k1
; X86-NEXT:    vpexpandb %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_expand_epi8:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovq %rdi, %k1
; X64-NEXT:    vpexpandb %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <64 x i8>
  %1 = bitcast <8 x i64> %__S to <64 x i8>
  %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
  %3 = bitcast <64 x i8> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) {
; X86-LABEL: test_mm512_maskz_expand_epi8:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    kunpckdq %k1, %k0, %k1
; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_expand_epi8:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovq %rdi, %k1
; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__D to <64 x i8>
  %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
  %2 = bitcast <64 x i8> %1 to <8 x i64>
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, ptr readonly %__P) {
; X86-LABEL: test_mm512_mask_expandloadu_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_expandloadu_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <32 x i16>
  %1 = bitcast i32 %__U to <32 x i1>
  %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(ptr %__P, <32 x i1> %1, <32 x i16> %0)
  %3 = bitcast <32 x i16> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, ptr readonly %__P) {
; X86-LABEL: test_mm512_maskz_expandloadu_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_expandloadu_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast i32 %__U to <32 x i1>
  %1 = tail call <32 x i16> @llvm.masked.expandload.v32i16(ptr %__P, <32 x i1> %0, <32 x i16> zeroinitializer)
  %2 = bitcast <32 x i16> %1 to <8 x i64>
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, ptr readonly %__P) {
; X86-LABEL: test_mm512_mask_expandloadu_epi8:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kunpckdq %k1, %k0, %k1
; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_expandloadu_epi8:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovq %rdi, %k1
; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <64 x i8>
  %1 = bitcast i64 %__U to <64 x i1>
  %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(ptr %__P, <64 x i1> %1, <64 x i8> %0)
  %3 = bitcast <64 x i8> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, ptr readonly %__P) {
; X86-LABEL: test_mm512_maskz_expandloadu_epi8:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kunpckdq %k1, %k0, %k1
; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_expandloadu_epi8:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovq %rdi, %k1
; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast i64 %__U to <64 x i1>
  %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(ptr %__P, <64 x i1> %0, <64 x i8> zeroinitializer)
  %2 = bitcast <64 x i8> %1 to <8 x i64>
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shldi_epi64:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shldi_epi64:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
  %1 = bitcast i8 %__U to <8 x i1>
  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
  ret <8 x i64> %2
}

declare <8 x i64> @llvm.fshl.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)

define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shldi_epi64:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shldi_epi64:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
  %1 = bitcast i8 %__U to <8 x i1>
  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shldi_epi64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshldq $31, %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
  ret <8 x i64> %0
}

define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shldi_epi32:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shldi_epi32:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldd $7, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__A to <16 x i32>
  %1 = bitcast <8 x i64> %__B to <16 x i32>
  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
  %3 = bitcast <8 x i64> %__S to <16 x i32>
  %4 = bitcast i16 %__U to <16 x i1>
  %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
  %6 = bitcast <16 x i32> %5 to <8 x i64>
  ret <8 x i64> %6
}

declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)

define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shldi_epi32:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shldi_epi32:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__A to <16 x i32>
  %1 = bitcast <8 x i64> %__B to <16 x i32>
  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
  %3 = bitcast i16 %__U to <16 x i1>
  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
  %5 = bitcast <16 x i32> %4 to <8 x i64>
  ret <8 x i64> %5
}

define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shldi_epi32:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshldd $31, %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = bitcast <8 x i64> %__A to <16 x i32>
  %1 = bitcast <8 x i64> %__B to <16 x i32>
  %2 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
  %3 = bitcast <16 x i32> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shldi_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shldi_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldw $3, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__A to <32 x i16>
  %1 = bitcast <8 x i64> %__B to <32 x i16>
  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
  %3 = bitcast <8 x i64> %__S to <32 x i16>
  %4 = bitcast i32 %__U to <32 x i1>
  %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
  %6 = bitcast <32 x i16> %5 to <8 x i64>
  ret <8 x i64> %6
}

declare <32 x i16> @llvm.fshl.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)

define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shldi_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shldi_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldw $7, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__A to <32 x i16>
  %1 = bitcast <8 x i64> %__B to <32 x i16>
  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>)
  %3 = bitcast i32 %__U to <32 x i1>
  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
  %5 = bitcast <32 x i16> %4 to <8 x i64>
  ret <8 x i64> %5
}

define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shldi_epi16:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshldw $15, %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = bitcast <8 x i64> %__A to <32 x i16>
  %1 = bitcast <8 x i64> %__B to <32 x i16>
  %2 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  %3 = bitcast <32 x i16> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shrdi_epi64:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shrdi_epi64:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdq $47, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47, i64 47>)
  %1 = bitcast i8 %__U to <8 x i1>
  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
  ret <8 x i64> %2
}

declare <8 x i64> @llvm.fshr.v8i64(<8 x i64>, <8 x i64>, <8 x i64>)

define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shrdi_epi64:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shrdi_epi64:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>)
  %1 = bitcast i8 %__U to <8 x i1>
  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shrdi_epi64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshrdq $31, %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__B, <8 x i64> %__A, <8 x i64> <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>)
  ret <8 x i64> %0
}

define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shrdi_epi32:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shrdi_epi32:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdd $7, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__A to <16 x i32>
  %1 = bitcast <8 x i64> %__B to <16 x i32>
  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>)
  %3 = bitcast <8 x i64> %__S to <16 x i32>
  %4 = bitcast i16 %__U to <16 x i1>
  %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
  %6 = bitcast <16 x i32> %5 to <8 x i64>
  ret <8 x i64> %6
}

declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>)

define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shrdi_epi32:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shrdi_epi32:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdd $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__A to <16 x i32>
  %1 = bitcast <8 x i64> %__B to <16 x i32>
  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>)
  %3 = bitcast i16 %__U to <16 x i1>
  %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
  %5 = bitcast <16 x i32> %4 to <8 x i64>
  ret <8 x i64> %5
}

define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shrdi_epi32:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshrdd $31, %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = bitcast <8 x i64> %__A to <16 x i32>
  %1 = bitcast <8 x i64> %__B to <16 x i32>
  %2 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>)
  %3 = bitcast <16 x i32> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shrdi_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shrdi_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdw $3, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__A to <32 x i16>
  %1 = bitcast <8 x i64> %__B to <32 x i16>
  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>)
  %3 = bitcast <8 x i64> %__S to <32 x i16>
  %4 = bitcast i32 %__U to <32 x i1>
  %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
  %6 = bitcast <32 x i16> %5 to <8 x i64>
  ret <8 x i64> %6
}

declare <32 x i16> @llvm.fshr.v32i16(<32 x i16>, <32 x i16>, <32 x i16>)

define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shrdi_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shrdi_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__A to <32 x i16>
  %1 = bitcast <8 x i64> %__B to <32 x i16>
  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
  %3 = bitcast i32 %__U to <32 x i1>
  %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
  %5 = bitcast <32 x i16> %4 to <8 x i64>
  ret <8 x i64> %5
}

define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shrdi_epi16:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshrdw $15, %zmm1, %zmm0, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = bitcast <8 x i64> %__A to <32 x i16>
  %1 = bitcast <8 x i64> %__B to <32 x i16>
  %2 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> <i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31, i16 31>)
  %3 = bitcast <32 x i16> %2 to <8 x i64>
  ret <8 x i64> %3
}

define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shldv_epi64:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shldv_epi64:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
  %1 = bitcast i8 %__U to <8 x i1>
  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shldv_epi64:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shldv_epi64:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
  %1 = bitcast i8 %__U to <8 x i1>
  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shldv_epi64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = tail call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B)
  ret <8 x i64> %0
}

define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shldv_epi32:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shldv_epi32:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <16 x i32>
  %1 = bitcast <8 x i64> %__A to <16 x i32>
  %2 = bitcast <8 x i64> %__B to <16 x i32>
  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
  %4 = bitcast i16 %__U to <16 x i1>
  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
  %6 = bitcast <16 x i32> %5 to <8 x i64>
  ret <8 x i64> %6
}

define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shldv_epi32:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shldv_epi32:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <16 x i32>
  %1 = bitcast <8 x i64> %__A to <16 x i32>
  %2 = bitcast <8 x i64> %__B to <16 x i32>
  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
  %4 = bitcast i16 %__U to <16 x i1>
  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
  %6 = bitcast <16 x i32> %5 to <8 x i64>
  ret <8 x i64> %6
}

define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shldv_epi32:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = bitcast <8 x i64> %__S to <16 x i32>
  %1 = bitcast <8 x i64> %__A to <16 x i32>
  %2 = bitcast <8 x i64> %__B to <16 x i32>
  %3 = tail call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2)
  %4 = bitcast <16 x i32> %3 to <8 x i64>
  ret <8 x i64> %4
}

define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shldv_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shldv_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <32 x i16>
  %1 = bitcast <8 x i64> %__A to <32 x i16>
  %2 = bitcast <8 x i64> %__B to <32 x i16>
  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
  %4 = bitcast i32 %__U to <32 x i1>
  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
  %6 = bitcast <32 x i16> %5 to <8 x i64>
  ret <8 x i64> %6
}

define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shldv_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shldv_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <32 x i16>
  %1 = bitcast <8 x i64> %__A to <32 x i16>
  %2 = bitcast <8 x i64> %__B to <32 x i16>
  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
  %4 = bitcast i32 %__U to <32 x i1>
  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
  %6 = bitcast <32 x i16> %5 to <8 x i64>
  ret <8 x i64> %6
}

define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shldv_epi16:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = bitcast <8 x i64> %__S to <32 x i16>
  %1 = bitcast <8 x i64> %__A to <32 x i16>
  %2 = bitcast <8 x i64> %__B to <32 x i16>
  %3 = tail call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2)
  %4 = bitcast <32 x i16> %3 to <8 x i64>
  ret <8 x i64> %4
}

define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shrdv_epi64:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shrdv_epi64:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
  %1 = bitcast i8 %__U to <8 x i1>
  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shrdv_epi64:
; X86:       # %bb.0: # %entry
; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT:    kmovd %eax, %k1
; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shrdv_epi64:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
  %1 = bitcast i8 %__U to <8 x i1>
  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
  ret <8 x i64> %2
}

define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shrdv_epi64:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = tail call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %__A, <8 x i64> %__S, <8 x i64> %__B)
  ret <8 x i64> %0
}

define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shrdv_epi32:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shrdv_epi32:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <16 x i32>
  %1 = bitcast <8 x i64> %__A to <16 x i32>
  %2 = bitcast <8 x i64> %__B to <16 x i32>
  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
  %4 = bitcast i16 %__U to <16 x i1>
  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
  %6 = bitcast <16 x i32> %5 to <8 x i64>
  ret <8 x i64> %6
}

define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shrdv_epi32:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shrdv_epi32:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <16 x i32>
  %1 = bitcast <8 x i64> %__A to <16 x i32>
  %2 = bitcast <8 x i64> %__B to <16 x i32>
  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
  %4 = bitcast i16 %__U to <16 x i1>
  %5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
  %6 = bitcast <16 x i32> %5 to <8 x i64>
  ret <8 x i64> %6
}

define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shrdv_epi32:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = bitcast <8 x i64> %__S to <16 x i32>
  %1 = bitcast <8 x i64> %__A to <16 x i32>
  %2 = bitcast <8 x i64> %__B to <16 x i32>
  %3 = tail call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %1, <16 x i32> %0, <16 x i32> %2)
  %4 = bitcast <16 x i32> %3 to <8 x i64>
  ret <8 x i64> %4
}

define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_mask_shrdv_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_mask_shrdv_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <32 x i16>
  %1 = bitcast <8 x i64> %__A to <32 x i16>
  %2 = bitcast <8 x i64> %__B to <32 x i16>
  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
  %4 = bitcast i32 %__U to <32 x i1>
  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> %0
  %6 = bitcast <32 x i16> %5 to <8 x i64>
  ret <8 x i64> %6
}

define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; X86-LABEL: test_mm512_maskz_shrdv_epi16:
; X86:       # %bb.0: # %entry
; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
; X86-NEXT:    retl
;
; X64-LABEL: test_mm512_maskz_shrdv_epi16:
; X64:       # %bb.0: # %entry
; X64-NEXT:    kmovd %edi, %k1
; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT:    retq
entry:
  %0 = bitcast <8 x i64> %__S to <32 x i16>
  %1 = bitcast <8 x i64> %__A to <32 x i16>
  %2 = bitcast <8 x i64> %__B to <32 x i16>
  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
  %4 = bitcast i32 %__U to <32 x i1>
  %5 = select <32 x i1> %4, <32 x i16> %3, <32 x i16> zeroinitializer
  %6 = bitcast <32 x i16> %5 to <8 x i64>
  ret <8 x i64> %6
}

define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
; CHECK-LABEL: test_mm512_shrdv_epi16:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0
; CHECK-NEXT:    ret{{[l|q]}}
entry:
  %0 = bitcast <8 x i64> %__S to <32 x i16>
  %1 = bitcast <8 x i64> %__A to <32 x i16>
  %2 = bitcast <8 x i64> %__B to <32 x i16>
  %3 = tail call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %1, <32 x i16> %0, <32 x i16> %2)
  %4 = bitcast <32 x i16> %3 to <8 x i64>
  ret <8 x i64> %4
}

declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>)
declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>)
declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>)
declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>)