Compiler projects using llvm
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx -fast-isel | FileCheck %s --check-prefix=FASTISEL

define <16 x i1> @test1() {
; ALL_X64-LABEL: test1:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test1:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test1:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
; FASTISEL-NEXT:    retq
  ret <16 x i1> zeroinitializer
}

define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
; ALL_X64-LABEL: test2:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test2:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    vandps %xmm1, %xmm0, %xmm0
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test2:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    vpsllw $7, %xmm1, %xmm1
; FASTISEL-NEXT:    vpmovb2m %xmm1, %k0
; FASTISEL-NEXT:    vpsllw $7, %xmm0, %xmm0
; FASTISEL-NEXT:    vpmovb2m %xmm0, %k1
; FASTISEL-NEXT:    kandw %k0, %k1, %k0
; FASTISEL-NEXT:    vpmovm2b %k0, %xmm0
; FASTISEL-NEXT:    retq
  %c = and <16 x i1>%a, %b
  ret <16 x i1> %c
}

define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
; ALL_X64-LABEL: test3:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test3:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    vandps %xmm1, %xmm0, %xmm0
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test3:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    vpsllw $15, %xmm1, %xmm1
; FASTISEL-NEXT:    vpmovw2m %xmm1, %k0
; FASTISEL-NEXT:    vpsllw $15, %xmm0, %xmm0
; FASTISEL-NEXT:    vpmovw2m %xmm0, %k1
; FASTISEL-NEXT:    kandb %k0, %k1, %k0
; FASTISEL-NEXT:    vpmovm2w %k0, %xmm0
; FASTISEL-NEXT:    retq
  %c = and <8 x i1>%a, %b
  ret <8 x i1> %c
}

define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
; ALL_X64-LABEL: test4:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    vandps %xmm1, %xmm0, %xmm0
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test4:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    vandps %xmm1, %xmm0, %xmm0
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test4:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    vpslld $31, %xmm1, %xmm1
; FASTISEL-NEXT:    vpmovd2m %xmm1, %k0
; FASTISEL-NEXT:    vpslld $31, %xmm0, %xmm0
; FASTISEL-NEXT:    vpmovd2m %xmm0, %k1
; FASTISEL-NEXT:    kandw %k0, %k1, %k0
; FASTISEL-NEXT:    vpmovm2d %k0, %xmm0
; FASTISEL-NEXT:    retq
  %c = and <4 x i1>%a, %b
  ret <4 x i1> %c
}

declare <8 x i1> @func8xi1(<8 x i1> %a)

define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
; KNL-LABEL: test5:
; KNL:       ## %bb.0:
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL-NEXT:    vpmovdw %zmm0, %ymm0
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT:    callq _func8xi1
; KNL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL-NEXT:    vpslld $31, %ymm0, %ymm0
; KNL-NEXT:    vpsrad $31, %ymm0, %ymm0
; KNL-NEXT:    popq %rax
; KNL-NEXT:    retq
;
; SKX-LABEL: test5:
; SKX:       ## %bb.0:
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
; SKX-NEXT:    vpmovm2w %k0, %xmm0
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _func8xi1
; SKX-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SKX-NEXT:    vpslld $31, %ymm0, %ymm0
; SKX-NEXT:    vpsrad $31, %ymm0, %ymm0
; SKX-NEXT:    popq %rax
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: test5:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    subl $12, %esp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
; KNL_X32-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL_X32-NEXT:    vpmovdw %zmm0, %ymm0
; KNL_X32-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL_X32-NEXT:    calll _func8xi1
; KNL_X32-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL_X32-NEXT:    vpslld $31, %ymm0, %ymm0
; KNL_X32-NEXT:    vpsrad $31, %ymm0, %ymm0
; KNL_X32-NEXT:    addl $12, %esp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test5:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rax
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
; FASTISEL-NEXT:    vpmovm2w %k0, %xmm0
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    callq _func8xi1
; FASTISEL-NEXT:    vpsllw $15, %xmm0, %xmm0
; FASTISEL-NEXT:    vpmovw2m %xmm0, %k0
; FASTISEL-NEXT:    vpmovm2d %k0, %ymm0
; FASTISEL-NEXT:    popq %rax
; FASTISEL-NEXT:    retq
  %cmpRes = icmp sgt <8 x i32>%a, %b
  %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
  %res = sext <8 x i1>%resi to <8 x i32>
  ret <8 x i32> %res
}

declare <16 x i1> @func16xi1(<16 x i1> %a)

define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
; KNL-LABEL: test6:
; KNL:       ## %bb.0:
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT:    vpmovdb %zmm0, %xmm0
; KNL-NEXT:    callq _func16xi1
; KNL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL-NEXT:    vpsrad $31, %zmm0, %zmm0
; KNL-NEXT:    popq %rax
; KNL-NEXT:    retq
;
; SKX-LABEL: test6:
; SKX:       ## %bb.0:
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
; SKX-NEXT:    vpmovm2b %k0, %xmm0
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _func16xi1
; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; SKX-NEXT:    vpslld $31, %zmm0, %zmm0
; SKX-NEXT:    vpsrad $31, %zmm0, %zmm0
; SKX-NEXT:    popq %rax
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: test6:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    subl $12, %esp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
; KNL_X32-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1
; KNL_X32-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT:    vpmovdb %zmm0, %xmm0
; KNL_X32-NEXT:    calll _func16xi1
; KNL_X32-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL_X32-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL_X32-NEXT:    vpsrad $31, %zmm0, %zmm0
; KNL_X32-NEXT:    addl $12, %esp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test6:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rax
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0
; FASTISEL-NEXT:    vpmovm2b %k0, %xmm0
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    callq _func16xi1
; FASTISEL-NEXT:    vpsllw $7, %xmm0, %xmm0
; FASTISEL-NEXT:    vpmovb2m %xmm0, %k0
; FASTISEL-NEXT:    vpmovm2d %k0, %zmm0
; FASTISEL-NEXT:    popq %rax
; FASTISEL-NEXT:    retq
  %cmpRes = icmp sgt <16 x i32>%a, %b
  %resi = call <16 x i1> @func16xi1(<16 x i1> %cmpRes)
  %res = sext <16 x i1>%resi to <16 x i32>
  ret <16 x i32> %res
}

declare <4 x i1> @func4xi1(<4 x i1> %a)

define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) {
; ALL_X64-LABEL: test7:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    pushq %rax
; ALL_X64-NEXT:    .cfi_def_cfa_offset 16
; ALL_X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
; ALL_X64-NEXT:    callq _func4xi1
; ALL_X64-NEXT:    vpslld $31, %xmm0, %xmm0
; ALL_X64-NEXT:    vpsrad $31, %xmm0, %xmm0
; ALL_X64-NEXT:    popq %rax
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test7:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    subl $12, %esp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
; KNL_X32-NEXT:    vpcmpgtd %xmm1, %xmm0, %xmm0
; KNL_X32-NEXT:    calll _func4xi1
; KNL_X32-NEXT:    vpslld $31, %xmm0, %xmm0
; KNL_X32-NEXT:    vpsrad $31, %xmm0, %xmm0
; KNL_X32-NEXT:    addl $12, %esp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test7:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rax
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0
; FASTISEL-NEXT:    vpmovm2d %k0, %xmm0
; FASTISEL-NEXT:    callq _func4xi1
; FASTISEL-NEXT:    vpslld $31, %xmm0, %xmm0
; FASTISEL-NEXT:    vpmovd2m %xmm0, %k0
; FASTISEL-NEXT:    vpmovm2d %k0, %xmm0
; FASTISEL-NEXT:    popq %rax
; FASTISEL-NEXT:    retq
  %cmpRes = icmp sgt <4 x i32>%a, %b
  %resi = call <4 x i1> @func4xi1(<4 x i1> %cmpRes)
  %res = sext <4 x i1>%resi to <4 x i32>
  ret <4 x i32> %res
}

define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; KNL-LABEL: test7a:
; KNL:       ## %bb.0:
; KNL-NEXT:    pushq %rax
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL-NEXT:    vpmovdw %zmm0, %ymm0
; KNL-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT:    callq _func8xi1
; KNL-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; KNL-NEXT:    popq %rax
; KNL-NEXT:    retq
;
; SKX-LABEL: test7a:
; SKX:       ## %bb.0:
; SKX-NEXT:    pushq %rax
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
; SKX-NEXT:    vpmovm2w %k0, %xmm0
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    callq _func8xi1
; SKX-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; SKX-NEXT:    popq %rax
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: test7a:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    subl $12, %esp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
; KNL_X32-NEXT:    vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL_X32-NEXT:    vpmovdw %zmm0, %ymm0
; KNL_X32-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL_X32-NEXT:    calll _func8xi1
; KNL_X32-NEXT:    vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
; KNL_X32-NEXT:    addl $12, %esp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test7a:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rax
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0
; FASTISEL-NEXT:    vpmovm2w %k0, %xmm0
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    callq _func8xi1
; FASTISEL-NEXT:    vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; FASTISEL-NEXT:    popq %rax
; FASTISEL-NEXT:    retq
  %cmpRes = icmp sgt <8 x i32>%a, %b
  %resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
  %res = and <8 x i1>%resi,  <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
  ret <8 x i1> %res
}

define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
; ALL_X64-LABEL: test8:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    testb $1, %dil
; ALL_X64-NEXT:    jne LBB8_2
; ALL_X64-NEXT:  ## %bb.1:
; ALL_X64-NEXT:    vmovaps %xmm1, %xmm0
; ALL_X64-NEXT:  LBB8_2:
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test8:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    jne LBB8_2
; KNL_X32-NEXT:  ## %bb.1:
; KNL_X32-NEXT:    vmovaps %xmm1, %xmm0
; KNL_X32-NEXT:  LBB8_2:
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test8:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    testb $1, %dil
; FASTISEL-NEXT:    jne LBB8_2
; FASTISEL-NEXT:  ## %bb.1:
; FASTISEL-NEXT:    vmovaps %xmm1, %xmm0
; FASTISEL-NEXT:  LBB8_2:
; FASTISEL-NEXT:    retq
  %res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
  ret <16 x i8> %res
}

define i1 @test9(double %a, double %b) {
; ALL_X64-LABEL: test9:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    vucomisd %xmm0, %xmm1
; ALL_X64-NEXT:    setb %al
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test9:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
; KNL_X32-NEXT:    vucomisd {{[0-9]+}}(%esp), %xmm0
; KNL_X32-NEXT:    setb %al
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test9:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    vucomisd %xmm0, %xmm1
; FASTISEL-NEXT:    setb %al
; FASTISEL-NEXT:    retq
  %c = fcmp ugt double %a, %b
  ret i1 %c
}

define i32 @test10(i32 %a, i32 %b, i1 %cond) {
; ALL_X64-LABEL: test10:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    movl %edi, %eax
; ALL_X64-NEXT:    testb $1, %dl
; ALL_X64-NEXT:    cmovel %esi, %eax
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test10:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    leal {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    leal {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT:    cmovnel %eax, %ecx
; KNL_X32-NEXT:    movl (%ecx), %eax
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test10:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    movl %edi, %eax
; FASTISEL-NEXT:    testb $1, %dl
; FASTISEL-NEXT:    cmovel %esi, %eax
; FASTISEL-NEXT:    retq
  %c = select i1 %cond, i32 %a, i32 %b
  ret i32 %c
}

define i1 @test11(i32 %a, i32 %b) {
; ALL_X64-LABEL: test11:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    cmpl %esi, %edi
; ALL_X64-NEXT:    setg %al
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test11:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    setg %al
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test11:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    cmpl %esi, %edi
; FASTISEL-NEXT:    setg %al
; FASTISEL-NEXT:    retq
  %c = icmp sgt i32 %a, %b
  ret i1 %c
}

define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
; ALL_X64-LABEL: test12:
; ALL_X64:       ## %bb.0:
; ALL_X64-NEXT:    pushq %rbp
; ALL_X64-NEXT:    .cfi_def_cfa_offset 16
; ALL_X64-NEXT:    pushq %r14
; ALL_X64-NEXT:    .cfi_def_cfa_offset 24
; ALL_X64-NEXT:    pushq %rbx
; ALL_X64-NEXT:    .cfi_def_cfa_offset 32
; ALL_X64-NEXT:    .cfi_offset %rbx, -32
; ALL_X64-NEXT:    .cfi_offset %r14, -24
; ALL_X64-NEXT:    .cfi_offset %rbp, -16
; ALL_X64-NEXT:    movl %esi, %r14d
; ALL_X64-NEXT:    movl %edi, %ebp
; ALL_X64-NEXT:    movl %edx, %esi
; ALL_X64-NEXT:    callq _test11
; ALL_X64-NEXT:    movzbl %al, %ebx
; ALL_X64-NEXT:    movl %ebp, %edi
; ALL_X64-NEXT:    movl %r14d, %esi
; ALL_X64-NEXT:    movl %ebx, %edx
; ALL_X64-NEXT:    callq _test10
; ALL_X64-NEXT:    xorl %ecx, %ecx
; ALL_X64-NEXT:    testb $1, %bl
; ALL_X64-NEXT:    cmovel %ecx, %eax
; ALL_X64-NEXT:    popq %rbx
; ALL_X64-NEXT:    popq %r14
; ALL_X64-NEXT:    popq %rbp
; ALL_X64-NEXT:    retq
;
; KNL_X32-LABEL: test12:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebx
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    pushl %edi
; KNL_X32-NEXT:    .cfi_def_cfa_offset 12
; KNL_X32-NEXT:    pushl %esi
; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
; KNL_X32-NEXT:    subl $16, %esp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 32
; KNL_X32-NEXT:    .cfi_offset %esi, -16
; KNL_X32-NEXT:    .cfi_offset %edi, -12
; KNL_X32-NEXT:    .cfi_offset %ebx, -8
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl %edi, (%esp)
; KNL_X32-NEXT:    calll _test11
; KNL_X32-NEXT:    movl %eax, %ebx
; KNL_X32-NEXT:    movzbl %al, %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl %esi, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl %edi, (%esp)
; KNL_X32-NEXT:    calll _test10
; KNL_X32-NEXT:    xorl %ecx, %ecx
; KNL_X32-NEXT:    testb $1, %bl
; KNL_X32-NEXT:    cmovel %ecx, %eax
; KNL_X32-NEXT:    addl $16, %esp
; KNL_X32-NEXT:    popl %esi
; KNL_X32-NEXT:    popl %edi
; KNL_X32-NEXT:    popl %ebx
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test12:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rbp
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    pushq %r14
; FASTISEL-NEXT:    .cfi_def_cfa_offset 24
; FASTISEL-NEXT:    pushq %rbx
; FASTISEL-NEXT:    .cfi_def_cfa_offset 32
; FASTISEL-NEXT:    .cfi_offset %rbx, -32
; FASTISEL-NEXT:    .cfi_offset %r14, -24
; FASTISEL-NEXT:    .cfi_offset %rbp, -16
; FASTISEL-NEXT:    movl %esi, %r14d
; FASTISEL-NEXT:    movl %edi, %ebp
; FASTISEL-NEXT:    movl %edx, %esi
; FASTISEL-NEXT:    callq _test11
; FASTISEL-NEXT:    movzbl %al, %ebx
; FASTISEL-NEXT:    movl %ebp, %edi
; FASTISEL-NEXT:    movl %r14d, %esi
; FASTISEL-NEXT:    movl %ebx, %edx
; FASTISEL-NEXT:    callq _test10
; FASTISEL-NEXT:    xorl %ecx, %ecx
; FASTISEL-NEXT:    testb $1, %bl
; FASTISEL-NEXT:    cmovel %ecx, %eax
; FASTISEL-NEXT:    popq %rbx
; FASTISEL-NEXT:    popq %r14
; FASTISEL-NEXT:    popq %rbp
; FASTISEL-NEXT:    retq
  %cond = call i1 @test11(i32 %a1, i32 %b1)
  %res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond)
  %res1 = select i1 %cond, i32 %res, i32 0
  ret i32 %res1
}

define <1 x i1> @test13(ptr %foo) {
; KNL-LABEL: test13:
; KNL:       ## %bb.0:
; KNL-NEXT:    movzbl (%rdi), %eax
; KNL-NEXT:    ## kill: def $al killed $al killed $eax
; KNL-NEXT:    retq
;
; SKX-LABEL: test13:
; SKX:       ## %bb.0:
; SKX-NEXT:    kmovb (%rdi), %k0
; SKX-NEXT:    kmovd %k0, %eax
; SKX-NEXT:    ## kill: def $al killed $al killed $eax
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: test13:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    movzbl (%eax), %eax
; KNL_X32-NEXT:    ## kill: def $al killed $al killed $eax
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test13:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    kmovb (%rdi), %k0
; FASTISEL-NEXT:    kmovd %k0, %eax
; FASTISEL-NEXT:    ## kill: def $al killed $al killed $eax
; FASTISEL-NEXT:    retq
  %bar = load <1 x i1>, ptr %foo
  ret <1 x i1> %bar
}

define void @test14(ptr %x) {
; KNL-LABEL: test14:
; KNL:       ## %bb.0:
; KNL-NEXT:    pushq %rbx
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    .cfi_offset %rbx, -16
; KNL-NEXT:    movq %rdi, %rbx
; KNL-NEXT:    vmovaps (%rdi), %zmm0
; KNL-NEXT:    callq _test14_callee
; KNL-NEXT:    vmovaps %zmm0, (%rbx)
; KNL-NEXT:    popq %rbx
; KNL-NEXT:    retq
;
; SKX-LABEL: test14:
; SKX:       ## %bb.0:
; SKX-NEXT:    pushq %rbx
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    .cfi_offset %rbx, -16
; SKX-NEXT:    movq %rdi, %rbx
; SKX-NEXT:    vmovaps (%rdi), %zmm0
; SKX-NEXT:    callq _test14_callee
; SKX-NEXT:    vmovaps %zmm0, (%rbx)
; SKX-NEXT:    popq %rbx
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: test14:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %esi
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    subl $8, %esp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
; KNL_X32-NEXT:    .cfi_offset %esi, -8
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
; KNL_X32-NEXT:    vmovaps (%esi), %zmm0
; KNL_X32-NEXT:    calll _test14_callee
; KNL_X32-NEXT:    vmovaps %zmm0, (%esi)
; KNL_X32-NEXT:    addl $8, %esp
; KNL_X32-NEXT:    popl %esi
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test14:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rbx
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    .cfi_offset %rbx, -16
; FASTISEL-NEXT:    movq %rdi, %rbx
; FASTISEL-NEXT:    vmovaps (%rdi), %zmm0
; FASTISEL-NEXT:    callq _test14_callee
; FASTISEL-NEXT:    vmovaps %zmm0, (%rbx)
; FASTISEL-NEXT:    popq %rbx
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    retq
  %a = load <32 x i16>, ptr %x
  %b = call <32 x i16> @test14_callee(<32 x i16> %a)
  store <32 x i16> %b, ptr %x
  ret void
}
declare <32 x i16> @test14_callee(<32 x i16>)

define void @test15(ptr %x) {
; KNL-LABEL: test15:
; KNL:       ## %bb.0:
; KNL-NEXT:    pushq %rbx
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    .cfi_offset %rbx, -16
; KNL-NEXT:    movq %rdi, %rbx
; KNL-NEXT:    vmovaps (%rdi), %zmm0
; KNL-NEXT:    callq _test15_callee
; KNL-NEXT:    vmovaps %zmm0, (%rbx)
; KNL-NEXT:    popq %rbx
; KNL-NEXT:    retq
;
; SKX-LABEL: test15:
; SKX:       ## %bb.0:
; SKX-NEXT:    pushq %rbx
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    .cfi_offset %rbx, -16
; SKX-NEXT:    movq %rdi, %rbx
; SKX-NEXT:    vmovaps (%rdi), %zmm0
; SKX-NEXT:    callq _test15_callee
; SKX-NEXT:    vmovaps %zmm0, (%rbx)
; SKX-NEXT:    popq %rbx
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: test15:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %esi
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    subl $8, %esp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 16
; KNL_X32-NEXT:    .cfi_offset %esi, -8
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
; KNL_X32-NEXT:    vmovaps (%esi), %zmm0
; KNL_X32-NEXT:    calll _test15_callee
; KNL_X32-NEXT:    vmovaps %zmm0, (%esi)
; KNL_X32-NEXT:    addl $8, %esp
; KNL_X32-NEXT:    popl %esi
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: test15:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rbx
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    .cfi_offset %rbx, -16
; FASTISEL-NEXT:    movq %rdi, %rbx
; FASTISEL-NEXT:    vmovaps (%rdi), %zmm0
; FASTISEL-NEXT:    callq _test15_callee
; FASTISEL-NEXT:    vmovaps %zmm0, (%rbx)
; FASTISEL-NEXT:    popq %rbx
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    retq
  %a = load <64 x i8>, ptr %x
  %b = call <64 x i8> @test15_callee(<64 x i8> %a)
  store <64 x i8> %b, ptr %x
  ret void
}
declare <64 x i8> @test15_callee(<64 x i8>)

define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-LABEL: test16:
; KNL:       ## %bb.0:
; KNL-NEXT:    pushq %rbp
; KNL-NEXT:    pushq %r15
; KNL-NEXT:    pushq %r14
; KNL-NEXT:    pushq %r13
; KNL-NEXT:    pushq %r12
; KNL-NEXT:    pushq %rbx
; KNL-NEXT:    xorl %r10d, %r10d
; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl $65535, %eax ## imm = 0xFFFF
; KNL-NEXT:    movl $0, %r11d
; KNL-NEXT:    cmovnel %eax, %r11d
; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
; KNL-NEXT:    cmovnel %eax, %r10d
; KNL-NEXT:    movq %rdi, %rax
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    andl $1, %edi
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $14, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-5, %di
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $13, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-9, %di
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $12, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-17, %di
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $11, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-33, %di
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $10, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-65, %di
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $9, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-129, %di
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $8, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-257, %di ## imm = 0xFEFF
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $7, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-513, %di ## imm = 0xFDFF
; KNL-NEXT:    kmovw %edi, %k7
; KNL-NEXT:    kandw %k7, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $6, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-1025, %di ## imm = 0xFBFF
; KNL-NEXT:    kmovw %edi, %k4
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $5, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-2049, %di ## imm = 0xF7FF
; KNL-NEXT:    kmovw %edi, %k3
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $4, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-4097, %di ## imm = 0xEFFF
; KNL-NEXT:    kmovw %edi, %k2
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $3, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-8193, %di ## imm = 0xDFFF
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k5
; KNL-NEXT:    kshiftlw $15, %k5, %k5
; KNL-NEXT:    kshiftrw $2, %k5, %k5
; KNL-NEXT:    korw %k5, %k0, %k5
; KNL-NEXT:    movw $-16385, %di ## imm = 0xBFFF
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    kandw %k0, %k5, %k5
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $14, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kshiftlw $1, %k5, %k5
; KNL-NEXT:    kshiftrw $1, %k5, %k5
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    andl $1, %esi
; KNL-NEXT:    kmovw %edx, %k5
; KNL-NEXT:    kshiftlw $15, %k5, %k5
; KNL-NEXT:    kshiftrw $14, %k5, %k5
; KNL-NEXT:    kmovw %esi, %k6
; KNL-NEXT:    korw %k5, %k6, %k5
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT:    kandw %k6, %k5, %k5
; KNL-NEXT:    kmovw %ecx, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $13, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT:    kandw %k6, %k5, %k5
; KNL-NEXT:    kmovw %r8d, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $12, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT:    kandw %k6, %k5, %k5
; KNL-NEXT:    kmovw %r9d, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $11, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT:    kandw %k6, %k5, %k5
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $10, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT:    kandw %k6, %k5, %k5
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $9, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT:    kandw %k6, %k5, %k5
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $8, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; KNL-NEXT:    kandw %k6, %k5, %k5
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $7, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kandw %k7, %k5, %k5
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $6, %k6, %k6
; KNL-NEXT:    korw %k6, %k5, %k5
; KNL-NEXT:    kandw %k4, %k5, %k4
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k5
; KNL-NEXT:    kshiftlw $15, %k5, %k5
; KNL-NEXT:    kshiftrw $5, %k5, %k5
; KNL-NEXT:    korw %k5, %k4, %k4
; KNL-NEXT:    kandw %k3, %k4, %k3
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k4
; KNL-NEXT:    kshiftlw $15, %k4, %k4
; KNL-NEXT:    kshiftrw $4, %k4, %k4
; KNL-NEXT:    korw %k4, %k3, %k3
; KNL-NEXT:    kandw %k2, %k3, %k2
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k3
; KNL-NEXT:    kshiftlw $15, %k3, %k3
; KNL-NEXT:    kshiftrw $3, %k3, %k3
; KNL-NEXT:    korw %k3, %k2, %k2
; KNL-NEXT:    kandw %k1, %k2, %k1
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k2
; KNL-NEXT:    kshiftlw $15, %k2, %k2
; KNL-NEXT:    kshiftrw $2, %k2, %k2
; KNL-NEXT:    korw %k2, %k1, %k1
; KNL-NEXT:    kandw %k0, %k1, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k1
; KNL-NEXT:    kshiftlw $14, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    kshiftlw $1, %k0, %k0
; KNL-NEXT:    kshiftrw $1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    kmovw %r11d, %k1
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    kmovw %r10d, %k2
; KNL-NEXT:    kandw %k1, %k2, %k1
; KNL-NEXT:    kmovw %k1, %r8d
; KNL-NEXT:    kshiftrw $1, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r9d
; KNL-NEXT:    kshiftrw $2, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r10d
; KNL-NEXT:    kshiftrw $3, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r11d
; KNL-NEXT:    kshiftrw $4, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r12d
; KNL-NEXT:    kshiftrw $5, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r15d
; KNL-NEXT:    kshiftrw $6, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r14d
; KNL-NEXT:    kshiftrw $7, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r13d
; KNL-NEXT:    kshiftrw $8, %k0, %k1
; KNL-NEXT:    kmovw %k1, %ebx
; KNL-NEXT:    kshiftrw $9, %k0, %k1
; KNL-NEXT:    kmovw %k1, %esi
; KNL-NEXT:    kshiftrw $10, %k0, %k1
; KNL-NEXT:    kmovw %k1, %ebp
; KNL-NEXT:    kshiftrw $11, %k0, %k1
; KNL-NEXT:    kmovw %k1, %ecx
; KNL-NEXT:    kshiftrw $12, %k0, %k1
; KNL-NEXT:    kmovw %k1, %edx
; KNL-NEXT:    kshiftrw $13, %k0, %k1
; KNL-NEXT:    kmovw %k1, %edi
; KNL-NEXT:    kshiftrw $14, %k0, %k1
; KNL-NEXT:    andl $1, %r8d
; KNL-NEXT:    movb %r8b, 2(%rax)
; KNL-NEXT:    kmovw %k0, %r8d
; KNL-NEXT:    andl $1, %r8d
; KNL-NEXT:    andl $1, %r9d
; KNL-NEXT:    leal (%r8,%r9,2), %r8d
; KNL-NEXT:    kmovw %k1, %r9d
; KNL-NEXT:    kshiftrw $15, %k0, %k0
; KNL-NEXT:    andl $1, %r10d
; KNL-NEXT:    leal (%r8,%r10,4), %r8d
; KNL-NEXT:    kmovw %k0, %r10d
; KNL-NEXT:    andl $1, %r11d
; KNL-NEXT:    leal (%r8,%r11,8), %r8d
; KNL-NEXT:    andl $1, %r12d
; KNL-NEXT:    shll $4, %r12d
; KNL-NEXT:    orl %r8d, %r12d
; KNL-NEXT:    andl $1, %r15d
; KNL-NEXT:    shll $5, %r15d
; KNL-NEXT:    orl %r12d, %r15d
; KNL-NEXT:    andl $1, %r14d
; KNL-NEXT:    shll $6, %r14d
; KNL-NEXT:    andl $1, %r13d
; KNL-NEXT:    shll $7, %r13d
; KNL-NEXT:    orl %r14d, %r13d
; KNL-NEXT:    andl $1, %ebx
; KNL-NEXT:    shll $8, %ebx
; KNL-NEXT:    orl %r13d, %ebx
; KNL-NEXT:    andl $1, %esi
; KNL-NEXT:    shll $9, %esi
; KNL-NEXT:    orl %ebx, %esi
; KNL-NEXT:    andl $1, %ebp
; KNL-NEXT:    shll $10, %ebp
; KNL-NEXT:    orl %esi, %ebp
; KNL-NEXT:    orl %r15d, %ebp
; KNL-NEXT:    andl $1, %ecx
; KNL-NEXT:    shll $11, %ecx
; KNL-NEXT:    andl $1, %edx
; KNL-NEXT:    shll $12, %edx
; KNL-NEXT:    orl %ecx, %edx
; KNL-NEXT:    andl $1, %edi
; KNL-NEXT:    shll $13, %edi
; KNL-NEXT:    orl %edx, %edi
; KNL-NEXT:    andl $1, %r9d
; KNL-NEXT:    shll $14, %r9d
; KNL-NEXT:    orl %edi, %r9d
; KNL-NEXT:    andl $1, %r10d
; KNL-NEXT:    shll $15, %r10d
; KNL-NEXT:    orl %r9d, %r10d
; KNL-NEXT:    orl %ebp, %r10d
; KNL-NEXT:    movw %r10w, (%rax)
; KNL-NEXT:    popq %rbx
; KNL-NEXT:    popq %r12
; KNL-NEXT:    popq %r13
; KNL-NEXT:    popq %r14
; KNL-NEXT:    popq %r15
; KNL-NEXT:    popq %rbp
; KNL-NEXT:    retq
;
; SKX-LABEL: test16:
; SKX:       ## %bb.0:
; SKX-NEXT:    pushq %rbp
; SKX-NEXT:    pushq %r15
; SKX-NEXT:    pushq %r14
; SKX-NEXT:    pushq %r13
; SKX-NEXT:    pushq %r12
; SKX-NEXT:    pushq %rbx
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    movq %rdi, %rax
; SKX-NEXT:    kshiftld $31, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftrd $30, %k0, %k0
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $31, %k1, %k1
; SKX-NEXT:    kord %k0, %k1, %k0
; SKX-NEXT:    movl $-5, %edi
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $29, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-9, %edi
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $28, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-17, %edi
; SKX-NEXT:    kmovd %edi, %k2
; SKX-NEXT:    kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kandd %k2, %k0, %k0
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $27, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-33, %edi
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $26, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-65, %edi
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $25, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-129, %edi
; SKX-NEXT:    kmovd %edi, %k2
; SKX-NEXT:    kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kandd %k2, %k0, %k0
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $24, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-257, %edi ## imm = 0xFEFF
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $23, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-513, %edi ## imm = 0xFDFF
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $22, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-1025, %edi ## imm = 0xFBFF
; SKX-NEXT:    kmovd %edi, %k2
; SKX-NEXT:    kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kandd %k2, %k0, %k0
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $21, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-2049, %edi ## imm = 0xF7FF
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $20, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-4097, %edi ## imm = 0xEFFF
; SKX-NEXT:    kmovd %edi, %k6
; SKX-NEXT:    kandd %k6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $19, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-8193, %edi ## imm = 0xDFFF
; SKX-NEXT:    kmovd %edi, %k5
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kandd %k5, %k0, %k0
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $18, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-16385, %edi ## imm = 0xBFFF
; SKX-NEXT:    kmovd %edi, %k4
; SKX-NEXT:    kandd %k4, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $17, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    movl $-32769, %edi ## imm = 0xFFFF7FFF
; SKX-NEXT:    kmovd %edi, %k3
; SKX-NEXT:    kandd %k3, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    kshiftld $31, %k7, %k7
; SKX-NEXT:    kshiftrd $16, %k7, %k7
; SKX-NEXT:    kord %k7, %k0, %k7
; SKX-NEXT:    movl $-65537, %edi ## imm = 0xFFFEFFFF
; SKX-NEXT:    kmovd %edi, %k2
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    kandd %k2, %k7, %k7
; SKX-NEXT:    kshiftld $31, %k0, %k0
; SKX-NEXT:    kshiftrd $15, %k0, %k0
; SKX-NEXT:    kord %k0, %k7, %k0
; SKX-NEXT:    kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; SKX-NEXT:    kmovd %edx, %k0
; SKX-NEXT:    kshiftld $31, %k0, %k0
; SKX-NEXT:    kshiftrd $30, %k0, %k0
; SKX-NEXT:    kmovd %esi, %k7
; SKX-NEXT:    kshiftld $31, %k7, %k7
; SKX-NEXT:    kshiftrd $31, %k7, %k7
; SKX-NEXT:    kord %k0, %k7, %k0
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovd %ecx, %k7
; SKX-NEXT:    kshiftld $31, %k7, %k7
; SKX-NEXT:    kshiftrd $29, %k7, %k7
; SKX-NEXT:    kord %k7, %k0, %k0
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovd %r8d, %k7
; SKX-NEXT:    kshiftld $31, %k7, %k7
; SKX-NEXT:    kshiftrd $28, %k7, %k7
; SKX-NEXT:    kord %k7, %k0, %k0
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovd %r9d, %k7
; SKX-NEXT:    kshiftld $31, %k7, %k7
; SKX-NEXT:    kshiftrd $27, %k7, %k7
; SKX-NEXT:    kord %k7, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k1
; SKX-NEXT:    kshiftld $31, %k7, %k7
; SKX-NEXT:    kshiftrd $26, %k7, %k7
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    kord %k7, %k1, %k1
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload
; SKX-NEXT:    kandd %k7, %k1, %k1
; SKX-NEXT:    kshiftld $31, %k0, %k0
; SKX-NEXT:    kshiftrd $25, %k0, %k0
; SKX-NEXT:    kord %k0, %k1, %k0
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $24, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $23, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $22, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $21, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload
; SKX-NEXT:    kandd %k7, %k0, %k0
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $20, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kandd %k6, %k0, %k0
; SKX-NEXT:    kshiftld $31, %k7, %k1
; SKX-NEXT:    kshiftrd $19, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kandd %k5, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $18, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kandd %k4, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $17, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kandd %k3, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $16, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kandd %k2, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftld $31, %k1, %k1
; SKX-NEXT:    kshiftrd $15, %k1, %k1
; SKX-NEXT:    kord %k1, %k0, %k0
; SKX-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; SKX-NEXT:    kandd %k1, %k0, %k0
; SKX-NEXT:    kshiftrd $16, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r8d
; SKX-NEXT:    kshiftrd $1, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r9d
; SKX-NEXT:    kshiftrd $2, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r10d
; SKX-NEXT:    kshiftrd $3, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r11d
; SKX-NEXT:    kshiftrd $4, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r12d
; SKX-NEXT:    kshiftrd $5, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r15d
; SKX-NEXT:    kshiftrd $6, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r14d
; SKX-NEXT:    kshiftrd $7, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r13d
; SKX-NEXT:    kshiftrd $8, %k0, %k1
; SKX-NEXT:    kmovd %k1, %ebx
; SKX-NEXT:    kshiftrd $9, %k0, %k1
; SKX-NEXT:    kmovd %k1, %esi
; SKX-NEXT:    kshiftrd $10, %k0, %k1
; SKX-NEXT:    kmovd %k1, %ebp
; SKX-NEXT:    kshiftrd $11, %k0, %k1
; SKX-NEXT:    kmovd %k1, %ecx
; SKX-NEXT:    kshiftrd $12, %k0, %k1
; SKX-NEXT:    kmovd %k1, %edx
; SKX-NEXT:    kshiftrd $13, %k0, %k1
; SKX-NEXT:    kmovd %k1, %edi
; SKX-NEXT:    kshiftrd $14, %k0, %k1
; SKX-NEXT:    andl $1, %r8d
; SKX-NEXT:    movb %r8b, 2(%rax)
; SKX-NEXT:    kmovd %k0, %r8d
; SKX-NEXT:    andl $1, %r8d
; SKX-NEXT:    andl $1, %r9d
; SKX-NEXT:    leal (%r8,%r9,2), %r8d
; SKX-NEXT:    kmovd %k1, %r9d
; SKX-NEXT:    kshiftrd $15, %k0, %k0
; SKX-NEXT:    andl $1, %r10d
; SKX-NEXT:    leal (%r8,%r10,4), %r8d
; SKX-NEXT:    kmovd %k0, %r10d
; SKX-NEXT:    andl $1, %r11d
; SKX-NEXT:    leal (%r8,%r11,8), %r8d
; SKX-NEXT:    andl $1, %r12d
; SKX-NEXT:    shll $4, %r12d
; SKX-NEXT:    orl %r8d, %r12d
; SKX-NEXT:    andl $1, %r15d
; SKX-NEXT:    shll $5, %r15d
; SKX-NEXT:    orl %r12d, %r15d
; SKX-NEXT:    andl $1, %r14d
; SKX-NEXT:    shll $6, %r14d
; SKX-NEXT:    andl $1, %r13d
; SKX-NEXT:    shll $7, %r13d
; SKX-NEXT:    orl %r14d, %r13d
; SKX-NEXT:    andl $1, %ebx
; SKX-NEXT:    shll $8, %ebx
; SKX-NEXT:    orl %r13d, %ebx
; SKX-NEXT:    andl $1, %esi
; SKX-NEXT:    shll $9, %esi
; SKX-NEXT:    orl %ebx, %esi
; SKX-NEXT:    andl $1, %ebp
; SKX-NEXT:    shll $10, %ebp
; SKX-NEXT:    orl %esi, %ebp
; SKX-NEXT:    orl %r15d, %ebp
; SKX-NEXT:    andl $1, %ecx
; SKX-NEXT:    shll $11, %ecx
; SKX-NEXT:    andl $1, %edx
; SKX-NEXT:    shll $12, %edx
; SKX-NEXT:    orl %ecx, %edx
; SKX-NEXT:    andl $1, %edi
; SKX-NEXT:    shll $13, %edi
; SKX-NEXT:    orl %edx, %edi
; SKX-NEXT:    andl $1, %r9d
; SKX-NEXT:    shll $14, %r9d
; SKX-NEXT:    orl %edi, %r9d
; SKX-NEXT:    andl $1, %r10d
; SKX-NEXT:    shll $15, %r10d
; SKX-NEXT:    orl %r9d, %r10d
; SKX-NEXT:    orl %ebp, %r10d
; SKX-NEXT:    movw %r10w, (%rax)
; SKX-NEXT:    popq %rbx
; SKX-NEXT:    popq %r12
; SKX-NEXT:    popq %r13
; SKX-NEXT:    popq %r14
; SKX-NEXT:    popq %r15
; SKX-NEXT:    popq %rbp
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: test16:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebp
; KNL_X32-NEXT:    pushl %ebx
; KNL_X32-NEXT:    pushl %edi
; KNL_X32-NEXT:    pushl %esi
; KNL_X32-NEXT:    subl $16, %esp
; KNL_X32-NEXT:    xorl %eax, %eax
; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl $65535, %edx ## imm = 0xFFFF
; KNL_X32-NEXT:    movl $0, %ecx
; KNL_X32-NEXT:    cmovnel %edx, %ecx
; KNL_X32-NEXT:    testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    cmovnel %edx, %eax
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    andl $1, %edx
; KNL_X32-NEXT:    kmovw %edx, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $14, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-5, %dx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $13, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-9, %dx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $12, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-17, %dx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $11, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-33, %dx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $10, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-65, %dx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $9, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-129, %dx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $8, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-257, %dx ## imm = 0xFEFF
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kmovw %k1, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $7, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-513, %dx ## imm = 0xFDFF
; KNL_X32-NEXT:    kmovw %edx, %k7
; KNL_X32-NEXT:    kandw %k7, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $6, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-1025, %dx ## imm = 0xFBFF
; KNL_X32-NEXT:    kmovw %edx, %k4
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $5, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-2049, %dx ## imm = 0xF7FF
; KNL_X32-NEXT:    kmovw %edx, %k3
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $4, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-4097, %dx ## imm = 0xEFFF
; KNL_X32-NEXT:    kmovw %edx, %k2
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $3, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-8193, %dx ## imm = 0xDFFF
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k5
; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
; KNL_X32-NEXT:    kshiftrw $2, %k5, %k5
; KNL_X32-NEXT:    korw %k5, %k0, %k5
; KNL_X32-NEXT:    movw $-16385, %dx ## imm = 0xBFFF
; KNL_X32-NEXT:    kmovw %edx, %k0
; KNL_X32-NEXT:    kandw %k0, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $14, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kshiftlw $1, %k5, %k5
; KNL_X32-NEXT:    kshiftrw $1, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kmovw %k5, (%esp) ## 2-byte Spill
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    andl $1, %edx
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ebx
; KNL_X32-NEXT:    kmovw %ebx, %k5
; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
; KNL_X32-NEXT:    kshiftrw $14, %k5, %k5
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    korw %k5, %k6, %k5
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k6, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k6, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k6, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k6, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k6, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k6, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $8, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k6 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k6, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $7, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kandw %k7, %k5, %k5
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $6, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k5, %k5
; KNL_X32-NEXT:    kandw %k4, %k5, %k4
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k5
; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
; KNL_X32-NEXT:    kshiftrw $5, %k5, %k5
; KNL_X32-NEXT:    korw %k5, %k4, %k4
; KNL_X32-NEXT:    kandw %k3, %k4, %k3
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k4
; KNL_X32-NEXT:    kshiftlw $15, %k4, %k4
; KNL_X32-NEXT:    kshiftrw $4, %k4, %k4
; KNL_X32-NEXT:    korw %k4, %k3, %k3
; KNL_X32-NEXT:    kandw %k2, %k3, %k2
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k3
; KNL_X32-NEXT:    kshiftlw $15, %k3, %k3
; KNL_X32-NEXT:    kshiftrw $3, %k3, %k3
; KNL_X32-NEXT:    korw %k3, %k2, %k2
; KNL_X32-NEXT:    kandw %k1, %k2, %k1
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k2
; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
; KNL_X32-NEXT:    kshiftrw $2, %k2, %k2
; KNL_X32-NEXT:    korw %k2, %k1, %k1
; KNL_X32-NEXT:    kandw %k0, %k1, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $14, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    kshiftlw $1, %k0, %k0
; KNL_X32-NEXT:    kshiftrw $1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %edx
; KNL_X32-NEXT:    kmovw %edx, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    kmovw %ecx, %k1
; KNL_X32-NEXT:    kmovw (%esp), %k2 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    kmovw %eax, %k2
; KNL_X32-NEXT:    kandw %k1, %k2, %k1
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %k1, %ebx
; KNL_X32-NEXT:    kshiftrw $1, %k0, %k1
; KNL_X32-NEXT:    kmovw %k1, %ebp
; KNL_X32-NEXT:    kshiftrw $2, %k0, %k1
; KNL_X32-NEXT:    kmovw %k1, %esi
; KNL_X32-NEXT:    kshiftrw $3, %k0, %k1
; KNL_X32-NEXT:    kmovw %k1, %edi
; KNL_X32-NEXT:    kshiftrw $4, %k0, %k1
; KNL_X32-NEXT:    kmovw %k1, %edx
; KNL_X32-NEXT:    kshiftrw $5, %k0, %k1
; KNL_X32-NEXT:    kmovw %k1, %ecx
; KNL_X32-NEXT:    kshiftrw $6, %k0, %k1
; KNL_X32-NEXT:    andl $1, %ebx
; KNL_X32-NEXT:    movb %bl, 2(%eax)
; KNL_X32-NEXT:    kmovw %k0, %ebx
; KNL_X32-NEXT:    andl $1, %ebx
; KNL_X32-NEXT:    andl $1, %ebp
; KNL_X32-NEXT:    leal (%ebx,%ebp,2), %ebx
; KNL_X32-NEXT:    kmovw %k1, %ebp
; KNL_X32-NEXT:    kshiftrw $7, %k0, %k1
; KNL_X32-NEXT:    andl $1, %esi
; KNL_X32-NEXT:    leal (%ebx,%esi,4), %ebx
; KNL_X32-NEXT:    kmovw %k1, %esi
; KNL_X32-NEXT:    kshiftrw $8, %k0, %k1
; KNL_X32-NEXT:    andl $1, %edi
; KNL_X32-NEXT:    leal (%ebx,%edi,8), %ebx
; KNL_X32-NEXT:    kmovw %k1, %edi
; KNL_X32-NEXT:    kshiftrw $9, %k0, %k1
; KNL_X32-NEXT:    andl $1, %edx
; KNL_X32-NEXT:    shll $4, %edx
; KNL_X32-NEXT:    orl %ebx, %edx
; KNL_X32-NEXT:    kmovw %k1, %ebx
; KNL_X32-NEXT:    kshiftrw $10, %k0, %k1
; KNL_X32-NEXT:    andl $1, %ecx
; KNL_X32-NEXT:    shll $5, %ecx
; KNL_X32-NEXT:    orl %edx, %ecx
; KNL_X32-NEXT:    kmovw %k1, %edx
; KNL_X32-NEXT:    kshiftrw $11, %k0, %k1
; KNL_X32-NEXT:    andl $1, %ebp
; KNL_X32-NEXT:    shll $6, %ebp
; KNL_X32-NEXT:    andl $1, %esi
; KNL_X32-NEXT:    shll $7, %esi
; KNL_X32-NEXT:    orl %ebp, %esi
; KNL_X32-NEXT:    kmovw %k1, %ebp
; KNL_X32-NEXT:    kshiftrw $12, %k0, %k1
; KNL_X32-NEXT:    andl $1, %edi
; KNL_X32-NEXT:    shll $8, %edi
; KNL_X32-NEXT:    orl %esi, %edi
; KNL_X32-NEXT:    kmovw %k1, %esi
; KNL_X32-NEXT:    kshiftrw $13, %k0, %k1
; KNL_X32-NEXT:    andl $1, %ebx
; KNL_X32-NEXT:    shll $9, %ebx
; KNL_X32-NEXT:    orl %edi, %ebx
; KNL_X32-NEXT:    kmovw %k1, %edi
; KNL_X32-NEXT:    kshiftrw $14, %k0, %k1
; KNL_X32-NEXT:    andl $1, %edx
; KNL_X32-NEXT:    shll $10, %edx
; KNL_X32-NEXT:    orl %ebx, %edx
; KNL_X32-NEXT:    kmovw %k1, %ebx
; KNL_X32-NEXT:    kshiftrw $15, %k0, %k0
; KNL_X32-NEXT:    orl %ecx, %edx
; KNL_X32-NEXT:    kmovw %k0, %ecx
; KNL_X32-NEXT:    andl $1, %ebp
; KNL_X32-NEXT:    shll $11, %ebp
; KNL_X32-NEXT:    andl $1, %esi
; KNL_X32-NEXT:    shll $12, %esi
; KNL_X32-NEXT:    orl %ebp, %esi
; KNL_X32-NEXT:    andl $1, %edi
; KNL_X32-NEXT:    shll $13, %edi
; KNL_X32-NEXT:    orl %esi, %edi
; KNL_X32-NEXT:    andl $1, %ebx
; KNL_X32-NEXT:    shll $14, %ebx
; KNL_X32-NEXT:    orl %edi, %ebx
; KNL_X32-NEXT:    andl $1, %ecx
; KNL_X32-NEXT:    shll $15, %ecx
; KNL_X32-NEXT:    orl %ebx, %ecx
; KNL_X32-NEXT:    orl %edx, %ecx
; KNL_X32-NEXT:    movw %cx, (%eax)
; KNL_X32-NEXT:    addl $16, %esp
; KNL_X32-NEXT:    popl %esi
; KNL_X32-NEXT:    popl %edi
; KNL_X32-NEXT:    popl %ebx
; KNL_X32-NEXT:    popl %ebp
; KNL_X32-NEXT:    retl $4
;
; FASTISEL-LABEL: test16:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rbp
; FASTISEL-NEXT:    pushq %r15
; FASTISEL-NEXT:    pushq %r14
; FASTISEL-NEXT:    pushq %r13
; FASTISEL-NEXT:    pushq %r12
; FASTISEL-NEXT:    pushq %rbx
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    movq %rdi, %rax
; FASTISEL-NEXT:    kshiftld $31, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftrd $30, %k0, %k0
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $31, %k1, %k1
; FASTISEL-NEXT:    kord %k0, %k1, %k0
; FASTISEL-NEXT:    movl $-5, %edi
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $29, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-9, %edi
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $28, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-17, %edi
; FASTISEL-NEXT:    kmovd %edi, %k2
; FASTISEL-NEXT:    kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kandd %k2, %k0, %k0
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $27, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-33, %edi
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $26, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-65, %edi
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $25, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-129, %edi
; FASTISEL-NEXT:    kmovd %edi, %k2
; FASTISEL-NEXT:    kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kandd %k2, %k0, %k0
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $24, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-257, %edi ## imm = 0xFEFF
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $23, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-513, %edi ## imm = 0xFDFF
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $22, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-1025, %edi ## imm = 0xFBFF
; FASTISEL-NEXT:    kmovd %edi, %k2
; FASTISEL-NEXT:    kmovd %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kandd %k2, %k0, %k0
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $21, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-2049, %edi ## imm = 0xF7FF
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovd %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $20, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-4097, %edi ## imm = 0xEFFF
; FASTISEL-NEXT:    kmovd %edi, %k6
; FASTISEL-NEXT:    kandd %k6, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $19, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-8193, %edi ## imm = 0xDFFF
; FASTISEL-NEXT:    kmovd %edi, %k5
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kandd %k5, %k0, %k0
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $18, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-16385, %edi ## imm = 0xBFFF
; FASTISEL-NEXT:    kmovd %edi, %k4
; FASTISEL-NEXT:    kandd %k4, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $17, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    movl $-32769, %edi ## imm = 0xFFFF7FFF
; FASTISEL-NEXT:    kmovd %edi, %k3
; FASTISEL-NEXT:    kandd %k3, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    kshiftld $31, %k7, %k7
; FASTISEL-NEXT:    kshiftrd $16, %k7, %k7
; FASTISEL-NEXT:    kord %k7, %k0, %k7
; FASTISEL-NEXT:    movl $-65537, %edi ## imm = 0xFFFEFFFF
; FASTISEL-NEXT:    kmovd %edi, %k2
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    kandd %k2, %k7, %k7
; FASTISEL-NEXT:    kshiftld $31, %k0, %k0
; FASTISEL-NEXT:    kshiftrd $15, %k0, %k0
; FASTISEL-NEXT:    kord %k0, %k7, %k0
; FASTISEL-NEXT:    kmovd %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; FASTISEL-NEXT:    kmovd %edx, %k0
; FASTISEL-NEXT:    kshiftld $31, %k0, %k0
; FASTISEL-NEXT:    kshiftrd $30, %k0, %k0
; FASTISEL-NEXT:    kmovd %esi, %k7
; FASTISEL-NEXT:    kshiftld $31, %k7, %k7
; FASTISEL-NEXT:    kshiftrd $31, %k7, %k7
; FASTISEL-NEXT:    kord %k0, %k7, %k0
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovd %ecx, %k7
; FASTISEL-NEXT:    kshiftld $31, %k7, %k7
; FASTISEL-NEXT:    kshiftrd $29, %k7, %k7
; FASTISEL-NEXT:    kord %k7, %k0, %k0
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovd %r8d, %k7
; FASTISEL-NEXT:    kshiftld $31, %k7, %k7
; FASTISEL-NEXT:    kshiftrd $28, %k7, %k7
; FASTISEL-NEXT:    kord %k7, %k0, %k0
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovd %r9d, %k7
; FASTISEL-NEXT:    kshiftld $31, %k7, %k7
; FASTISEL-NEXT:    kshiftrd $27, %k7, %k7
; FASTISEL-NEXT:    kord %k7, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k1
; FASTISEL-NEXT:    kshiftld $31, %k7, %k7
; FASTISEL-NEXT:    kshiftrd $26, %k7, %k7
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    kord %k7, %k1, %k1
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k7, %k1, %k1
; FASTISEL-NEXT:    kshiftld $31, %k0, %k0
; FASTISEL-NEXT:    kshiftrd $25, %k0, %k0
; FASTISEL-NEXT:    kord %k0, %k1, %k0
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $24, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $23, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $22, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $21, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k7, %k0, %k0
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $20, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kandd %k6, %k0, %k0
; FASTISEL-NEXT:    kshiftld $31, %k7, %k1
; FASTISEL-NEXT:    kshiftrd $19, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kandd %k5, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $18, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kandd %k4, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $17, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kandd %k3, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $16, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kandd %k2, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftld $31, %k1, %k1
; FASTISEL-NEXT:    kshiftrd $15, %k1, %k1
; FASTISEL-NEXT:    kord %k1, %k0, %k0
; FASTISEL-NEXT:    kmovd {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 4-byte Reload
; FASTISEL-NEXT:    kandd %k1, %k0, %k0
; FASTISEL-NEXT:    kshiftrd $16, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r8d
; FASTISEL-NEXT:    kshiftrd $1, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r9d
; FASTISEL-NEXT:    kshiftrd $2, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r10d
; FASTISEL-NEXT:    kshiftrd $3, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r11d
; FASTISEL-NEXT:    kshiftrd $4, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r12d
; FASTISEL-NEXT:    kshiftrd $5, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r15d
; FASTISEL-NEXT:    kshiftrd $6, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r14d
; FASTISEL-NEXT:    kshiftrd $7, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r13d
; FASTISEL-NEXT:    kshiftrd $8, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %ebx
; FASTISEL-NEXT:    kshiftrd $9, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %esi
; FASTISEL-NEXT:    kshiftrd $10, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %ebp
; FASTISEL-NEXT:    kshiftrd $11, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %ecx
; FASTISEL-NEXT:    kshiftrd $12, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %edx
; FASTISEL-NEXT:    kshiftrd $13, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %edi
; FASTISEL-NEXT:    kshiftrd $14, %k0, %k1
; FASTISEL-NEXT:    andl $1, %r8d
; FASTISEL-NEXT:    movb %r8b, 2(%rax)
; FASTISEL-NEXT:    kmovd %k0, %r8d
; FASTISEL-NEXT:    andl $1, %r8d
; FASTISEL-NEXT:    andl $1, %r9d
; FASTISEL-NEXT:    leal (%r8,%r9,2), %r8d
; FASTISEL-NEXT:    kmovd %k1, %r9d
; FASTISEL-NEXT:    kshiftrd $15, %k0, %k0
; FASTISEL-NEXT:    andl $1, %r10d
; FASTISEL-NEXT:    leal (%r8,%r10,4), %r8d
; FASTISEL-NEXT:    kmovd %k0, %r10d
; FASTISEL-NEXT:    andl $1, %r11d
; FASTISEL-NEXT:    leal (%r8,%r11,8), %r8d
; FASTISEL-NEXT:    andl $1, %r12d
; FASTISEL-NEXT:    shll $4, %r12d
; FASTISEL-NEXT:    orl %r8d, %r12d
; FASTISEL-NEXT:    andl $1, %r15d
; FASTISEL-NEXT:    shll $5, %r15d
; FASTISEL-NEXT:    orl %r12d, %r15d
; FASTISEL-NEXT:    andl $1, %r14d
; FASTISEL-NEXT:    shll $6, %r14d
; FASTISEL-NEXT:    andl $1, %r13d
; FASTISEL-NEXT:    shll $7, %r13d
; FASTISEL-NEXT:    orl %r14d, %r13d
; FASTISEL-NEXT:    andl $1, %ebx
; FASTISEL-NEXT:    shll $8, %ebx
; FASTISEL-NEXT:    orl %r13d, %ebx
; FASTISEL-NEXT:    andl $1, %esi
; FASTISEL-NEXT:    shll $9, %esi
; FASTISEL-NEXT:    orl %ebx, %esi
; FASTISEL-NEXT:    andl $1, %ebp
; FASTISEL-NEXT:    shll $10, %ebp
; FASTISEL-NEXT:    orl %esi, %ebp
; FASTISEL-NEXT:    orl %r15d, %ebp
; FASTISEL-NEXT:    andl $1, %ecx
; FASTISEL-NEXT:    shll $11, %ecx
; FASTISEL-NEXT:    andl $1, %edx
; FASTISEL-NEXT:    shll $12, %edx
; FASTISEL-NEXT:    orl %ecx, %edx
; FASTISEL-NEXT:    andl $1, %edi
; FASTISEL-NEXT:    shll $13, %edi
; FASTISEL-NEXT:    orl %edx, %edi
; FASTISEL-NEXT:    andl $1, %r9d
; FASTISEL-NEXT:    shll $14, %r9d
; FASTISEL-NEXT:    orl %edi, %r9d
; FASTISEL-NEXT:    andl $1, %r10d
; FASTISEL-NEXT:    shll $15, %r10d
; FASTISEL-NEXT:    orl %r9d, %r10d
; FASTISEL-NEXT:    orl %ebp, %r10d
; FASTISEL-NEXT:    movw %r10w, (%rax)
; FASTISEL-NEXT:    popq %rbx
; FASTISEL-NEXT:    popq %r12
; FASTISEL-NEXT:    popq %r13
; FASTISEL-NEXT:    popq %r14
; FASTISEL-NEXT:    popq %r15
; FASTISEL-NEXT:    popq %rbp
; FASTISEL-NEXT:    retq
  %c = and <17 x i1> %a, %b
  ret <17 x i1> %c
}

define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x i1>%e, <7 x i1>%f, <7 x i1> %g, <7 x i1> %h, <7 x i1> %i) nounwind {
; KNL-LABEL: test17:
; KNL:       ## %bb.0:
; KNL-NEXT:    movq %rdi, %rax
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    andl $1, %edi
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kshiftlw $15, %k1, %k1
; KNL-NEXT:    kshiftrw $14, %k1, %k1
; KNL-NEXT:    korw %k1, %k0, %k0
; KNL-NEXT:    movw $-5, %di
; KNL-NEXT:    kmovw %edi, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k2
; KNL-NEXT:    kshiftlw $15, %k2, %k2
; KNL-NEXT:    kshiftrw $13, %k2, %k2
; KNL-NEXT:    korw %k2, %k0, %k0
; KNL-NEXT:    movw $-9, %di
; KNL-NEXT:    kmovw %edi, %k2
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k3
; KNL-NEXT:    kshiftlw $15, %k3, %k3
; KNL-NEXT:    kshiftrw $12, %k3, %k3
; KNL-NEXT:    korw %k3, %k0, %k0
; KNL-NEXT:    movw $-17, %di
; KNL-NEXT:    kmovw %edi, %k3
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k4
; KNL-NEXT:    kshiftlw $15, %k4, %k4
; KNL-NEXT:    kshiftrw $11, %k4, %k4
; KNL-NEXT:    korw %k4, %k0, %k0
; KNL-NEXT:    movw $-33, %di
; KNL-NEXT:    kmovw %edi, %k4
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k5
; KNL-NEXT:    kshiftlw $15, %k5, %k5
; KNL-NEXT:    kshiftrw $10, %k5, %k5
; KNL-NEXT:    korw %k5, %k0, %k0
; KNL-NEXT:    movw $-65, %di
; KNL-NEXT:    kmovw %edi, %k5
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $9, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    andl $1, %edi
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $14, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $13, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $12, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $11, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $10, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $9, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT:    andl $1, %r10d
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    kshiftlw $15, %k0, %k0
; KNL-NEXT:    kshiftrw $14, %k0, %k0
; KNL-NEXT:    kmovw %r10d, %k6
; KNL-NEXT:    korw %k0, %k6, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $13, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $12, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $11, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $10, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $9, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT:    andl $1, %r10d
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    kshiftlw $15, %k0, %k0
; KNL-NEXT:    kshiftrw $14, %k0, %k0
; KNL-NEXT:    kmovw %r10d, %k6
; KNL-NEXT:    korw %k0, %k6, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $13, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $12, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $11, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $10, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $9, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT:    andl $1, %r10d
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    kshiftlw $15, %k0, %k0
; KNL-NEXT:    kshiftrw $14, %k0, %k0
; KNL-NEXT:    kmovw %r10d, %k6
; KNL-NEXT:    korw %k0, %k6, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $13, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $12, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $11, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $10, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $9, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT:    andl $1, %r10d
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    kshiftlw $15, %k0, %k0
; KNL-NEXT:    kshiftrw $14, %k0, %k0
; KNL-NEXT:    kmovw %r10d, %k6
; KNL-NEXT:    korw %k0, %k6, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $13, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $12, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $11, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $10, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $9, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %r10d
; KNL-NEXT:    andl $1, %r10d
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k0
; KNL-NEXT:    kshiftlw $15, %k0, %k0
; KNL-NEXT:    kshiftrw $14, %k0, %k0
; KNL-NEXT:    kmovw %r10d, %k6
; KNL-NEXT:    korw %k0, %k6, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $13, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $12, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $11, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $10, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
; KNL-NEXT:    kmovw %edi, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $9, %k6, %k6
; KNL-NEXT:    korw %k6, %k0, %k0
; KNL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT:    andl $1, %esi
; KNL-NEXT:    kmovw %edx, %k0
; KNL-NEXT:    kshiftlw $15, %k0, %k0
; KNL-NEXT:    kshiftrw $14, %k0, %k0
; KNL-NEXT:    kmovw %esi, %k7
; KNL-NEXT:    korw %k0, %k7, %k0
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw %ecx, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $13, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k2, %k0, %k0
; KNL-NEXT:    kmovw %r8d, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $12, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k3, %k0, %k0
; KNL-NEXT:    kmovw %r9d, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $11, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k4, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $10, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    kandw %k5, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $9, %k7, %k7
; KNL-NEXT:    korw %k7, %k0, %k0
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    andl $1, %ecx
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edx
; KNL-NEXT:    kmovw %edx, %k7
; KNL-NEXT:    kshiftlw $15, %k7, %k7
; KNL-NEXT:    kshiftrw $14, %k7, %k7
; KNL-NEXT:    kmovw %ecx, %k6
; KNL-NEXT:    korw %k7, %k6, %k6
; KNL-NEXT:    kandw %k1, %k6, %k1
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k6
; KNL-NEXT:    kshiftlw $15, %k6, %k6
; KNL-NEXT:    kshiftrw $13, %k6, %k6
; KNL-NEXT:    korw %k6, %k1, %k1
; KNL-NEXT:    kandw %k2, %k1, %k1
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k2
; KNL-NEXT:    kshiftlw $15, %k2, %k2
; KNL-NEXT:    kshiftrw $12, %k2, %k2
; KNL-NEXT:    korw %k2, %k1, %k1
; KNL-NEXT:    kandw %k3, %k1, %k1
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k2
; KNL-NEXT:    kshiftlw $15, %k2, %k2
; KNL-NEXT:    kshiftrw $11, %k2, %k2
; KNL-NEXT:    korw %k2, %k1, %k1
; KNL-NEXT:    kandw %k4, %k1, %k1
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k2
; KNL-NEXT:    kshiftlw $15, %k2, %k2
; KNL-NEXT:    kshiftrw $10, %k2, %k2
; KNL-NEXT:    korw %k2, %k1, %k1
; KNL-NEXT:    kandw %k5, %k1, %k1
; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %ecx
; KNL-NEXT:    kmovw %ecx, %k2
; KNL-NEXT:    kshiftlw $15, %k2, %k2
; KNL-NEXT:    kshiftrw $9, %k2, %k2
; KNL-NEXT:    korw %k2, %k1, %k1
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT:    kandw %k1, %k0, %k0
; KNL-NEXT:    kshiftrw $6, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r8d
; KNL-NEXT:    kshiftrw $5, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r9d
; KNL-NEXT:    kshiftrw $4, %k0, %k1
; KNL-NEXT:    kmovw %k1, %r10d
; KNL-NEXT:    kshiftrw $3, %k0, %k1
; KNL-NEXT:    kmovw %k1, %edi
; KNL-NEXT:    kshiftrw $2, %k0, %k1
; KNL-NEXT:    kmovw %k1, %ecx
; KNL-NEXT:    kshiftrw $1, %k0, %k1
; KNL-NEXT:    kmovw %k1, %edx
; KNL-NEXT:    kmovw %k0, %esi
; KNL-NEXT:    andb $1, %sil
; KNL-NEXT:    andb $1, %dl
; KNL-NEXT:    addb %dl, %dl
; KNL-NEXT:    orb %sil, %dl
; KNL-NEXT:    andb $1, %cl
; KNL-NEXT:    shlb $2, %cl
; KNL-NEXT:    orb %dl, %cl
; KNL-NEXT:    andb $1, %dil
; KNL-NEXT:    shlb $3, %dil
; KNL-NEXT:    orb %cl, %dil
; KNL-NEXT:    andb $1, %r10b
; KNL-NEXT:    shlb $4, %r10b
; KNL-NEXT:    orb %dil, %r10b
; KNL-NEXT:    andb $1, %r9b
; KNL-NEXT:    shlb $5, %r9b
; KNL-NEXT:    orb %r10b, %r9b
; KNL-NEXT:    shlb $6, %r8b
; KNL-NEXT:    orb %r9b, %r8b
; KNL-NEXT:    andb $127, %r8b
; KNL-NEXT:    movb %r8b, (%rax)
; KNL-NEXT:    retq
;
; SKX-LABEL: test17:
; SKX:       ## %bb.0:
; SKX-NEXT:    movq %rdi, %rax
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $6, %k0, %k0
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $7, %k1, %k1
; SKX-NEXT:    korb %k0, %k1, %k0
; SKX-NEXT:    movb $-5, %dil
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kandb %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $5, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    movb $-9, %dil
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kandb %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT:    kshiftlb $7, %k2, %k2
; SKX-NEXT:    kshiftrb $4, %k2, %k2
; SKX-NEXT:    korb %k2, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT:    movb $-17, %dil
; SKX-NEXT:    kmovd %edi, %k3
; SKX-NEXT:    kandb %k3, %k0, %k0
; SKX-NEXT:    kshiftlb $7, %k2, %k2
; SKX-NEXT:    kshiftrb $3, %k2, %k2
; SKX-NEXT:    korb %k2, %k0, %k0
; SKX-NEXT:    movb $-33, %dil
; SKX-NEXT:    kmovd %edi, %k1
; SKX-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kandb %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT:    kshiftlb $7, %k4, %k4
; SKX-NEXT:    kshiftrb $2, %k4, %k4
; SKX-NEXT:    korb %k4, %k0, %k0
; SKX-NEXT:    movb $-65, %dil
; SKX-NEXT:    kmovd %edi, %k6
; SKX-NEXT:    kandb %k6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT:    kshiftlb $7, %k5, %k5
; SKX-NEXT:    kshiftrb $1, %k5, %k5
; SKX-NEXT:    korb %k5, %k0, %k0
; SKX-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    kshiftlb $7, %k7, %k7
; SKX-NEXT:    kshiftrb $7, %k7, %k7
; SKX-NEXT:    korb %k0, %k7, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; SKX-NEXT:    kandb %k2, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    kshiftlb $7, %k7, %k7
; SKX-NEXT:    kshiftrb $5, %k7, %k7
; SKX-NEXT:    korb %k7, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; SKX-NEXT:    kandb %k5, %k0, %k1
; SKX-NEXT:    kshiftlb $7, %k7, %k7
; SKX-NEXT:    kshiftrb $4, %k7, %k7
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    korb %k7, %k1, %k1
; SKX-NEXT:    kandb %k3, %k1, %k1
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $3, %k0, %k0
; SKX-NEXT:    korb %k0, %k1, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; SKX-NEXT:    kandb %k4, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $2, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $1, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; SKX-NEXT:    kandb %k1, %k0, %k0
; SKX-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $7, %k1, %k1
; SKX-NEXT:    korb %k0, %k1, %k0
; SKX-NEXT:    kandb %k2, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $5, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovq %k5, %k7
; SKX-NEXT:    kandb %k5, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $4, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kandb %k3, %k0, %k0
; SKX-NEXT:    kmovw %k3, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $3, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k4, %k0, %k0
; SKX-NEXT:    kshiftlb $7, %k5, %k1
; SKX-NEXT:    kshiftrb $2, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovw %k6, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kandb %k6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $1, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $6, %k1, %k1
; SKX-NEXT:    kshiftlb $7, %k5, %k5
; SKX-NEXT:    kshiftrb $7, %k5, %k5
; SKX-NEXT:    korb %k1, %k5, %k1
; SKX-NEXT:    kandb %k2, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT:    kshiftlb $7, %k5, %k5
; SKX-NEXT:    kshiftrb $5, %k5, %k5
; SKX-NEXT:    korb %k5, %k1, %k1
; SKX-NEXT:    kandb %k7, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT:    kshiftlb $7, %k5, %k5
; SKX-NEXT:    kshiftrb $4, %k5, %k5
; SKX-NEXT:    korb %k5, %k1, %k1
; SKX-NEXT:    kandb %k3, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT:    kshiftlb $7, %k5, %k5
; SKX-NEXT:    kshiftrb $3, %k5, %k5
; SKX-NEXT:    korb %k5, %k1, %k1
; SKX-NEXT:    kandb %k4, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT:    kshiftlb $7, %k5, %k5
; SKX-NEXT:    kshiftrb $2, %k5, %k5
; SKX-NEXT:    korb %k5, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT:    kandb %k6, %k1, %k1
; SKX-NEXT:    kshiftlb $7, %k5, %k5
; SKX-NEXT:    kshiftrb $1, %k5, %k5
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    korb %k5, %k1, %k5
; SKX-NEXT:    kshiftlb $7, %k7, %k1
; SKX-NEXT:    kshiftrb $6, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    kshiftlb $7, %k7, %k7
; SKX-NEXT:    kshiftrb $7, %k7, %k7
; SKX-NEXT:    korb %k1, %k7, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    kandb %k2, %k1, %k1
; SKX-NEXT:    kshiftlb $7, %k7, %k7
; SKX-NEXT:    kshiftrb $5, %k7, %k7
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    korb %k7, %k1, %k1
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; SKX-NEXT:    kandb %k3, %k1, %k1
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $4, %k0, %k0
; SKX-NEXT:    korb %k0, %k1, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; SKX-NEXT:    kandb %k6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $3, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k4, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $2, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; SKX-NEXT:    kandb %k2, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $1, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k5, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; SKX-NEXT:    kandb %k5, %k0, %k0
; SKX-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kshiftlb $7, %k1, %k0
; SKX-NEXT:    kshiftrb $6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $7, %k1, %k1
; SKX-NEXT:    korb %k0, %k1, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; SKX-NEXT:    kandb %k5, %k0, %k0
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $5, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k3, %k0, %k0
; SKX-NEXT:    kshiftlb $7, %k7, %k1
; SKX-NEXT:    kshiftrb $4, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $3, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k4, %k0, %k0
; SKX-NEXT:    kmovq %k4, %k7
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $2, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k2, %k0, %k0
; SKX-NEXT:    kmovq %k2, %k3
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $1, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $7, %k1, %k1
; SKX-NEXT:    korb %k0, %k1, %k0
; SKX-NEXT:    kmovq %k5, %k4
; SKX-NEXT:    kandb %k5, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $5, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k5 ## 2-byte Reload
; SKX-NEXT:    kandb %k5, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $4, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kandb %k6, %k0, %k2
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $3, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    korb %k1, %k2, %k1
; SKX-NEXT:    kmovq %k7, %k2
; SKX-NEXT:    kandb %k7, %k1, %k1
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $2, %k0, %k0
; SKX-NEXT:    korb %k0, %k1, %k0
; SKX-NEXT:    kandb %k3, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $1, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; SKX-NEXT:    kandb %k1, %k0, %k0
; SKX-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT:    kshiftlb $7, %k0, %k0
; SKX-NEXT:    kshiftrb $6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $7, %k1, %k1
; SKX-NEXT:    korb %k0, %k1, %k0
; SKX-NEXT:    kandb %k4, %k0, %k0
; SKX-NEXT:    kmovq %k4, %k7
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $5, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovq %k5, %k3
; SKX-NEXT:    kandb %k5, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $4, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kandb %k6, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $3, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; SKX-NEXT:    kandb %k2, %k0, %k0
; SKX-NEXT:    kmovq %k2, %k5
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $2, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; SKX-NEXT:    kandb %k4, %k0, %k0
; SKX-NEXT:    kshiftlb $7, %k2, %k1
; SKX-NEXT:    kshiftrb $1, %k1, %k1
; SKX-NEXT:    korb %k1, %k0, %k0
; SKX-NEXT:    kmovd %edx, %k1
; SKX-NEXT:    kshiftlb $7, %k1, %k1
; SKX-NEXT:    kshiftrb $6, %k1, %k1
; SKX-NEXT:    kmovd %esi, %k2
; SKX-NEXT:    kshiftlb $7, %k2, %k2
; SKX-NEXT:    kshiftrb $7, %k2, %k2
; SKX-NEXT:    korb %k1, %k2, %k1
; SKX-NEXT:    kandb %k7, %k1, %k1
; SKX-NEXT:    kmovd %ecx, %k2
; SKX-NEXT:    kshiftlb $7, %k2, %k2
; SKX-NEXT:    kshiftrb $5, %k2, %k2
; SKX-NEXT:    korb %k2, %k1, %k1
; SKX-NEXT:    kandb %k3, %k1, %k1
; SKX-NEXT:    kmovd %r8d, %k2
; SKX-NEXT:    kshiftlb $7, %k2, %k2
; SKX-NEXT:    kshiftrb $4, %k2, %k2
; SKX-NEXT:    korb %k2, %k1, %k1
; SKX-NEXT:    kandb %k6, %k1, %k1
; SKX-NEXT:    kmovd %r9d, %k2
; SKX-NEXT:    kshiftlb $7, %k2, %k2
; SKX-NEXT:    kshiftrb $3, %k2, %k2
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT:    korb %k2, %k1, %k1
; SKX-NEXT:    kandb %k5, %k1, %k1
; SKX-NEXT:    kshiftlb $7, %k3, %k2
; SKX-NEXT:    kshiftrb $2, %k2, %k2
; SKX-NEXT:    korb %k2, %k1, %k1
; SKX-NEXT:    kandb %k4, %k1, %k1
; SKX-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT:    kshiftlb $7, %k2, %k2
; SKX-NEXT:    kshiftrb $1, %k2, %k2
; SKX-NEXT:    korb %k2, %k1, %k1
; SKX-NEXT:    kandb %k0, %k1, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; SKX-NEXT:    kandb %k1, %k0, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; SKX-NEXT:    kandb %k1, %k0, %k0
; SKX-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; SKX-NEXT:    kandb %k1, %k0, %k0
; SKX-NEXT:    kshiftrb $6, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r8d
; SKX-NEXT:    kshiftrb $5, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r9d
; SKX-NEXT:    kshiftrb $4, %k0, %k1
; SKX-NEXT:    kmovd %k1, %r10d
; SKX-NEXT:    kshiftrb $3, %k0, %k1
; SKX-NEXT:    kmovd %k1, %edi
; SKX-NEXT:    kshiftrb $2, %k0, %k1
; SKX-NEXT:    kmovd %k1, %ecx
; SKX-NEXT:    kshiftrb $1, %k0, %k1
; SKX-NEXT:    kmovd %k1, %edx
; SKX-NEXT:    kmovd %k0, %esi
; SKX-NEXT:    andb $1, %sil
; SKX-NEXT:    andb $1, %dl
; SKX-NEXT:    addb %dl, %dl
; SKX-NEXT:    orb %sil, %dl
; SKX-NEXT:    andb $1, %cl
; SKX-NEXT:    shlb $2, %cl
; SKX-NEXT:    orb %dl, %cl
; SKX-NEXT:    andb $1, %dil
; SKX-NEXT:    shlb $3, %dil
; SKX-NEXT:    orb %cl, %dil
; SKX-NEXT:    andb $1, %r10b
; SKX-NEXT:    shlb $4, %r10b
; SKX-NEXT:    orb %dil, %r10b
; SKX-NEXT:    andb $1, %r9b
; SKX-NEXT:    shlb $5, %r9b
; SKX-NEXT:    orb %r10b, %r9b
; SKX-NEXT:    shlb $6, %r8b
; SKX-NEXT:    orb %r9b, %r8b
; SKX-NEXT:    andb $127, %r8b
; SKX-NEXT:    movb %r8b, (%rax)
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: test17:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebx
; KNL_X32-NEXT:    subl $16, %esp
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    kmovw %eax, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k1
; KNL_X32-NEXT:    kshiftlw $15, %k1, %k1
; KNL_X32-NEXT:    kshiftrw $14, %k1, %k1
; KNL_X32-NEXT:    korw %k1, %k0, %k0
; KNL_X32-NEXT:    movw $-5, %ax
; KNL_X32-NEXT:    kmovw %eax, %k1
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k2
; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
; KNL_X32-NEXT:    kshiftrw $13, %k2, %k2
; KNL_X32-NEXT:    korw %k2, %k0, %k0
; KNL_X32-NEXT:    movw $-9, %ax
; KNL_X32-NEXT:    kmovw %eax, %k2
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k3
; KNL_X32-NEXT:    kshiftlw $15, %k3, %k3
; KNL_X32-NEXT:    kshiftrw $12, %k3, %k3
; KNL_X32-NEXT:    korw %k3, %k0, %k0
; KNL_X32-NEXT:    movw $-17, %ax
; KNL_X32-NEXT:    kmovw %eax, %k3
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k4
; KNL_X32-NEXT:    kshiftlw $15, %k4, %k4
; KNL_X32-NEXT:    kshiftrw $11, %k4, %k4
; KNL_X32-NEXT:    korw %k4, %k0, %k0
; KNL_X32-NEXT:    movw $-33, %ax
; KNL_X32-NEXT:    kmovw %eax, %k4
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k5
; KNL_X32-NEXT:    kshiftlw $15, %k5, %k5
; KNL_X32-NEXT:    kshiftrw $10, %k5, %k5
; KNL_X32-NEXT:    korw %k5, %k0, %k0
; KNL_X32-NEXT:    movw $-65, %ax
; KNL_X32-NEXT:    kmovw %eax, %k5
; KNL_X32-NEXT:    kandw %k5, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    kmovw %eax, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $14, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k5, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT:    kmovw %ecx, %k0
; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    korw %k0, %k6, %k0
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k5, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT:    kmovw %ecx, %k0
; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    korw %k0, %k6, %k0
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k5, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT:    kmovw %ecx, %k0
; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    korw %k0, %k6, %k0
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k5, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT:    kmovw %ecx, %k0
; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    korw %k0, %k6, %k0
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k5, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT:    kmovw %ecx, %k0
; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    korw %k0, %k6, %k0
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $12, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $11, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $10, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kandw %k5, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $9, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k0, %k0
; KNL_X32-NEXT:    kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT:    kmovw %ecx, %k0
; KNL_X32-NEXT:    kshiftlw $15, %k0, %k0
; KNL_X32-NEXT:    kshiftrw $14, %k0, %k0
; KNL_X32-NEXT:    kmovw %eax, %k7
; KNL_X32-NEXT:    korw %k0, %k7, %k0
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k7
; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
; KNL_X32-NEXT:    kshiftrw $13, %k7, %k7
; KNL_X32-NEXT:    korw %k7, %k0, %k0
; KNL_X32-NEXT:    kandw %k2, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k7
; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
; KNL_X32-NEXT:    kshiftrw $12, %k7, %k7
; KNL_X32-NEXT:    korw %k7, %k0, %k0
; KNL_X32-NEXT:    kandw %k3, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k7
; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
; KNL_X32-NEXT:    kshiftrw $11, %k7, %k7
; KNL_X32-NEXT:    korw %k7, %k0, %k0
; KNL_X32-NEXT:    kandw %k4, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k7
; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
; KNL_X32-NEXT:    kshiftrw $10, %k7, %k7
; KNL_X32-NEXT:    korw %k7, %k0, %k0
; KNL_X32-NEXT:    kandw %k5, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k7
; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
; KNL_X32-NEXT:    kshiftrw $9, %k7, %k7
; KNL_X32-NEXT:    korw %k7, %k0, %k0
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andl $1, %eax
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT:    kmovw %ecx, %k7
; KNL_X32-NEXT:    kshiftlw $15, %k7, %k7
; KNL_X32-NEXT:    kshiftrw $14, %k7, %k7
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    korw %k7, %k6, %k6
; KNL_X32-NEXT:    kandw %k1, %k6, %k1
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k6
; KNL_X32-NEXT:    kshiftlw $15, %k6, %k6
; KNL_X32-NEXT:    kshiftrw $13, %k6, %k6
; KNL_X32-NEXT:    korw %k6, %k1, %k1
; KNL_X32-NEXT:    kandw %k2, %k1, %k1
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k2
; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
; KNL_X32-NEXT:    kshiftrw $12, %k2, %k2
; KNL_X32-NEXT:    korw %k2, %k1, %k1
; KNL_X32-NEXT:    kandw %k3, %k1, %k1
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k2
; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
; KNL_X32-NEXT:    kshiftrw $11, %k2, %k2
; KNL_X32-NEXT:    korw %k2, %k1, %k1
; KNL_X32-NEXT:    kandw %k4, %k1, %k1
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k2
; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
; KNL_X32-NEXT:    kshiftrw $10, %k2, %k2
; KNL_X32-NEXT:    korw %k2, %k1, %k1
; KNL_X32-NEXT:    kandw %k5, %k1, %k1
; KNL_X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    kmovw %eax, %k2
; KNL_X32-NEXT:    kshiftlw $15, %k2, %k2
; KNL_X32-NEXT:    kshiftrw $9, %k2, %k2
; KNL_X32-NEXT:    korw %k2, %k1, %k1
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; KNL_X32-NEXT:    kandw %k1, %k0, %k0
; KNL_X32-NEXT:    kshiftrw $6, %k0, %k1
; KNL_X32-NEXT:    kmovw %k1, %ecx
; KNL_X32-NEXT:    kshiftrw $5, %k0, %k1
; KNL_X32-NEXT:    kmovw %k1, %eax
; KNL_X32-NEXT:    kshiftrw $1, %k0, %k1
; KNL_X32-NEXT:    kmovw %k1, %edx
; KNL_X32-NEXT:    kshiftrw $2, %k0, %k1
; KNL_X32-NEXT:    kmovw %k0, %ebx
; KNL_X32-NEXT:    andb $1, %bl
; KNL_X32-NEXT:    andb $1, %dl
; KNL_X32-NEXT:    addb %dl, %dl
; KNL_X32-NEXT:    orb %bl, %dl
; KNL_X32-NEXT:    kmovw %k1, %ebx
; KNL_X32-NEXT:    kshiftrw $3, %k0, %k1
; KNL_X32-NEXT:    andb $1, %bl
; KNL_X32-NEXT:    shlb $2, %bl
; KNL_X32-NEXT:    orb %dl, %bl
; KNL_X32-NEXT:    kmovw %k1, %edx
; KNL_X32-NEXT:    kshiftrw $4, %k0, %k0
; KNL_X32-NEXT:    andb $1, %dl
; KNL_X32-NEXT:    shlb $3, %dl
; KNL_X32-NEXT:    orb %bl, %dl
; KNL_X32-NEXT:    kmovw %k0, %ebx
; KNL_X32-NEXT:    andb $1, %bl
; KNL_X32-NEXT:    shlb $4, %bl
; KNL_X32-NEXT:    orb %dl, %bl
; KNL_X32-NEXT:    andb $1, %al
; KNL_X32-NEXT:    shlb $5, %al
; KNL_X32-NEXT:    orb %bl, %al
; KNL_X32-NEXT:    shlb $6, %cl
; KNL_X32-NEXT:    orb %al, %cl
; KNL_X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT:    andb $127, %cl
; KNL_X32-NEXT:    movb %cl, (%eax)
; KNL_X32-NEXT:    addl $16, %esp
; KNL_X32-NEXT:    popl %ebx
; KNL_X32-NEXT:    retl $4
;
; FASTISEL-LABEL: test17:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    movq %rdi, %rax
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k0, %k0
; FASTISEL-NEXT:    kshiftrb $6, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $7, %k1, %k1
; FASTISEL-NEXT:    korb %k0, %k1, %k0
; FASTISEL-NEXT:    movb $-5, %dil
; FASTISEL-NEXT:    kmovd %edi, %k3
; FASTISEL-NEXT:    kandb %k3, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $5, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    movb $-9, %dil
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kandb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $4, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    movb $-17, %dil
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kandb %k1, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $3, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k0, %k0
; FASTISEL-NEXT:    movb $-33, %dil
; FASTISEL-NEXT:    kmovd %edi, %k5
; FASTISEL-NEXT:    kandb %k5, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k4
; FASTISEL-NEXT:    kshiftlb $7, %k4, %k4
; FASTISEL-NEXT:    kshiftrb $2, %k4, %k4
; FASTISEL-NEXT:    korb %k4, %k0, %k0
; FASTISEL-NEXT:    movb $-65, %dil
; FASTISEL-NEXT:    kmovd %edi, %k1
; FASTISEL-NEXT:    kandb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovq %k1, %k4
; FASTISEL-NEXT:    kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k6
; FASTISEL-NEXT:    kshiftlb $7, %k6, %k6
; FASTISEL-NEXT:    kshiftrb $1, %k6, %k6
; FASTISEL-NEXT:    korb %k6, %k0, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k6
; FASTISEL-NEXT:    kshiftlb $7, %k6, %k6
; FASTISEL-NEXT:    kshiftrb $6, %k6, %k6
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    kshiftlb $7, %k7, %k7
; FASTISEL-NEXT:    kshiftrb $7, %k7, %k7
; FASTISEL-NEXT:    korb %k6, %k7, %k6
; FASTISEL-NEXT:    kandb %k3, %k6, %k6
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    kshiftlb $7, %k7, %k7
; FASTISEL-NEXT:    kshiftrb $5, %k7, %k7
; FASTISEL-NEXT:    korb %k7, %k6, %k6
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k2, %k6, %k6
; FASTISEL-NEXT:    kshiftlb $7, %k7, %k7
; FASTISEL-NEXT:    kshiftrb $4, %k7, %k7
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    korb %k7, %k6, %k6
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k7, %k6, %k6
; FASTISEL-NEXT:    kshiftlb $7, %k0, %k0
; FASTISEL-NEXT:    kshiftrb $3, %k0, %k0
; FASTISEL-NEXT:    korb %k0, %k6, %k0
; FASTISEL-NEXT:    kandb %k5, %k0, %k0
; FASTISEL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k6
; FASTISEL-NEXT:    kshiftlb $7, %k6, %k6
; FASTISEL-NEXT:    kshiftrb $2, %k6, %k6
; FASTISEL-NEXT:    korb %k6, %k0, %k0
; FASTISEL-NEXT:    kandb %k4, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k6
; FASTISEL-NEXT:    kshiftlb $7, %k6, %k6
; FASTISEL-NEXT:    kshiftrb $1, %k6, %k6
; FASTISEL-NEXT:    korb %k6, %k0, %k0
; FASTISEL-NEXT:    kandb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    kshiftlb $7, %k0, %k0
; FASTISEL-NEXT:    kshiftrb $6, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $7, %k1, %k1
; FASTISEL-NEXT:    korb %k0, %k1, %k0
; FASTISEL-NEXT:    kmovq %k3, %k7
; FASTISEL-NEXT:    kandb %k3, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $5, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k2, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $4, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k4, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $3, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k6
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k5, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k6, %k1
; FASTISEL-NEXT:    kshiftrb $2, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k3 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k3, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $1, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k0, %k0
; FASTISEL-NEXT:    kshiftrb $6, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $7, %k1, %k1
; FASTISEL-NEXT:    korb %k0, %k1, %k0
; FASTISEL-NEXT:    kandb %k7, %k0, %k0
; FASTISEL-NEXT:    kmovq %k7, %k5
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $5, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k2, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $4, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k4, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $3, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k6, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $2, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kandb %k3, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $1, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    korb %k1, %k0, %k2
; FASTISEL-NEXT:    kshiftlb $7, %k7, %k1
; FASTISEL-NEXT:    kshiftrb $6, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    kshiftlb $7, %k7, %k7
; FASTISEL-NEXT:    kshiftrb $7, %k7, %k7
; FASTISEL-NEXT:    korb %k1, %k7, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    kandb %k5, %k1, %k1
; FASTISEL-NEXT:    kshiftlb $7, %k7, %k7
; FASTISEL-NEXT:    kshiftrb $5, %k7, %k7
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    korb %k7, %k1, %k1
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k7, %k1, %k1
; FASTISEL-NEXT:    kshiftlb $7, %k0, %k0
; FASTISEL-NEXT:    kshiftrb $4, %k0, %k0
; FASTISEL-NEXT:    korb %k0, %k1, %k0
; FASTISEL-NEXT:    kandb %k4, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $3, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k6, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $2, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k3, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $1, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k2, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k2, %k0, %k0
; FASTISEL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k0
; FASTISEL-NEXT:    kshiftrb $6, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $7, %k1, %k1
; FASTISEL-NEXT:    korb %k0, %k1, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kmovq %k5, %k3
; FASTISEL-NEXT:    kmovw %k5, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kandb %k5, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $5, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k7, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k1
; FASTISEL-NEXT:    kshiftrb $4, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovq %k4, %k5
; FASTISEL-NEXT:    kandb %k4, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $3, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k6, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $2, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k4 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k4, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $1, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $6, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $7, %k2, %k2
; FASTISEL-NEXT:    korb %k1, %k2, %k1
; FASTISEL-NEXT:    kandb %k3, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $5, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k7, %k1, %k1
; FASTISEL-NEXT:    kmovq %k7, %k3
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $4, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    kandb %k5, %k1, %k1
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $3, %k2, %k2
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k7
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k6, %k1, %k1
; FASTISEL-NEXT:    kmovq %k6, %k5
; FASTISEL-NEXT:    kshiftlb $7, %k7, %k2
; FASTISEL-NEXT:    kshiftrb $2, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k4, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $1, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k0, %k1, %k0
; FASTISEL-NEXT:    kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k0
; FASTISEL-NEXT:    kshiftlb $7, %k0, %k0
; FASTISEL-NEXT:    kshiftrb $6, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $7, %k1, %k1
; FASTISEL-NEXT:    korb %k0, %k1, %k0
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k7, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $5, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k3, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $4, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k6 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k6, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $3, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k1
; FASTISEL-NEXT:    kandb %k5, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $2, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kandb %k4, %k0, %k0
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k1
; FASTISEL-NEXT:    kshiftrb $1, %k1, %k1
; FASTISEL-NEXT:    korb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovd %edx, %k1
; FASTISEL-NEXT:    kshiftlb $7, %k1, %k1
; FASTISEL-NEXT:    kshiftrb $6, %k1, %k1
; FASTISEL-NEXT:    kmovd %esi, %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $7, %k2, %k2
; FASTISEL-NEXT:    korb %k1, %k2, %k1
; FASTISEL-NEXT:    kandb %k7, %k1, %k1
; FASTISEL-NEXT:    kmovd %ecx, %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $5, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k3, %k1, %k1
; FASTISEL-NEXT:    kmovd %r8d, %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $4, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k6, %k1, %k1
; FASTISEL-NEXT:    kmovd %r9d, %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $3, %k2, %k2
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k3
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k5, %k1, %k1
; FASTISEL-NEXT:    kshiftlb $7, %k3, %k2
; FASTISEL-NEXT:    kshiftrb $2, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k4, %k1, %k1
; FASTISEL-NEXT:    kmovb {{[0-9]+}}(%rsp), %k2
; FASTISEL-NEXT:    kshiftlb $7, %k2, %k2
; FASTISEL-NEXT:    kshiftrb $1, %k2, %k2
; FASTISEL-NEXT:    korb %k2, %k1, %k1
; FASTISEL-NEXT:    kandb %k0, %k1, %k0
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k1, %k0, %k0
; FASTISEL-NEXT:    kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; FASTISEL-NEXT:    kandb %k1, %k0, %k0
; FASTISEL-NEXT:    kshiftrb $6, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r8d
; FASTISEL-NEXT:    kshiftrb $5, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r9d
; FASTISEL-NEXT:    kshiftrb $4, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %r10d
; FASTISEL-NEXT:    kshiftrb $3, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %edi
; FASTISEL-NEXT:    kshiftrb $2, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %ecx
; FASTISEL-NEXT:    kshiftrb $1, %k0, %k1
; FASTISEL-NEXT:    kmovd %k1, %edx
; FASTISEL-NEXT:    kmovd %k0, %esi
; FASTISEL-NEXT:    andb $1, %sil
; FASTISEL-NEXT:    andb $1, %dl
; FASTISEL-NEXT:    addb %dl, %dl
; FASTISEL-NEXT:    orb %sil, %dl
; FASTISEL-NEXT:    andb $1, %cl
; FASTISEL-NEXT:    shlb $2, %cl
; FASTISEL-NEXT:    orb %dl, %cl
; FASTISEL-NEXT:    andb $1, %dil
; FASTISEL-NEXT:    shlb $3, %dil
; FASTISEL-NEXT:    orb %cl, %dil
; FASTISEL-NEXT:    andb $1, %r10b
; FASTISEL-NEXT:    shlb $4, %r10b
; FASTISEL-NEXT:    orb %dil, %r10b
; FASTISEL-NEXT:    andb $1, %r9b
; FASTISEL-NEXT:    shlb $5, %r9b
; FASTISEL-NEXT:    orb %r10b, %r9b
; FASTISEL-NEXT:    shlb $6, %r8b
; FASTISEL-NEXT:    orb %r9b, %r8b
; FASTISEL-NEXT:    andb $127, %r8b
; FASTISEL-NEXT:    movb %r8b, (%rax)
; FASTISEL-NEXT:    retq
  %j = and <7 x i1> %a, %b
  %k = and <7 x i1> %j, %c
  %l = and <7 x i1> %k, %d
  %m = and <7 x i1> %l, %e
  %n = and <7 x i1> %m, %f
  %o = and <7 x i1> %n, %g
  %p = and <7 x i1> %o, %h
  %q = and <7 x i1> %p, %i
  ret <7 x i1> %q
}

declare void @v2i1_mem_callee(<128 x i32> %x, <2 x i1> %y)
define void @v2i1_mem(<128 x i32> %x, <2 x i1> %y) {
; KNL-LABEL: v2i1_mem:
; KNL:       ## %bb.0:
; KNL-NEXT:    subq $24, %rsp
; KNL-NEXT:    .cfi_def_cfa_offset 32
; KNL-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm8
; KNL-NEXT:    vmovaps %xmm8, (%rsp)
; KNL-NEXT:    callq _v2i1_mem_callee
; KNL-NEXT:    addq $24, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: v2i1_mem:
; SKX:       ## %bb.0:
; SKX-NEXT:    subq $24, %rsp
; SKX-NEXT:    .cfi_def_cfa_offset 32
; SKX-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm8
; SKX-NEXT:    vmovaps %xmm8, (%rsp)
; SKX-NEXT:    callq _v2i1_mem_callee
; SKX-NEXT:    addq $24, %rsp
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: v2i1_mem:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    .cfi_offset %ebp, -8
; KNL_X32-NEXT:    movl %esp, %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_register %ebp
; KNL_X32-NEXT:    andl $-64, %esp
; KNL_X32-NEXT:    subl $384, %esp ## imm = 0x180
; KNL_X32-NEXT:    vmovaps 72(%ebp), %zmm5
; KNL_X32-NEXT:    vmovaps 136(%ebp), %zmm6
; KNL_X32-NEXT:    vmovaps 200(%ebp), %zmm7
; KNL_X32-NEXT:    vmovaps 264(%ebp), %xmm4
; KNL_X32-NEXT:    vmovaps %xmm4, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm7, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm6, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm5, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps 8(%ebp), %zmm4
; KNL_X32-NEXT:    vmovaps %zmm4, (%esp)
; KNL_X32-NEXT:    calll _v2i1_mem_callee
; KNL_X32-NEXT:    movl %ebp, %esp
; KNL_X32-NEXT:    popl %ebp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: v2i1_mem:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    subq $24, %rsp
; FASTISEL-NEXT:    .cfi_def_cfa_offset 32
; FASTISEL-NEXT:    vpsllq $63, {{[0-9]+}}(%rsp), %xmm8
; FASTISEL-NEXT:    vpmovq2m %xmm8, %k0
; FASTISEL-NEXT:    vpmovm2q %k0, %xmm8
; FASTISEL-NEXT:    vmovdqa %xmm8, (%rsp)
; FASTISEL-NEXT:    callq _v2i1_mem_callee
; FASTISEL-NEXT:    addq $24, %rsp
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    retq
  call void @v2i1_mem_callee(<128 x i32> %x, <2 x i1> %y)
  ret void
}

declare void @v4i1_mem_callee(<128 x i32> %x, <4 x i1> %y)
define void @v4i1_mem(<128 x i32> %x, <4 x i1> %y) {
; KNL-LABEL: v4i1_mem:
; KNL:       ## %bb.0:
; KNL-NEXT:    subq $24, %rsp
; KNL-NEXT:    .cfi_def_cfa_offset 32
; KNL-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm8
; KNL-NEXT:    vmovaps %xmm8, (%rsp)
; KNL-NEXT:    callq _v4i1_mem_callee
; KNL-NEXT:    addq $24, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: v4i1_mem:
; SKX:       ## %bb.0:
; SKX-NEXT:    subq $24, %rsp
; SKX-NEXT:    .cfi_def_cfa_offset 32
; SKX-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm8
; SKX-NEXT:    vmovaps %xmm8, (%rsp)
; SKX-NEXT:    callq _v4i1_mem_callee
; SKX-NEXT:    addq $24, %rsp
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: v4i1_mem:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    .cfi_offset %ebp, -8
; KNL_X32-NEXT:    movl %esp, %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_register %ebp
; KNL_X32-NEXT:    andl $-64, %esp
; KNL_X32-NEXT:    subl $384, %esp ## imm = 0x180
; KNL_X32-NEXT:    vmovaps 72(%ebp), %zmm5
; KNL_X32-NEXT:    vmovaps 136(%ebp), %zmm6
; KNL_X32-NEXT:    vmovaps 200(%ebp), %zmm7
; KNL_X32-NEXT:    vmovaps 264(%ebp), %xmm4
; KNL_X32-NEXT:    vmovaps %xmm4, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm7, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm6, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm5, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps 8(%ebp), %zmm4
; KNL_X32-NEXT:    vmovaps %zmm4, (%esp)
; KNL_X32-NEXT:    calll _v4i1_mem_callee
; KNL_X32-NEXT:    movl %ebp, %esp
; KNL_X32-NEXT:    popl %ebp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: v4i1_mem:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    subq $24, %rsp
; FASTISEL-NEXT:    .cfi_def_cfa_offset 32
; FASTISEL-NEXT:    vpslld $31, {{[0-9]+}}(%rsp), %xmm8
; FASTISEL-NEXT:    vpmovd2m %xmm8, %k0
; FASTISEL-NEXT:    vpmovm2d %k0, %xmm8
; FASTISEL-NEXT:    vmovdqa %xmm8, (%rsp)
; FASTISEL-NEXT:    callq _v4i1_mem_callee
; FASTISEL-NEXT:    addq $24, %rsp
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    retq
  call void @v4i1_mem_callee(<128 x i32> %x, <4 x i1> %y)
  ret void
}

declare void @v8i1_mem_callee(<128 x i32> %x, <8 x i1> %y)
define void @v8i1_mem(<128 x i32> %x, <8 x i1> %y) {
; KNL-LABEL: v8i1_mem:
; KNL:       ## %bb.0:
; KNL-NEXT:    subq $24, %rsp
; KNL-NEXT:    .cfi_def_cfa_offset 32
; KNL-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm8
; KNL-NEXT:    vmovaps %xmm8, (%rsp)
; KNL-NEXT:    callq _v8i1_mem_callee
; KNL-NEXT:    addq $24, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: v8i1_mem:
; SKX:       ## %bb.0:
; SKX-NEXT:    subq $24, %rsp
; SKX-NEXT:    .cfi_def_cfa_offset 32
; SKX-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm8
; SKX-NEXT:    vmovaps %xmm8, (%rsp)
; SKX-NEXT:    callq _v8i1_mem_callee
; SKX-NEXT:    addq $24, %rsp
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: v8i1_mem:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    .cfi_offset %ebp, -8
; KNL_X32-NEXT:    movl %esp, %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_register %ebp
; KNL_X32-NEXT:    andl $-64, %esp
; KNL_X32-NEXT:    subl $384, %esp ## imm = 0x180
; KNL_X32-NEXT:    vmovaps 72(%ebp), %zmm5
; KNL_X32-NEXT:    vmovaps 136(%ebp), %zmm6
; KNL_X32-NEXT:    vmovaps 200(%ebp), %zmm7
; KNL_X32-NEXT:    vmovaps 264(%ebp), %xmm4
; KNL_X32-NEXT:    vmovaps %xmm4, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm7, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm6, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm5, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps 8(%ebp), %zmm4
; KNL_X32-NEXT:    vmovaps %zmm4, (%esp)
; KNL_X32-NEXT:    calll _v8i1_mem_callee
; KNL_X32-NEXT:    movl %ebp, %esp
; KNL_X32-NEXT:    popl %ebp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: v8i1_mem:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    subq $24, %rsp
; FASTISEL-NEXT:    .cfi_def_cfa_offset 32
; FASTISEL-NEXT:    vpsllw $15, {{[0-9]+}}(%rsp), %xmm8
; FASTISEL-NEXT:    vpmovw2m %xmm8, %k0
; FASTISEL-NEXT:    vpmovm2w %k0, %xmm8
; FASTISEL-NEXT:    vmovdqa %xmm8, (%rsp)
; FASTISEL-NEXT:    callq _v8i1_mem_callee
; FASTISEL-NEXT:    addq $24, %rsp
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    retq
  call void @v8i1_mem_callee(<128 x i32> %x, <8 x i1> %y)
  ret void
}

declare void @v16i1_mem_callee(<128 x i32> %x, <16 x i1> %y)
define void @v16i1_mem(<128 x i32> %x, <16 x i1> %y) {
; KNL-LABEL: v16i1_mem:
; KNL:       ## %bb.0:
; KNL-NEXT:    subq $24, %rsp
; KNL-NEXT:    .cfi_def_cfa_offset 32
; KNL-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm8
; KNL-NEXT:    vmovaps %xmm8, (%rsp)
; KNL-NEXT:    callq _v16i1_mem_callee
; KNL-NEXT:    addq $24, %rsp
; KNL-NEXT:    retq
;
; SKX-LABEL: v16i1_mem:
; SKX:       ## %bb.0:
; SKX-NEXT:    subq $24, %rsp
; SKX-NEXT:    .cfi_def_cfa_offset 32
; SKX-NEXT:    vmovaps {{[0-9]+}}(%rsp), %xmm8
; SKX-NEXT:    vmovaps %xmm8, (%rsp)
; SKX-NEXT:    callq _v16i1_mem_callee
; SKX-NEXT:    addq $24, %rsp
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: v16i1_mem:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    .cfi_offset %ebp, -8
; KNL_X32-NEXT:    movl %esp, %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_register %ebp
; KNL_X32-NEXT:    andl $-64, %esp
; KNL_X32-NEXT:    subl $384, %esp ## imm = 0x180
; KNL_X32-NEXT:    vmovaps 72(%ebp), %zmm5
; KNL_X32-NEXT:    vmovaps 136(%ebp), %zmm6
; KNL_X32-NEXT:    vmovaps 200(%ebp), %zmm7
; KNL_X32-NEXT:    vmovaps 264(%ebp), %xmm4
; KNL_X32-NEXT:    vmovaps %xmm4, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm7, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm6, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm5, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps 8(%ebp), %zmm4
; KNL_X32-NEXT:    vmovaps %zmm4, (%esp)
; KNL_X32-NEXT:    calll _v16i1_mem_callee
; KNL_X32-NEXT:    movl %ebp, %esp
; KNL_X32-NEXT:    popl %ebp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: v16i1_mem:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    subq $24, %rsp
; FASTISEL-NEXT:    .cfi_def_cfa_offset 32
; FASTISEL-NEXT:    vpsllw $7, {{[0-9]+}}(%rsp), %xmm8
; FASTISEL-NEXT:    vpmovb2m %xmm8, %k0
; FASTISEL-NEXT:    vpmovm2b %k0, %xmm8
; FASTISEL-NEXT:    vmovdqa %xmm8, (%rsp)
; FASTISEL-NEXT:    callq _v16i1_mem_callee
; FASTISEL-NEXT:    addq $24, %rsp
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    retq
  call void @v16i1_mem_callee(<128 x i32> %x, <16 x i1> %y)
  ret void
}

declare void @v32i1_mem_callee(<128 x i32> %x, <32 x i1> %y)
define void @v32i1_mem(<128 x i32> %x, <32 x i1> %y) {
; KNL-LABEL: v32i1_mem:
; KNL:       ## %bb.0:
; KNL-NEXT:    pushq %rbp
; KNL-NEXT:    .cfi_def_cfa_offset 16
; KNL-NEXT:    .cfi_offset %rbp, -16
; KNL-NEXT:    movq %rsp, %rbp
; KNL-NEXT:    .cfi_def_cfa_register %rbp
; KNL-NEXT:    andq $-32, %rsp
; KNL-NEXT:    subq $64, %rsp
; KNL-NEXT:    vmovaps 16(%rbp), %ymm8
; KNL-NEXT:    vmovaps %ymm8, (%rsp)
; KNL-NEXT:    callq _v32i1_mem_callee
; KNL-NEXT:    movq %rbp, %rsp
; KNL-NEXT:    popq %rbp
; KNL-NEXT:    retq
;
; SKX-LABEL: v32i1_mem:
; SKX:       ## %bb.0:
; SKX-NEXT:    pushq %rbp
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    .cfi_offset %rbp, -16
; SKX-NEXT:    movq %rsp, %rbp
; SKX-NEXT:    .cfi_def_cfa_register %rbp
; SKX-NEXT:    andq $-32, %rsp
; SKX-NEXT:    subq $64, %rsp
; SKX-NEXT:    vmovaps 16(%rbp), %ymm8
; SKX-NEXT:    vmovaps %ymm8, (%rsp)
; SKX-NEXT:    callq _v32i1_mem_callee
; SKX-NEXT:    movq %rbp, %rsp
; SKX-NEXT:    popq %rbp
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: v32i1_mem:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    .cfi_offset %ebp, -8
; KNL_X32-NEXT:    movl %esp, %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_register %ebp
; KNL_X32-NEXT:    andl $-64, %esp
; KNL_X32-NEXT:    subl $384, %esp ## imm = 0x180
; KNL_X32-NEXT:    vmovaps 72(%ebp), %zmm5
; KNL_X32-NEXT:    vmovaps 136(%ebp), %zmm6
; KNL_X32-NEXT:    vmovaps 200(%ebp), %zmm7
; KNL_X32-NEXT:    vmovaps 264(%ebp), %ymm4
; KNL_X32-NEXT:    vmovaps %ymm4, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm7, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm6, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm5, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps 8(%ebp), %zmm4
; KNL_X32-NEXT:    vmovaps %zmm4, (%esp)
; KNL_X32-NEXT:    calll _v32i1_mem_callee
; KNL_X32-NEXT:    movl %ebp, %esp
; KNL_X32-NEXT:    popl %ebp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: v32i1_mem:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rbp
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    .cfi_offset %rbp, -16
; FASTISEL-NEXT:    movq %rsp, %rbp
; FASTISEL-NEXT:    .cfi_def_cfa_register %rbp
; FASTISEL-NEXT:    andq $-32, %rsp
; FASTISEL-NEXT:    subq $64, %rsp
; FASTISEL-NEXT:    vpsllw $7, 16(%rbp), %ymm8
; FASTISEL-NEXT:    vpmovb2m %ymm8, %k0
; FASTISEL-NEXT:    vpmovm2b %k0, %ymm8
; FASTISEL-NEXT:    vmovdqa %ymm8, (%rsp)
; FASTISEL-NEXT:    callq _v32i1_mem_callee
; FASTISEL-NEXT:    movq %rbp, %rsp
; FASTISEL-NEXT:    popq %rbp
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    retq
  call void @v32i1_mem_callee(<128 x i32> %x, <32 x i1> %y)
  ret void
}

declare void @v64i1_mem_callee(<128 x i32> %x, <64 x i1> %y)
define void @v64i1_mem(<128 x i32> %x, <64 x i1> %y) {
; KNL-LABEL: v64i1_mem:
; KNL:       ## %bb.0:
; KNL-NEXT:    subq $472, %rsp ## imm = 0x1D8
; KNL-NEXT:    .cfi_def_cfa_offset 480
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, {{[0-9]+}}(%rsp)
; KNL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
; KNL-NEXT:    movl %eax, (%rsp)
; KNL-NEXT:    callq _v64i1_mem_callee
; KNL-NEXT:    addq $472, %rsp ## imm = 0x1D8
; KNL-NEXT:    retq
;
; SKX-LABEL: v64i1_mem:
; SKX:       ## %bb.0:
; SKX-NEXT:    pushq %rbp
; SKX-NEXT:    .cfi_def_cfa_offset 16
; SKX-NEXT:    .cfi_offset %rbp, -16
; SKX-NEXT:    movq %rsp, %rbp
; SKX-NEXT:    .cfi_def_cfa_register %rbp
; SKX-NEXT:    andq $-64, %rsp
; SKX-NEXT:    subq $128, %rsp
; SKX-NEXT:    vmovaps 16(%rbp), %zmm8
; SKX-NEXT:    vmovaps %zmm8, (%rsp)
; SKX-NEXT:    callq _v64i1_mem_callee
; SKX-NEXT:    movq %rbp, %rsp
; SKX-NEXT:    popq %rbp
; SKX-NEXT:    vzeroupper
; SKX-NEXT:    retq
;
; KNL_X32-LABEL: v64i1_mem:
; KNL_X32:       ## %bb.0:
; KNL_X32-NEXT:    pushl %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_offset 8
; KNL_X32-NEXT:    .cfi_offset %ebp, -8
; KNL_X32-NEXT:    movl %esp, %ebp
; KNL_X32-NEXT:    .cfi_def_cfa_register %ebp
; KNL_X32-NEXT:    andl $-64, %esp
; KNL_X32-NEXT:    subl $576, %esp ## imm = 0x240
; KNL_X32-NEXT:    vmovaps 8(%ebp), %zmm4
; KNL_X32-NEXT:    vmovaps 72(%ebp), %zmm5
; KNL_X32-NEXT:    vmovaps 136(%ebp), %zmm6
; KNL_X32-NEXT:    vmovaps 200(%ebp), %zmm7
; KNL_X32-NEXT:    movl 516(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 512(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 508(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 504(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 500(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 496(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 492(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 488(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 484(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 480(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 476(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 472(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 468(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 464(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 460(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 456(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 452(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 448(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 444(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 440(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 436(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 432(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 428(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 424(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 420(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 416(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 412(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 408(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 404(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 400(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 396(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 392(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 388(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 384(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 380(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 376(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 372(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 368(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 364(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 360(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 356(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 352(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 348(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 344(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 340(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 336(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 332(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 328(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 324(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 320(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 316(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 312(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 308(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 304(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 300(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 296(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 292(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 288(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 284(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 280(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 276(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 272(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 268(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    movl 264(%ebp), %eax
; KNL_X32-NEXT:    movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm7, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm6, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm5, {{[0-9]+}}(%esp)
; KNL_X32-NEXT:    vmovaps %zmm4, (%esp)
; KNL_X32-NEXT:    calll _v64i1_mem_callee
; KNL_X32-NEXT:    movl %ebp, %esp
; KNL_X32-NEXT:    popl %ebp
; KNL_X32-NEXT:    retl
;
; FASTISEL-LABEL: v64i1_mem:
; FASTISEL:       ## %bb.0:
; FASTISEL-NEXT:    pushq %rbp
; FASTISEL-NEXT:    .cfi_def_cfa_offset 16
; FASTISEL-NEXT:    .cfi_offset %rbp, -16
; FASTISEL-NEXT:    movq %rsp, %rbp
; FASTISEL-NEXT:    .cfi_def_cfa_register %rbp
; FASTISEL-NEXT:    andq $-64, %rsp
; FASTISEL-NEXT:    subq $128, %rsp
; FASTISEL-NEXT:    vpsllw $7, 16(%rbp), %zmm8
; FASTISEL-NEXT:    vpmovb2m %zmm8, %k0
; FASTISEL-NEXT:    vpmovm2b %k0, %zmm8
; FASTISEL-NEXT:    vmovdqa64 %zmm8, (%rsp)
; FASTISEL-NEXT:    callq _v64i1_mem_callee
; FASTISEL-NEXT:    movq %rbp, %rsp
; FASTISEL-NEXT:    popq %rbp
; FASTISEL-NEXT:    vzeroupper
; FASTISEL-NEXT:    retq
  call void @v64i1_mem_callee(<128 x i32> %x, <64 x i1> %y)
  ret void
}