; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BASELINE ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE1 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP ; https://bugs.llvm.org/show_bug.cgi?id=37104 ; All the advanced stuff (negative tests, commutativity) is handled in the ; scalar version of the test only. ; ============================================================================ ; ; 8-bit vector width ; ============================================================================ ; define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { ; CHECK-LABEL: out_v1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: andl %edx, %edi ; CHECK-NEXT: notb %al ; CHECK-NEXT: andb %sil, %al ; CHECK-NEXT: orb %dil, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %mx = and <1 x i8> %x, %mask %notmask = xor <1 x i8> %mask, <i8 -1> %my = and <1 x i8> %y, %notmask %r = or <1 x i8> %mx, %my ret <1 x i8> %r } ; ============================================================================ ; ; 16-bit vector width ; ============================================================================ ; define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v2i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movl %r8d, %eax ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: andl %r8d, %edi ; CHECK-BASELINE-NEXT: notb %al ; CHECK-BASELINE-NEXT: notb %r9b ; CHECK-BASELINE-NEXT: andb %cl, %r9b ; CHECK-BASELINE-NEXT: andb %dl, %al ; CHECK-BASELINE-NEXT: orb %dil, %al ; CHECK-BASELINE-NEXT: orb %sil, %r9b ; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BASELINE-NEXT: movl %r9d, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v2i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movl %r8d, %eax ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: andl %r8d, %edi ; CHECK-SSE1-NEXT: notb %al ; CHECK-SSE1-NEXT: notb %r9b ; CHECK-SSE1-NEXT: andb %cl, %r9b ; CHECK-SSE1-NEXT: andb %dl, %al ; CHECK-SSE1-NEXT: orb %dil, %al ; CHECK-SSE1-NEXT: orb %sil, %r9b ; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax ; CHECK-SSE1-NEXT: movl %r9d, %edx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v2i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v2i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <2 x i8> %x, %mask %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1> %my = and <2 x i8> %y, %notmask %r = or <2 x i8> %mx, %my ret <2 x i8> %r } define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { ; CHECK-LABEL: out_v1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edx, %eax ; CHECK-NEXT: andl %edx, %edi ; CHECK-NEXT: notl %eax ; CHECK-NEXT: andl %esi, %eax ; CHECK-NEXT: orl %edi, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %mx = and <1 x i16> %x, %mask %notmask = xor <1 x i16> %mask, <i16 -1> %my = and <1 x i16> %y, %notmask %r = or <1 x i16> %mx, %my ret <1 x i16> %r } ; ============================================================================ ; ; 32-bit vector width ; ============================================================================ ; define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: xorb %r9b, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %r9b, %sil ; CHECK-BASELINE-NEXT: xorb %r11b, %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: xorb %r11b, %dl ; CHECK-BASELINE-NEXT: xorb %r10b, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %r10b, %cl ; CHECK-BASELINE-NEXT: xorb %dil, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %dil, %r8b ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) ; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) ; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) ; CHECK-BASELINE-NEXT: movb %sil, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: xorb %r9b, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %r9b, %sil ; CHECK-SSE1-NEXT: xorb %r11b, %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: xorb %r11b, %dl ; CHECK-SSE1-NEXT: xorb %r10b, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %r10b, %cl ; CHECK-SSE1-NEXT: xorb %dil, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %dil, %r8b ; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) ; CHECK-SSE1-NEXT: movb %cl, 2(%rax) ; CHECK-SSE1-NEXT: movb %dl, 1(%rax) ; CHECK-SSE1-NEXT: movb %sil, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v4i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1> %my = and <4 x i8> %y, %notmask %r = or <4 x i8> %mx, %my ret <4 x i8> %r } define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i8_undef: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %r9b, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %r9b, %sil ; CHECK-BASELINE-NEXT: xorb %r10b, %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: xorb %r10b, %dl ; CHECK-BASELINE-NEXT: xorb %dil, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %dil, %r8b ; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) ; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) ; CHECK-BASELINE-NEXT: movb %sil, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i8_undef: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %r9b, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %r9b, %sil ; CHECK-SSE1-NEXT: xorb %r10b, %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: xorb %r10b, %dl ; CHECK-SSE1-NEXT: xorb %dil, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %dil, %r8b ; CHECK-SSE1-NEXT: movb %cl, 2(%rax) ; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) ; CHECK-SSE1-NEXT: movb %dl, 1(%rax) ; CHECK-SSE1-NEXT: movb %sil, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i8_undef: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v4i8_undef: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <4 x i8> %x, %mask %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1> %my = and <4 x i8> %y, %notmask %r = or <4 x i8> %mx, %my ret <4 x i8> %r } define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v2i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movl %r8d, %eax ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: andl %r8d, %edi ; CHECK-BASELINE-NEXT: notl %eax ; CHECK-BASELINE-NEXT: notl %r9d ; CHECK-BASELINE-NEXT: andl %ecx, %r9d ; CHECK-BASELINE-NEXT: orl %esi, %r9d ; CHECK-BASELINE-NEXT: andl %edx, %eax ; CHECK-BASELINE-NEXT: orl %edi, %eax ; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BASELINE-NEXT: movl %r9d, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v2i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movl %r8d, %eax ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: andl %r8d, %edi ; CHECK-SSE1-NEXT: notl %eax ; CHECK-SSE1-NEXT: notl %r9d ; CHECK-SSE1-NEXT: andl %ecx, %r9d ; CHECK-SSE1-NEXT: orl %esi, %r9d ; CHECK-SSE1-NEXT: andl %edx, %eax ; CHECK-SSE1-NEXT: orl %edi, %eax ; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-SSE1-NEXT: movl %r9d, %edx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v2i16: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v2i16: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <2 x i16> %x, %mask %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1> %my = and <2 x i16> %y, %notmask %r = or <2 x i16> %mx, %my ret <2 x i16> %r } define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { ; CHECK-LABEL: out_v1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: andl %edx, %eax ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: retq %mx = and <1 x i32> %x, %mask %notmask = xor <1 x i32> %mask, <i32 -1> %my = and <1 x i32> %y, %notmask %r = or <1 x i32> %mx, %my ret <1 x i32> %r } ; ============================================================================ ; ; 64-bit vector width ; ============================================================================ ; define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v8i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorb %bl, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %bl, %sil ; CHECK-BASELINE-NEXT: xorb %r12b, %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: xorb %r12b, %dl ; CHECK-BASELINE-NEXT: xorb %r15b, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %r15b, %cl ; CHECK-BASELINE-NEXT: xorb %r14b, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %r14b, %r8b ; CHECK-BASELINE-NEXT: xorb %bpl, %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: xorb %bpl, %r9b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: xorb %r11b, %bpl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %r11b, %bpl ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: xorb %r10b, %r11b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %r10b, %r11b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorb %dil, %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %dil, %bl ; CHECK-BASELINE-NEXT: movb %bl, 7(%rax) ; CHECK-BASELINE-NEXT: movb %r11b, 6(%rax) ; CHECK-BASELINE-NEXT: movb %bpl, 5(%rax) ; CHECK-BASELINE-NEXT: movb %r9b, 4(%rax) ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) ; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) ; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) ; CHECK-BASELINE-NEXT: movb %sil, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v8i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorb %bl, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %bl, %sil ; CHECK-SSE1-NEXT: xorb %r12b, %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: xorb %r12b, %dl ; CHECK-SSE1-NEXT: xorb %r15b, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %r15b, %cl ; CHECK-SSE1-NEXT: xorb %r14b, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %r14b, %r8b ; CHECK-SSE1-NEXT: xorb %bpl, %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: xorb %bpl, %r9b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: xorb %r11b, %bpl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %r11b, %bpl ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: xorb %r10b, %r11b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %r10b, %r11b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorb %dil, %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %dil, %bl ; CHECK-SSE1-NEXT: movb %bl, 7(%rax) ; CHECK-SSE1-NEXT: movb %r11b, 6(%rax) ; CHECK-SSE1-NEXT: movb %bpl, 5(%rax) ; CHECK-SSE1-NEXT: movb %r9b, 4(%rax) ; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) ; CHECK-SSE1-NEXT: movb %cl, 2(%rax) ; CHECK-SSE1-NEXT: movb %dl, 1(%rax) ; CHECK-SSE1-NEXT: movb %sil, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v8i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v8i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <8 x i8> %x, %mask %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %my = and <8 x i8> %y, %notmask %r = or <8 x i8> %mx, %my ret <8 x i8> %r } define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: xorl %r11d, %ecx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-BASELINE-NEXT: xorl %r11d, %ecx ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d ; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) ; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) ; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) ; CHECK-BASELINE-NEXT: movw %si, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: xorl %edi, %edx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-SSE1-NEXT: xorl %edi, %edx ; CHECK-SSE1-NEXT: xorl %r11d, %ecx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-SSE1-NEXT: xorl %r11d, %ecx ; CHECK-SSE1-NEXT: xorl %r10d, %r8d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-SSE1-NEXT: xorl %r10d, %r8d ; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) ; CHECK-SSE1-NEXT: movw %cx, 4(%rax) ; CHECK-SSE1-NEXT: movw %dx, 2(%rax) ; CHECK-SSE1-NEXT: movw %si, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i16: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v4i16: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <4 x i16> %x, %mask %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1> %my = and <4 x i16> %y, %notmask %r = or <4 x i16> %mx, %my ret <4 x i16> %r } define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i16_undef: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d ; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) ; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) ; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) ; CHECK-BASELINE-NEXT: movw %si, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i16_undef: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: xorl %edi, %edx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-SSE1-NEXT: xorl %edi, %edx ; CHECK-SSE1-NEXT: xorl %r10d, %r8d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-SSE1-NEXT: xorl %r10d, %r8d ; CHECK-SSE1-NEXT: movw %cx, 4(%rax) ; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) ; CHECK-SSE1-NEXT: movw %dx, 2(%rax) ; CHECK-SSE1-NEXT: movw %si, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i16_undef: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v4i16_undef: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <4 x i16> %x, %mask %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1> %my = and <4 x i16> %y, %notmask %r = or <4 x i16> %mx, %my ret <4 x i16> %r } define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v2i32: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movl %edi, %eax ; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: andl %r8d, %eax ; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: movl %esi, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v2i32: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movl %edi, %eax ; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: andl %r8d, %eax ; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: movl %esi, %edx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v2i32: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v2i32: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <2 x i32> %x, %mask %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1> %my = and <2 x i32> %y, %notmask %r = or <2 x i32> %mx, %my ret <2 x i32> %r } define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { ; CHECK-LABEL: out_v1i64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: xorq %rsi, %rax ; CHECK-NEXT: andq %rdx, %rax ; CHECK-NEXT: xorq %rsi, %rax ; CHECK-NEXT: retq %mx = and <1 x i64> %x, %mask %notmask = xor <1 x i64> %mask, <i64 -1> %my = and <1 x i64> %y, %notmask %r = or <1 x i64> %mx, %my ret <1 x i64> %r } ; ============================================================================ ; ; 128-bit vector width ; ============================================================================ ; define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v16i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movl %edx, %r11d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorb %bl, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %bl, %sil ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: xorb %dl, %r11b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %dl, %r11b ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: xorb %r10b, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %r10b, %r8b ; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: xorb %r12b, %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: xorb %r12b, %r9b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-BASELINE-NEXT: xorb %bpl, %r12b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: xorb %bpl, %r12b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: xorb %r14b, %bpl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %r14b, %bpl ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-BASELINE-NEXT: xorb %r15b, %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %r15b, %sil ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-BASELINE-NEXT: xorb %r13b, %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: xorb %r13b, %dl ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: xorb %al, %r13b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb %al, %r13b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: xorb %al, %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %al, %bl ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d ; CHECK-BASELINE-NEXT: xorb %r8b, %r10b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: xorb %r8b, %r10b ; CHECK-BASELINE-NEXT: movb %r10b, 15(%rdi) ; CHECK-BASELINE-NEXT: movb %al, 14(%rdi) ; CHECK-BASELINE-NEXT: movb %bl, 13(%rdi) ; CHECK-BASELINE-NEXT: movb %r14b, 12(%rdi) ; CHECK-BASELINE-NEXT: movb %r15b, 11(%rdi) ; CHECK-BASELINE-NEXT: movb %r13b, 10(%rdi) ; CHECK-BASELINE-NEXT: movb %cl, 9(%rdi) ; CHECK-BASELINE-NEXT: movb %dl, 8(%rdi) ; CHECK-BASELINE-NEXT: movb %sil, 7(%rdi) ; CHECK-BASELINE-NEXT: movb %bpl, 6(%rdi) ; CHECK-BASELINE-NEXT: movb %r12b, 5(%rdi) ; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 3(%rdi) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movb %al, 2(%rdi) ; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movb %al, (%rdi) ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v16i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movl %edx, %r11d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorb %bl, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %bl, %sil ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: xorb %dl, %r11b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %dl, %r11b ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: xorb %r10b, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %r10b, %r8b ; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: xorb %r12b, %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: xorb %r12b, %r9b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-SSE1-NEXT: xorb %bpl, %r12b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: xorb %bpl, %r12b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: xorb %r14b, %bpl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %r14b, %bpl ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-SSE1-NEXT: xorb %r15b, %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %r15b, %sil ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx ; CHECK-SSE1-NEXT: xorb %r13b, %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: xorb %r13b, %dl ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: xorb %al, %r13b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb %al, %r13b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: xorb %al, %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %al, %bl ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d ; CHECK-SSE1-NEXT: xorb %r8b, %r10b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: xorb %r8b, %r10b ; CHECK-SSE1-NEXT: movb %r10b, 15(%rdi) ; CHECK-SSE1-NEXT: movb %al, 14(%rdi) ; CHECK-SSE1-NEXT: movb %bl, 13(%rdi) ; CHECK-SSE1-NEXT: movb %r14b, 12(%rdi) ; CHECK-SSE1-NEXT: movb %r15b, 11(%rdi) ; CHECK-SSE1-NEXT: movb %r13b, 10(%rdi) ; CHECK-SSE1-NEXT: movb %cl, 9(%rdi) ; CHECK-SSE1-NEXT: movb %dl, 8(%rdi) ; CHECK-SSE1-NEXT: movb %sil, 7(%rdi) ; CHECK-SSE1-NEXT: movb %bpl, 6(%rdi) ; CHECK-SSE1-NEXT: movb %r12b, 5(%rdi) ; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movb %al, 3(%rdi) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movb %al, 2(%rdi) ; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movb %al, (%rdi) ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v16i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v16i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <16 x i8> %x, %mask %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %my = and <16 x i8> %y, %notmask %r = or <16 x i8> %mx, %my ret <16 x i8> %r } define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v8i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r15d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: xorl %ebp, %esi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-BASELINE-NEXT: xorl %ebp, %esi ; CHECK-BASELINE-NEXT: xorl %ebx, %edx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-BASELINE-NEXT: xorl %ebx, %edx ; CHECK-BASELINE-NEXT: xorl %edi, %ecx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-BASELINE-NEXT: xorl %edi, %ecx ; CHECK-BASELINE-NEXT: xorl %r12d, %r8d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-BASELINE-NEXT: xorl %r12d, %r8d ; CHECK-BASELINE-NEXT: xorl %r15d, %r9d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r9w ; CHECK-BASELINE-NEXT: xorl %r15d, %r9d ; CHECK-BASELINE-NEXT: movl %r14d, %edi ; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %di ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %di ; CHECK-BASELINE-NEXT: xorl %r14d, %edi ; CHECK-BASELINE-NEXT: movl %r11d, %ebx ; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %bx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx ; CHECK-BASELINE-NEXT: xorl %r11d, %ebx ; CHECK-BASELINE-NEXT: movl %r10d, %ebp ; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %bp ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bp ; CHECK-BASELINE-NEXT: xorl %r10d, %ebp ; CHECK-BASELINE-NEXT: movw %bp, 14(%rax) ; CHECK-BASELINE-NEXT: movw %bx, 12(%rax) ; CHECK-BASELINE-NEXT: movw %di, 10(%rax) ; CHECK-BASELINE-NEXT: movw %r9w, 8(%rax) ; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) ; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) ; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) ; CHECK-BASELINE-NEXT: movw %si, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v8i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r15d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: xorl %ebp, %esi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-SSE1-NEXT: xorl %ebp, %esi ; CHECK-SSE1-NEXT: xorl %ebx, %edx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-SSE1-NEXT: xorl %ebx, %edx ; CHECK-SSE1-NEXT: xorl %edi, %ecx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-SSE1-NEXT: xorl %edi, %ecx ; CHECK-SSE1-NEXT: xorl %r12d, %r8d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-SSE1-NEXT: xorl %r12d, %r8d ; CHECK-SSE1-NEXT: xorl %r15d, %r9d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r9w ; CHECK-SSE1-NEXT: xorl %r15d, %r9d ; CHECK-SSE1-NEXT: movl %r14d, %edi ; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %di ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %di ; CHECK-SSE1-NEXT: xorl %r14d, %edi ; CHECK-SSE1-NEXT: movl %r11d, %ebx ; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %bx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx ; CHECK-SSE1-NEXT: xorl %r11d, %ebx ; CHECK-SSE1-NEXT: movl %r10d, %ebp ; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %bp ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bp ; CHECK-SSE1-NEXT: xorl %r10d, %ebp ; CHECK-SSE1-NEXT: movw %bp, 14(%rax) ; CHECK-SSE1-NEXT: movw %bx, 12(%rax) ; CHECK-SSE1-NEXT: movw %di, 10(%rax) ; CHECK-SSE1-NEXT: movw %r9w, 8(%rax) ; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) ; CHECK-SSE1-NEXT: movw %cx, 4(%rax) ; CHECK-SSE1-NEXT: movw %dx, 2(%rax) ; CHECK-SSE1-NEXT: movw %si, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v8i16: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v8i16: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <8 x i16> %x, %mask %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> %my = and <8 x i16> %y, %notmask %r = or <8 x i16> %mx, %my ret <8 x i16> %r } define <4 x i32> @out_v4i32(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i32: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl 12(%rdx), %r8d ; CHECK-BASELINE-NEXT: movl 8(%rdx), %r9d ; CHECK-BASELINE-NEXT: movl (%rdx), %edi ; CHECK-BASELINE-NEXT: movl 4(%rdx), %r10d ; CHECK-BASELINE-NEXT: movl (%rsi), %r11d ; CHECK-BASELINE-NEXT: xorl %edi, %r11d ; CHECK-BASELINE-NEXT: andl (%rcx), %r11d ; CHECK-BASELINE-NEXT: xorl %edi, %r11d ; CHECK-BASELINE-NEXT: movl 4(%rsi), %edi ; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: andl 4(%rcx), %edi ; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: movl 8(%rsi), %edx ; CHECK-BASELINE-NEXT: xorl %r9d, %edx ; CHECK-BASELINE-NEXT: andl 8(%rcx), %edx ; CHECK-BASELINE-NEXT: xorl %r9d, %edx ; CHECK-BASELINE-NEXT: movl 12(%rsi), %esi ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: andl 12(%rcx), %esi ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: movl %esi, 12(%rax) ; CHECK-BASELINE-NEXT: movl %edx, 8(%rax) ; CHECK-BASELINE-NEXT: movl %edi, 4(%rax) ; CHECK-BASELINE-NEXT: movl %r11d, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i32: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i32: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v4i32: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 ; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, ptr%px, align 16 %y = load <4 x i32>, ptr%py, align 16 %mask = load <4 x i32>, ptr%pmask, align 16 %mx = and <4 x i32> %x, %mask %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> %my = and <4 x i32> %y, %notmask %r = or <4 x i32> %mx, %my ret <4 x i32> %r } define <4 x i32> @out_v4i32_undef(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i32_undef: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl 8(%rsi), %r9d ; CHECK-BASELINE-NEXT: movl 12(%rdx), %r8d ; CHECK-BASELINE-NEXT: movl (%rdx), %edi ; CHECK-BASELINE-NEXT: movl 4(%rdx), %r10d ; CHECK-BASELINE-NEXT: andl 8(%rcx), %r9d ; CHECK-BASELINE-NEXT: movl (%rsi), %edx ; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: andl (%rcx), %edx ; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: movl 4(%rsi), %edi ; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: andl 4(%rcx), %edi ; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: movl 12(%rsi), %esi ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: andl 12(%rcx), %esi ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: movl %r9d, 8(%rax) ; CHECK-BASELINE-NEXT: movl %esi, 12(%rax) ; CHECK-BASELINE-NEXT: movl %edi, 4(%rax) ; CHECK-BASELINE-NEXT: movl %edx, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i32_undef: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 ; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i32_undef: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v4i32_undef: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 ; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, ptr%px, align 16 %y = load <4 x i32>, ptr%py, align 16 %mask = load <4 x i32>, ptr%pmask, align 16 %mx = and <4 x i32> %x, %mask %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1> %my = and <4 x i32> %y, %notmask %r = or <4 x i32> %mx, %my ret <4 x i32> %r } define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { ; CHECK-BASELINE-LABEL: out_v2i64: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: xorq %rdx, %rax ; CHECK-BASELINE-NEXT: andq %r8, %rax ; CHECK-BASELINE-NEXT: xorq %rdx, %rax ; CHECK-BASELINE-NEXT: xorq %rcx, %rsi ; CHECK-BASELINE-NEXT: andq %r9, %rsi ; CHECK-BASELINE-NEXT: xorq %rcx, %rsi ; CHECK-BASELINE-NEXT: movq %rsi, %rdx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v2i64: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: xorq %rdx, %rax ; CHECK-SSE1-NEXT: andq %r8, %rax ; CHECK-SSE1-NEXT: xorq %rdx, %rax ; CHECK-SSE1-NEXT: xorq %rcx, %rsi ; CHECK-SSE1-NEXT: andq %r9, %rsi ; CHECK-SSE1-NEXT: xorq %rcx, %rsi ; CHECK-SSE1-NEXT: movq %rsi, %rdx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v2i64: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v2i64: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %mx = and <2 x i64> %x, %mask %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1> %my = and <2 x i64> %y, %notmask %r = or <2 x i64> %mx, %my ret <2 x i64> %r } ; ============================================================================ ; ; 256-bit vector width ; ============================================================================ ; define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: out_v32i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rcx, %r10 ; CHECK-BASELINE-NEXT: movq %rdx, %r8 ; CHECK-BASELINE-NEXT: movq %rsi, %r9 ; CHECK-BASELINE-NEXT: movq %rdi, %r11 ; CHECK-BASELINE-NEXT: movzbl 15(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 14(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 13(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 12(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 11(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 10(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 9(%rdx), %ebp ; CHECK-BASELINE-NEXT: movzbl 8(%rdx), %r14d ; CHECK-BASELINE-NEXT: movzbl 7(%rdx), %r15d ; CHECK-BASELINE-NEXT: movzbl 6(%rdx), %r12d ; CHECK-BASELINE-NEXT: movzbl 5(%rdx), %esi ; CHECK-BASELINE-NEXT: movzbl 4(%rdx), %r13d ; CHECK-BASELINE-NEXT: movzbl 3(%rdx), %edx ; CHECK-BASELINE-NEXT: movzbl 2(%r8), %edi ; CHECK-BASELINE-NEXT: movzbl (%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 1(%r8), %ecx ; CHECK-BASELINE-NEXT: movzbl (%r9), %ebx ; CHECK-BASELINE-NEXT: xorb %al, %bl ; CHECK-BASELINE-NEXT: andb (%r10), %bl ; CHECK-BASELINE-NEXT: xorb %al, %bl ; CHECK-BASELINE-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 1(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 1(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 2(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: andb 2(%r10), %al ; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 3(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %dl, %al ; CHECK-BASELINE-NEXT: andb 3(%r10), %al ; CHECK-BASELINE-NEXT: xorb %dl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 4(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %r13b, %al ; CHECK-BASELINE-NEXT: andb 4(%r10), %al ; CHECK-BASELINE-NEXT: xorb %r13b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 5(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: andb 5(%r10), %al ; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 6(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: andb 6(%r10), %al ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 7(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %r15b, %al ; CHECK-BASELINE-NEXT: andb 7(%r10), %al ; CHECK-BASELINE-NEXT: xorb %r15b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 8(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: andb 8(%r10), %al ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 9(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %bpl, %al ; CHECK-BASELINE-NEXT: andb 9(%r10), %al ; CHECK-BASELINE-NEXT: xorb %bpl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 10(%r9), %eax ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 10(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 11(%r9), %eax ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 11(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 12(%r9), %eax ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 12(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 13(%r9), %eax ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 13(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 14(%r9), %eax ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 14(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 15(%r9), %eax ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb 15(%r10), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 16(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 16(%r9), %ecx ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 16(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 17(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 17(%r9), %ecx ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 17(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 18(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 18(%r9), %ecx ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 18(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 19(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 19(%r9), %ecx ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 19(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 20(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 20(%r9), %ecx ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 20(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 21(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 21(%r9), %r13d ; CHECK-BASELINE-NEXT: xorb %al, %r13b ; CHECK-BASELINE-NEXT: andb 21(%r10), %r13b ; CHECK-BASELINE-NEXT: xorb %al, %r13b ; CHECK-BASELINE-NEXT: movzbl 22(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 22(%r9), %r12d ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: andb 22(%r10), %r12b ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: movzbl 23(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 23(%r9), %r15d ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: andb 23(%r10), %r15b ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: movzbl 24(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 24(%r9), %r14d ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: andb 24(%r10), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: movzbl 25(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 25(%r9), %ebp ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: andb 25(%r10), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: movzbl 26(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 26(%r9), %edi ; CHECK-BASELINE-NEXT: xorb %al, %dil ; CHECK-BASELINE-NEXT: andb 26(%r10), %dil ; CHECK-BASELINE-NEXT: xorb %al, %dil ; CHECK-BASELINE-NEXT: movzbl 27(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 27(%r9), %esi ; CHECK-BASELINE-NEXT: xorb %al, %sil ; CHECK-BASELINE-NEXT: andb 27(%r10), %sil ; CHECK-BASELINE-NEXT: xorb %al, %sil ; CHECK-BASELINE-NEXT: movzbl 28(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 28(%r9), %edx ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 28(%r10), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movzbl 29(%r8), %eax ; CHECK-BASELINE-NEXT: movzbl 29(%r9), %ecx ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 29(%r10), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movzbl 30(%r8), %ebx ; CHECK-BASELINE-NEXT: movzbl 30(%r9), %eax ; CHECK-BASELINE-NEXT: xorb %bl, %al ; CHECK-BASELINE-NEXT: andb 30(%r10), %al ; CHECK-BASELINE-NEXT: xorb %bl, %al ; CHECK-BASELINE-NEXT: movzbl 31(%r8), %r8d ; CHECK-BASELINE-NEXT: movzbl 31(%r9), %ebx ; CHECK-BASELINE-NEXT: xorb %r8b, %bl ; CHECK-BASELINE-NEXT: andb 31(%r10), %bl ; CHECK-BASELINE-NEXT: xorb %r8b, %bl ; CHECK-BASELINE-NEXT: movb %bl, 31(%r11) ; CHECK-BASELINE-NEXT: movb %al, 30(%r11) ; CHECK-BASELINE-NEXT: movb %cl, 29(%r11) ; CHECK-BASELINE-NEXT: movb %dl, 28(%r11) ; CHECK-BASELINE-NEXT: movb %sil, 27(%r11) ; CHECK-BASELINE-NEXT: movb %dil, 26(%r11) ; CHECK-BASELINE-NEXT: movb %bpl, 25(%r11) ; CHECK-BASELINE-NEXT: movb %r14b, 24(%r11) ; CHECK-BASELINE-NEXT: movb %r15b, 23(%r11) ; CHECK-BASELINE-NEXT: movb %r12b, 22(%r11) ; CHECK-BASELINE-NEXT: movb %r13b, 21(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 20(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 19(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 18(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 17(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 16(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 15(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 14(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 13(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 12(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 11(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 10(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 9(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 8(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 7(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 6(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 5(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 4(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 3(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 2(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 1(%r11) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, (%r11) ; CHECK-BASELINE-NEXT: movq %r11, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v32i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rcx, %r10 ; CHECK-SSE1-NEXT: movq %rdx, %r8 ; CHECK-SSE1-NEXT: movq %rsi, %r9 ; CHECK-SSE1-NEXT: movq %rdi, %r11 ; CHECK-SSE1-NEXT: movzbl 15(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 14(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 13(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 12(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 11(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 10(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 9(%rdx), %ebp ; CHECK-SSE1-NEXT: movzbl 8(%rdx), %r14d ; CHECK-SSE1-NEXT: movzbl 7(%rdx), %r15d ; CHECK-SSE1-NEXT: movzbl 6(%rdx), %r12d ; CHECK-SSE1-NEXT: movzbl 5(%rdx), %esi ; CHECK-SSE1-NEXT: movzbl 4(%rdx), %r13d ; CHECK-SSE1-NEXT: movzbl 3(%rdx), %edx ; CHECK-SSE1-NEXT: movzbl 2(%r8), %edi ; CHECK-SSE1-NEXT: movzbl (%r8), %eax ; CHECK-SSE1-NEXT: movzbl 1(%r8), %ecx ; CHECK-SSE1-NEXT: movzbl (%r9), %ebx ; CHECK-SSE1-NEXT: xorb %al, %bl ; CHECK-SSE1-NEXT: andb (%r10), %bl ; CHECK-SSE1-NEXT: xorb %al, %bl ; CHECK-SSE1-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 1(%r9), %eax ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 1(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 2(%r9), %eax ; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: andb 2(%r10), %al ; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 3(%r9), %eax ; CHECK-SSE1-NEXT: xorb %dl, %al ; CHECK-SSE1-NEXT: andb 3(%r10), %al ; CHECK-SSE1-NEXT: xorb %dl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 4(%r9), %eax ; CHECK-SSE1-NEXT: xorb %r13b, %al ; CHECK-SSE1-NEXT: andb 4(%r10), %al ; CHECK-SSE1-NEXT: xorb %r13b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 5(%r9), %eax ; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: andb 5(%r10), %al ; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 6(%r9), %eax ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: andb 6(%r10), %al ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 7(%r9), %eax ; CHECK-SSE1-NEXT: xorb %r15b, %al ; CHECK-SSE1-NEXT: andb 7(%r10), %al ; CHECK-SSE1-NEXT: xorb %r15b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 8(%r9), %eax ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: andb 8(%r10), %al ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 9(%r9), %eax ; CHECK-SSE1-NEXT: xorb %bpl, %al ; CHECK-SSE1-NEXT: andb 9(%r10), %al ; CHECK-SSE1-NEXT: xorb %bpl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 10(%r9), %eax ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 10(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 11(%r9), %eax ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 11(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 12(%r9), %eax ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 12(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 13(%r9), %eax ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 13(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 14(%r9), %eax ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 14(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 15(%r9), %eax ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb 15(%r10), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 16(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 16(%r9), %ecx ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 16(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 17(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 17(%r9), %ecx ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 17(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 18(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 18(%r9), %ecx ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 18(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 19(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 19(%r9), %ecx ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 19(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 20(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 20(%r9), %ecx ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 20(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 21(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 21(%r9), %r13d ; CHECK-SSE1-NEXT: xorb %al, %r13b ; CHECK-SSE1-NEXT: andb 21(%r10), %r13b ; CHECK-SSE1-NEXT: xorb %al, %r13b ; CHECK-SSE1-NEXT: movzbl 22(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 22(%r9), %r12d ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: andb 22(%r10), %r12b ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: movzbl 23(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 23(%r9), %r15d ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: andb 23(%r10), %r15b ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: movzbl 24(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 24(%r9), %r14d ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: andb 24(%r10), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: movzbl 25(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 25(%r9), %ebp ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: andb 25(%r10), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: movzbl 26(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 26(%r9), %edi ; CHECK-SSE1-NEXT: xorb %al, %dil ; CHECK-SSE1-NEXT: andb 26(%r10), %dil ; CHECK-SSE1-NEXT: xorb %al, %dil ; CHECK-SSE1-NEXT: movzbl 27(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 27(%r9), %esi ; CHECK-SSE1-NEXT: xorb %al, %sil ; CHECK-SSE1-NEXT: andb 27(%r10), %sil ; CHECK-SSE1-NEXT: xorb %al, %sil ; CHECK-SSE1-NEXT: movzbl 28(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 28(%r9), %edx ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 28(%r10), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movzbl 29(%r8), %eax ; CHECK-SSE1-NEXT: movzbl 29(%r9), %ecx ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 29(%r10), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movzbl 30(%r8), %ebx ; CHECK-SSE1-NEXT: movzbl 30(%r9), %eax ; CHECK-SSE1-NEXT: xorb %bl, %al ; CHECK-SSE1-NEXT: andb 30(%r10), %al ; CHECK-SSE1-NEXT: xorb %bl, %al ; CHECK-SSE1-NEXT: movzbl 31(%r8), %r8d ; CHECK-SSE1-NEXT: movzbl 31(%r9), %ebx ; CHECK-SSE1-NEXT: xorb %r8b, %bl ; CHECK-SSE1-NEXT: andb 31(%r10), %bl ; CHECK-SSE1-NEXT: xorb %r8b, %bl ; CHECK-SSE1-NEXT: movb %bl, 31(%r11) ; CHECK-SSE1-NEXT: movb %al, 30(%r11) ; CHECK-SSE1-NEXT: movb %cl, 29(%r11) ; CHECK-SSE1-NEXT: movb %dl, 28(%r11) ; CHECK-SSE1-NEXT: movb %sil, 27(%r11) ; CHECK-SSE1-NEXT: movb %dil, 26(%r11) ; CHECK-SSE1-NEXT: movb %bpl, 25(%r11) ; CHECK-SSE1-NEXT: movb %r14b, 24(%r11) ; CHECK-SSE1-NEXT: movb %r15b, 23(%r11) ; CHECK-SSE1-NEXT: movb %r12b, 22(%r11) ; CHECK-SSE1-NEXT: movb %r13b, 21(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 20(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 19(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 18(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 17(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 16(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 15(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 14(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 13(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 12(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 11(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 10(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 9(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 8(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 7(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 6(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 5(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 4(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 3(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 2(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 1(%r11) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, (%r11) ; CHECK-SSE1-NEXT: movq %r11, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v32i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 ; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v32i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 ; CHECK-XOP-NEXT: retq %x = load <32 x i8>, ptr%px, align 32 %y = load <32 x i8>, ptr%py, align 32 %mask = load <32 x i8>, ptr%pmask, align 32 %mx = and <32 x i8> %x, %mask %notmask = xor <32 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> %my = and <32 x i8> %y, %notmask %r = or <32 x i8> %mx, %my ret <32 x i8> %r } define <16 x i16> @out_v16i16(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: out_v16i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r15d ; CHECK-BASELINE-NEXT: movzwl 16(%rdx), %r14d ; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %r11d ; CHECK-BASELINE-NEXT: movzwl 12(%rdx), %r13d ; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r9d ; CHECK-BASELINE-NEXT: movzwl 8(%rdx), %r8d ; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %ebx ; CHECK-BASELINE-NEXT: movzwl 4(%rdx), %r12d ; CHECK-BASELINE-NEXT: movzwl (%rdx), %ebp ; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %r10d ; CHECK-BASELINE-NEXT: movzwl (%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %bp, %ax ; CHECK-BASELINE-NEXT: andw (%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %ebp ; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r10w, %ax ; CHECK-BASELINE-NEXT: andw 2(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r10d ; CHECK-BASELINE-NEXT: movzwl 4(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r12w, %ax ; CHECK-BASELINE-NEXT: andw 4(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r12d ; CHECK-BASELINE-NEXT: movzwl 6(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %bx, %ax ; CHECK-BASELINE-NEXT: andw 6(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %ebx ; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 8(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r8w, %ax ; CHECK-BASELINE-NEXT: andw 8(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r8d ; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 10(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r9w, %ax ; CHECK-BASELINE-NEXT: andw 10(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r9d ; CHECK-BASELINE-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 12(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r13w, %ax ; CHECK-BASELINE-NEXT: andw 12(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r13d ; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r11w, %ax ; CHECK-BASELINE-NEXT: andw 14(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r11d ; CHECK-BASELINE-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r14w, %ax ; CHECK-BASELINE-NEXT: andw 16(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r14d ; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r15w, %ax ; CHECK-BASELINE-NEXT: andw 18(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r15d ; CHECK-BASELINE-NEXT: movzwl 20(%rdx), %ebx ; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %bx, %ax ; CHECK-BASELINE-NEXT: andw 20(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %ebx ; CHECK-BASELINE-NEXT: movzwl 22(%rdx), %r9d ; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r9w, %ax ; CHECK-BASELINE-NEXT: andw 22(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r9d ; CHECK-BASELINE-NEXT: movzwl 24(%rdx), %r8d ; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %r8w, %ax ; CHECK-BASELINE-NEXT: andw 24(%rcx), %ax ; CHECK-BASELINE-NEXT: xorl %eax, %r8d ; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %eax ; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r11d ; CHECK-BASELINE-NEXT: xorw %ax, %r11w ; CHECK-BASELINE-NEXT: andw 26(%rcx), %r11w ; CHECK-BASELINE-NEXT: xorl %r11d, %eax ; CHECK-BASELINE-NEXT: movzwl 28(%rdx), %r11d ; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %ebp ; CHECK-BASELINE-NEXT: xorw %r11w, %bp ; CHECK-BASELINE-NEXT: andw 28(%rcx), %bp ; CHECK-BASELINE-NEXT: xorl %ebp, %r11d ; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %edx ; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %esi ; CHECK-BASELINE-NEXT: xorw %dx, %si ; CHECK-BASELINE-NEXT: andw 30(%rcx), %si ; CHECK-BASELINE-NEXT: xorl %esi, %edx ; CHECK-BASELINE-NEXT: movw %dx, 30(%rdi) ; CHECK-BASELINE-NEXT: movw %r11w, 28(%rdi) ; CHECK-BASELINE-NEXT: movw %ax, 26(%rdi) ; CHECK-BASELINE-NEXT: movw %r8w, 24(%rdi) ; CHECK-BASELINE-NEXT: movw %r9w, 22(%rdi) ; CHECK-BASELINE-NEXT: movw %bx, 20(%rdi) ; CHECK-BASELINE-NEXT: movw %r15w, 18(%rdi) ; CHECK-BASELINE-NEXT: movw %r14w, 16(%rdi) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movw %ax, 14(%rdi) ; CHECK-BASELINE-NEXT: movw %r13w, 12(%rdi) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movw %ax, 10(%rdi) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movw %ax, 8(%rdi) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movw %ax, 6(%rdi) ; CHECK-BASELINE-NEXT: movw %r12w, 4(%rdi) ; CHECK-BASELINE-NEXT: movw %r10w, 2(%rdi) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movw %ax, (%rdi) ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v16i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r15d ; CHECK-SSE1-NEXT: movzwl 16(%rdx), %r14d ; CHECK-SSE1-NEXT: movzwl 14(%rdx), %r11d ; CHECK-SSE1-NEXT: movzwl 12(%rdx), %r13d ; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r9d ; CHECK-SSE1-NEXT: movzwl 8(%rdx), %r8d ; CHECK-SSE1-NEXT: movzwl 6(%rdx), %ebx ; CHECK-SSE1-NEXT: movzwl 4(%rdx), %r12d ; CHECK-SSE1-NEXT: movzwl (%rdx), %ebp ; CHECK-SSE1-NEXT: movzwl 2(%rdx), %r10d ; CHECK-SSE1-NEXT: movzwl (%rsi), %eax ; CHECK-SSE1-NEXT: xorw %bp, %ax ; CHECK-SSE1-NEXT: andw (%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %ebp ; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 2(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r10w, %ax ; CHECK-SSE1-NEXT: andw 2(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r10d ; CHECK-SSE1-NEXT: movzwl 4(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r12w, %ax ; CHECK-SSE1-NEXT: andw 4(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r12d ; CHECK-SSE1-NEXT: movzwl 6(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %bx, %ax ; CHECK-SSE1-NEXT: andw 6(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %ebx ; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 8(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r8w, %ax ; CHECK-SSE1-NEXT: andw 8(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r8d ; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 10(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r9w, %ax ; CHECK-SSE1-NEXT: andw 10(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r9d ; CHECK-SSE1-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 12(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r13w, %ax ; CHECK-SSE1-NEXT: andw 12(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r13d ; CHECK-SSE1-NEXT: movzwl 14(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r11w, %ax ; CHECK-SSE1-NEXT: andw 14(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r11d ; CHECK-SSE1-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 16(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r14w, %ax ; CHECK-SSE1-NEXT: andw 16(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r14d ; CHECK-SSE1-NEXT: movzwl 18(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r15w, %ax ; CHECK-SSE1-NEXT: andw 18(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r15d ; CHECK-SSE1-NEXT: movzwl 20(%rdx), %ebx ; CHECK-SSE1-NEXT: movzwl 20(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %bx, %ax ; CHECK-SSE1-NEXT: andw 20(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %ebx ; CHECK-SSE1-NEXT: movzwl 22(%rdx), %r9d ; CHECK-SSE1-NEXT: movzwl 22(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r9w, %ax ; CHECK-SSE1-NEXT: andw 22(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r9d ; CHECK-SSE1-NEXT: movzwl 24(%rdx), %r8d ; CHECK-SSE1-NEXT: movzwl 24(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %r8w, %ax ; CHECK-SSE1-NEXT: andw 24(%rcx), %ax ; CHECK-SSE1-NEXT: xorl %eax, %r8d ; CHECK-SSE1-NEXT: movzwl 26(%rdx), %eax ; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r11d ; CHECK-SSE1-NEXT: xorw %ax, %r11w ; CHECK-SSE1-NEXT: andw 26(%rcx), %r11w ; CHECK-SSE1-NEXT: xorl %r11d, %eax ; CHECK-SSE1-NEXT: movzwl 28(%rdx), %r11d ; CHECK-SSE1-NEXT: movzwl 28(%rsi), %ebp ; CHECK-SSE1-NEXT: xorw %r11w, %bp ; CHECK-SSE1-NEXT: andw 28(%rcx), %bp ; CHECK-SSE1-NEXT: xorl %ebp, %r11d ; CHECK-SSE1-NEXT: movzwl 30(%rdx), %edx ; CHECK-SSE1-NEXT: movzwl 30(%rsi), %esi ; CHECK-SSE1-NEXT: xorw %dx, %si ; CHECK-SSE1-NEXT: andw 30(%rcx), %si ; CHECK-SSE1-NEXT: xorl %esi, %edx ; CHECK-SSE1-NEXT: movw %dx, 30(%rdi) ; CHECK-SSE1-NEXT: movw %r11w, 28(%rdi) ; CHECK-SSE1-NEXT: movw %ax, 26(%rdi) ; CHECK-SSE1-NEXT: movw %r8w, 24(%rdi) ; CHECK-SSE1-NEXT: movw %r9w, 22(%rdi) ; CHECK-SSE1-NEXT: movw %bx, 20(%rdi) ; CHECK-SSE1-NEXT: movw %r15w, 18(%rdi) ; CHECK-SSE1-NEXT: movw %r14w, 16(%rdi) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movw %ax, 14(%rdi) ; CHECK-SSE1-NEXT: movw %r13w, 12(%rdi) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movw %ax, 10(%rdi) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movw %ax, 8(%rdi) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movw %ax, 6(%rdi) ; CHECK-SSE1-NEXT: movw %r12w, 4(%rdi) ; CHECK-SSE1-NEXT: movw %r10w, 2(%rdi) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movw %ax, (%rdi) ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v16i16: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 ; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v16i16: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 ; CHECK-XOP-NEXT: retq %x = load <16 x i16>, ptr%px, align 32 %y = load <16 x i16>, ptr%py, align 32 %mask = load <16 x i16>, ptr%pmask, align 32 %mx = and <16 x i16> %x, %mask %notmask = xor <16 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> %my = and <16 x i16> %y, %notmask %r = or <16 x i16> %mx, %my ret <16 x i16> %r } define <8 x i32> @out_v8i32(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: out_v8i32: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl 28(%rdx), %r8d ; CHECK-BASELINE-NEXT: movl 24(%rdx), %r9d ; CHECK-BASELINE-NEXT: movl 20(%rdx), %r10d ; CHECK-BASELINE-NEXT: movl 16(%rdx), %r14d ; CHECK-BASELINE-NEXT: movl 12(%rdx), %ebx ; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebp ; CHECK-BASELINE-NEXT: movl (%rdx), %edi ; CHECK-BASELINE-NEXT: movl 4(%rdx), %edx ; CHECK-BASELINE-NEXT: movl (%rsi), %r11d ; CHECK-BASELINE-NEXT: xorl %edi, %r11d ; CHECK-BASELINE-NEXT: andl (%rcx), %r11d ; CHECK-BASELINE-NEXT: xorl %edi, %r11d ; CHECK-BASELINE-NEXT: movl 4(%rsi), %r15d ; CHECK-BASELINE-NEXT: xorl %edx, %r15d ; CHECK-BASELINE-NEXT: andl 4(%rcx), %r15d ; CHECK-BASELINE-NEXT: xorl %edx, %r15d ; CHECK-BASELINE-NEXT: movl 8(%rsi), %r12d ; CHECK-BASELINE-NEXT: xorl %ebp, %r12d ; CHECK-BASELINE-NEXT: andl 8(%rcx), %r12d ; CHECK-BASELINE-NEXT: xorl %ebp, %r12d ; CHECK-BASELINE-NEXT: movl 12(%rsi), %ebp ; CHECK-BASELINE-NEXT: xorl %ebx, %ebp ; CHECK-BASELINE-NEXT: andl 12(%rcx), %ebp ; CHECK-BASELINE-NEXT: xorl %ebx, %ebp ; CHECK-BASELINE-NEXT: movl 16(%rsi), %ebx ; CHECK-BASELINE-NEXT: xorl %r14d, %ebx ; CHECK-BASELINE-NEXT: andl 16(%rcx), %ebx ; CHECK-BASELINE-NEXT: xorl %r14d, %ebx ; CHECK-BASELINE-NEXT: movl 20(%rsi), %edi ; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: andl 20(%rcx), %edi ; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: movl 24(%rsi), %edx ; CHECK-BASELINE-NEXT: xorl %r9d, %edx ; CHECK-BASELINE-NEXT: andl 24(%rcx), %edx ; CHECK-BASELINE-NEXT: xorl %r9d, %edx ; CHECK-BASELINE-NEXT: movl 28(%rsi), %esi ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: andl 28(%rcx), %esi ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: movl %esi, 28(%rax) ; CHECK-BASELINE-NEXT: movl %edx, 24(%rax) ; CHECK-BASELINE-NEXT: movl %edi, 20(%rax) ; CHECK-BASELINE-NEXT: movl %ebx, 16(%rax) ; CHECK-BASELINE-NEXT: movl %ebp, 12(%rax) ; CHECK-BASELINE-NEXT: movl %r12d, 8(%rax) ; CHECK-BASELINE-NEXT: movl %r15d, 4(%rax) ; CHECK-BASELINE-NEXT: movl %r11d, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v8i32: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl 28(%rdx), %r8d ; CHECK-SSE1-NEXT: movl 24(%rdx), %r9d ; CHECK-SSE1-NEXT: movl 20(%rdx), %r10d ; CHECK-SSE1-NEXT: movl 16(%rdx), %r14d ; CHECK-SSE1-NEXT: movl 12(%rdx), %ebx ; CHECK-SSE1-NEXT: movl 8(%rdx), %ebp ; CHECK-SSE1-NEXT: movl (%rdx), %edi ; CHECK-SSE1-NEXT: movl 4(%rdx), %edx ; CHECK-SSE1-NEXT: movl (%rsi), %r11d ; CHECK-SSE1-NEXT: xorl %edi, %r11d ; CHECK-SSE1-NEXT: andl (%rcx), %r11d ; CHECK-SSE1-NEXT: xorl %edi, %r11d ; CHECK-SSE1-NEXT: movl 4(%rsi), %r15d ; CHECK-SSE1-NEXT: xorl %edx, %r15d ; CHECK-SSE1-NEXT: andl 4(%rcx), %r15d ; CHECK-SSE1-NEXT: xorl %edx, %r15d ; CHECK-SSE1-NEXT: movl 8(%rsi), %r12d ; CHECK-SSE1-NEXT: xorl %ebp, %r12d ; CHECK-SSE1-NEXT: andl 8(%rcx), %r12d ; CHECK-SSE1-NEXT: xorl %ebp, %r12d ; CHECK-SSE1-NEXT: movl 12(%rsi), %ebp ; CHECK-SSE1-NEXT: xorl %ebx, %ebp ; CHECK-SSE1-NEXT: andl 12(%rcx), %ebp ; CHECK-SSE1-NEXT: xorl %ebx, %ebp ; CHECK-SSE1-NEXT: movl 16(%rsi), %ebx ; CHECK-SSE1-NEXT: xorl %r14d, %ebx ; CHECK-SSE1-NEXT: andl 16(%rcx), %ebx ; CHECK-SSE1-NEXT: xorl %r14d, %ebx ; CHECK-SSE1-NEXT: movl 20(%rsi), %edi ; CHECK-SSE1-NEXT: xorl %r10d, %edi ; CHECK-SSE1-NEXT: andl 20(%rcx), %edi ; CHECK-SSE1-NEXT: xorl %r10d, %edi ; CHECK-SSE1-NEXT: movl 24(%rsi), %edx ; CHECK-SSE1-NEXT: xorl %r9d, %edx ; CHECK-SSE1-NEXT: andl 24(%rcx), %edx ; CHECK-SSE1-NEXT: xorl %r9d, %edx ; CHECK-SSE1-NEXT: movl 28(%rsi), %esi ; CHECK-SSE1-NEXT: xorl %r8d, %esi ; CHECK-SSE1-NEXT: andl 28(%rcx), %esi ; CHECK-SSE1-NEXT: xorl %r8d, %esi ; CHECK-SSE1-NEXT: movl %esi, 28(%rax) ; CHECK-SSE1-NEXT: movl %edx, 24(%rax) ; CHECK-SSE1-NEXT: movl %edi, 20(%rax) ; CHECK-SSE1-NEXT: movl %ebx, 16(%rax) ; CHECK-SSE1-NEXT: movl %ebp, 12(%rax) ; CHECK-SSE1-NEXT: movl %r12d, 8(%rax) ; CHECK-SSE1-NEXT: movl %r15d, 4(%rax) ; CHECK-SSE1-NEXT: movl %r11d, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v8i32: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 ; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v8i32: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 ; CHECK-XOP-NEXT: retq %x = load <8 x i32>, ptr%px, align 32 %y = load <8 x i32>, ptr%py, align 32 %mask = load <8 x i32>, ptr%pmask, align 32 %mx = and <8 x i32> %x, %mask %notmask = xor <8 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> %my = and <8 x i32> %y, %notmask %r = or <8 x i32> %mx, %my ret <8 x i32> %r } define <4 x i64> @out_v4i64(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: out_v4i64: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movq 24(%rdx), %r8 ; CHECK-BASELINE-NEXT: movq 16(%rdx), %r9 ; CHECK-BASELINE-NEXT: movq (%rdx), %rdi ; CHECK-BASELINE-NEXT: movq 8(%rdx), %r10 ; CHECK-BASELINE-NEXT: movq (%rsi), %r11 ; CHECK-BASELINE-NEXT: xorq %rdi, %r11 ; CHECK-BASELINE-NEXT: andq (%rcx), %r11 ; CHECK-BASELINE-NEXT: xorq %rdi, %r11 ; CHECK-BASELINE-NEXT: movq 8(%rsi), %rdi ; CHECK-BASELINE-NEXT: xorq %r10, %rdi ; CHECK-BASELINE-NEXT: andq 8(%rcx), %rdi ; CHECK-BASELINE-NEXT: xorq %r10, %rdi ; CHECK-BASELINE-NEXT: movq 16(%rsi), %rdx ; CHECK-BASELINE-NEXT: xorq %r9, %rdx ; CHECK-BASELINE-NEXT: andq 16(%rcx), %rdx ; CHECK-BASELINE-NEXT: xorq %r9, %rdx ; CHECK-BASELINE-NEXT: movq 24(%rsi), %rsi ; CHECK-BASELINE-NEXT: xorq %r8, %rsi ; CHECK-BASELINE-NEXT: andq 24(%rcx), %rsi ; CHECK-BASELINE-NEXT: xorq %r8, %rsi ; CHECK-BASELINE-NEXT: movq %rsi, 24(%rax) ; CHECK-BASELINE-NEXT: movq %rdx, 16(%rax) ; CHECK-BASELINE-NEXT: movq %rdi, 8(%rax) ; CHECK-BASELINE-NEXT: movq %r11, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: out_v4i64: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movq 24(%rdx), %r8 ; CHECK-SSE1-NEXT: movq 16(%rdx), %r9 ; CHECK-SSE1-NEXT: movq (%rdx), %rdi ; CHECK-SSE1-NEXT: movq 8(%rdx), %r10 ; CHECK-SSE1-NEXT: movq (%rsi), %r11 ; CHECK-SSE1-NEXT: xorq %rdi, %r11 ; CHECK-SSE1-NEXT: andq (%rcx), %r11 ; CHECK-SSE1-NEXT: xorq %rdi, %r11 ; CHECK-SSE1-NEXT: movq 8(%rsi), %rdi ; CHECK-SSE1-NEXT: xorq %r10, %rdi ; CHECK-SSE1-NEXT: andq 8(%rcx), %rdi ; CHECK-SSE1-NEXT: xorq %r10, %rdi ; CHECK-SSE1-NEXT: movq 16(%rsi), %rdx ; CHECK-SSE1-NEXT: xorq %r9, %rdx ; CHECK-SSE1-NEXT: andq 16(%rcx), %rdx ; CHECK-SSE1-NEXT: xorq %r9, %rdx ; CHECK-SSE1-NEXT: movq 24(%rsi), %rsi ; CHECK-SSE1-NEXT: xorq %r8, %rsi ; CHECK-SSE1-NEXT: andq 24(%rcx), %rsi ; CHECK-SSE1-NEXT: xorq %r8, %rsi ; CHECK-SSE1-NEXT: movq %rsi, 24(%rax) ; CHECK-SSE1-NEXT: movq %rdx, 16(%rax) ; CHECK-SSE1-NEXT: movq %rdi, 8(%rax) ; CHECK-SSE1-NEXT: movq %r11, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: out_v4i64: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 ; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 ; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 ; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: out_v4i64: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i64>, ptr%px, align 32 %y = load <4 x i64>, ptr%py, align 32 %mask = load <4 x i64>, ptr%pmask, align 32 %mx = and <4 x i64> %x, %mask %notmask = xor <4 x i64> %mask, <i64 -1, i64 -1, i64 -1, i64 -1> %my = and <4 x i64> %y, %notmask %r = or <4 x i64> %mx, %my ret <4 x i64> %r } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; Should be the same as the previous one. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; ============================================================================ ; ; 8-bit vector width ; ============================================================================ ; define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { ; CHECK-LABEL: in_v1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: andl %edx, %eax ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %n0 = xor <1 x i8> %x, %y %n1 = and <1 x i8> %n0, %mask %r = xor <1 x i8> %n1, %y ret <1 x i8> %r } ; ============================================================================ ; ; 16-bit vector width ; ============================================================================ ; define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v2i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movl %edi, %eax ; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: andl %r8d, %eax ; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax ; CHECK-BASELINE-NEXT: movl %esi, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v2i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movl %edi, %eax ; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: andl %r8d, %eax ; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax ; CHECK-SSE1-NEXT: movl %esi, %edx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v2i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v2i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <2 x i8> %x, %y %n1 = and <2 x i8> %n0, %mask %r = xor <2 x i8> %n1, %y ret <2 x i8> %r } define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { ; CHECK-LABEL: in_v1i16: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: andl %edx, %eax ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq %n0 = xor <1 x i16> %x, %y %n1 = and <1 x i16> %n0, %mask %r = xor <1 x i16> %n1, %y ret <1 x i16> %r } ; ============================================================================ ; ; 32-bit vector width ; ============================================================================ ; define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v4i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: xorb %r11b, %dl ; CHECK-BASELINE-NEXT: xorb %r10b, %cl ; CHECK-BASELINE-NEXT: xorb %dil, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: xorb %r9b, %sil ; CHECK-BASELINE-NEXT: xorb %r11b, %dl ; CHECK-BASELINE-NEXT: xorb %r10b, %cl ; CHECK-BASELINE-NEXT: xorb %dil, %r8b ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) ; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) ; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) ; CHECK-BASELINE-NEXT: movb %sil, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v4i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: xorb %r11b, %dl ; CHECK-SSE1-NEXT: xorb %r10b, %cl ; CHECK-SSE1-NEXT: xorb %dil, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: xorb %r9b, %sil ; CHECK-SSE1-NEXT: xorb %r11b, %dl ; CHECK-SSE1-NEXT: xorb %r10b, %cl ; CHECK-SSE1-NEXT: xorb %dil, %r8b ; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) ; CHECK-SSE1-NEXT: movb %cl, 2(%rax) ; CHECK-SSE1-NEXT: movb %dl, 1(%rax) ; CHECK-SSE1-NEXT: movb %sil, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v4i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v4i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <4 x i8> %x, %y %n1 = and <4 x i8> %n0, %mask %r = xor <4 x i8> %n1, %y ret <4 x i8> %r } define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v2i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movl %edi, %eax ; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: andl %r8d, %eax ; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-BASELINE-NEXT: movl %esi, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v2i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movl %edi, %eax ; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: andl %r8d, %eax ; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-SSE1-NEXT: movl %esi, %edx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v2i16: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v2i16: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <2 x i16> %x, %y %n1 = and <2 x i16> %n0, %mask %r = xor <2 x i16> %n1, %y ret <2 x i16> %r } define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { ; CHECK-LABEL: in_v1i32: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: andl %edx, %eax ; CHECK-NEXT: xorl %esi, %eax ; CHECK-NEXT: retq %n0 = xor <1 x i32> %x, %y %n1 = and <1 x i32> %n0, %mask %r = xor <1 x i32> %n1, %y ret <1 x i32> %r } ; ============================================================================ ; ; 64-bit vector width ; ============================================================================ ; define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v8i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: xorb %r11b, %sil ; CHECK-BASELINE-NEXT: xorb %r12b, %dl ; CHECK-BASELINE-NEXT: xorb %r15b, %cl ; CHECK-BASELINE-NEXT: xorb %r14b, %r8b ; CHECK-BASELINE-NEXT: xorb %bpl, %r9b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb %r11b, %sil ; CHECK-BASELINE-NEXT: xorb %r12b, %dl ; CHECK-BASELINE-NEXT: xorb %r15b, %cl ; CHECK-BASELINE-NEXT: xorb %r14b, %r8b ; CHECK-BASELINE-NEXT: xorb %bpl, %r9b ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: movb %al, 7(%rdi) ; CHECK-BASELINE-NEXT: movb %bl, 6(%rdi) ; CHECK-BASELINE-NEXT: movb %r13b, 5(%rdi) ; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) ; CHECK-BASELINE-NEXT: movb %cl, 2(%rdi) ; CHECK-BASELINE-NEXT: movb %dl, 1(%rdi) ; CHECK-BASELINE-NEXT: movb %sil, (%rdi) ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v8i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: xorb %r11b, %sil ; CHECK-SSE1-NEXT: xorb %r12b, %dl ; CHECK-SSE1-NEXT: xorb %r15b, %cl ; CHECK-SSE1-NEXT: xorb %r14b, %r8b ; CHECK-SSE1-NEXT: xorb %bpl, %r9b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb %r11b, %sil ; CHECK-SSE1-NEXT: xorb %r12b, %dl ; CHECK-SSE1-NEXT: xorb %r15b, %cl ; CHECK-SSE1-NEXT: xorb %r14b, %r8b ; CHECK-SSE1-NEXT: xorb %bpl, %r9b ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: movb %al, 7(%rdi) ; CHECK-SSE1-NEXT: movb %bl, 6(%rdi) ; CHECK-SSE1-NEXT: movb %r13b, 5(%rdi) ; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) ; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) ; CHECK-SSE1-NEXT: movb %cl, 2(%rdi) ; CHECK-SSE1-NEXT: movb %dl, 1(%rdi) ; CHECK-SSE1-NEXT: movb %sil, (%rdi) ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v8i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v8i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <8 x i8> %x, %y %n1 = and <8 x i8> %n0, %mask %r = xor <8 x i8> %n1, %y ret <8 x i8> %r } define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v4i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: xorl %r11d, %ecx ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-BASELINE-NEXT: xorl %r9d, %esi ; CHECK-BASELINE-NEXT: xorl %edi, %edx ; CHECK-BASELINE-NEXT: xorl %r11d, %ecx ; CHECK-BASELINE-NEXT: xorl %r10d, %r8d ; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) ; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) ; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) ; CHECK-BASELINE-NEXT: movw %si, (%rax) ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v4i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: xorl %edi, %edx ; CHECK-SSE1-NEXT: xorl %r11d, %ecx ; CHECK-SSE1-NEXT: xorl %r10d, %r8d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-SSE1-NEXT: xorl %r9d, %esi ; CHECK-SSE1-NEXT: xorl %edi, %edx ; CHECK-SSE1-NEXT: xorl %r11d, %ecx ; CHECK-SSE1-NEXT: xorl %r10d, %r8d ; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) ; CHECK-SSE1-NEXT: movw %cx, 4(%rax) ; CHECK-SSE1-NEXT: movw %dx, 2(%rax) ; CHECK-SSE1-NEXT: movw %si, (%rax) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v4i16: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v4i16: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <4 x i16> %x, %y %n1 = and <4 x i16> %n0, %mask %r = xor <4 x i16> %n1, %y ret <4 x i16> %r } define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v2i32: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movl %edi, %eax ; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: andl %r9d, %esi ; CHECK-BASELINE-NEXT: andl %r8d, %eax ; CHECK-BASELINE-NEXT: xorl %edx, %eax ; CHECK-BASELINE-NEXT: xorl %ecx, %esi ; CHECK-BASELINE-NEXT: movl %esi, %edx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v2i32: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movl %edi, %eax ; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: andl %r9d, %esi ; CHECK-SSE1-NEXT: andl %r8d, %eax ; CHECK-SSE1-NEXT: xorl %edx, %eax ; CHECK-SSE1-NEXT: xorl %ecx, %esi ; CHECK-SSE1-NEXT: movl %esi, %edx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v2i32: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v2i32: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <2 x i32> %x, %y %n1 = and <2 x i32> %n0, %mask %r = xor <2 x i32> %n1, %y ret <2 x i32> %r } define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { ; CHECK-LABEL: in_v1i64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rdi, %rax ; CHECK-NEXT: xorq %rsi, %rax ; CHECK-NEXT: andq %rdx, %rax ; CHECK-NEXT: xorq %rsi, %rax ; CHECK-NEXT: retq %n0 = xor <1 x i64> %x, %y %n1 = and <1 x i64> %n0, %mask %r = xor <1 x i64> %n1, %y ret <1 x i64> %r } ; ============================================================================ ; ; 128-bit vector width ; ============================================================================ ; define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v16i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movq %rdi, %rdx ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: xorb %dil, %r9b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-BASELINE-NEXT: xorb %dil, %r9b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: xorb %r10b, %dil ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil ; CHECK-BASELINE-NEXT: xorb %r10b, %dil ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: xorb %r11b, %r10b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-BASELINE-NEXT: xorb %r11b, %r10b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: xorb %bl, %r11b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-BASELINE-NEXT: xorb %bl, %r11b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-BASELINE-NEXT: xorb %r12b, %r13b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-BASELINE-NEXT: xorb %r12b, %r13b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-BASELINE-NEXT: xorb %r15b, %r12b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-BASELINE-NEXT: xorb %r15b, %r12b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-BASELINE-NEXT: xorb %r14b, %r15b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-BASELINE-NEXT: xorb %r14b, %r15b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-BASELINE-NEXT: xorb %bpl, %r14b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b ; CHECK-BASELINE-NEXT: xorb %bpl, %r14b ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-BASELINE-NEXT: xorb %cl, %al ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx ; CHECK-BASELINE-NEXT: xorb %sil, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %sil, %cl ; CHECK-BASELINE-NEXT: movb %cl, 15(%rdx) ; CHECK-BASELINE-NEXT: movb %al, 14(%rdx) ; CHECK-BASELINE-NEXT: movb %bpl, 13(%rdx) ; CHECK-BASELINE-NEXT: movb %r14b, 12(%rdx) ; CHECK-BASELINE-NEXT: movb %r15b, 11(%rdx) ; CHECK-BASELINE-NEXT: movb %r12b, 10(%rdx) ; CHECK-BASELINE-NEXT: movb %r13b, 9(%rdx) ; CHECK-BASELINE-NEXT: movb %bl, 8(%rdx) ; CHECK-BASELINE-NEXT: movb %r11b, 7(%rdx) ; CHECK-BASELINE-NEXT: movb %r10b, 6(%rdx) ; CHECK-BASELINE-NEXT: movb %dil, 5(%rdx) ; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdx) ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: xorb %al, %r8b ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-BASELINE-NEXT: xorb %al, %r8b ; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdx) ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, 2(%rdx) ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, 1(%rdx) ; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movb %cl, (%rdx) ; CHECK-BASELINE-NEXT: movq %rdx, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v16i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movq %rdi, %rdx ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: xorb %dil, %r9b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b ; CHECK-SSE1-NEXT: xorb %dil, %r9b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: xorb %r10b, %dil ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil ; CHECK-SSE1-NEXT: xorb %r10b, %dil ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: xorb %r11b, %r10b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b ; CHECK-SSE1-NEXT: xorb %r11b, %r10b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: xorb %bl, %r11b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b ; CHECK-SSE1-NEXT: xorb %bl, %r11b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d ; CHECK-SSE1-NEXT: xorb %r12b, %r13b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b ; CHECK-SSE1-NEXT: xorb %r12b, %r13b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d ; CHECK-SSE1-NEXT: xorb %r15b, %r12b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b ; CHECK-SSE1-NEXT: xorb %r15b, %r12b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d ; CHECK-SSE1-NEXT: xorb %r14b, %r15b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b ; CHECK-SSE1-NEXT: xorb %r14b, %r15b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d ; CHECK-SSE1-NEXT: xorb %bpl, %r14b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b ; CHECK-SSE1-NEXT: xorb %bpl, %r14b ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al ; CHECK-SSE1-NEXT: xorb %cl, %al ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx ; CHECK-SSE1-NEXT: xorb %sil, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %sil, %cl ; CHECK-SSE1-NEXT: movb %cl, 15(%rdx) ; CHECK-SSE1-NEXT: movb %al, 14(%rdx) ; CHECK-SSE1-NEXT: movb %bpl, 13(%rdx) ; CHECK-SSE1-NEXT: movb %r14b, 12(%rdx) ; CHECK-SSE1-NEXT: movb %r15b, 11(%rdx) ; CHECK-SSE1-NEXT: movb %r12b, 10(%rdx) ; CHECK-SSE1-NEXT: movb %r13b, 9(%rdx) ; CHECK-SSE1-NEXT: movb %bl, 8(%rdx) ; CHECK-SSE1-NEXT: movb %r11b, 7(%rdx) ; CHECK-SSE1-NEXT: movb %r10b, 6(%rdx) ; CHECK-SSE1-NEXT: movb %dil, 5(%rdx) ; CHECK-SSE1-NEXT: movb %r9b, 4(%rdx) ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: xorb %al, %r8b ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b ; CHECK-SSE1-NEXT: xorb %al, %r8b ; CHECK-SSE1-NEXT: movb %r8b, 3(%rdx) ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, 2(%rdx) ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, 1(%rdx) ; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movb %cl, (%rdx) ; CHECK-SSE1-NEXT: movq %rdx, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v16i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v16i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <16 x i8> %x, %y %n1 = and <16 x i8> %n0, %mask %r = xor <16 x i8> %n1, %y ret <16 x i8> %r } define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v8i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorl %ebx, %esi ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-BASELINE-NEXT: xorl %ebx, %esi ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorl %ebx, %edx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-BASELINE-NEXT: xorl %ebx, %edx ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorl %ebx, %ecx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-BASELINE-NEXT: xorl %ebx, %ecx ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorl %ebx, %r8d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-BASELINE-NEXT: xorl %ebx, %r8d ; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorl %ebx, %r9d ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r9w ; CHECK-BASELINE-NEXT: xorl %ebx, %r9d ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp ; CHECK-BASELINE-NEXT: xorw %di, %bp ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bp ; CHECK-BASELINE-NEXT: xorl %edi, %ebp ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi ; CHECK-BASELINE-NEXT: xorw %r11w, %di ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %di ; CHECK-BASELINE-NEXT: xorl %r11d, %edi ; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx ; CHECK-BASELINE-NEXT: xorw %r10w, %bx ; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx ; CHECK-BASELINE-NEXT: xorl %r10d, %ebx ; CHECK-BASELINE-NEXT: movw %bx, 14(%rax) ; CHECK-BASELINE-NEXT: movw %di, 12(%rax) ; CHECK-BASELINE-NEXT: movw %bp, 10(%rax) ; CHECK-BASELINE-NEXT: movw %r9w, 8(%rax) ; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) ; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) ; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) ; CHECK-BASELINE-NEXT: movw %si, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v8i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorl %ebx, %esi ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si ; CHECK-SSE1-NEXT: xorl %ebx, %esi ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorl %ebx, %edx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx ; CHECK-SSE1-NEXT: xorl %ebx, %edx ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorl %ebx, %ecx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx ; CHECK-SSE1-NEXT: xorl %ebx, %ecx ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorl %ebx, %r8d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w ; CHECK-SSE1-NEXT: xorl %ebx, %r8d ; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorl %ebx, %r9d ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r9w ; CHECK-SSE1-NEXT: xorl %ebx, %r9d ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp ; CHECK-SSE1-NEXT: xorw %di, %bp ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bp ; CHECK-SSE1-NEXT: xorl %edi, %ebp ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi ; CHECK-SSE1-NEXT: xorw %r11w, %di ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %di ; CHECK-SSE1-NEXT: xorl %r11d, %edi ; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx ; CHECK-SSE1-NEXT: xorw %r10w, %bx ; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx ; CHECK-SSE1-NEXT: xorl %r10d, %ebx ; CHECK-SSE1-NEXT: movw %bx, 14(%rax) ; CHECK-SSE1-NEXT: movw %di, 12(%rax) ; CHECK-SSE1-NEXT: movw %bp, 10(%rax) ; CHECK-SSE1-NEXT: movw %r9w, 8(%rax) ; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) ; CHECK-SSE1-NEXT: movw %cx, 4(%rax) ; CHECK-SSE1-NEXT: movw %dx, 2(%rax) ; CHECK-SSE1-NEXT: movw %si, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v8i16: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v8i16: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <8 x i16> %x, %y %n1 = and <8 x i16> %n0, %mask %r = xor <8 x i16> %n1, %y ret <8 x i16> %r } define <4 x i32> @in_v4i32(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: in_v4i32: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movl 12(%rdx), %r8d ; CHECK-BASELINE-NEXT: movl 8(%rdx), %r9d ; CHECK-BASELINE-NEXT: movl (%rdx), %r11d ; CHECK-BASELINE-NEXT: movl 4(%rdx), %r10d ; CHECK-BASELINE-NEXT: movl (%rsi), %edx ; CHECK-BASELINE-NEXT: xorl %r11d, %edx ; CHECK-BASELINE-NEXT: movl 4(%rsi), %edi ; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: movl 8(%rsi), %ebx ; CHECK-BASELINE-NEXT: xorl %r9d, %ebx ; CHECK-BASELINE-NEXT: movl 12(%rsi), %esi ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: andl 12(%rcx), %esi ; CHECK-BASELINE-NEXT: andl 8(%rcx), %ebx ; CHECK-BASELINE-NEXT: andl 4(%rcx), %edi ; CHECK-BASELINE-NEXT: andl (%rcx), %edx ; CHECK-BASELINE-NEXT: xorl %r11d, %edx ; CHECK-BASELINE-NEXT: xorl %r10d, %edi ; CHECK-BASELINE-NEXT: xorl %r9d, %ebx ; CHECK-BASELINE-NEXT: xorl %r8d, %esi ; CHECK-BASELINE-NEXT: movl %esi, 12(%rax) ; CHECK-BASELINE-NEXT: movl %ebx, 8(%rax) ; CHECK-BASELINE-NEXT: movl %edi, 4(%rax) ; CHECK-BASELINE-NEXT: movl %edx, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v4i32: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 ; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 ; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v4i32: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v4i32: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 ; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i32>, ptr%px, align 16 %y = load <4 x i32>, ptr%py, align 16 %mask = load <4 x i32>, ptr%pmask, align 16 %n0 = xor <4 x i32> %x, %y %n1 = and <4 x i32> %n0, %mask %r = xor <4 x i32> %n1, %y ret <4 x i32> %r } define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { ; CHECK-BASELINE-LABEL: in_v2i64: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: xorq %rdx, %rax ; CHECK-BASELINE-NEXT: xorq %rcx, %rsi ; CHECK-BASELINE-NEXT: andq %r9, %rsi ; CHECK-BASELINE-NEXT: andq %r8, %rax ; CHECK-BASELINE-NEXT: xorq %rdx, %rax ; CHECK-BASELINE-NEXT: xorq %rcx, %rsi ; CHECK-BASELINE-NEXT: movq %rsi, %rdx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v2i64: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: xorq %rdx, %rax ; CHECK-SSE1-NEXT: xorq %rcx, %rsi ; CHECK-SSE1-NEXT: andq %r9, %rsi ; CHECK-SSE1-NEXT: andq %r8, %rax ; CHECK-SSE1-NEXT: xorq %rdx, %rax ; CHECK-SSE1-NEXT: xorq %rcx, %rsi ; CHECK-SSE1-NEXT: movq %rsi, %rdx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v2i64: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v2i64: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-XOP-NEXT: retq %n0 = xor <2 x i64> %x, %y %n1 = and <2 x i64> %n0, %mask %r = xor <2 x i64> %n1, %y ret <2 x i64> %r } ; ============================================================================ ; ; 256-bit vector width ; ============================================================================ ; define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: in_v32i8: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rdx, %r13 ; CHECK-BASELINE-NEXT: movq %rsi, %rbx ; CHECK-BASELINE-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-BASELINE-NEXT: movzbl 15(%rdx), %r12d ; CHECK-BASELINE-NEXT: movzbl 14(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 13(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 12(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 11(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 10(%rdx), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 9(%rdx), %r9d ; CHECK-BASELINE-NEXT: movzbl 8(%rdx), %r10d ; CHECK-BASELINE-NEXT: movzbl 7(%rdx), %r11d ; CHECK-BASELINE-NEXT: movzbl 6(%rdx), %r8d ; CHECK-BASELINE-NEXT: movzbl 5(%rdx), %ebp ; CHECK-BASELINE-NEXT: movzbl 4(%rdx), %esi ; CHECK-BASELINE-NEXT: movzbl 3(%rdx), %edi ; CHECK-BASELINE-NEXT: movzbl 2(%rdx), %r14d ; CHECK-BASELINE-NEXT: movzbl (%rdx), %eax ; CHECK-BASELINE-NEXT: movzbl 1(%rdx), %r15d ; CHECK-BASELINE-NEXT: movzbl (%rbx), %edx ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb (%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 1(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %r15b, %al ; CHECK-BASELINE-NEXT: andb 1(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r15b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 2(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: andb 2(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r14b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 3(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: andb 3(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %dil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 4(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: andb 4(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %sil, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 5(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %bpl, %al ; CHECK-BASELINE-NEXT: andb 5(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %bpl, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 6(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: andb 6(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r8b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 7(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: andb 7(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r11b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 8(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: andb 8(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r10b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 9(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %r9b, %al ; CHECK-BASELINE-NEXT: andb 9(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r9b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 10(%rbx), %edx ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 10(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 11(%rbx), %edx ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 11(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 12(%rbx), %edx ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 12(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 13(%rbx), %edx ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 13(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 14(%rbx), %edx ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 14(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 15(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: andb 15(%rcx), %al ; CHECK-BASELINE-NEXT: xorb %r12b, %al ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 16(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 16(%rbx), %edx ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 16(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 17(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 17(%rbx), %edx ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 17(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 18(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 18(%rbx), %edx ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 18(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 19(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 19(%rbx), %r12d ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: andb 19(%rcx), %r12b ; CHECK-BASELINE-NEXT: xorb %al, %r12b ; CHECK-BASELINE-NEXT: movzbl 20(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 20(%rbx), %r15d ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: andb 20(%rcx), %r15b ; CHECK-BASELINE-NEXT: movq %rcx, %rsi ; CHECK-BASELINE-NEXT: xorb %al, %r15b ; CHECK-BASELINE-NEXT: movzbl 21(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 21(%rbx), %r14d ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: andb 21(%rcx), %r14b ; CHECK-BASELINE-NEXT: xorb %al, %r14b ; CHECK-BASELINE-NEXT: movzbl 22(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 22(%rbx), %ebp ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: andb 22(%rcx), %bpl ; CHECK-BASELINE-NEXT: xorb %al, %bpl ; CHECK-BASELINE-NEXT: movzbl 23(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 23(%rbx), %r11d ; CHECK-BASELINE-NEXT: xorb %al, %r11b ; CHECK-BASELINE-NEXT: andb 23(%rcx), %r11b ; CHECK-BASELINE-NEXT: xorb %al, %r11b ; CHECK-BASELINE-NEXT: movzbl 24(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 24(%rbx), %r10d ; CHECK-BASELINE-NEXT: xorb %al, %r10b ; CHECK-BASELINE-NEXT: andb 24(%rcx), %r10b ; CHECK-BASELINE-NEXT: xorb %al, %r10b ; CHECK-BASELINE-NEXT: movzbl 25(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 25(%rbx), %r9d ; CHECK-BASELINE-NEXT: xorb %al, %r9b ; CHECK-BASELINE-NEXT: andb 25(%rcx), %r9b ; CHECK-BASELINE-NEXT: xorb %al, %r9b ; CHECK-BASELINE-NEXT: movzbl 26(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 26(%rbx), %r8d ; CHECK-BASELINE-NEXT: xorb %al, %r8b ; CHECK-BASELINE-NEXT: andb 26(%rcx), %r8b ; CHECK-BASELINE-NEXT: xorb %al, %r8b ; CHECK-BASELINE-NEXT: movzbl 27(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 27(%rbx), %edi ; CHECK-BASELINE-NEXT: xorb %al, %dil ; CHECK-BASELINE-NEXT: andb 27(%rcx), %dil ; CHECK-BASELINE-NEXT: xorb %al, %dil ; CHECK-BASELINE-NEXT: movzbl 28(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 28(%rbx), %edx ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: andb 28(%rcx), %dl ; CHECK-BASELINE-NEXT: xorb %al, %dl ; CHECK-BASELINE-NEXT: movzbl 29(%r13), %eax ; CHECK-BASELINE-NEXT: movzbl 29(%rbx), %ecx ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: andb 29(%rsi), %cl ; CHECK-BASELINE-NEXT: xorb %al, %cl ; CHECK-BASELINE-NEXT: movzbl 30(%r13), %eax ; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-BASELINE-NEXT: movzbl 30(%rbx), %eax ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: andb 30(%rsi), %al ; CHECK-BASELINE-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movzbl 31(%r13), %r13d ; CHECK-BASELINE-NEXT: movzbl 31(%rbx), %ebx ; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: andb 31(%rsi), %bl ; CHECK-BASELINE-NEXT: xorb %r13b, %bl ; CHECK-BASELINE-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; CHECK-BASELINE-NEXT: movb %bl, 31(%r13) ; CHECK-BASELINE-NEXT: movb %al, 30(%r13) ; CHECK-BASELINE-NEXT: movb %cl, 29(%r13) ; CHECK-BASELINE-NEXT: movb %dl, 28(%r13) ; CHECK-BASELINE-NEXT: movb %dil, 27(%r13) ; CHECK-BASELINE-NEXT: movb %r8b, 26(%r13) ; CHECK-BASELINE-NEXT: movb %r9b, 25(%r13) ; CHECK-BASELINE-NEXT: movb %r10b, 24(%r13) ; CHECK-BASELINE-NEXT: movb %r11b, 23(%r13) ; CHECK-BASELINE-NEXT: movb %bpl, 22(%r13) ; CHECK-BASELINE-NEXT: movb %r14b, 21(%r13) ; CHECK-BASELINE-NEXT: movb %r15b, 20(%r13) ; CHECK-BASELINE-NEXT: movb %r12b, 19(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 18(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 17(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 16(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 15(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 14(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 13(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 12(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 11(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 10(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 9(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 8(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 7(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 6(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 5(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 4(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 3(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 2(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, 1(%r13) ; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-BASELINE-NEXT: movb %al, (%r13) ; CHECK-BASELINE-NEXT: movq %r13, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v32i8: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rdx, %r13 ; CHECK-SSE1-NEXT: movq %rsi, %rbx ; CHECK-SSE1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-SSE1-NEXT: movzbl 15(%rdx), %r12d ; CHECK-SSE1-NEXT: movzbl 14(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 13(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 12(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 11(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 10(%rdx), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 9(%rdx), %r9d ; CHECK-SSE1-NEXT: movzbl 8(%rdx), %r10d ; CHECK-SSE1-NEXT: movzbl 7(%rdx), %r11d ; CHECK-SSE1-NEXT: movzbl 6(%rdx), %r8d ; CHECK-SSE1-NEXT: movzbl 5(%rdx), %ebp ; CHECK-SSE1-NEXT: movzbl 4(%rdx), %esi ; CHECK-SSE1-NEXT: movzbl 3(%rdx), %edi ; CHECK-SSE1-NEXT: movzbl 2(%rdx), %r14d ; CHECK-SSE1-NEXT: movzbl (%rdx), %eax ; CHECK-SSE1-NEXT: movzbl 1(%rdx), %r15d ; CHECK-SSE1-NEXT: movzbl (%rbx), %edx ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb (%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 1(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %r15b, %al ; CHECK-SSE1-NEXT: andb 1(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r15b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 2(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: andb 2(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r14b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 3(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: andb 3(%rcx), %al ; CHECK-SSE1-NEXT: xorb %dil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 4(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: andb 4(%rcx), %al ; CHECK-SSE1-NEXT: xorb %sil, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 5(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %bpl, %al ; CHECK-SSE1-NEXT: andb 5(%rcx), %al ; CHECK-SSE1-NEXT: xorb %bpl, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 6(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: andb 6(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r8b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 7(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: andb 7(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r11b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 8(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: andb 8(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r10b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 9(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %r9b, %al ; CHECK-SSE1-NEXT: andb 9(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r9b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 10(%rbx), %edx ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 10(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 11(%rbx), %edx ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 11(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 12(%rbx), %edx ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 12(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 13(%rbx), %edx ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 13(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 14(%rbx), %edx ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 14(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 15(%rbx), %eax ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: andb 15(%rcx), %al ; CHECK-SSE1-NEXT: xorb %r12b, %al ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 16(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 16(%rbx), %edx ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 16(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 17(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 17(%rbx), %edx ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 17(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 18(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 18(%rbx), %edx ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 18(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 19(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 19(%rbx), %r12d ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: andb 19(%rcx), %r12b ; CHECK-SSE1-NEXT: xorb %al, %r12b ; CHECK-SSE1-NEXT: movzbl 20(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 20(%rbx), %r15d ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: andb 20(%rcx), %r15b ; CHECK-SSE1-NEXT: movq %rcx, %rsi ; CHECK-SSE1-NEXT: xorb %al, %r15b ; CHECK-SSE1-NEXT: movzbl 21(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 21(%rbx), %r14d ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: andb 21(%rcx), %r14b ; CHECK-SSE1-NEXT: xorb %al, %r14b ; CHECK-SSE1-NEXT: movzbl 22(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 22(%rbx), %ebp ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: andb 22(%rcx), %bpl ; CHECK-SSE1-NEXT: xorb %al, %bpl ; CHECK-SSE1-NEXT: movzbl 23(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 23(%rbx), %r11d ; CHECK-SSE1-NEXT: xorb %al, %r11b ; CHECK-SSE1-NEXT: andb 23(%rcx), %r11b ; CHECK-SSE1-NEXT: xorb %al, %r11b ; CHECK-SSE1-NEXT: movzbl 24(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 24(%rbx), %r10d ; CHECK-SSE1-NEXT: xorb %al, %r10b ; CHECK-SSE1-NEXT: andb 24(%rcx), %r10b ; CHECK-SSE1-NEXT: xorb %al, %r10b ; CHECK-SSE1-NEXT: movzbl 25(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 25(%rbx), %r9d ; CHECK-SSE1-NEXT: xorb %al, %r9b ; CHECK-SSE1-NEXT: andb 25(%rcx), %r9b ; CHECK-SSE1-NEXT: xorb %al, %r9b ; CHECK-SSE1-NEXT: movzbl 26(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 26(%rbx), %r8d ; CHECK-SSE1-NEXT: xorb %al, %r8b ; CHECK-SSE1-NEXT: andb 26(%rcx), %r8b ; CHECK-SSE1-NEXT: xorb %al, %r8b ; CHECK-SSE1-NEXT: movzbl 27(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 27(%rbx), %edi ; CHECK-SSE1-NEXT: xorb %al, %dil ; CHECK-SSE1-NEXT: andb 27(%rcx), %dil ; CHECK-SSE1-NEXT: xorb %al, %dil ; CHECK-SSE1-NEXT: movzbl 28(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 28(%rbx), %edx ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: andb 28(%rcx), %dl ; CHECK-SSE1-NEXT: xorb %al, %dl ; CHECK-SSE1-NEXT: movzbl 29(%r13), %eax ; CHECK-SSE1-NEXT: movzbl 29(%rbx), %ecx ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: andb 29(%rsi), %cl ; CHECK-SSE1-NEXT: xorb %al, %cl ; CHECK-SSE1-NEXT: movzbl 30(%r13), %eax ; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-SSE1-NEXT: movzbl 30(%rbx), %eax ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-SSE1-NEXT: andb 30(%rsi), %al ; CHECK-SSE1-NEXT: xorb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movzbl 31(%r13), %r13d ; CHECK-SSE1-NEXT: movzbl 31(%rbx), %ebx ; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: andb 31(%rsi), %bl ; CHECK-SSE1-NEXT: xorb %r13b, %bl ; CHECK-SSE1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Reload ; CHECK-SSE1-NEXT: movb %bl, 31(%r13) ; CHECK-SSE1-NEXT: movb %al, 30(%r13) ; CHECK-SSE1-NEXT: movb %cl, 29(%r13) ; CHECK-SSE1-NEXT: movb %dl, 28(%r13) ; CHECK-SSE1-NEXT: movb %dil, 27(%r13) ; CHECK-SSE1-NEXT: movb %r8b, 26(%r13) ; CHECK-SSE1-NEXT: movb %r9b, 25(%r13) ; CHECK-SSE1-NEXT: movb %r10b, 24(%r13) ; CHECK-SSE1-NEXT: movb %r11b, 23(%r13) ; CHECK-SSE1-NEXT: movb %bpl, 22(%r13) ; CHECK-SSE1-NEXT: movb %r14b, 21(%r13) ; CHECK-SSE1-NEXT: movb %r15b, 20(%r13) ; CHECK-SSE1-NEXT: movb %r12b, 19(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 18(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 17(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 16(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 15(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 14(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 13(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 12(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 11(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 10(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 9(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 8(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 7(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 6(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 5(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 4(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 3(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 2(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, 1(%r13) ; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload ; CHECK-SSE1-NEXT: movb %al, (%r13) ; CHECK-SSE1-NEXT: movq %r13, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v32i8: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 ; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v32i8: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 ; CHECK-XOP-NEXT: retq %x = load <32 x i8>, ptr%px, align 32 %y = load <32 x i8>, ptr%py, align 32 %mask = load <32 x i8>, ptr%pmask, align 32 %n0 = xor <32 x i8> %x, %y %n1 = and <32 x i8> %n0, %mask %r = xor <32 x i8> %n1, %y ret <32 x i8> %r } define <16 x i16> @in_v16i16(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: in_v16i16: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rcx, %r9 ; CHECK-BASELINE-NEXT: movq %rdi, %r10 ; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %edi ; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 28(%rdx), %edi ; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %edi ; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 24(%rdx), %eax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 22(%rdx), %eax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 20(%rdx), %r11d ; CHECK-BASELINE-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r14d ; CHECK-BASELINE-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 16(%rdx), %r15d ; CHECK-BASELINE-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %r12d ; CHECK-BASELINE-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 12(%rdx), %r13d ; CHECK-BASELINE-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r8d ; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebx ; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %ebp ; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl (%rdx), %ecx ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 4(%rdx), %edi ; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %eax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl (%rsi), %edx ; CHECK-BASELINE-NEXT: xorw %cx, %dx ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %ecx ; CHECK-BASELINE-NEXT: xorw %ax, %cx ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 4(%rsi), %eax ; CHECK-BASELINE-NEXT: xorw %di, %ax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movzwl 6(%rsi), %edx ; CHECK-BASELINE-NEXT: xorw %bp, %dx ; CHECK-BASELINE-NEXT: movl %edx, %eax ; CHECK-BASELINE-NEXT: movzwl 8(%rsi), %ecx ; CHECK-BASELINE-NEXT: xorw %bx, %cx ; CHECK-BASELINE-NEXT: movzwl 10(%rsi), %edx ; CHECK-BASELINE-NEXT: xorw %r8w, %dx ; CHECK-BASELINE-NEXT: movl %edx, %r8d ; CHECK-BASELINE-NEXT: movzwl 12(%rsi), %edx ; CHECK-BASELINE-NEXT: xorw %r13w, %dx ; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %r13d ; CHECK-BASELINE-NEXT: xorw %r12w, %r13w ; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %r12d ; CHECK-BASELINE-NEXT: xorw %r15w, %r12w ; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %r15d ; CHECK-BASELINE-NEXT: xorw %r14w, %r15w ; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %r14d ; CHECK-BASELINE-NEXT: xorw %r11w, %r14w ; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %ebp ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload ; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %ebx ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload ; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r11d ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload ; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %edi ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload ; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %esi ; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload ; CHECK-BASELINE-NEXT: andw 30(%r9), %si ; CHECK-BASELINE-NEXT: andw 28(%r9), %di ; CHECK-BASELINE-NEXT: andw 26(%r9), %r11w ; CHECK-BASELINE-NEXT: andw 24(%r9), %bx ; CHECK-BASELINE-NEXT: andw 22(%r9), %bp ; CHECK-BASELINE-NEXT: andw 20(%r9), %r14w ; CHECK-BASELINE-NEXT: andw 18(%r9), %r15w ; CHECK-BASELINE-NEXT: andw 16(%r9), %r12w ; CHECK-BASELINE-NEXT: andw 14(%r9), %r13w ; CHECK-BASELINE-NEXT: andw 12(%r9), %dx ; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: andw 10(%r9), %r8w ; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl %ecx, %edx ; CHECK-BASELINE-NEXT: andw 8(%r9), %dx ; CHECK-BASELINE-NEXT: andw 6(%r9), %ax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload ; CHECK-BASELINE-NEXT: andw 4(%r9), %r8w ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: andw 2(%r9), %ax ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-BASELINE-NEXT: andw (%r9), %cx ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: movl %edx, %ecx ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: movw %si, 30(%r10) ; CHECK-BASELINE-NEXT: movw %di, 28(%r10) ; CHECK-BASELINE-NEXT: movw %r11w, 26(%r10) ; CHECK-BASELINE-NEXT: movw %bx, 24(%r10) ; CHECK-BASELINE-NEXT: movw %bp, 22(%r10) ; CHECK-BASELINE-NEXT: movw %r14w, 20(%r10) ; CHECK-BASELINE-NEXT: movw %r15w, 18(%r10) ; CHECK-BASELINE-NEXT: movw %r12w, 16(%r10) ; CHECK-BASELINE-NEXT: movw %r13w, 14(%r10) ; CHECK-BASELINE-NEXT: movw %ax, 12(%r10) ; CHECK-BASELINE-NEXT: movw %dx, 10(%r10) ; CHECK-BASELINE-NEXT: movw %cx, 8(%r10) ; CHECK-BASELINE-NEXT: movw %r9w, 6(%r10) ; CHECK-BASELINE-NEXT: movw %r8w, 4(%r10) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movw %ax, 2(%r10) ; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-BASELINE-NEXT: movw %ax, (%r10) ; CHECK-BASELINE-NEXT: movq %r10, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v16i16: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rcx, %r9 ; CHECK-SSE1-NEXT: movq %rdi, %r10 ; CHECK-SSE1-NEXT: movzwl 30(%rdx), %edi ; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 28(%rdx), %edi ; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 26(%rdx), %edi ; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 24(%rdx), %eax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 22(%rdx), %eax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 20(%rdx), %r11d ; CHECK-SSE1-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r14d ; CHECK-SSE1-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 16(%rdx), %r15d ; CHECK-SSE1-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 14(%rdx), %r12d ; CHECK-SSE1-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 12(%rdx), %r13d ; CHECK-SSE1-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r8d ; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 8(%rdx), %ebx ; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 6(%rdx), %ebp ; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl (%rdx), %ecx ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 4(%rdx), %edi ; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 2(%rdx), %eax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl (%rsi), %edx ; CHECK-SSE1-NEXT: xorw %cx, %dx ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 2(%rsi), %ecx ; CHECK-SSE1-NEXT: xorw %ax, %cx ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 4(%rsi), %eax ; CHECK-SSE1-NEXT: xorw %di, %ax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movzwl 6(%rsi), %edx ; CHECK-SSE1-NEXT: xorw %bp, %dx ; CHECK-SSE1-NEXT: movl %edx, %eax ; CHECK-SSE1-NEXT: movzwl 8(%rsi), %ecx ; CHECK-SSE1-NEXT: xorw %bx, %cx ; CHECK-SSE1-NEXT: movzwl 10(%rsi), %edx ; CHECK-SSE1-NEXT: xorw %r8w, %dx ; CHECK-SSE1-NEXT: movl %edx, %r8d ; CHECK-SSE1-NEXT: movzwl 12(%rsi), %edx ; CHECK-SSE1-NEXT: xorw %r13w, %dx ; CHECK-SSE1-NEXT: movzwl 14(%rsi), %r13d ; CHECK-SSE1-NEXT: xorw %r12w, %r13w ; CHECK-SSE1-NEXT: movzwl 16(%rsi), %r12d ; CHECK-SSE1-NEXT: xorw %r15w, %r12w ; CHECK-SSE1-NEXT: movzwl 18(%rsi), %r15d ; CHECK-SSE1-NEXT: xorw %r14w, %r15w ; CHECK-SSE1-NEXT: movzwl 20(%rsi), %r14d ; CHECK-SSE1-NEXT: xorw %r11w, %r14w ; CHECK-SSE1-NEXT: movzwl 22(%rsi), %ebp ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload ; CHECK-SSE1-NEXT: movzwl 24(%rsi), %ebx ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload ; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r11d ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload ; CHECK-SSE1-NEXT: movzwl 28(%rsi), %edi ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload ; CHECK-SSE1-NEXT: movzwl 30(%rsi), %esi ; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload ; CHECK-SSE1-NEXT: andw 30(%r9), %si ; CHECK-SSE1-NEXT: andw 28(%r9), %di ; CHECK-SSE1-NEXT: andw 26(%r9), %r11w ; CHECK-SSE1-NEXT: andw 24(%r9), %bx ; CHECK-SSE1-NEXT: andw 22(%r9), %bp ; CHECK-SSE1-NEXT: andw 20(%r9), %r14w ; CHECK-SSE1-NEXT: andw 18(%r9), %r15w ; CHECK-SSE1-NEXT: andw 16(%r9), %r12w ; CHECK-SSE1-NEXT: andw 14(%r9), %r13w ; CHECK-SSE1-NEXT: andw 12(%r9), %dx ; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: andw 10(%r9), %r8w ; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl %ecx, %edx ; CHECK-SSE1-NEXT: andw 8(%r9), %dx ; CHECK-SSE1-NEXT: andw 6(%r9), %ax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload ; CHECK-SSE1-NEXT: andw 4(%r9), %r8w ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: andw 2(%r9), %ax ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-SSE1-NEXT: andw (%r9), %cx ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload ; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload ; CHECK-SSE1-NEXT: movl %edx, %ecx ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload ; CHECK-SSE1-NEXT: movw %si, 30(%r10) ; CHECK-SSE1-NEXT: movw %di, 28(%r10) ; CHECK-SSE1-NEXT: movw %r11w, 26(%r10) ; CHECK-SSE1-NEXT: movw %bx, 24(%r10) ; CHECK-SSE1-NEXT: movw %bp, 22(%r10) ; CHECK-SSE1-NEXT: movw %r14w, 20(%r10) ; CHECK-SSE1-NEXT: movw %r15w, 18(%r10) ; CHECK-SSE1-NEXT: movw %r12w, 16(%r10) ; CHECK-SSE1-NEXT: movw %r13w, 14(%r10) ; CHECK-SSE1-NEXT: movw %ax, 12(%r10) ; CHECK-SSE1-NEXT: movw %dx, 10(%r10) ; CHECK-SSE1-NEXT: movw %cx, 8(%r10) ; CHECK-SSE1-NEXT: movw %r9w, 6(%r10) ; CHECK-SSE1-NEXT: movw %r8w, 4(%r10) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movw %ax, 2(%r10) ; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload ; CHECK-SSE1-NEXT: movw %ax, (%r10) ; CHECK-SSE1-NEXT: movq %r10, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v16i16: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 ; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v16i16: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 ; CHECK-XOP-NEXT: retq %x = load <16 x i16>, ptr%px, align 32 %y = load <16 x i16>, ptr%py, align 32 %mask = load <16 x i16>, ptr%pmask, align 32 %n0 = xor <16 x i16> %x, %y %n1 = and <16 x i16> %n0, %mask %r = xor <16 x i16> %n1, %y ret <16 x i16> %r } define <8 x i32> @in_v8i32(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: in_v8i32: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbp ; CHECK-BASELINE-NEXT: pushq %r15 ; CHECK-BASELINE-NEXT: pushq %r14 ; CHECK-BASELINE-NEXT: pushq %r13 ; CHECK-BASELINE-NEXT: pushq %r12 ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movl 28(%rdx), %r15d ; CHECK-BASELINE-NEXT: movl 24(%rdx), %r14d ; CHECK-BASELINE-NEXT: movl 20(%rdx), %r10d ; CHECK-BASELINE-NEXT: movl 16(%rdx), %eax ; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 12(%rdx), %ebp ; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebx ; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-BASELINE-NEXT: movl (%rdx), %r12d ; CHECK-BASELINE-NEXT: movl 4(%rdx), %r13d ; CHECK-BASELINE-NEXT: movl (%rsi), %r11d ; CHECK-BASELINE-NEXT: xorl %r12d, %r11d ; CHECK-BASELINE-NEXT: movl 4(%rsi), %r9d ; CHECK-BASELINE-NEXT: xorl %r13d, %r9d ; CHECK-BASELINE-NEXT: movl 8(%rsi), %r8d ; CHECK-BASELINE-NEXT: xorl %ebx, %r8d ; CHECK-BASELINE-NEXT: movl 12(%rsi), %ebx ; CHECK-BASELINE-NEXT: xorl %ebp, %ebx ; CHECK-BASELINE-NEXT: movl 16(%rsi), %ebp ; CHECK-BASELINE-NEXT: xorl %eax, %ebp ; CHECK-BASELINE-NEXT: movl 20(%rsi), %edx ; CHECK-BASELINE-NEXT: xorl %r10d, %edx ; CHECK-BASELINE-NEXT: movl 24(%rsi), %eax ; CHECK-BASELINE-NEXT: xorl %r14d, %eax ; CHECK-BASELINE-NEXT: movl 28(%rsi), %esi ; CHECK-BASELINE-NEXT: xorl %r15d, %esi ; CHECK-BASELINE-NEXT: andl 28(%rcx), %esi ; CHECK-BASELINE-NEXT: andl 24(%rcx), %eax ; CHECK-BASELINE-NEXT: andl 20(%rcx), %edx ; CHECK-BASELINE-NEXT: andl 16(%rcx), %ebp ; CHECK-BASELINE-NEXT: andl 12(%rcx), %ebx ; CHECK-BASELINE-NEXT: andl 8(%rcx), %r8d ; CHECK-BASELINE-NEXT: andl 4(%rcx), %r9d ; CHECK-BASELINE-NEXT: andl (%rcx), %r11d ; CHECK-BASELINE-NEXT: xorl %r12d, %r11d ; CHECK-BASELINE-NEXT: xorl %r13d, %r9d ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload ; CHECK-BASELINE-NEXT: xorl %r10d, %edx ; CHECK-BASELINE-NEXT: xorl %r14d, %eax ; CHECK-BASELINE-NEXT: xorl %r15d, %esi ; CHECK-BASELINE-NEXT: movl %esi, 28(%rdi) ; CHECK-BASELINE-NEXT: movl %eax, 24(%rdi) ; CHECK-BASELINE-NEXT: movl %edx, 20(%rdi) ; CHECK-BASELINE-NEXT: movl %ebp, 16(%rdi) ; CHECK-BASELINE-NEXT: movl %ebx, 12(%rdi) ; CHECK-BASELINE-NEXT: movl %r8d, 8(%rdi) ; CHECK-BASELINE-NEXT: movl %r9d, 4(%rdi) ; CHECK-BASELINE-NEXT: movl %r11d, (%rdi) ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: popq %r12 ; CHECK-BASELINE-NEXT: popq %r13 ; CHECK-BASELINE-NEXT: popq %r14 ; CHECK-BASELINE-NEXT: popq %r15 ; CHECK-BASELINE-NEXT: popq %rbp ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v8i32: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbp ; CHECK-SSE1-NEXT: pushq %r15 ; CHECK-SSE1-NEXT: pushq %r14 ; CHECK-SSE1-NEXT: pushq %r13 ; CHECK-SSE1-NEXT: pushq %r12 ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movl 28(%rdx), %r15d ; CHECK-SSE1-NEXT: movl 24(%rdx), %r14d ; CHECK-SSE1-NEXT: movl 20(%rdx), %r10d ; CHECK-SSE1-NEXT: movl 16(%rdx), %eax ; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 12(%rdx), %ebp ; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl 8(%rdx), %ebx ; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; CHECK-SSE1-NEXT: movl (%rdx), %r12d ; CHECK-SSE1-NEXT: movl 4(%rdx), %r13d ; CHECK-SSE1-NEXT: movl (%rsi), %r11d ; CHECK-SSE1-NEXT: xorl %r12d, %r11d ; CHECK-SSE1-NEXT: movl 4(%rsi), %r9d ; CHECK-SSE1-NEXT: xorl %r13d, %r9d ; CHECK-SSE1-NEXT: movl 8(%rsi), %r8d ; CHECK-SSE1-NEXT: xorl %ebx, %r8d ; CHECK-SSE1-NEXT: movl 12(%rsi), %ebx ; CHECK-SSE1-NEXT: xorl %ebp, %ebx ; CHECK-SSE1-NEXT: movl 16(%rsi), %ebp ; CHECK-SSE1-NEXT: xorl %eax, %ebp ; CHECK-SSE1-NEXT: movl 20(%rsi), %edx ; CHECK-SSE1-NEXT: xorl %r10d, %edx ; CHECK-SSE1-NEXT: movl 24(%rsi), %eax ; CHECK-SSE1-NEXT: xorl %r14d, %eax ; CHECK-SSE1-NEXT: movl 28(%rsi), %esi ; CHECK-SSE1-NEXT: xorl %r15d, %esi ; CHECK-SSE1-NEXT: andl 28(%rcx), %esi ; CHECK-SSE1-NEXT: andl 24(%rcx), %eax ; CHECK-SSE1-NEXT: andl 20(%rcx), %edx ; CHECK-SSE1-NEXT: andl 16(%rcx), %ebp ; CHECK-SSE1-NEXT: andl 12(%rcx), %ebx ; CHECK-SSE1-NEXT: andl 8(%rcx), %r8d ; CHECK-SSE1-NEXT: andl 4(%rcx), %r9d ; CHECK-SSE1-NEXT: andl (%rcx), %r11d ; CHECK-SSE1-NEXT: xorl %r12d, %r11d ; CHECK-SSE1-NEXT: xorl %r13d, %r9d ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload ; CHECK-SSE1-NEXT: xorl %r10d, %edx ; CHECK-SSE1-NEXT: xorl %r14d, %eax ; CHECK-SSE1-NEXT: xorl %r15d, %esi ; CHECK-SSE1-NEXT: movl %esi, 28(%rdi) ; CHECK-SSE1-NEXT: movl %eax, 24(%rdi) ; CHECK-SSE1-NEXT: movl %edx, 20(%rdi) ; CHECK-SSE1-NEXT: movl %ebp, 16(%rdi) ; CHECK-SSE1-NEXT: movl %ebx, 12(%rdi) ; CHECK-SSE1-NEXT: movl %r8d, 8(%rdi) ; CHECK-SSE1-NEXT: movl %r9d, 4(%rdi) ; CHECK-SSE1-NEXT: movl %r11d, (%rdi) ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: popq %r12 ; CHECK-SSE1-NEXT: popq %r13 ; CHECK-SSE1-NEXT: popq %r14 ; CHECK-SSE1-NEXT: popq %r15 ; CHECK-SSE1-NEXT: popq %rbp ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v8i32: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 ; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v8i32: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 ; CHECK-XOP-NEXT: retq %x = load <8 x i32>, ptr%px, align 32 %y = load <8 x i32>, ptr%py, align 32 %mask = load <8 x i32>, ptr%pmask, align 32 %n0 = xor <8 x i32> %x, %y %n1 = and <8 x i32> %n0, %mask %r = xor <8 x i32> %n1, %y ret <8 x i32> %r } define <4 x i64> @in_v4i64(ptr%px, ptr%py, ptr%pmask) nounwind { ; CHECK-BASELINE-LABEL: in_v4i64: ; CHECK-BASELINE: # %bb.0: ; CHECK-BASELINE-NEXT: pushq %rbx ; CHECK-BASELINE-NEXT: movq %rdi, %rax ; CHECK-BASELINE-NEXT: movq 24(%rdx), %r8 ; CHECK-BASELINE-NEXT: movq 16(%rdx), %r9 ; CHECK-BASELINE-NEXT: movq (%rdx), %r11 ; CHECK-BASELINE-NEXT: movq 8(%rdx), %r10 ; CHECK-BASELINE-NEXT: movq (%rsi), %rdx ; CHECK-BASELINE-NEXT: xorq %r11, %rdx ; CHECK-BASELINE-NEXT: movq 8(%rsi), %rdi ; CHECK-BASELINE-NEXT: xorq %r10, %rdi ; CHECK-BASELINE-NEXT: movq 16(%rsi), %rbx ; CHECK-BASELINE-NEXT: xorq %r9, %rbx ; CHECK-BASELINE-NEXT: movq 24(%rsi), %rsi ; CHECK-BASELINE-NEXT: xorq %r8, %rsi ; CHECK-BASELINE-NEXT: andq 24(%rcx), %rsi ; CHECK-BASELINE-NEXT: andq 16(%rcx), %rbx ; CHECK-BASELINE-NEXT: andq 8(%rcx), %rdi ; CHECK-BASELINE-NEXT: andq (%rcx), %rdx ; CHECK-BASELINE-NEXT: xorq %r11, %rdx ; CHECK-BASELINE-NEXT: xorq %r10, %rdi ; CHECK-BASELINE-NEXT: xorq %r9, %rbx ; CHECK-BASELINE-NEXT: xorq %r8, %rsi ; CHECK-BASELINE-NEXT: movq %rsi, 24(%rax) ; CHECK-BASELINE-NEXT: movq %rbx, 16(%rax) ; CHECK-BASELINE-NEXT: movq %rdi, 8(%rax) ; CHECK-BASELINE-NEXT: movq %rdx, (%rax) ; CHECK-BASELINE-NEXT: popq %rbx ; CHECK-BASELINE-NEXT: retq ; ; CHECK-SSE1-LABEL: in_v4i64: ; CHECK-SSE1: # %bb.0: ; CHECK-SSE1-NEXT: pushq %rbx ; CHECK-SSE1-NEXT: movq %rdi, %rax ; CHECK-SSE1-NEXT: movq 24(%rdx), %r8 ; CHECK-SSE1-NEXT: movq 16(%rdx), %r9 ; CHECK-SSE1-NEXT: movq (%rdx), %r11 ; CHECK-SSE1-NEXT: movq 8(%rdx), %r10 ; CHECK-SSE1-NEXT: movq (%rsi), %rdx ; CHECK-SSE1-NEXT: xorq %r11, %rdx ; CHECK-SSE1-NEXT: movq 8(%rsi), %rdi ; CHECK-SSE1-NEXT: xorq %r10, %rdi ; CHECK-SSE1-NEXT: movq 16(%rsi), %rbx ; CHECK-SSE1-NEXT: xorq %r9, %rbx ; CHECK-SSE1-NEXT: movq 24(%rsi), %rsi ; CHECK-SSE1-NEXT: xorq %r8, %rsi ; CHECK-SSE1-NEXT: andq 24(%rcx), %rsi ; CHECK-SSE1-NEXT: andq 16(%rcx), %rbx ; CHECK-SSE1-NEXT: andq 8(%rcx), %rdi ; CHECK-SSE1-NEXT: andq (%rcx), %rdx ; CHECK-SSE1-NEXT: xorq %r11, %rdx ; CHECK-SSE1-NEXT: xorq %r10, %rdi ; CHECK-SSE1-NEXT: xorq %r9, %rbx ; CHECK-SSE1-NEXT: xorq %r8, %rsi ; CHECK-SSE1-NEXT: movq %rsi, 24(%rax) ; CHECK-SSE1-NEXT: movq %rbx, 16(%rax) ; CHECK-SSE1-NEXT: movq %rdi, 8(%rax) ; CHECK-SSE1-NEXT: movq %rdx, (%rax) ; CHECK-SSE1-NEXT: popq %rbx ; CHECK-SSE1-NEXT: retq ; ; CHECK-SSE2-LABEL: in_v4i64: ; CHECK-SSE2: # %bb.0: ; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 ; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 ; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 ; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 ; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 ; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 ; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 ; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 ; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 ; CHECK-SSE2-NEXT: retq ; ; CHECK-XOP-LABEL: in_v4i64: ; CHECK-XOP: # %bb.0: ; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 ; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 ; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 ; CHECK-XOP-NEXT: retq %x = load <4 x i64>, ptr%px, align 32 %y = load <4 x i64>, ptr%py, align 32 %mask = load <4 x i64>, ptr%pmask, align 32 %n0 = xor <4 x i64> %x, %y %n1 = and <4 x i64> %n0, %mask %r = xor <4 x i64> %n1, %y ret <4 x i64> %r }