; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512dq -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f -mattr=-prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F ; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw -mattr=-prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW ; https://llvm.org/bugs/show_bug.cgi?id=27100 define void @memset_16_nonzero_bytes(ptr %x) { ; SSE-LABEL: memset_16_nonzero_bytes: ; SSE: # %bb.0: ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A ; SSE-NEXT: movq %rax, 8(%rdi) ; SSE-NEXT: movq %rax, (%rdi) ; SSE-NEXT: retq ; ; SSE2FAST-LABEL: memset_16_nonzero_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; SSE2FAST-NEXT: movups %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX-LABEL: memset_16_nonzero_bytes: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX-NEXT: vmovups %xmm0, (%rdi) ; AVX-NEXT: retq %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 16, i64 -1) ret void } define void @memset_32_nonzero_bytes(ptr %x) { ; SSE-LABEL: memset_32_nonzero_bytes: ; SSE: # %bb.0: ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A ; SSE-NEXT: movq %rax, 24(%rdi) ; SSE-NEXT: movq %rax, 16(%rdi) ; SSE-NEXT: movq %rax, 8(%rdi) ; SSE-NEXT: movq %rax, (%rdi) ; SSE-NEXT: retq ; ; SSE2FAST-LABEL: memset_32_nonzero_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) ; SSE2FAST-NEXT: movups %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX-LABEL: memset_32_nonzero_bytes: ; AVX: # %bb.0: ; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX-NEXT: vmovups %ymm0, (%rdi) ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 32, i64 -1) ret void } define void @memset_64_nonzero_bytes(ptr %x) { ; SSE-LABEL: memset_64_nonzero_bytes: ; SSE: # %bb.0: ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A ; SSE-NEXT: movq %rax, 56(%rdi) ; SSE-NEXT: movq %rax, 48(%rdi) ; SSE-NEXT: movq %rax, 40(%rdi) ; SSE-NEXT: movq %rax, 32(%rdi) ; SSE-NEXT: movq %rax, 24(%rdi) ; SSE-NEXT: movq %rax, 16(%rdi) ; SSE-NEXT: movq %rax, 8(%rdi) ; SSE-NEXT: movq %rax, (%rdi) ; SSE-NEXT: retq ; ; SSE2FAST-LABEL: memset_64_nonzero_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) ; SSE2FAST-NEXT: movups %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX1-LABEL: memset_64_nonzero_bytes: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) ; AVX1-NEXT: vmovups %ymm0, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: memset_64_nonzero_bytes: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX2-NEXT: vmovups %ymm0, 32(%rdi) ; AVX2-NEXT: vmovups %ymm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: memset_64_nonzero_bytes: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378] ; AVX512F-NEXT: vmovups %zmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: memset_64_nonzero_bytes: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX512BW-NEXT: vmovups %zmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512NW-NEXT: retq %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 64, i64 -1) ret void } define void @memset_128_nonzero_bytes(ptr %x) { ; SSE-LABEL: memset_128_nonzero_bytes: ; SSE: # %bb.0: ; SSE-NEXT: movabsq $3038287259199220266, %rax # imm = 0x2A2A2A2A2A2A2A2A ; SSE-NEXT: movq %rax, 120(%rdi) ; SSE-NEXT: movq %rax, 112(%rdi) ; SSE-NEXT: movq %rax, 104(%rdi) ; SSE-NEXT: movq %rax, 96(%rdi) ; SSE-NEXT: movq %rax, 88(%rdi) ; SSE-NEXT: movq %rax, 80(%rdi) ; SSE-NEXT: movq %rax, 72(%rdi) ; SSE-NEXT: movq %rax, 64(%rdi) ; SSE-NEXT: movq %rax, 56(%rdi) ; SSE-NEXT: movq %rax, 48(%rdi) ; SSE-NEXT: movq %rax, 40(%rdi) ; SSE-NEXT: movq %rax, 32(%rdi) ; SSE-NEXT: movq %rax, 24(%rdi) ; SSE-NEXT: movq %rax, 16(%rdi) ; SSE-NEXT: movq %rax, 8(%rdi) ; SSE-NEXT: movq %rax, (%rdi) ; SSE-NEXT: retq ; ; SSE2FAST-LABEL: memset_128_nonzero_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; SSE2FAST-NEXT: movups %xmm0, 112(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 96(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 80(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 64(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) ; SSE2FAST-NEXT: movups %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX1-LABEL: memset_128_nonzero_bytes: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX1-NEXT: vmovups %ymm0, 96(%rdi) ; AVX1-NEXT: vmovups %ymm0, 64(%rdi) ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) ; AVX1-NEXT: vmovups %ymm0, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: memset_128_nonzero_bytes: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX2-NEXT: vmovups %ymm0, 96(%rdi) ; AVX2-NEXT: vmovups %ymm0, 64(%rdi) ; AVX2-NEXT: vmovups %ymm0, 32(%rdi) ; AVX2-NEXT: vmovups %ymm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: memset_128_nonzero_bytes: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378] ; AVX512F-NEXT: vmovups %zmm0, 64(%rdi) ; AVX512F-NEXT: vmovups %zmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: memset_128_nonzero_bytes: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX512BW-NEXT: vmovups %zmm0, 64(%rdi) ; AVX512BW-NEXT: vmovups %zmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 128, i64 -1) ret void } define void @memset_256_nonzero_bytes(ptr %x) { ; SSE-LABEL: memset_256_nonzero_bytes: ; SSE: # %bb.0: ; SSE-NEXT: movl $256, %edx # imm = 0x100 ; SSE-NEXT: movl $42, %esi ; SSE-NEXT: jmp memset@PLT # TAILCALL ; ; SSE2FAST-LABEL: memset_256_nonzero_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movaps {{.*#+}} xmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; SSE2FAST-NEXT: movups %xmm0, 240(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 224(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 208(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 192(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 176(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 160(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 144(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 128(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 112(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 96(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 80(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 64(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 48(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 32(%rdi) ; SSE2FAST-NEXT: movups %xmm0, 16(%rdi) ; SSE2FAST-NEXT: movups %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX1-LABEL: memset_256_nonzero_bytes: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX1-NEXT: vmovups %ymm0, 224(%rdi) ; AVX1-NEXT: vmovups %ymm0, 192(%rdi) ; AVX1-NEXT: vmovups %ymm0, 160(%rdi) ; AVX1-NEXT: vmovups %ymm0, 128(%rdi) ; AVX1-NEXT: vmovups %ymm0, 96(%rdi) ; AVX1-NEXT: vmovups %ymm0, 64(%rdi) ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) ; AVX1-NEXT: vmovups %ymm0, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: memset_256_nonzero_bytes: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX2-NEXT: vmovups %ymm0, 224(%rdi) ; AVX2-NEXT: vmovups %ymm0, 192(%rdi) ; AVX2-NEXT: vmovups %ymm0, 160(%rdi) ; AVX2-NEXT: vmovups %ymm0, 128(%rdi) ; AVX2-NEXT: vmovups %ymm0, 96(%rdi) ; AVX2-NEXT: vmovups %ymm0, 64(%rdi) ; AVX2-NEXT: vmovups %ymm0, 32(%rdi) ; AVX2-NEXT: vmovups %ymm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: memset_256_nonzero_bytes: ; AVX512F: # %bb.0: ; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378] ; AVX512F-NEXT: vmovups %zmm0, 192(%rdi) ; AVX512F-NEXT: vmovups %zmm0, 128(%rdi) ; AVX512F-NEXT: vmovups %zmm0, 64(%rdi) ; AVX512F-NEXT: vmovups %zmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: memset_256_nonzero_bytes: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42] ; AVX512BW-NEXT: vmovups %zmm0, 192(%rdi) ; AVX512BW-NEXT: vmovups %zmm0, 128(%rdi) ; AVX512BW-NEXT: vmovups %zmm0, 64(%rdi) ; AVX512BW-NEXT: vmovups %zmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq %call = tail call ptr @__memset_chk(ptr %x, i32 42, i64 256, i64 -1) ret void } declare ptr @__memset_chk(ptr, i32, i64, i64) ; Repeat with a non-constant value for the stores. define void @memset_16_nonconst_bytes(ptr %x, i8 %c) { ; SSE-LABEL: memset_16_nonconst_bytes: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movzbl %sil, %eax ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 ; SSE-NEXT: imulq %rax, %rcx ; SSE-NEXT: movq %rcx, 8(%rdi) ; SSE-NEXT: movq %rcx, (%rdi) ; SSE-NEXT: retq ; ; SSE2FAST-LABEL: memset_16_nonconst_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movd %esi, %xmm0 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX1-LABEL: memset_16_nonconst_bytes: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %esi, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovdqu %xmm0, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: memset_16_nonconst_bytes: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %esi, %xmm0 ; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 ; AVX2-NEXT: vmovdqu %xmm0, (%rdi) ; AVX2-NEXT: retq ; ; AVX512-LABEL: memset_16_nonconst_bytes: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovd %esi, %xmm0 ; AVX512-NEXT: vpbroadcastb %xmm0, %xmm0 ; AVX512-NEXT: vmovdqu %xmm0, (%rdi) ; AVX512-NEXT: retq tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 16, i1 false) ret void } define void @memset_32_nonconst_bytes(ptr %x, i8 %c) { ; SSE-LABEL: memset_32_nonconst_bytes: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movzbl %sil, %eax ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 ; SSE-NEXT: imulq %rax, %rcx ; SSE-NEXT: movq %rcx, 24(%rdi) ; SSE-NEXT: movq %rcx, 16(%rdi) ; SSE-NEXT: movq %rcx, 8(%rdi) ; SSE-NEXT: movq %rcx, (%rdi) ; SSE-NEXT: retq ; ; SSE2FAST-LABEL: memset_32_nonconst_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movd %esi, %xmm0 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX1-LABEL: memset_32_nonconst_bytes: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %esi, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovdqu %xmm0, 16(%rdi) ; AVX1-NEXT: vmovdqu %xmm0, (%rdi) ; AVX1-NEXT: retq ; ; AVX2-LABEL: memset_32_nonconst_bytes: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %esi, %xmm0 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 ; AVX2-NEXT: vmovdqu %ymm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512-LABEL: memset_32_nonconst_bytes: ; AVX512: # %bb.0: ; AVX512-NEXT: vmovd %esi, %xmm0 ; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0 ; AVX512-NEXT: vmovdqu %ymm0, (%rdi) ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 32, i1 false) ret void } define void @memset_64_nonconst_bytes(ptr %x, i8 %c) { ; SSE-LABEL: memset_64_nonconst_bytes: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movzbl %sil, %eax ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 ; SSE-NEXT: imulq %rax, %rcx ; SSE-NEXT: movq %rcx, 56(%rdi) ; SSE-NEXT: movq %rcx, 48(%rdi) ; SSE-NEXT: movq %rcx, 40(%rdi) ; SSE-NEXT: movq %rcx, 32(%rdi) ; SSE-NEXT: movq %rcx, 24(%rdi) ; SSE-NEXT: movq %rcx, 16(%rdi) ; SSE-NEXT: movq %rcx, 8(%rdi) ; SSE-NEXT: movq %rcx, (%rdi) ; SSE-NEXT: retq ; ; SSE2FAST-LABEL: memset_64_nonconst_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movd %esi, %xmm0 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX1-LABEL: memset_64_nonconst_bytes: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %esi, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) ; AVX1-NEXT: vmovups %ymm0, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: memset_64_nonconst_bytes: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %esi, %xmm0 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: memset_64_nonconst_bytes: ; AVX512F: # %bb.0: ; AVX512F-NEXT: movzbl %sil, %eax ; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: memset_64_nonconst_bytes: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0 ; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 64, i1 false) ret void } define void @memset_128_nonconst_bytes(ptr %x, i8 %c) { ; SSE-LABEL: memset_128_nonconst_bytes: ; SSE: # %bb.0: ; SSE-NEXT: # kill: def $esi killed $esi def $rsi ; SSE-NEXT: movzbl %sil, %eax ; SSE-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 ; SSE-NEXT: imulq %rax, %rcx ; SSE-NEXT: movq %rcx, 120(%rdi) ; SSE-NEXT: movq %rcx, 112(%rdi) ; SSE-NEXT: movq %rcx, 104(%rdi) ; SSE-NEXT: movq %rcx, 96(%rdi) ; SSE-NEXT: movq %rcx, 88(%rdi) ; SSE-NEXT: movq %rcx, 80(%rdi) ; SSE-NEXT: movq %rcx, 72(%rdi) ; SSE-NEXT: movq %rcx, 64(%rdi) ; SSE-NEXT: movq %rcx, 56(%rdi) ; SSE-NEXT: movq %rcx, 48(%rdi) ; SSE-NEXT: movq %rcx, 40(%rdi) ; SSE-NEXT: movq %rcx, 32(%rdi) ; SSE-NEXT: movq %rcx, 24(%rdi) ; SSE-NEXT: movq %rcx, 16(%rdi) ; SSE-NEXT: movq %rcx, 8(%rdi) ; SSE-NEXT: movq %rcx, (%rdi) ; SSE-NEXT: retq ; ; SSE2FAST-LABEL: memset_128_nonconst_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movd %esi, %xmm0 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX1-LABEL: memset_128_nonconst_bytes: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %esi, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vmovups %ymm0, 96(%rdi) ; AVX1-NEXT: vmovups %ymm0, 64(%rdi) ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) ; AVX1-NEXT: vmovups %ymm0, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: memset_128_nonconst_bytes: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %esi, %xmm0 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 ; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: memset_128_nonconst_bytes: ; AVX512F: # %bb.0: ; AVX512F-NEXT: movzbl %sil, %eax ; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 ; AVX512F-NEXT: vmovdqu64 %zmm0, 64(%rdi) ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: memset_128_nonconst_bytes: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0 ; AVX512BW-NEXT: vmovdqu64 %zmm0, 64(%rdi) ; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 128, i1 false) ret void } define void @memset_256_nonconst_bytes(ptr %x, i8 %c) { ; SSE-LABEL: memset_256_nonconst_bytes: ; SSE: # %bb.0: ; SSE-NEXT: movl $256, %edx # imm = 0x100 ; SSE-NEXT: jmp memset@PLT # TAILCALL ; ; SSE2FAST-LABEL: memset_256_nonconst_bytes: ; SSE2FAST: # %bb.0: ; SSE2FAST-NEXT: movd %esi, %xmm0 ; SSE2FAST-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2FAST-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE2FAST-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE2FAST-NEXT: movdqu %xmm0, 240(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 224(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 208(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 192(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 176(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 160(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 144(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 128(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 112(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 96(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 80(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 64(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 48(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 32(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, 16(%rdi) ; SSE2FAST-NEXT: movdqu %xmm0, (%rdi) ; SSE2FAST-NEXT: retq ; ; AVX1-LABEL: memset_256_nonconst_bytes: ; AVX1: # %bb.0: ; AVX1-NEXT: vmovd %esi, %xmm0 ; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: vmovups %ymm0, 224(%rdi) ; AVX1-NEXT: vmovups %ymm0, 192(%rdi) ; AVX1-NEXT: vmovups %ymm0, 160(%rdi) ; AVX1-NEXT: vmovups %ymm0, 128(%rdi) ; AVX1-NEXT: vmovups %ymm0, 96(%rdi) ; AVX1-NEXT: vmovups %ymm0, 64(%rdi) ; AVX1-NEXT: vmovups %ymm0, 32(%rdi) ; AVX1-NEXT: vmovups %ymm0, (%rdi) ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq ; ; AVX2-LABEL: memset_256_nonconst_bytes: ; AVX2: # %bb.0: ; AVX2-NEXT: vmovd %esi, %xmm0 ; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 ; AVX2-NEXT: vmovdqu %ymm0, 224(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, 192(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, 160(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, 128(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, 96(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, 64(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, 32(%rdi) ; AVX2-NEXT: vmovdqu %ymm0, (%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; ; AVX512F-LABEL: memset_256_nonconst_bytes: ; AVX512F: # %bb.0: ; AVX512F-NEXT: movzbl %sil, %eax ; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 ; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 ; AVX512F-NEXT: vmovdqu64 %zmm0, 192(%rdi) ; AVX512F-NEXT: vmovdqu64 %zmm0, 128(%rdi) ; AVX512F-NEXT: vmovdqu64 %zmm0, 64(%rdi) ; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: memset_256_nonconst_bytes: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0 ; AVX512BW-NEXT: vmovdqu64 %zmm0, 192(%rdi) ; AVX512BW-NEXT: vmovdqu64 %zmm0, 128(%rdi) ; AVX512BW-NEXT: vmovdqu64 %zmm0, 64(%rdi) ; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq tail call void @llvm.memset.p0.i64(ptr %x, i8 %c, i64 256, i1 false) ret void } declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) #1