; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-CUR %s ; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=0 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-CUR %s ; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O0,CHECK-O0-EX %s ; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mcpu=skylake -x86-experimental-unordered-atomic-isel=1 | FileCheck --check-prefixes=CHECK,CHECK-O3,CHECK-O3-EX %s define i8 @load_i8(i8* %ptr) { ; CHECK-O0-LABEL: load_i8: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb (%rdi), %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_i8: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzbl (%rdi), %eax ; CHECK-O3-NEXT: retq %v = load atomic i8, i8* %ptr unordered, align 1 ret i8 %v } define void @store_i8(i8* %ptr, i8 %v) { ; CHECK-O0-LABEL: store_i8: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb %sil, %al ; CHECK-O0-NEXT: movb %al, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i8: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movb %sil, (%rdi) ; CHECK-O3-NEXT: retq store atomic i8 %v, i8* %ptr unordered, align 1 ret void } define i16 @load_i16(i16* %ptr) { ; CHECK-O0-LABEL: load_i16: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movw (%rdi), %ax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_i16: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzwl (%rdi), %eax ; CHECK-O3-NEXT: retq %v = load atomic i16, i16* %ptr unordered, align 2 ret i16 %v } define void @store_i16(i16* %ptr, i16 %v) { ; CHECK-O0-LABEL: store_i16: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movw %si, %ax ; CHECK-O0-NEXT: movw %ax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i16: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movw %si, (%rdi) ; CHECK-O3-NEXT: retq store atomic i16 %v, i16* %ptr unordered, align 2 ret void } define i32 @load_i32(i32* %ptr) { ; CHECK-LABEL: load_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: movl (%rdi), %eax ; CHECK-NEXT: retq %v = load atomic i32, i32* %ptr unordered, align 4 ret i32 %v } define void @store_i32(i32* %ptr, i32 %v) { ; CHECK-LABEL: store_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, (%rdi) ; CHECK-NEXT: retq store atomic i32 %v, i32* %ptr unordered, align 4 ret void } define i64 @load_i64(i64* %ptr) { ; CHECK-LABEL: load_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 ret i64 %v } define void @store_i64(i64* %ptr, i64 %v) { ; CHECK-LABEL: store_i64: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, (%rdi) ; CHECK-NEXT: retq store atomic i64 %v, i64* %ptr unordered, align 8 ret void } ;; The tests in the rest of this file are intended to show transforms which we ;; either *can't* do for legality, or don't currently implement. The later ;; are noted carefully where relevant. ;; Start w/some clearly illegal ones. ; Must use a full width op, not a byte op define void @narrow_writeback_or(i64* %ptr) { ; CHECK-O0-LABEL: narrow_writeback_or: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: orq $7, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: narrow_writeback_or: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq $7, (%rdi) ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = or i64 %v, 7 store atomic i64 %v.new, i64* %ptr unordered, align 8 ret void } ; Must use a full width op, not a byte op define void @narrow_writeback_and(i64* %ptr) { ; CHECK-O0-LABEL: narrow_writeback_and: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl $-256, %eax ; CHECK-O0-NEXT: # kill: def $rax killed $eax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: narrow_writeback_and: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movl $4294967040, %eax # imm = 0xFFFFFF00 ; CHECK-O3-NEXT: andq %rax, (%rdi) ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = and i64 %v, 4294967040 ;; 0xFFFF_FF00 store atomic i64 %v.new, i64* %ptr unordered, align 8 ret void } ; Must use a full width op, not a byte op define void @narrow_writeback_xor(i64* %ptr) { ; CHECK-O0-LABEL: narrow_writeback_xor: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorq $7, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: narrow_writeback_xor: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq $7, (%rdi) ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %ptr unordered, align 8 %v.new = xor i64 %v, 7 store atomic i64 %v.new, i64* %ptr unordered, align 8 ret void } ;; Next batch of tests are exercising cases where store widening would ;; improve codegeneration. Note that widening is only legal if the ;; resulting type would be atomic. Each tests has a well aligned, and ;; unaligned variant to ensure we get correct codegen here. ;; Note: It's not a legality issue, but there's a gotcha here to be aware ;; of. Once we widen a pair of atomic stores, we loose the information ;; that the original atomicity requirement was half the width. Given that, ;; we can't then split the load again. This challenges our usual iterative ;; approach to incremental improvement. ; Legal if wider type is also atomic (TODO) define void @widen_store(i32* %p0, i32 %v1, i32 %v2) { ; CHECK-LABEL: widen_store: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, (%rdi) ; CHECK-NEXT: movl %edx, 4(%rdi) ; CHECK-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 8 store atomic i32 %v2, i32* %p1 unordered, align 4 ret void } ; This one is *NOT* legal to widen. With weaker alignment, ; the wider type might cross a cache line and violate the ; atomicity requirement. define void @widen_store_unaligned(i32* %p0, i32 %v1, i32 %v2) { ; CHECK-LABEL: widen_store_unaligned: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, (%rdi) ; CHECK-NEXT: movl %edx, 4(%rdi) ; CHECK-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 4 store atomic i32 %v2, i32* %p1 unordered, align 4 ret void } ; Legal if wider type is also atomic (TODO) define void @widen_broadcast(i32* %p0, i32 %v) { ; CHECK-LABEL: widen_broadcast: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, (%rdi) ; CHECK-NEXT: movl %esi, 4(%rdi) ; CHECK-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v, i32* %p0 unordered, align 8 store atomic i32 %v, i32* %p1 unordered, align 4 ret void } ; Not legal to widen due to alignment restriction define void @widen_broadcast_unaligned(i32* %p0, i32 %v) { ; CHECK-LABEL: widen_broadcast_unaligned: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, (%rdi) ; CHECK-NEXT: movl %esi, 4(%rdi) ; CHECK-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v, i32* %p0 unordered, align 4 store atomic i32 %v, i32* %p1 unordered, align 4 ret void } define i128 @load_i128(i128* %ptr) { ; CHECK-O0-LABEL: load_i128: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: pushq %rbx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 ; CHECK-O0-NEXT: xorl %eax, %eax ; CHECK-O0-NEXT: movl %eax, %ebx ; CHECK-O0-NEXT: movq %rbx, %rax ; CHECK-O0-NEXT: movq %rbx, %rdx ; CHECK-O0-NEXT: movq %rbx, %rcx ; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) ; CHECK-O0-NEXT: popq %rbx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_i128: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: pushq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O3-NEXT: .cfi_offset %rbx, -16 ; CHECK-O3-NEXT: xorl %eax, %eax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: xorl %ecx, %ecx ; CHECK-O3-NEXT: xorl %ebx, %ebx ; CHECK-O3-NEXT: lock cmpxchg16b (%rdi) ; CHECK-O3-NEXT: popq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq %v = load atomic i128, i128* %ptr unordered, align 16 ret i128 %v } define void @store_i128(i128* %ptr, i128 %v) { ; CHECK-O0-LABEL: store_i128: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: pushq %rbx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 ; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq 8(%rdi), %rdx ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: jmp .LBB16_1 ; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start ; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; CHECK-O0-NEXT: lock cmpxchg16b (%rsi) ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: jne .LBB16_1 ; CHECK-O0-NEXT: jmp .LBB16_2 ; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end ; CHECK-O0-NEXT: popq %rbx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i128: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: pushq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O3-NEXT: .cfi_offset %rbx, -16 ; CHECK-O3-NEXT: movq %rdx, %rcx ; CHECK-O3-NEXT: movq %rsi, %rbx ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq 8(%rdi), %rdx ; CHECK-O3-NEXT: .p2align 4, 0x90 ; CHECK-O3-NEXT: .LBB16_1: # %atomicrmw.start ; CHECK-O3-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-O3-NEXT: lock cmpxchg16b (%rdi) ; CHECK-O3-NEXT: jne .LBB16_1 ; CHECK-O3-NEXT: # %bb.2: # %atomicrmw.end ; CHECK-O3-NEXT: popq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq store atomic i128 %v, i128* %ptr unordered, align 16 ret void } define i256 @load_i256(i256* %ptr) { ; CHECK-O0-LABEL: load_i256: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: subq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 64 ; CHECK-O0-NEXT: movq %rdi, %rax ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movl $32, %edi ; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: callq __atomic_load@PLT ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %r8 ; CHECK-O0-NEXT: movq %r8, 24(%rdi) ; CHECK-O0-NEXT: movq %rsi, 16(%rdi) ; CHECK-O0-NEXT: movq %rdx, 8(%rdi) ; CHECK-O0-NEXT: movq %rcx, (%rdi) ; CHECK-O0-NEXT: addq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_i256: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: pushq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O3-NEXT: subq $32, %rsp ; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 ; CHECK-O3-NEXT: .cfi_offset %rbx, -16 ; CHECK-O3-NEXT: movq %rdi, %rbx ; CHECK-O3-NEXT: movq %rsp, %rdx ; CHECK-O3-NEXT: movl $32, %edi ; CHECK-O3-NEXT: xorl %ecx, %ecx ; CHECK-O3-NEXT: callq __atomic_load@PLT ; CHECK-O3-NEXT: vmovups (%rsp), %ymm0 ; CHECK-O3-NEXT: vmovups %ymm0, (%rbx) ; CHECK-O3-NEXT: movq %rbx, %rax ; CHECK-O3-NEXT: addq $32, %rsp ; CHECK-O3-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O3-NEXT: popq %rbx ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: vzeroupper ; CHECK-O3-NEXT: retq %v = load atomic i256, i256* %ptr unordered, align 16 ret i256 %v } define void @store_i256(i256* %ptr, i256 %v) { ; CHECK-O0-LABEL: store_i256: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: subq $40, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: movq %rsi, (%rsp) # 8-byte Spill ; CHECK-O0-NEXT: movq %rdi, %rsi ; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload ; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; CHECK-O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movl $32, %edi ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: callq __atomic_store@PLT ; CHECK-O0-NEXT: addq $40, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i256: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: subq $40, %rsp ; CHECK-O3-NEXT: .cfi_def_cfa_offset 48 ; CHECK-O3-NEXT: movq %rdi, %rax ; CHECK-O3-NEXT: movq %r8, {{[0-9]+}}(%rsp) ; CHECK-O3-NEXT: movq %rcx, {{[0-9]+}}(%rsp) ; CHECK-O3-NEXT: movq %rdx, {{[0-9]+}}(%rsp) ; CHECK-O3-NEXT: movq %rsi, {{[0-9]+}}(%rsp) ; CHECK-O3-NEXT: leaq {{[0-9]+}}(%rsp), %rdx ; CHECK-O3-NEXT: movl $32, %edi ; CHECK-O3-NEXT: movq %rax, %rsi ; CHECK-O3-NEXT: xorl %ecx, %ecx ; CHECK-O3-NEXT: callq __atomic_store@PLT ; CHECK-O3-NEXT: addq $40, %rsp ; CHECK-O3-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O3-NEXT: retq store atomic i256 %v, i256* %ptr unordered, align 16 ret void } ; Legal if wider type is also atomic (TODO) define void @vec_store(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-CUR-LABEL: vec_store: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx ; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax ; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) ; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: vec_store: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax ; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx ; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) ; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O0-EX-LABEL: vec_store: ; CHECK-O0-EX: # %bb.0: ; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) ; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi) ; CHECK-O0-EX-NEXT: retq ; ; CHECK-O3-EX-LABEL: vec_store: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) ; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) ; CHECK-O3-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %v2 = extractelement <2 x i32> %vec, i32 1 %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 8 store atomic i32 %v2, i32* %p1 unordered, align 4 ret void } ; Not legal to widen due to alignment restriction define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-CUR-LABEL: vec_store_unaligned: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx ; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax ; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) ; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: vec_store_unaligned: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax ; CHECK-O3-CUR-NEXT: vpextrd $1, %xmm0, %ecx ; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) ; CHECK-O3-CUR-NEXT: movl %ecx, 4(%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O0-EX-LABEL: vec_store_unaligned: ; CHECK-O0-EX: # %bb.0: ; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) ; CHECK-O0-EX-NEXT: vpextrd $1, %xmm0, 4(%rdi) ; CHECK-O0-EX-NEXT: retq ; ; CHECK-O3-EX-LABEL: vec_store_unaligned: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) ; CHECK-O3-EX-NEXT: vextractps $1, %xmm0, 4(%rdi) ; CHECK-O3-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %v2 = extractelement <2 x i32> %vec, i32 1 %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 4 store atomic i32 %v2, i32* %p1 unordered, align 4 ret void } ; Legal if wider type is also atomic (TODO) ; Also, can avoid register move from xmm to eax (TODO) define void @widen_broadcast2(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-CUR-LABEL: widen_broadcast2: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax ; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) ; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: widen_broadcast2: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax ; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) ; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O0-EX-LABEL: widen_broadcast2: ; CHECK-O0-EX: # %bb.0: ; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) ; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi) ; CHECK-O0-EX-NEXT: retq ; ; CHECK-O3-EX-LABEL: widen_broadcast2: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) ; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi) ; CHECK-O3-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 8 store atomic i32 %v1, i32* %p1 unordered, align 4 ret void } ; Not legal to widen due to alignment restriction define void @widen_broadcast2_unaligned(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-CUR-LABEL: widen_broadcast2_unaligned: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax ; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) ; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: widen_broadcast2_unaligned: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: vmovd %xmm0, %eax ; CHECK-O3-CUR-NEXT: movl %eax, (%rdi) ; CHECK-O3-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O0-EX-LABEL: widen_broadcast2_unaligned: ; CHECK-O0-EX: # %bb.0: ; CHECK-O0-EX-NEXT: vmovd %xmm0, (%rdi) ; CHECK-O0-EX-NEXT: vmovd %xmm0, 4(%rdi) ; CHECK-O0-EX-NEXT: retq ; ; CHECK-O3-EX-LABEL: widen_broadcast2_unaligned: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: vmovss %xmm0, (%rdi) ; CHECK-O3-EX-NEXT: vmovss %xmm0, 4(%rdi) ; CHECK-O3-EX-NEXT: retq %v1 = extractelement <2 x i32> %vec, i32 0 %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 %v1, i32* %p0 unordered, align 4 store atomic i32 %v1, i32* %p1 unordered, align 4 ret void } ; Legal if wider type is also atomic (TODO) define void @widen_zero_init(i32* %p0, i32 %v1, i32 %v2) { ; CHECK-LABEL: widen_zero_init: ; CHECK: # %bb.0: ; CHECK-NEXT: movl $0, (%rdi) ; CHECK-NEXT: movl $0, 4(%rdi) ; CHECK-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 0, i32* %p0 unordered, align 8 store atomic i32 0, i32* %p1 unordered, align 4 ret void } ; Not legal to widen due to alignment restriction define void @widen_zero_init_unaligned(i32* %p0, i32 %v1, i32 %v2) { ; CHECK-LABEL: widen_zero_init_unaligned: ; CHECK: # %bb.0: ; CHECK-NEXT: movl $0, (%rdi) ; CHECK-NEXT: movl $0, 4(%rdi) ; CHECK-NEXT: retq %p1 = getelementptr i32, i32* %p0, i64 1 store atomic i32 0, i32* %p0 unordered, align 4 store atomic i32 0, i32* %p1 unordered, align 4 ret void } ;; The next batch of tests are stressing load folding. Folding is legal ;; on x86, so these are simply checking optimization quality. ; Legal, as expected define i64 @load_fold_add1(i64* %p) { ; CHECK-LABEL: load_fold_add1: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: addq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = add i64 %v, 15 ret i64 %ret } define i64 @load_fold_add2(i64* %p, i64 %v2) { ; CHECK-LABEL: load_fold_add2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: addq (%rdi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = add i64 %v, %v2 ret i64 %ret } define i64 @load_fold_add3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_add3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: addq (%rsi), %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_add3: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rsi), %rax ; CHECK-O3-CUR-NEXT: addq (%rdi), %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_add3: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq (%rdi), %rax ; CHECK-O3-EX-NEXT: addq (%rsi), %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = add i64 %v, %v2 ret i64 %ret } ; Legal, as expected define i64 @load_fold_sub1(i64* %p) { ; CHECK-O0-LABEL: load_fold_sub1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: subq $15, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_sub1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: addq $-15, %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sub i64 %v, 15 ret i64 %ret } define i64 @load_fold_sub2(i64* %p, i64 %v2) { ; CHECK-LABEL: load_fold_sub2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: subq %rsi, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sub i64 %v, %v2 ret i64 %ret } define i64 @load_fold_sub3(i64* %p1, i64* %p2) { ; CHECK-LABEL: load_fold_sub3: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: subq (%rsi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = sub i64 %v, %v2 ret i64 %ret } ; Legal, as expected define i64 @load_fold_mul1(i64* %p) { ; CHECK-O0-LABEL: load_fold_mul1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: imulq $15, (%rdi), %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_mul1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, 15 ret i64 %ret } define i64 @load_fold_mul2(i64* %p, i64 %v2) { ; CHECK-LABEL: load_fold_mul2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: imulq (%rdi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, %v2 ret i64 %ret } define i64 @load_fold_mul3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_mul3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: imulq (%rsi), %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_mul3: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rsi), %rax ; CHECK-O3-CUR-NEXT: imulq (%rdi), %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_mul3: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq (%rdi), %rax ; CHECK-O3-EX-NEXT: imulq (%rsi), %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = mul i64 %v, %v2 ret i64 %ret } ; Legal to fold (TODO) define i64 @load_fold_sdiv1(i64* %p) { ; CHECK-O0-LABEL: load_fold_sdiv1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movl $15, %ecx ; CHECK-O0-NEXT: cqto ; CHECK-O0-NEXT: idivq %rcx ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_sdiv1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: imulq %rdx ; CHECK-O3-NEXT: addq %rdx, %rcx ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: shrq $63, %rax ; CHECK-O3-NEXT: sarq $3, %rcx ; CHECK-O3-NEXT: addq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sdiv i64 %v, 15 ret i64 %ret } ; Legal to fold (TODO) define i64 @load_fold_sdiv2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_sdiv2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: cqto ; CHECK-O0-NEXT: idivq %rsi ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_sdiv2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: orq %rsi, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: je .LBB35_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rsi ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB35_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = sdiv i64 %v, %v2 ret i64 %ret } define i64 @load_fold_sdiv3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_sdiv3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: cqto ; CHECK-O0-NEXT: idivq (%rsi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_sdiv3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq (%rsi), %rcx ; CHECK-O3-NEXT: movq %rax, %rdx ; CHECK-O3-NEXT: orq %rcx, %rdx ; CHECK-O3-NEXT: shrq $32, %rdx ; CHECK-O3-NEXT: je .LBB36_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rcx ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB36_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = sdiv i64 %v, %v2 ret i64 %ret } ; Legal to fold (TODO) define i64 @load_fold_udiv1(i64* %p) { ; CHECK-O0-LABEL: load_fold_udiv1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movl $15, %ecx ; CHECK-O0-NEXT: xorl %edx, %edx ; CHECK-O0-NEXT: # kill: def $rdx killed $edx ; CHECK-O0-NEXT: divq %rcx ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_udiv1: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx ; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 ; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax ; CHECK-O3-CUR-NEXT: shrq $3, %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_udiv1: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 ; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax ; CHECK-O3-EX-NEXT: shrq $3, %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = udiv i64 %v, 15 ret i64 %ret } define i64 @load_fold_udiv2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_udiv2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx ; CHECK-O0-NEXT: divq %rsi ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_udiv2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: orq %rsi, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: je .LBB38_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rsi ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB38_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = udiv i64 %v, %v2 ret i64 %ret } define i64 @load_fold_udiv3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_udiv3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx ; CHECK-O0-NEXT: divq (%rsi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_udiv3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq (%rsi), %rcx ; CHECK-O3-NEXT: movq %rax, %rdx ; CHECK-O3-NEXT: orq %rcx, %rdx ; CHECK-O3-NEXT: shrq $32, %rdx ; CHECK-O3-NEXT: je .LBB39_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rcx ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB39_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = udiv i64 %v, %v2 ret i64 %ret } ; Legal to fold (TODO) define i64 @load_fold_srem1(i64* %p) { ; CHECK-O0-LABEL: load_fold_srem1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movl $15, %ecx ; CHECK-O0-NEXT: cqto ; CHECK-O0-NEXT: idivq %rcx ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_srem1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: imulq %rdx ; CHECK-O3-NEXT: addq %rcx, %rdx ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: shrq $63, %rax ; CHECK-O3-NEXT: sarq $3, %rdx ; CHECK-O3-NEXT: addq %rax, %rdx ; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = srem i64 %v, 15 ret i64 %ret } ; Legal, as expected define i64 @load_fold_srem2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_srem2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: cqto ; CHECK-O0-NEXT: idivq %rsi ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_srem2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: orq %rsi, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: je .LBB41_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rsi ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB41_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = srem i64 %v, %v2 ret i64 %ret } define i64 @load_fold_srem3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_srem3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: cqto ; CHECK-O0-NEXT: idivq (%rsi) ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_srem3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq (%rsi), %rcx ; CHECK-O3-NEXT: movq %rax, %rdx ; CHECK-O3-NEXT: orq %rcx, %rdx ; CHECK-O3-NEXT: shrq $32, %rdx ; CHECK-O3-NEXT: je .LBB42_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rcx ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB42_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = srem i64 %v, %v2 ret i64 %ret } ; Legal to fold (TODO) define i64 @load_fold_urem1(i64* %p) { ; CHECK-O0-LABEL: load_fold_urem1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movl $15, %ecx ; CHECK-O0-NEXT: xorl %edx, %edx ; CHECK-O0-NEXT: # kill: def $rdx killed $edx ; CHECK-O0-NEXT: divq %rcx ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_urem1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 ; CHECK-O3-NEXT: movq %rax, %rdx ; CHECK-O3-NEXT: mulxq %rcx, %rcx, %rcx ; CHECK-O3-NEXT: shrq $3, %rcx ; CHECK-O3-NEXT: leaq (%rcx,%rcx,4), %rcx ; CHECK-O3-NEXT: leaq (%rcx,%rcx,2), %rcx ; CHECK-O3-NEXT: subq %rcx, %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, 15 ret i64 %ret } ; Legal, as expected define i64 @load_fold_urem2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_urem2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx ; CHECK-O0-NEXT: divq %rsi ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_urem2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: orq %rsi, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: je .LBB44_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rsi ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB44_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = urem i64 %v, %v2 ret i64 %ret } define i64 @load_fold_urem3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_urem3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx ; CHECK-O0-NEXT: divq (%rsi) ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_urem3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq (%rsi), %rcx ; CHECK-O3-NEXT: movq %rax, %rdx ; CHECK-O3-NEXT: orq %rcx, %rdx ; CHECK-O3-NEXT: shrq $32, %rdx ; CHECK-O3-NEXT: je .LBB45_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rcx ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB45_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %ecx ; CHECK-O3-NEXT: movl %edx, %eax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = urem i64 %v, %v2 ret i64 %ret } ; Legal, as expected define i64 @load_fold_shl1(i64* %p) { ; CHECK-LABEL: load_fold_shl1: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: shlq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = shl i64 %v, 15 ret i64 %ret } define i64 @load_fold_shl2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_shl2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq %rsi, %rcx ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: shlq %cl, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_shl2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: shlxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = shl i64 %v, %v2 ret i64 %ret } define i64 @load_fold_shl3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_shl3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: shlq %cl, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_shl3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: shlxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = shl i64 %v, %v2 ret i64 %ret } ; Legal, as expected define i64 @load_fold_lshr1(i64* %p) { ; CHECK-LABEL: load_fold_lshr1: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: shrq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = lshr i64 %v, 15 ret i64 %ret } define i64 @load_fold_lshr2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_lshr2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq %rsi, %rcx ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: shrq %cl, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_lshr2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: shrxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = lshr i64 %v, %v2 ret i64 %ret } define i64 @load_fold_lshr3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_lshr3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: shrq %cl, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_lshr3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: shrxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = lshr i64 %v, %v2 ret i64 %ret } ; Legal, as expected define i64 @load_fold_ashr1(i64* %p) { ; CHECK-LABEL: load_fold_ashr1: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: sarq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = ashr i64 %v, 15 ret i64 %ret } define i64 @load_fold_ashr2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_ashr2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq %rsi, %rcx ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: sarq %cl, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_ashr2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: sarxq %rsi, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = ashr i64 %v, %v2 ret i64 %ret } define i64 @load_fold_ashr3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_ashr3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: sarq %cl, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_ashr3: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rsi), %rax ; CHECK-O3-NEXT: sarxq %rax, (%rdi), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = ashr i64 %v, %v2 ret i64 %ret } ; Legal, as expected define i64 @load_fold_and1(i64* %p) { ; CHECK-O0-LABEL: load_fold_and1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: andq $15, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_and1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: andl $15, %eax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, 15 ret i64 %ret } define i64 @load_fold_and2(i64* %p, i64 %v2) { ; CHECK-LABEL: load_fold_and2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: andq (%rdi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, %v2 ret i64 %ret } define i64 @load_fold_and3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_and3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: andq (%rsi), %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_and3: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rsi), %rax ; CHECK-O3-CUR-NEXT: andq (%rdi), %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_and3: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq (%rdi), %rax ; CHECK-O3-EX-NEXT: andq (%rsi), %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = and i64 %v, %v2 ret i64 %ret } ; Legal, as expected define i64 @load_fold_or1(i64* %p) { ; CHECK-LABEL: load_fold_or1: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: orq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, 15 ret i64 %ret } define i64 @load_fold_or2(i64* %p, i64 %v2) { ; CHECK-LABEL: load_fold_or2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: orq (%rdi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 ret i64 %ret } define i64 @load_fold_or3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_or3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: orq (%rsi), %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_or3: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rsi), %rax ; CHECK-O3-CUR-NEXT: orq (%rdi), %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_or3: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq (%rdi), %rax ; CHECK-O3-EX-NEXT: orq (%rsi), %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = or i64 %v, %v2 ret i64 %ret } ; Legal, as expected define i64 @load_fold_xor1(i64* %p) { ; CHECK-LABEL: load_fold_xor1: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: xorq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = xor i64 %v, 15 ret i64 %ret } define i64 @load_fold_xor2(i64* %p, i64 %v2) { ; CHECK-LABEL: load_fold_xor2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: xorq (%rdi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = xor i64 %v, %v2 ret i64 %ret } define i64 @load_fold_xor3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_xor3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorq (%rsi), %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_xor3: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rsi), %rax ; CHECK-O3-CUR-NEXT: xorq (%rdi), %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_xor3: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq (%rdi), %rax ; CHECK-O3-EX-NEXT: xorq (%rsi), %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = xor i64 %v, %v2 ret i64 %ret } define i1 @load_fold_icmp1(i64* %p) { ; CHECK-O0-LABEL: load_fold_icmp1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: subq $15, %rax ; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_icmp1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: cmpq $15, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = icmp eq i64 %v, 15 ret i1 %ret } define i1 @load_fold_icmp2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_icmp2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: subq %rsi, %rax ; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_icmp2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: cmpq %rsi, (%rdi) ; CHECK-O3-NEXT: sete %al ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = icmp eq i64 %v, %v2 ret i1 %ret } define i1 @load_fold_icmp3(i64* %p1, i64* %p2) { ; CHECK-O0-LABEL: load_fold_icmp3: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax ; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_icmp3: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rsi), %rax ; CHECK-O3-CUR-NEXT: cmpq %rax, (%rdi) ; CHECK-O3-CUR-NEXT: sete %al ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_fold_icmp3: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq (%rdi), %rax ; CHECK-O3-EX-NEXT: cmpq (%rsi), %rax ; CHECK-O3-EX-NEXT: sete %al ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p1 unordered, align 8 %v2 = load atomic i64, i64* %p2 unordered, align 8 %ret = icmp eq i64 %v, %v2 ret i1 %ret } ;; The next batch of tests check for read-modify-write patterns ;; Legally, it's okay to use a memory operand here as long as the operand ;; is well aligned (i.e. doesn't cross a cache line boundary). We are ;; required not to narrow the store though! ; Legal, as expected define void @rmw_fold_add1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_add1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: addq $15, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_add1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq $15, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = add i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_add2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_add2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: addq %rsi, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_add2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq %rsi, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = add i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_sub1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_sub1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: addq $-15, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_sub1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: addq $-15, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sub i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_sub2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_sub2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: subq %rsi, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_sub2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: subq %rsi, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sub i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_mul1(i64* %p, i64 %v) { ; CHECK-LABEL: rmw_fold_mul1: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: leaq (%rax,%rax,4), %rax ; CHECK-NEXT: leaq (%rax,%rax,2), %rax ; CHECK-NEXT: movq %rax, (%rdi) ; CHECK-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = mul i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal to fold (TODO) define void @rmw_fold_mul2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_mul2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: imulq %rsi, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_mul2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: imulq (%rdi), %rsi ; CHECK-O3-NEXT: movq %rsi, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = mul i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_sdiv1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_sdiv1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rcx ; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 ; CHECK-O0-NEXT: movq %rcx, %rax ; CHECK-O0-NEXT: imulq %rdx ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: addq %rcx, %rax ; CHECK-O0-NEXT: movq %rax, %rcx ; CHECK-O0-NEXT: shrq $63, %rcx ; CHECK-O0-NEXT: sarq $3, %rax ; CHECK-O0-NEXT: addq %rcx, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_sdiv1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: imulq %rdx ; CHECK-O3-NEXT: addq %rcx, %rdx ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: shrq $63, %rax ; CHECK-O3-NEXT: sarq $3, %rdx ; CHECK-O3-NEXT: addq %rax, %rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sdiv i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_sdiv2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_sdiv2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: cqto ; CHECK-O0-NEXT: idivq %rsi ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_sdiv2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: orq %rsi, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: je .LBB74_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rsi ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB74_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = sdiv i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_udiv1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_udiv1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rdx ; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 ; CHECK-O0-NEXT: mulxq %rax, %rax, %rax ; CHECK-O0-NEXT: shrq $3, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_udiv1: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rdi), %rdx ; CHECK-O3-CUR-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 ; CHECK-O3-CUR-NEXT: mulxq %rax, %rax, %rax ; CHECK-O3-CUR-NEXT: shrq $3, %rax ; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_udiv1: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 ; CHECK-O3-EX-NEXT: mulxq (%rdi), %rax, %rax ; CHECK-O3-EX-NEXT: shrq $3, %rax ; CHECK-O3-EX-NEXT: movq %rax, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_udiv2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_udiv2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx ; CHECK-O0-NEXT: divq %rsi ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_udiv2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: orq %rsi, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: je .LBB76_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rsi ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB76_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $eax killed $eax def $rax ; CHECK-O3-NEXT: movq %rax, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = udiv i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_srem1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_srem1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 ; CHECK-O0-NEXT: imulq %rcx ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-O0-NEXT: movq %rdx, %rcx ; CHECK-O0-NEXT: addq %rax, %rcx ; CHECK-O0-NEXT: movq %rcx, %rdx ; CHECK-O0-NEXT: shrq $63, %rdx ; CHECK-O0-NEXT: sarq $3, %rcx ; CHECK-O0-NEXT: addq %rdx, %rcx ; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx ; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_srem1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rcx ; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 ; CHECK-O3-NEXT: movq %rcx, %rax ; CHECK-O3-NEXT: imulq %rdx ; CHECK-O3-NEXT: addq %rcx, %rdx ; CHECK-O3-NEXT: movq %rdx, %rax ; CHECK-O3-NEXT: shrq $63, %rax ; CHECK-O3-NEXT: sarq $3, %rdx ; CHECK-O3-NEXT: addq %rax, %rdx ; CHECK-O3-NEXT: leaq (%rdx,%rdx,4), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax ; CHECK-O3-NEXT: subq %rax, %rcx ; CHECK-O3-NEXT: movq %rcx, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = srem i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_srem2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_srem2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: cqto ; CHECK-O0-NEXT: idivq %rsi ; CHECK-O0-NEXT: movq %rdx, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_srem2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: orq %rsi, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: je .LBB78_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: cqto ; CHECK-O3-NEXT: idivq %rsi ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB78_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = srem i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_urem1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_urem1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 ; CHECK-O0-NEXT: movq %rax, %rdx ; CHECK-O0-NEXT: mulxq %rcx, %rcx, %rcx ; CHECK-O0-NEXT: shrq $3, %rcx ; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx ; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_urem1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rdx ; CHECK-O3-NEXT: movabsq $-8608480567731124087, %rax # imm = 0x8888888888888889 ; CHECK-O3-NEXT: mulxq %rax, %rax, %rax ; CHECK-O3-NEXT: shrq $3, %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,4), %rax ; CHECK-O3-NEXT: leaq (%rax,%rax,2), %rax ; CHECK-O3-NEXT: subq %rax, %rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_urem2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_urem2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx ; CHECK-O0-NEXT: divq %rsi ; CHECK-O0-NEXT: movq %rdx, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_urem2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: orq %rsi, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: je .LBB80_1 ; CHECK-O3-NEXT: # %bb.2: ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divq %rsi ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq ; CHECK-O3-NEXT: .LBB80_1: ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: xorl %edx, %edx ; CHECK-O3-NEXT: divl %esi ; CHECK-O3-NEXT: # kill: def $edx killed $edx def $rdx ; CHECK-O3-NEXT: movq %rdx, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = urem i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal to fold (TODO) define void @rmw_fold_shl1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_shl1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: shlq $15, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_shl1: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rdi), %rax ; CHECK-O3-CUR-NEXT: shlq $15, %rax ; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_shl1: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: shlq $15, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = shl i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal to fold (TODO) define void @rmw_fold_shl2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_shl2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx ; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: shlxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_shl2: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: shlxq %rsi, (%rdi), %rax ; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_shl2: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rcx ; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-O3-EX-NEXT: shlq %cl, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = shl i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal to fold (TODO) define void @rmw_fold_lshr1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_lshr1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: shrq $15, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_lshr1: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rdi), %rax ; CHECK-O3-CUR-NEXT: shrq $15, %rax ; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_lshr1: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: shrq $15, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = lshr i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal to fold (TODO) define void @rmw_fold_lshr2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_lshr2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx ; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: shrxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_lshr2: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: shrxq %rsi, (%rdi), %rax ; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_lshr2: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rcx ; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-O3-EX-NEXT: shrq %cl, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = lshr i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal to fold (TODO) define void @rmw_fold_ashr1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_ashr1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: sarq $15, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_ashr1: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rdi), %rax ; CHECK-O3-CUR-NEXT: sarq $15, %rax ; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_ashr1: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: sarq $15, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = ashr i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal to fold (TODO) define void @rmw_fold_ashr2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_ashr2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx ; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: sarxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: rmw_fold_ashr2: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: sarxq %rsi, (%rdi), %rax ; CHECK-O3-CUR-NEXT: movq %rax, (%rdi) ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: rmw_fold_ashr2: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rcx ; CHECK-O3-EX-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-O3-EX-NEXT: sarq %cl, (%rdi) ; CHECK-O3-EX-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = ashr i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_and1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_and1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl $15, %eax ; CHECK-O0-NEXT: # kill: def $rax killed $eax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_and1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: andq $15, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = and i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_and2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_and2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: andq %rsi, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_and2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: andq %rsi, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = and i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_or1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_or1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: orq $15, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_or1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq $15, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = or i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_or2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_or2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: orq %rsi, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_or2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: orq %rsi, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = or i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_xor1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_xor1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorq $15, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_xor1: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq $15, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = xor i64 %prev, 15 store atomic i64 %val, i64* %p unordered, align 8 ret void } ; Legal, as expected define void @rmw_fold_xor2(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_xor2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorq %rsi, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_xor2: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: xorq %rsi, (%rdi) ; CHECK-O3-NEXT: retq %prev = load atomic i64, i64* %p unordered, align 8 %val = xor i64 %prev, %v store atomic i64 %val, i64* %p unordered, align 8 ret void } ;; The next batch test truncations, in combination w/operations which could ;; be folded against the memory operation. ; Legal to reduce the load width (TODO) define i32 @fold_trunc(i64* %p) { ; CHECK-LABEL: fold_trunc: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = trunc i64 %v to i32 ret i32 %ret } ; Legal to reduce the load width and fold the load (TODO) define i32 @fold_trunc_add(i64* %p, i32 %v2) { ; CHECK-O0-LABEL: fold_trunc_add: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: addl %esi, %eax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: fold_trunc_add: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: addl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = add i32 %trunc, %v2 ret i32 %ret } ; Legal to reduce the load width and fold the load (TODO) define i32 @fold_trunc_and(i64* %p, i32 %v2) { ; CHECK-O0-LABEL: fold_trunc_and: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl %esi, %eax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: fold_trunc_and: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: andl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = and i32 %trunc, %v2 ret i32 %ret } ; Legal to reduce the load width and fold the load (TODO) define i32 @fold_trunc_or(i64* %p, i32 %v2) { ; CHECK-O0-LABEL: fold_trunc_or: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: orl %esi, %eax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: fold_trunc_or: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: orl %esi, %eax ; CHECK-O3-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %trunc = trunc i64 %v to i32 %ret = or i32 %trunc, %v2 ret i32 %ret } ; It's tempting to split the wide load into two smaller byte loads ; to reduce memory traffic, but this would be illegal for a atomic load define i32 @split_load(i64* %p) { ; CHECK-O0-LABEL: split_load: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rcx ; CHECK-O0-NEXT: movb %cl, %al ; CHECK-O0-NEXT: shrq $32, %rcx ; CHECK-O0-NEXT: # kill: def $cl killed $cl killed $rcx ; CHECK-O0-NEXT: orb %cl, %al ; CHECK-O0-NEXT: movzbl %al, %eax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: split_load: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq (%rdi), %rax ; CHECK-O3-NEXT: movq %rax, %rcx ; CHECK-O3-NEXT: shrq $32, %rcx ; CHECK-O3-NEXT: orl %eax, %ecx ; CHECK-O3-NEXT: movzbl %cl, %eax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %b1 = trunc i64 %v to i8 %v.shift = lshr i64 %v, 32 %b2 = trunc i64 %v.shift to i8 %or = or i8 %b1, %b2 %ret = zext i8 %or to i32 ret i32 %ret } ;; A collection of simple memory forwarding tests. Nothing particular ;; interesting semantic wise, just demonstrating obvious missed transforms. @Zero = constant i64 0 ; TODO: should return constant define i64 @constant_folding(i64* %p) { ; CHECK-LABEL: constant_folding: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 ret i64 %v } ; Legal to forward and fold (TODO) define i64 @load_forwarding(i64* %p) { ; CHECK-LABEL: load_forwarding: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: orq (%rdi), %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %v2 = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 ret i64 %ret } ; Legal to forward (TODO) define i64 @store_forward(i64* %p, i64 %v) { ; CHECK-LABEL: store_forward: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, (%rdi) ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: retq store atomic i64 %v, i64* %p unordered, align 8 %ret = load atomic i64, i64* %p unordered, align 8 ret i64 %ret } ; Legal to kill (TODO) define void @dead_writeback(i64* %p) { ; CHECK-LABEL: dead_writeback: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: movq %rax, (%rdi) ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 store atomic i64 %v, i64* %p unordered, align 8 ret void } ; Legal to kill (TODO) define void @dead_store(i64* %p, i64 %v) { ; CHECK-LABEL: dead_store: ; CHECK: # %bb.0: ; CHECK-NEXT: movq $0, (%rdi) ; CHECK-NEXT: movq %rsi, (%rdi) ; CHECK-NEXT: retq store atomic i64 0, i64* %p unordered, align 8 store atomic i64 %v, i64* %p unordered, align 8 ret void } ;; The next batch of tests ensure that we don't try to fold a load into a ;; use where the code motion implied for the load is prevented by a fence. ;; Note: We're checking that the load doesn't get moved below the fence as ;; part of folding, but is technically legal to lift the add above the fence. ;; If that were to happen, please rewrite the test to ensure load movement ;; isn't violated. define i64 @nofold_fence(i64* %p) { ; CHECK-LABEL: nofold_fence: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: mfence ; CHECK-NEXT: addq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 fence seq_cst %ret = add i64 %v, 15 ret i64 %ret } define i64 @nofold_fence_acquire(i64* %p) { ; CHECK-LABEL: nofold_fence_acquire: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: #MEMBARRIER ; CHECK-NEXT: addq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 fence acquire %ret = add i64 %v, 15 ret i64 %ret } define i64 @nofold_stfence(i64* %p) { ; CHECK-LABEL: nofold_stfence: ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: #MEMBARRIER ; CHECK-NEXT: addq $15, %rax ; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 fence syncscope("singlethread") seq_cst %ret = add i64 %v, 15 ret i64 %ret } ;; Next, test how well we can fold invariant loads. @Constant = external dso_local constant i64 define i64 @fold_constant(i64 %arg) { ; CHECK-O0-LABEL: fold_constant: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq %rdi, %rax ; CHECK-O0-NEXT: addq Constant, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: fold_constant: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movq %rdi, %rax ; CHECK-O3-NEXT: addq Constant(%rip), %rax ; CHECK-O3-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 %ret = add i64 %v, %arg ret i64 %ret } define i64 @fold_constant_clobber(i64* %p, i64 %arg) { ; CHECK-O0-LABEL: fold_constant_clobber: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq Constant(%rip), %rax ; CHECK-O0-NEXT: movq $5, (%rdi) ; CHECK-O0-NEXT: addq %rsi, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: fold_constant_clobber: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax ; CHECK-O3-CUR-NEXT: movq $5, (%rdi) ; CHECK-O3-CUR-NEXT: addq %rsi, %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: fold_constant_clobber: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax ; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax ; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 store i64 5, i64* %p %ret = add i64 %v, %arg ret i64 %ret } define i64 @fold_constant_fence(i64 %arg) { ; CHECK-O0-LABEL: fold_constant_fence: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq Constant(%rip), %rax ; CHECK-O0-NEXT: mfence ; CHECK-O0-NEXT: addq %rdi, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: fold_constant_fence: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq Constant(%rip), %rax ; CHECK-O3-CUR-NEXT: mfence ; CHECK-O3-CUR-NEXT: addq %rdi, %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: fold_constant_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rdi, %rax ; CHECK-O3-EX-NEXT: addq Constant(%rip), %rax ; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* @Constant unordered, align 8 fence seq_cst %ret = add i64 %v, %arg ret i64 %ret } define i64 @fold_invariant_clobber(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O0-LABEL: fold_invariant_clobber: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq $5, (%rdi) ; CHECK-O0-NEXT: addq %rsi, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: fold_invariant_clobber: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rdi), %rax ; CHECK-O3-CUR-NEXT: movq $5, (%rdi) ; CHECK-O3-CUR-NEXT: addq %rsi, %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: fold_invariant_clobber: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax ; CHECK-O3-EX-NEXT: addq (%rdi), %rax ; CHECK-O3-EX-NEXT: movq $5, (%rdi) ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} store i64 5, i64* %p %ret = add i64 %v, %arg ret i64 %ret } define i64 @fold_invariant_fence(i64* dereferenceable(8) %p, i64 %arg) { ; CHECK-O0-LABEL: fold_invariant_fence: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: mfence ; CHECK-O0-NEXT: addq %rsi, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: fold_invariant_fence: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movq (%rdi), %rax ; CHECK-O3-CUR-NEXT: mfence ; CHECK-O3-CUR-NEXT: addq %rsi, %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: fold_invariant_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: movq %rsi, %rax ; CHECK-O3-EX-NEXT: addq (%rdi), %rax ; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8, !invariant.load !{} fence seq_cst %ret = add i64 %v, %arg ret i64 %ret } ; Exercise a few cases involving any extend idioms define i16 @load_i8_anyext_i16(i8* %ptr) { ; CHECK-O0-CUR-LABEL: load_i8_anyext_i16: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: movb (%rdi), %al ; CHECK-O0-CUR-NEXT: movzbl %al, %eax ; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i8_anyext_i16: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax ; CHECK-O3-CUR-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O0-EX-LABEL: load_i8_anyext_i16: ; CHECK-O0-EX: # %bb.0: ; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0 ; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax ; CHECK-O0-EX-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O0-EX-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_i8_anyext_i16: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0 ; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax ; CHECK-O3-EX-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O3-EX-NEXT: retq %v = load atomic i8, i8* %ptr unordered, align 2 %vec = insertelement <2 x i8> undef, i8 %v, i32 0 %res = bitcast <2 x i8> %vec to i16 ret i16 %res } define i32 @load_i8_anyext_i32(i8* %ptr) { ; CHECK-O0-CUR-LABEL: load_i8_anyext_i32: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: movb (%rdi), %al ; CHECK-O0-CUR-NEXT: movzbl %al, %eax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i8_anyext_i32: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movzbl (%rdi), %eax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O0-EX-LABEL: load_i8_anyext_i32: ; CHECK-O0-EX: # %bb.0: ; CHECK-O0-EX-NEXT: vpbroadcastb (%rdi), %xmm0 ; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax ; CHECK-O0-EX-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_i8_anyext_i32: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: vpbroadcastb (%rdi), %xmm0 ; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax ; CHECK-O3-EX-NEXT: retq %v = load atomic i8, i8* %ptr unordered, align 4 %vec = insertelement <4 x i8> undef, i8 %v, i32 0 %res = bitcast <4 x i8> %vec to i32 ret i32 %res } define i32 @load_i16_anyext_i32(i16* %ptr) { ; CHECK-O0-CUR-LABEL: load_i16_anyext_i32: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: movw (%rdi), %cx ; CHECK-O0-CUR-NEXT: # implicit-def: $eax ; CHECK-O0-CUR-NEXT: movw %cx, %ax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i16_anyext_i32: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O0-EX-LABEL: load_i16_anyext_i32: ; CHECK-O0-EX: # %bb.0: ; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0 ; CHECK-O0-EX-NEXT: vmovd %xmm0, %eax ; CHECK-O0-EX-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_i16_anyext_i32: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0 ; CHECK-O3-EX-NEXT: vmovd %xmm0, %eax ; CHECK-O3-EX-NEXT: retq %v = load atomic i16, i16* %ptr unordered, align 4 %vec = insertelement <2 x i16> undef, i16 %v, i64 0 %res = bitcast <2 x i16> %vec to i32 ret i32 %res } define i64 @load_i16_anyext_i64(i16* %ptr) { ; CHECK-O0-CUR-LABEL: load_i16_anyext_i64: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: movw (%rdi), %cx ; CHECK-O0-CUR-NEXT: # implicit-def: $eax ; CHECK-O0-CUR-NEXT: movw %cx, %ax ; CHECK-O0-CUR-NEXT: vmovd %eax, %xmm0 ; CHECK-O0-CUR-NEXT: vmovq %xmm0, %rax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i16_anyext_i64: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movzwl (%rdi), %eax ; CHECK-O3-CUR-NEXT: vmovd %eax, %xmm0 ; CHECK-O3-CUR-NEXT: vmovq %xmm0, %rax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O0-EX-LABEL: load_i16_anyext_i64: ; CHECK-O0-EX: # %bb.0: ; CHECK-O0-EX-NEXT: vpbroadcastw (%rdi), %xmm0 ; CHECK-O0-EX-NEXT: vmovq %xmm0, %rax ; CHECK-O0-EX-NEXT: retq ; ; CHECK-O3-EX-LABEL: load_i16_anyext_i64: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: vpbroadcastw (%rdi), %xmm0 ; CHECK-O3-EX-NEXT: vmovq %xmm0, %rax ; CHECK-O3-EX-NEXT: retq %v = load atomic i16, i16* %ptr unordered, align 8 %vec = insertelement <4 x i16> undef, i16 %v, i64 0 %res = bitcast <4 x i16> %vec to i64 ret i64 %res } ; TODO: Would be legal to combine for legal atomic wider types define i16 @load_combine(i8* %p) { ; CHECK-O0-LABEL: load_combine: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb (%rdi), %al ; CHECK-O0-NEXT: movb 1(%rdi), %cl ; CHECK-O0-NEXT: movzbl %al, %eax ; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O0-NEXT: movzbl %cl, %ecx ; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx ; CHECK-O0-NEXT: shlw $8, %cx ; CHECK-O0-NEXT: orw %cx, %ax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_combine: ; CHECK-O3: # %bb.0: ; CHECK-O3-NEXT: movzbl (%rdi), %ecx ; CHECK-O3-NEXT: movzbl 1(%rdi), %eax ; CHECK-O3-NEXT: shll $8, %eax ; CHECK-O3-NEXT: orl %ecx, %eax ; CHECK-O3-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O3-NEXT: retq %v1 = load atomic i8, i8* %p unordered, align 2 %p2 = getelementptr i8, i8* %p, i64 1 %v2 = load atomic i8, i8* %p2 unordered, align 1 %v1.ext = zext i8 %v1 to i16 %v2.ext = zext i8 %v2 to i16 %v2.sht = shl i16 %v2.ext, 8 %res = or i16 %v1.ext, %v2.sht ret i16 %res } define i1 @fold_cmp_over_fence(i32* %p, i32 %v1) { ; CHECK-O0-LABEL: fold_cmp_over_fence: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movl (%rdi), %eax ; CHECK-O0-NEXT: mfence ; CHECK-O0-NEXT: cmpl %eax, %esi ; CHECK-O0-NEXT: jne .LBB116_2 ; CHECK-O0-NEXT: # %bb.1: # %taken ; CHECK-O0-NEXT: movb $1, %al ; CHECK-O0-NEXT: retq ; CHECK-O0-NEXT: .LBB116_2: # %untaken ; CHECK-O0-NEXT: xorl %eax, %eax ; CHECK-O0-NEXT: # kill: def $al killed $al killed $eax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: fold_cmp_over_fence: ; CHECK-O3-CUR: # %bb.0: ; CHECK-O3-CUR-NEXT: movl (%rdi), %eax ; CHECK-O3-CUR-NEXT: mfence ; CHECK-O3-CUR-NEXT: cmpl %eax, %esi ; CHECK-O3-CUR-NEXT: jne .LBB116_2 ; CHECK-O3-CUR-NEXT: # %bb.1: # %taken ; CHECK-O3-CUR-NEXT: movb $1, %al ; CHECK-O3-CUR-NEXT: retq ; CHECK-O3-CUR-NEXT: .LBB116_2: # %untaken ; CHECK-O3-CUR-NEXT: xorl %eax, %eax ; CHECK-O3-CUR-NEXT: retq ; ; CHECK-O3-EX-LABEL: fold_cmp_over_fence: ; CHECK-O3-EX: # %bb.0: ; CHECK-O3-EX-NEXT: cmpl (%rdi), %esi ; CHECK-O3-EX-NEXT: mfence ; CHECK-O3-EX-NEXT: jne .LBB116_2 ; CHECK-O3-EX-NEXT: # %bb.1: # %taken ; CHECK-O3-EX-NEXT: movb $1, %al ; CHECK-O3-EX-NEXT: retq ; CHECK-O3-EX-NEXT: .LBB116_2: # %untaken ; CHECK-O3-EX-NEXT: xorl %eax, %eax ; CHECK-O3-EX-NEXT: retq %v2 = load atomic i32, i32* %p unordered, align 4 fence seq_cst %cmp = icmp eq i32 %v1, %v2 br i1 %cmp, label %taken, label %untaken taken: ret i1 true untaken: ret i1 false }