; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mcpu=pentium4 -O0 | FileCheck %s target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-f64:32:64-f80:32-n8:16:32-S128" target triple = "i386-unknown-linux-unknown" define <4 x half> @doTheTestMod(<4 x half> %0, <4 x half> %1) nounwind { ; CHECK-LABEL: doTheTestMod: ; CHECK: # %bb.0: # %Entry ; CHECK-NEXT: subl $140, %esp ; CHECK-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill ; CHECK-NEXT: movaps %xmm0, %xmm6 ; CHECK-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm0, %xmm3 ; CHECK-NEXT: psrlq $48, %xmm3 ; CHECK-NEXT: movaps %xmm0, %xmm2 ; CHECK-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,1,1] ; CHECK-NEXT: psrld $16, %xmm0 ; CHECK-NEXT: movaps %xmm6, %xmm7 ; CHECK-NEXT: movaps %xmm6, %xmm4 ; CHECK-NEXT: psrlq $48, %xmm4 ; CHECK-NEXT: movaps %xmm6, %xmm5 ; CHECK-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1,1,1] ; CHECK-NEXT: psrld $16, %xmm6 ; CHECK-NEXT: pextrw $0, %xmm7, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: pextrw $0, %xmm6, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: pextrw $0, %xmm5, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: pextrw $0, %xmm4, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: pextrw $0, %xmm3, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: pextrw $0, %xmm2, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: pextrw $0, %xmm1, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: # implicit-def: $xmm0 ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: # implicit-def: $xmm0 ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: # implicit-def: $xmm0 ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 ; CHECK-NEXT: movss %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: # implicit-def: $xmm0 ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 ; CHECK-NEXT: # implicit-def: $xmm1 ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: # implicit-def: $xmm1 ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: # implicit-def: $xmm1 ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: # implicit-def: $xmm1 ; CHECK-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm1 ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movw %cx, (%eax) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movw %cx, (%eax) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __truncsfhf2 ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movw %cx, (%eax) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movw %cx, (%eax) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __truncsfhf2 ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movw %cx, (%eax) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movw %cx, (%eax) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __truncsfhf2 ; CHECK-NEXT: movaps %xmm0, %xmm1 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: movss %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movw %cx, (%eax) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill ; CHECK-NEXT: pextrw $0, %xmm0, %eax ; CHECK-NEXT: movw %ax, %cx ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movw %cx, (%eax) ; CHECK-NEXT: calll __extendhfsf2 ; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __truncsfhf2 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm1 # 4-byte Reload ; CHECK-NEXT: # xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm2 # 4-byte Reload ; CHECK-NEXT: # xmm2 = mem[0],zero,zero,zero ; CHECK-NEXT: movaps %xmm0, %xmm3 ; CHECK-NEXT: movss {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 4-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] ; CHECK-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] ; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; CHECK-NEXT: addl $140, %esp ; CHECK-NEXT: retl Entry: %x = alloca <4 x half>, align 8 %y = alloca <4 x half>, align 8 store <4 x half> %0, ptr %x, align 8 store <4 x half> %1, ptr %y, align 8 %2 = load <4 x half>, ptr %x, align 8 %3 = load <4 x half>, ptr %y, align 8 %4 = frem <4 x half> %2, %3 ret <4 x half> %4 }