; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s ; Increment in loop bb.i28.i adjusted to 2, to prevent loop reversal from ; kicking in. declare fastcc void @rdft(i32, i32, ptr, ptr, ptr) define fastcc void @mp_sqrt(i32 %n, i32 %radix, ptr %in, ptr %out, ptr %tmp1, ptr %tmp2, i32 %nfft, ptr %tmp1fft, ptr %tmp2fft, ptr %ip, ptr %w) nounwind { ; CHECK-LABEL: mp_sqrt: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: movl $1, %ebx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %bb.i5 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: addl %ebx, %ebx ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %mp_unexp_mp2d.exit.i ; CHECK-NEXT: je .LBB0_10 ; CHECK-NEXT: # %bb.3: # %cond_next.i ; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: jne .LBB0_10 ; CHECK-NEXT: # %bb.4: # %cond_next36.i ; CHECK-NEXT: movl $0, 0 ; CHECK-NEXT: movzbl %al, %ebp ; CHECK-NEXT: andl $1, %ebp ; CHECK-NEXT: xorpd %xmm0, %xmm0 ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_5: # %bb.i28.i ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cvttsd2si %xmm1, %edx ; CHECK-NEXT: cmpl %esi, %edx ; CHECK-NEXT: cmovgel %eax, %edx ; CHECK-NEXT: addl $2, %ecx ; CHECK-NEXT: xorps %xmm2, %xmm2 ; CHECK-NEXT: cvtsi2sd %edx, %xmm2 ; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: subsd %xmm2, %xmm1 ; CHECK-NEXT: mulsd %xmm0, %xmm1 ; CHECK-NEXT: addl $-2, %ebp ; CHECK-NEXT: jne .LBB0_5 ; CHECK-NEXT: # %bb.6: # %mp_unexp_d2mp.exit29.i ; CHECK-NEXT: movl $0, 0 ; CHECK-NEXT: je .LBB0_7 ; CHECK-NEXT: # %bb.8: # %mp_sqrt_init.exit ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %edi, %edx ; CHECK-NEXT: calll mp_mul_csqu@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl $-1, %edx ; CHECK-NEXT: pushl {{[0-9]+}}(%esp) ; CHECK-NEXT: pushl {{[0-9]+}}(%esp) ; CHECK-NEXT: pushl $0 ; CHECK-NEXT: calll rdft@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: pushl %eax ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl $0 ; CHECK-NEXT: calll mp_mul_d2i@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: testl %ebp, %ebp ; CHECK-NEXT: jne .LBB0_10 ; CHECK-NEXT: # %bb.9: # %cond_false.i ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: pushl {{[0-9]+}}(%esp) ; CHECK-NEXT: pushl $0 ; CHECK-NEXT: calll mp_round@PLT ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: calll mp_add@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl {{[0-9]+}}(%esp) ; CHECK-NEXT: calll mp_sub@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl $0 ; CHECK-NEXT: calll mp_round@PLT ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl {{[0-9]+}}(%esp) ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: calll mp_mul_d2i@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: .LBB0_10: # %cond_true.i ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebx ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_7: # %bb.i.i ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: jmp .LBB0_7 entry: br label %bb.i5 bb.i5: ; preds = %bb.i5, %entry %nfft_init.0.i = phi i32 [ 1, %entry ], [ %tmp7.i3, %bb.i5 ] ; <i32> [#uses=1] %foo = phi i1 [1, %entry], [0, %bb.i5] %tmp7.i3 = shl i32 %nfft_init.0.i, 1 ; <i32> [#uses=2] br i1 %foo, label %bb.i5, label %mp_unexp_mp2d.exit.i mp_unexp_mp2d.exit.i: ; preds = %bb.i5 br i1 %foo, label %cond_next.i, label %cond_true.i cond_true.i: ; preds = %mp_unexp_mp2d.exit.i ret void cond_next.i: ; preds = %mp_unexp_mp2d.exit.i %tmp22.i = sdiv i32 0, 2 ; <i32> [#uses=2] br i1 %foo, label %cond_true29.i, label %cond_next36.i cond_true29.i: ; preds = %cond_next.i ret void cond_next36.i: ; preds = %cond_next.i store i32 %tmp22.i, ptr null, align 4 %tmp8.i14.i = select i1 %foo, i32 1, i32 0 ; <i32> [#uses=1] br label %bb.i28.i bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i %j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2] %din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1] %tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2] %tmp4.i19.i = icmp slt i32 %tmp1.i18.i, %radix ; <i1> [#uses=1] %x.0.i21.i = select i1 %tmp4.i19.i, i32 %tmp1.i18.i, i32 0 ; <i32> [#uses=1] %tmp41.sum.i = add i32 %j.0.reg2mem.0.i16.i, 2 ; <i32> [#uses=0] %tmp1213.i23.i = sitofp i32 %x.0.i21.i to double ; <double> [#uses=1] %tmp15.i24.i = fsub double 0.000000e+00, %tmp1213.i23.i ; <double> [#uses=1] %tmp16.i25.i = fmul double 0.000000e+00, %tmp15.i24.i ; <double> [#uses=1] %indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2 ; <i32> [#uses=2] %exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i ; <i1> [#uses=1] br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i mp_unexp_d2mp.exit29.i: ; preds = %bb.i28.i %tmp46.i = sub i32 0, %tmp22.i ; <i32> [#uses=1] store i32 %tmp46.i, ptr null, align 4 br i1 %exitcond40.i, label %bb.i.i, label %mp_sqrt_init.exit bb.i.i: ; preds = %bb.i.i, %mp_unexp_d2mp.exit29.i br label %bb.i.i mp_sqrt_init.exit: ; preds = %mp_unexp_d2mp.exit29.i tail call fastcc void @mp_mul_csqu( i32 0, ptr %tmp1fft ) tail call fastcc void @rdft( i32 0, i32 -1, ptr null, ptr %ip, ptr %w ) tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 0, ptr %tmp1fft, ptr %tmp2 ) br i1 %exitcond40.i, label %cond_false.i, label %cond_true36.i cond_true36.i: ; preds = %mp_sqrt_init.exit ret void cond_false.i: ; preds = %mp_sqrt_init.exit tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, ptr %out ) tail call fastcc void @mp_add( i32 0, i32 %radix, ptr %tmp1, ptr %tmp2, ptr %tmp1 ) tail call fastcc void @mp_sub( i32 0, i32 %radix, ptr %in, ptr %tmp2, ptr %tmp2 ) tail call fastcc void @mp_round( i32 0, i32 %radix, i32 0, ptr %tmp1 ) tail call fastcc void @mp_mul_d2i( i32 0, i32 %radix, i32 %tmp7.i3, ptr %tmp2fft, ptr %tmp2 ) ret void } declare fastcc void @mp_add(i32, i32, ptr, ptr, ptr) declare fastcc void @mp_sub(i32, i32, ptr, ptr, ptr) declare fastcc void @mp_round(i32, i32, i32, ptr) declare fastcc void @mp_mul_csqu(i32, ptr) declare fastcc void @mp_mul_d2i(i32, i32, i32, ptr, ptr)