# RUN: llc -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve.fp -run-pass=arm-low-overhead-loops %s -verify-machineinstrs -o - | FileCheck %s # This test was originally hitting problems with empty blocks. That went away # but the underlying problem (empty blocks causing iterator issues) still remains. # The test adds an extra empty block to one of the loops to test this. # CHECK: LETP --- | %struct.DCT_InstanceTypeDef = type { float*, i32, i32 } ; Function Attrs: nofree nounwind define hidden arm_aapcs_vfpcc void @test(%struct.DCT_InstanceTypeDef* nocapture readonly %S, float* %pIn, float* nocapture %pOut) { entry: %NumInputs = getelementptr inbounds %struct.DCT_InstanceTypeDef, %struct.DCT_InstanceTypeDef* %S, i32 0, i32 2 %0 = load i32, i32* %NumInputs, align 4 %NumFilters = getelementptr inbounds %struct.DCT_InstanceTypeDef, %struct.DCT_InstanceTypeDef* %S, i32 0, i32 1 %1 = load i32, i32* %NumFilters, align 4 %pDCTCoefs34 = bitcast %struct.DCT_InstanceTypeDef* %S to float** %2 = load float*, float** %pDCTCoefs34, align 4 %3 = add i32 %0, 3 %4 = icmp slt i32 %0, 4 %smin36 = select i1 %4, i32 %0, i32 4 %5 = sub i32 %3, %smin36 %6 = lshr i32 %5, 2 %7 = add nuw nsw i32 %6, 1 %start1 = call i32 @llvm.start.loop.iterations.i32(i32 %7) br label %do.body do.body: ; preds = %do.body, %entry %count.0 = phi i32 [ %0, %entry ], [ %12, %do.body ] %pInT.0 = phi float* [ %pIn, %entry ], [ %add.ptr, %do.body ] %sumVec.0 = phi <4 x float> [ zeroinitializer, %entry ], [ %11, %do.body ] %8 = phi i32 [ %start1, %entry ], [ %13, %do.body ] %pInT.033 = bitcast float* %pInT.0 to <4 x float>* %9 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.0) %10 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.033, i32 4, <4 x i1> %9, <4 x float> zeroinitializer) %11 = tail call fast <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float> %sumVec.0, <4 x float> %10, <4 x i1> %9, <4 x float> undef) %add.ptr = getelementptr inbounds float, float* %pInT.0, i32 4 %12 = add i32 %count.0, -4 %13 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1) %14 = icmp ne i32 %13, 0 br i1 %14, label %do.body, label %do.end do.end: ; preds = %do.body %15 = extractelement <4 x float> %11, i32 0 %16 = extractelement <4 x float> %11, i32 1 %add = fadd fast float %15, %16 %17 = extractelement <4 x float> %11, i32 2 %add1 = fadd fast float %add, %17 %18 = extractelement <4 x float> %11, i32 3 %add2 = fadd fast float %add1, %18 %19 = load float, float* %2, align 4 %mul = fmul fast float %19, %add2 store float %mul, float* %pOut, align 4 %sub4 = add i32 %1, -4 %cmp5201 = icmp ugt i32 %sub4, 1 br i1 %cmp5201, label %for.body.lr.ph, label %for.cond54.preheader for.body.lr.ph: ; preds = %do.end %scevgep = getelementptr float, float* %pIn, i32 4 %20 = add i32 %0, 4 %scevgep5 = getelementptr float, float* %2, i32 %20 %21 = shl i32 %0, 4 %22 = shl i32 %0, 1 %23 = add i32 %22, 4 %scevgep12 = getelementptr float, float* %2, i32 %23 %24 = mul i32 %0, 3 %25 = add i32 %24, 4 %scevgep19 = getelementptr float, float* %2, i32 %25 %26 = shl i32 %0, 2 %27 = add i32 %26, 4 %scevgep26 = getelementptr float, float* %2, i32 %27 %28 = add i32 %0, -1 %29 = add i32 %0, -4 %30 = icmp slt i32 %29, 4 %smin35 = select i1 %30, i32 %29, i32 4 %31 = sub i32 %28, %smin35 %32 = lshr i32 %31, 2 %33 = add nuw nsw i32 %32, 1 br label %for.body for.cond54.preheader: ; preds = %do.end33, %do.end %k.0.lcssa = phi i32 [ 1, %do.end ], [ %add53, %do.end33 ] %cmp55199 = icmp ult i32 %k.0.lcssa, %1 br i1 %cmp55199, label %for.body56.preheader, label %for.end72 for.body56.preheader: ; preds = %for.cond54.preheader %34 = add i32 %0, 3 %35 = icmp slt i32 %0, 4 %smin = select i1 %35, i32 %0, i32 4 %36 = sub i32 %34, %smin %37 = lshr i32 %36, 2 %38 = add nuw nsw i32 %37, 1 br label %for.body56 for.body: ; preds = %do.end33, %for.body.lr.ph %lsr.iv27 = phi float* [ %88, %do.end33 ], [ %scevgep26, %for.body.lr.ph ] %lsr.iv20 = phi float* [ %87, %do.end33 ], [ %scevgep19, %for.body.lr.ph ] %lsr.iv13 = phi float* [ %86, %do.end33 ], [ %scevgep12, %for.body.lr.ph ] %lsr.iv6 = phi float* [ %85, %do.end33 ], [ %scevgep5, %for.body.lr.ph ] %k.0202 = phi i32 [ 1, %for.body.lr.ph ], [ %add53, %do.end33 ] %39 = bitcast float* %pIn to <4 x float>* %mul7 = mul i32 %k.0202, %0 %arrayidx8 = getelementptr inbounds float, float* %2, i32 %mul7 %add9 = add nuw nsw i32 %k.0202, 1 %mul10 = mul i32 %add9, %0 %arrayidx11 = getelementptr inbounds float, float* %2, i32 %mul10 %add12 = add nuw nsw i32 %k.0202, 2 %mul13 = mul i32 %add12, %0 %arrayidx14 = getelementptr inbounds float, float* %2, i32 %mul13 %add15 = add i32 %k.0202, 3 %mul16 = mul i32 %add15, %0 %arrayidx17 = getelementptr inbounds float, float* %2, i32 %mul16 %40 = load <4 x float>, <4 x float>* %39, align 4 %41 = bitcast float* %arrayidx8 to <4 x float>* %42 = load <4 x float>, <4 x float>* %41, align 4 %43 = fmul fast <4 x float> %42, %40 %44 = bitcast float* %arrayidx11 to <4 x float>* %45 = load <4 x float>, <4 x float>* %44, align 4 %46 = fmul fast <4 x float> %45, %40 %47 = bitcast float* %arrayidx14 to <4 x float>* %48 = load <4 x float>, <4 x float>* %47, align 4 %49 = fmul fast <4 x float> %48, %40 %50 = bitcast float* %arrayidx17 to <4 x float>* %51 = load <4 x float>, <4 x float>* %50, align 4 %52 = fmul fast <4 x float> %51, %40 %start2 = call i32 @llvm.start.loop.iterations.i32(i32 %33) br label %do.body24 do.body24: ; preds = %do.body24, %for.body %lsr.iv30 = phi float* [ %scevgep31, %do.body24 ], [ %lsr.iv27, %for.body ] %lsr.iv23 = phi float* [ %scevgep24, %do.body24 ], [ %lsr.iv20, %for.body ] %lsr.iv16 = phi float* [ %scevgep17, %do.body24 ], [ %lsr.iv13, %for.body ] %lsr.iv9 = phi float* [ %scevgep10, %do.body24 ], [ %lsr.iv6, %for.body ] %lsr.iv = phi float* [ %scevgep3, %do.body24 ], [ %scevgep, %for.body ] %sumVec0.0 = phi <4 x float> [ %43, %for.body ], [ %56, %do.body24 ] %sumVec1.0 = phi <4 x float> [ %46, %for.body ], [ %58, %do.body24 ] %sumVec2.0 = phi <4 x float> [ %49, %for.body ], [ %60, %do.body24 ] %sumVec3.0 = phi <4 x float> [ %52, %for.body ], [ %62, %do.body24 ] %53 = phi i32 [ %start2, %for.body ], [ %63, %do.body24 ] %lsr.iv4 = bitcast float* %lsr.iv to <4 x float>* %lsr.iv911 = bitcast float* %lsr.iv9 to <4 x float>* %lsr.iv1618 = bitcast float* %lsr.iv16 to <4 x float>* %lsr.iv2325 = bitcast float* %lsr.iv23 to <4 x float>* %lsr.iv3032 = bitcast float* %lsr.iv30 to <4 x float>* %54 = load <4 x float>, <4 x float>* %lsr.iv4, align 4 %55 = load <4 x float>, <4 x float>* %lsr.iv911, align 4 %56 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %55, <4 x float> %sumVec0.0) %57 = load <4 x float>, <4 x float>* %lsr.iv1618, align 4 %58 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %57, <4 x float> %sumVec1.0) %59 = load <4 x float>, <4 x float>* %lsr.iv2325, align 4 %60 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %59, <4 x float> %sumVec2.0) %61 = load <4 x float>, <4 x float>* %lsr.iv3032, align 4 %62 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %54, <4 x float> %61, <4 x float> %sumVec3.0) %scevgep3 = getelementptr float, float* %lsr.iv, i32 4 %scevgep10 = getelementptr float, float* %lsr.iv9, i32 4 %scevgep17 = getelementptr float, float* %lsr.iv16, i32 4 %scevgep24 = getelementptr float, float* %lsr.iv23, i32 4 %scevgep31 = getelementptr float, float* %lsr.iv30, i32 4 %63 = call i32 @llvm.loop.decrement.reg.i32(i32 %53, i32 1) %64 = icmp ne i32 %63, 0 br i1 %64, label %do.body24, label %do.end33 do.end33: ; preds = %do.body24 %65 = bitcast float* %lsr.iv27 to i1* %66 = bitcast float* %lsr.iv20 to i1* %67 = bitcast float* %lsr.iv13 to i1* %68 = bitcast float* %lsr.iv6 to i1* %69 = extractelement <4 x float> %56, i32 0 %70 = extractelement <4 x float> %56, i32 1 %add34 = fadd fast float %69, %70 %71 = extractelement <4 x float> %56, i32 2 %add35 = fadd fast float %add34, %71 %72 = extractelement <4 x float> %56, i32 3 %add36 = fadd fast float %add35, %72 %arrayidx37 = getelementptr inbounds float, float* %pOut, i32 %k.0202 store float %add36, float* %arrayidx37, align 4 %73 = extractelement <4 x float> %58, i32 0 %74 = extractelement <4 x float> %58, i32 1 %add38 = fadd fast float %73, %74 %75 = extractelement <4 x float> %58, i32 2 %add39 = fadd fast float %add38, %75 %76 = extractelement <4 x float> %58, i32 3 %add40 = fadd fast float %add39, %76 %arrayidx42 = getelementptr inbounds float, float* %pOut, i32 %add9 store float %add40, float* %arrayidx42, align 4 %77 = extractelement <4 x float> %60, i32 0 %78 = extractelement <4 x float> %60, i32 1 %add43 = fadd fast float %77, %78 %79 = extractelement <4 x float> %60, i32 2 %add44 = fadd fast float %add43, %79 %80 = extractelement <4 x float> %60, i32 3 %add45 = fadd fast float %add44, %80 %arrayidx47 = getelementptr inbounds float, float* %pOut, i32 %add12 store float %add45, float* %arrayidx47, align 4 %81 = extractelement <4 x float> %62, i32 0 %82 = extractelement <4 x float> %62, i32 1 %add48 = fadd fast float %81, %82 %83 = extractelement <4 x float> %62, i32 2 %add49 = fadd fast float %add48, %83 %84 = extractelement <4 x float> %62, i32 3 %add50 = fadd fast float %add49, %84 %arrayidx52 = getelementptr inbounds float, float* %pOut, i32 %add15 store float %add50, float* %arrayidx52, align 4 %add53 = add i32 %k.0202, 4 %scevgep8 = getelementptr i1, i1* %68, i32 %21 %85 = bitcast i1* %scevgep8 to float* %scevgep15 = getelementptr i1, i1* %67, i32 %21 %86 = bitcast i1* %scevgep15 to float* %scevgep22 = getelementptr i1, i1* %66, i32 %21 %87 = bitcast i1* %scevgep22 to float* %scevgep29 = getelementptr i1, i1* %65, i32 %21 %88 = bitcast i1* %scevgep29 to float* %cmp5 = icmp ult i32 %add53, %sub4 br i1 %cmp5, label %for.body, label %for.cond54.preheader for.body56: ; preds = %for.body56.preheader, %do.end66 %k.1200 = phi i32 [ %inc, %do.end66 ], [ %k.0.lcssa, %for.body56.preheader ] %mul57 = mul i32 %k.1200, %0 %arrayidx58 = getelementptr inbounds float, float* %2, i32 %mul57 %start3 = call i32 @llvm.start.loop.iterations.i32(i32 %38) br label %do.body59 do.body59: ; preds = %do.body59, %for.body56 %count.2 = phi i32 [ %0, %for.body56 ], [ %94, %do.body59 ] %pInT.2 = phi float* [ %pIn, %for.body56 ], [ %add.ptr61, %do.body59 ] %pCos0.1 = phi float* [ %arrayidx58, %for.body56 ], [ %add.ptr62, %do.body59 ] %sumVec.1 = phi <4 x float> [ zeroinitializer, %for.body56 ], [ %93, %do.body59 ] %89 = phi i32 [ %start3, %for.body56 ], [ %95, %do.body59 ] %pInT.21 = bitcast float* %pInT.2 to <4 x float>* %pCos0.12 = bitcast float* %pCos0.1 to <4 x float>* %90 = tail call <4 x i1> @llvm.arm.mve.vctp32(i32 %count.2) %91 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pInT.21, i32 4, <4 x i1> %90, <4 x float> zeroinitializer) %92 = tail call fast <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %pCos0.12, i32 4, <4 x i1> %90, <4 x float> zeroinitializer) %93 = tail call fast <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float> %91, <4 x float> %92, <4 x float> %sumVec.1, <4 x i1> %90) %add.ptr61 = getelementptr inbounds float, float* %pInT.2, i32 4 %add.ptr62 = getelementptr inbounds float, float* %pCos0.1, i32 4 %94 = add i32 %count.2, -4 %95 = call i32 @llvm.loop.decrement.reg.i32(i32 %89, i32 1) %96 = icmp ne i32 %95, 0 br i1 %96, label %do.body59, label %do.end66 do.end66: ; preds = %do.body59 %97 = extractelement <4 x float> %93, i32 0 %98 = extractelement <4 x float> %93, i32 1 %add67 = fadd fast float %97, %98 %99 = extractelement <4 x float> %93, i32 2 %add68 = fadd fast float %add67, %99 %100 = extractelement <4 x float> %93, i32 3 %add69 = fadd fast float %add68, %100 %arrayidx70 = getelementptr inbounds float, float* %pOut, i32 %k.1200 store float %add69, float* %arrayidx70, align 4 %inc = add nuw i32 %k.1200, 1 %exitcond.not = icmp eq i32 %inc, %1 br i1 %exitcond.not, label %for.end72, label %for.body56 for.end72: ; preds = %do.end66, %for.cond54.preheader ret void } declare <4 x i1> @llvm.arm.mve.vctp32(i32) #1 declare <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>*, i32 immarg, <4 x i1>, <4 x float>) #2 declare <4 x float> @llvm.arm.mve.add.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x i1>, <4 x float>) #1 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #3 declare <4 x float> @llvm.arm.mve.fma.predicated.v4f32.v4i1(<4 x float>, <4 x float>, <4 x float>, <4 x i1>) #1 declare i32 @llvm.start.loop.iterations.i32(i32) #4 declare i32 @llvm.loop.decrement.reg.i32(i32, i32) #4 ... --- name: test alignment: 4 exposesReturnsTwice: false legalized: false regBankSelected: false selected: false failedISel: false tracksRegLiveness: true hasWinCFI: false registers: [] liveins: - { reg: '$r0', virtual-reg: '' } - { reg: '$r1', virtual-reg: '' } - { reg: '$r2', virtual-reg: '' } frameInfo: isFrameAddressTaken: false isReturnAddressTaken: false hasStackMap: false hasPatchPoint: false stackSize: 112 offsetAdjustment: 0 maxAlignment: 8 adjustsStack: false hasCalls: false stackProtector: '' maxCallFrameSize: 0 cvBytesOfCalleeSavedRegisters: 0 hasOpaqueSPAdjustment: false hasVAStart: false hasMustTailInVarArgFunc: false localFrameSize: 0 savePoint: '' restorePoint: '' fixedStack: [] stack: - { id: 0, name: '', type: spill-slot, offset: -76, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 1, name: '', type: spill-slot, offset: -80, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 2, name: '', type: spill-slot, offset: -84, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 3, name: '', type: spill-slot, offset: -88, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 4, name: '', type: spill-slot, offset: -92, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 5, name: '', type: spill-slot, offset: -96, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 6, name: '', type: spill-slot, offset: -100, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 7, name: '', type: spill-slot, offset: -104, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 8, name: '', type: spill-slot, offset: -108, size: 4, alignment: 4, stack-id: default, callee-saved-register: '', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 9, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 10, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r11', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 11, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r10', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 12, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r9', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 13, name: '', type: spill-slot, offset: -20, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r8', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 14, name: '', type: spill-slot, offset: -24, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 15, name: '', type: spill-slot, offset: -28, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r6', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 16, name: '', type: spill-slot, offset: -32, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 17, name: '', type: spill-slot, offset: -36, size: 4, alignment: 4, stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 18, name: '', type: spill-slot, offset: -48, size: 8, alignment: 8, stack-id: default, callee-saved-register: '$d11', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 19, name: '', type: spill-slot, offset: -56, size: 8, alignment: 8, stack-id: default, callee-saved-register: '$d10', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 20, name: '', type: spill-slot, offset: -64, size: 8, alignment: 8, stack-id: default, callee-saved-register: '$d9', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } - { id: 21, name: '', type: spill-slot, offset: -72, size: 8, alignment: 8, stack-id: default, callee-saved-register: '$d8', callee-saved-restored: true, debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } callSites: [] constants: [] machineFunctionInfo: {} body: | bb.0.entry: successors: %bb.1(0x80000000) liveins: $r0, $r1, $r2, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $lr, $d8, $d9, $d10, $d11 $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11, killed $lr frame-setup CFI_INSTRUCTION def_cfa_offset 36 frame-setup CFI_INSTRUCTION offset $lr, -4 frame-setup CFI_INSTRUCTION offset $r11, -8 frame-setup CFI_INSTRUCTION offset $r10, -12 frame-setup CFI_INSTRUCTION offset $r9, -16 frame-setup CFI_INSTRUCTION offset $r8, -20 frame-setup CFI_INSTRUCTION offset $r7, -24 frame-setup CFI_INSTRUCTION offset $r6, -28 frame-setup CFI_INSTRUCTION offset $r5, -32 frame-setup CFI_INSTRUCTION offset $r4, -36 $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 40 $sp = frame-setup VSTMDDB_UPD $sp, 14 /* CC::al */, $noreg, killed $d8, killed $d9, killed $d10, killed $d11 frame-setup CFI_INSTRUCTION def_cfa_offset 72 frame-setup CFI_INSTRUCTION offset $d11, -48 frame-setup CFI_INSTRUCTION offset $d10, -56 frame-setup CFI_INSTRUCTION offset $d9, -64 frame-setup CFI_INSTRUCTION offset $d8, -72 $sp = frame-setup tSUBspi $sp, 10, 14 /* CC::al */, $noreg frame-setup CFI_INSTRUCTION def_cfa_offset 112 renamable $r4 = tLDRi renamable $r0, 2, 14 /* CC::al */, $noreg :: (load (s32) from %ir.NumInputs) $r5 = tMOVr killed $r1, 14 /* CC::al */, $noreg renamable $r11 = t2LDRi12 renamable $r0, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.pDCTCoefs34) renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg $r1 = tMOVr $r4, 14 /* CC::al */, $noreg tCMPi8 renamable $r4, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 10, 8, implicit-def $itstate renamable $r1 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r1, implicit killed $itstate renamable $r1, dead $cpsr = tSUBrr renamable $r4, killed renamable $r1, 14 /* CC::al */, $noreg renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 3, 14 /* CC::al */, $noreg renamable $lr = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg renamable $r3 = tLDRi killed renamable $r0, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.NumFilters) $r0 = tMOVr $r4, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, $noreg, undef renamable $q0 $r1 = tMOVr $r5, 14 /* CC::al */, $noreg $lr = t2DoLoopStart renamable $lr bb.1.do.body (align 4): successors: %bb.1(0x7c000000), %bb.2(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r11 renamable $vpr = MVE_VCTP32 renamable $r0, 0, $noreg, $noreg MVE_VPST 4, implicit $vpr renamable $r1, renamable $q1 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.pInT.033, align 4) renamable $q0 = MVE_VADDf32 killed renamable $q0, killed renamable $q1, 1, killed renamable $vpr, $noreg, undef renamable $q0 renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr tB %bb.2, 14 /* CC::al */, $noreg bb.2.do.end: successors: %bb.3(0x40000000), %bb.7(0x40000000) liveins: $q0, $r2, $r3, $r4, $r5, $r11 renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg renamable $r0, dead $cpsr = tSUBi3 renamable $r3, 4, 14 /* CC::al */, $noreg tSTRspi killed renamable $r3, $sp, 1, 14 /* CC::al */, $noreg :: (store (s32) into %stack.8) renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg tSTRspi renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1) renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 renamable $s2 = VLDRS renamable $r11, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.2) tCMPi8 killed renamable $r0, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr renamable $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg VSTRS killed renamable $s0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pOut) t2Bcc %bb.7, 3 /* CC::lo */, killed $cpsr bb.3.for.body.lr.ph: successors: %bb.4(0x80000000) liveins: $r0, $r2, $r4, $r5, $r11 renamable $r6 = t2ADDri renamable $r5, 16, 14 /* CC::al */, $noreg, $noreg renamable $r1, dead $cpsr = tSUBi3 renamable $r4, 4, 14 /* CC::al */, $noreg tSTRspi killed renamable $r6, $sp, 4, 14 /* CC::al */, $noreg :: (store (s32) into %stack.5) renamable $r6, dead $cpsr = tLSLri renamable $r4, 4, 14 /* CC::al */, $noreg tCMPi8 renamable $r1, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr tSTRspi killed renamable $r6, $sp, 3, 14 /* CC::al */, $noreg :: (store (s32) into %stack.6) t2IT 10, 8, implicit-def $itstate renamable $r1 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r1, implicit killed $itstate renamable $r7 = t2ADDrs renamable $r4, renamable $r4, 10, 14 /* CC::al */, $noreg, $noreg renamable $r1, dead $cpsr = tMVN killed renamable $r1, 14 /* CC::al */, $noreg renamable $r1 = tADDhirr killed renamable $r1, renamable $r4, 14 /* CC::al */, $noreg renamable $r12 = t2ADDrs renamable $r11, renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg renamable $r3 = t2ADDrs renamable $r11, renamable $r4, 26, 14 /* CC::al */, $noreg, $noreg renamable $lr = t2ADDrs renamable $r11, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg renamable $r7 = t2ADDrs renamable $r11, renamable $r4, 34, 14 /* CC::al */, $noreg, $noreg renamable $r1 = nuw nsw t2ADDrs renamable $r0, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg renamable $r6 = t2ADDri killed renamable $r12, 16, 14 /* CC::al */, $noreg, $noreg renamable $r12 = t2ADDri killed renamable $r3, 16, 14 /* CC::al */, $noreg, $noreg tSTRspi killed renamable $r1, $sp, 2, 14 /* CC::al */, $noreg :: (store (s32) into %stack.7) renamable $r1 = t2ADDri killed renamable $lr, 16, 14 /* CC::al */, $noreg, $noreg renamable $r10 = t2ADDri killed renamable $r7, 16, 14 /* CC::al */, $noreg, $noreg tSTRspi renamable $r4, $sp, 7, 14 /* CC::al */, $noreg :: (store (s32) into %stack.2) t2STRDi8 $r11, $r5, $sp, 20, 14 /* CC::al */, $noreg :: (store (s32) into %stack.4), (store (s32) into %stack.3) bb.4.for.body (align 4): successors: %bb.5(0x80000000) liveins: $r0, $r1, $r2, $r4, $r5, $r6, $r10, $r11, $r12 renamable $r3 = t2MUL renamable $r0, renamable $r4, 14 /* CC::al */, $noreg renamable $r7, dead $cpsr = nuw nsw tADDi3 renamable $r0, 1, 14 /* CC::al */, $noreg renamable $r8 = nuw nsw t2ADDri renamable $r0, 2, 14 /* CC::al */, $noreg, $noreg tSTRspi renamable $r7, $sp, 9, 14 /* CC::al */, $noreg :: (store (s32) into %stack.0) renamable $r9 = t2ADDri renamable $r0, 3, 14 /* CC::al */, $noreg, $noreg renamable $r7, dead $cpsr = tMUL renamable $r4, killed renamable $r7, 14 /* CC::al */, $noreg renamable $q0 = MVE_VLDRWU32 killed renamable $r5, 0, 0, $noreg, $noreg :: (load (s128) from %ir.39, align 4) renamable $r3 = t2ADDrs renamable $r11, killed renamable $r3, 18, 14 /* CC::al */, $noreg, $noreg renamable $r5 = t2MUL renamable $r8, renamable $r4, 14 /* CC::al */, $noreg renamable $r4 = t2MUL renamable $r9, killed renamable $r4, 14 /* CC::al */, $noreg renamable $r7 = t2ADDrs renamable $r11, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg renamable $r5 = t2ADDrs renamable $r11, killed renamable $r5, 18, 14 /* CC::al */, $noreg, $noreg renamable $r4 = t2ADDrs killed renamable $r11, killed renamable $r4, 18, 14 /* CC::al */, $noreg, $noreg renamable $q1 = MVE_VLDRWU32 killed renamable $r3, 0, 0, $noreg, $noreg :: (load (s128) from %ir.41, align 4) renamable $q3 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, $noreg, undef renamable $q3 renamable $q1 = MVE_VLDRWU32 killed renamable $r7, 0, 0, $noreg, $noreg :: (load (s128) from %ir.44, align 4) renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, $noreg, undef renamable $q2 renamable $q1 = MVE_VLDRWU32 killed renamable $r5, 0, 0, $noreg, $noreg :: (load (s128) from %ir.47, align 4) renamable $q1 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q1, renamable $q0, 0, $noreg, $noreg, undef renamable $q1 renamable $q4 = MVE_VLDRWU32 killed renamable $r4, 0, 0, $noreg, $noreg :: (load (s128) from %ir.50, align 4) renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VMULf32 killed renamable $q4, killed renamable $q0, 0, $noreg, $noreg, undef renamable $q0 renamable $lr = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %stack.7) $r3 = tMOVr $r10, 14 /* CC::al */, $noreg $r5 = tMOVr $r1, 14 /* CC::al */, $noreg $r4 = tMOVr $r12, 14 /* CC::al */, $noreg $lr = t2DoLoopStart renamable $lr $r7 = tMOVr $r6, 14 /* CC::al */, $noreg renamable $r11 = t2LDRi12 $sp, 16, 14 /* CC::al */, $noreg :: (load (s32) from %stack.5) bb.5.do.body24 (align 4): successors: %bb.5(0x7c000000), %bb.6(0x04000000) liveins: $lr, $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11, $r12 renamable $r11, renamable $q4 = MVE_VLDRWU32_post killed renamable $r11, 16, 0, $noreg, $noreg :: (load (s128) from %ir.lsr.iv4, align 4) renamable $r7, renamable $q5 = MVE_VLDRWU32_post killed renamable $r7, 16, 0, $noreg, $noreg :: (load (s128) from %ir.lsr.iv911, align 4) renamable $q3 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q3, renamable $q4, killed renamable $q5, 0, $noreg, $noreg renamable $r4, renamable $q5 = MVE_VLDRWU32_post killed renamable $r4, 16, 0, $noreg, $noreg :: (load (s128) from %ir.lsr.iv1618, align 4) renamable $q2 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q2, renamable $q4, killed renamable $q5, 0, $noreg, $noreg renamable $r5, renamable $q5 = MVE_VLDRWU32_post killed renamable $r5, 16, 0, $noreg, $noreg :: (load (s128) from %ir.lsr.iv2325, align 4) renamable $q1 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q1, renamable $q4, killed renamable $q5, 0, $noreg, $noreg renamable $r3, renamable $q5 = MVE_VLDRWU32_post killed renamable $r3, 16, 0, $noreg, $noreg :: (load (s128) from %ir.lsr.iv3032, align 4) renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VFMAf32 killed renamable $q0, killed renamable $q4, killed renamable $q5, 0, $noreg, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr tB %bb.6, 14 /* CC::al */, $noreg bb.6.do.end33: successors: %bb.4(0x7c000000), %bb.7(0x04000000) liveins: $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r6, $r8, $r9, $r10, $r12 renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s12, renamable $s13, 14 /* CC::al */, $noreg renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s8, renamable $s9, 14 /* CC::al */, $noreg renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, renamable $s14, 14 /* CC::al */, $noreg renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, renamable $s10, 14 /* CC::al */, $noreg renamable $s12 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, killed renamable $s15, 14 /* CC::al */, $noreg, implicit $q3 renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2 renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg renamable $r7 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0) renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg renamable $r3 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg renamable $r7 = t2ADDrs renamable $r2, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1 renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 VSTRS killed renamable $s12, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx37) VSTRS killed renamable $s8, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx42) renamable $r3 = t2ADDrs renamable $r2, killed renamable $r8, 18, 14 /* CC::al */, $noreg, $noreg renamable $r7 = t2ADDrs renamable $r2, killed renamable $r9, 18, 14 /* CC::al */, $noreg, $noreg VSTRS killed renamable $s4, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx47) VSTRS killed renamable $s0, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx52) $r11, $r5 = t2LDRDi8 $sp, 20, 14 /* CC::al */, $noreg :: (load (s32) from %stack.4), (load (s32) from %stack.3) renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14 /* CC::al */, $noreg renamable $r7 = tLDRspi $sp, 3, 14 /* CC::al */, $noreg :: (load (s32) from %stack.6) renamable $r3 = tLDRspi $sp, 8, 14 /* CC::al */, $noreg :: (load (s32) from %stack.1) renamable $r4 = tLDRspi $sp, 7, 14 /* CC::al */, $noreg :: (load (s32) from %stack.2) renamable $r6 = tADDhirr killed renamable $r6, renamable $r7, 14 /* CC::al */, $noreg renamable $r12 = tADDhirr killed renamable $r12, renamable $r7, 14 /* CC::al */, $noreg renamable $r1 = tADDhirr killed renamable $r1, renamable $r7, 14 /* CC::al */, $noreg tCMPr renamable $r0, killed renamable $r3, 14 /* CC::al */, $noreg, implicit-def $cpsr renamable $r10 = tADDhirr killed renamable $r10, killed renamable $r7, 14 /* CC::al */, $noreg t2Bcc %bb.4, 3 /* CC::lo */, killed $cpsr bb.7.for.cond54.preheader: successors: %bb.8(0x40000000), %bb.12(0x40000000) liveins: $r0, $r2, $r4, $r5, $r11 renamable $r12 = t2LDRi12 $sp, 4, 14 /* CC::al */, $noreg :: (load (s32) from %stack.8) tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr tBcc %bb.12, 2 /* CC::hs */, killed $cpsr bb.8.for.body56.preheader: successors: %bb.9(0x80000000) liveins: $r0, $r2, $r4, $r5, $r11, $r12 $r1 = tMOVr $r4, 14 /* CC::al */, $noreg tCMPi8 renamable $r4, 4, 14 /* CC::al */, $noreg, implicit-def $cpsr t2IT 10, 8, implicit-def $itstate renamable $r1 = tMOVi8 $noreg, 4, 10 /* CC::ge */, killed $cpsr, implicit killed renamable $r1, implicit killed $itstate renamable $r3, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg renamable $r1, dead $cpsr = tSUBrr renamable $r4, killed renamable $r1, 14 /* CC::al */, $noreg renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 3, 14 /* CC::al */, $noreg renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r1, 19, 14 /* CC::al */, $noreg, $noreg bb.9.for.body56 (align 4): successors: %bb.13(0x80000000) liveins: $r0, $r2, $r3, $r4, $r5, $r11, $r12 renamable $r1 = t2MUL renamable $r0, renamable $r4, 14 /* CC::al */, $noreg renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, $noreg, undef renamable $q0 renamable $r1 = t2ADDrs renamable $r11, killed renamable $r1, 18, 14 /* CC::al */, $noreg, $noreg $r6 = tMOVr $r4, 14 /* CC::al */, $noreg $r7 = tMOVr $r5, 14 /* CC::al */, $noreg $lr = tMOVr $r3, 14 /* CC::al */, $noreg $lr = t2DoLoopStart renamable $r3 bb.13: successors: %bb.10(0x80000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12 bb.10.do.body59 (align 4): successors: %bb.10(0x7c000000), %bb.11(0x04000000) liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12 renamable $vpr = MVE_VCTP32 renamable $r6, 0, $noreg, $noreg MVE_VPST 2, implicit $vpr renamable $r7, renamable $q1 = MVE_VLDRWU32_post killed renamable $r7, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.pInT.21, align 4) renamable $r1, renamable $q2 = MVE_VLDRWU32_post killed renamable $r1, 16, 1, renamable $vpr, $noreg :: (load (s128) from %ir.pCos0.12, align 4) renamable $q0 = MVE_VFMAf32 killed renamable $q0, killed renamable $q1, killed renamable $q2, 1, killed renamable $vpr, $noreg renamable $r6, dead $cpsr = tSUBi8 killed renamable $r6, 4, 14 /* CC::al */, $noreg renamable $lr = t2LoopDec killed renamable $lr, 1 t2LoopEnd renamable $lr, %bb.10, implicit-def dead $cpsr tB %bb.11, 14 /* CC::al */, $noreg bb.11.do.end66: successors: %bb.12(0x04000000), %bb.9(0x7c000000) liveins: $q0, $r0, $r2, $r3, $r4, $r5, $r11, $r12 renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg renamable $r1 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0 tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr VSTRS killed renamable $s0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx70) tBcc %bb.9, 1 /* CC::ne */, killed $cpsr bb.12.for.end72: $sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d8, def $d9, def $d10, def $d11 $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg $sp = frame-destroy t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11, def $pc ...