# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -bottleneck-analysis < %s | FileCheck %s
.LBB0_4:
, %xmm0
vpermilps $255, %xmm0, %xmm7
vmulps -24(%rsp), %xmm7, %xmm8
vpermilps $170, %xmm0, %xmm6
vpermilps $85, %xmm0, %xmm5
vbroadcastss %xmm0, %xmm0
vfmadd231ps %xmm9, %xmm6, %xmm8
vfmadd213ps %xmm8, %xmm10, %xmm5
vfmadd213ps %xmm5, %xmm11, %xmm0
vfmadd213ps %xmm0, %xmm12, %xmm4
vfmadd213ps %xmm4, %xmm13, %xmm1
vmovaps %xmm7, %xmm4
vfmadd213ps %xmm1, %xmm14, %xmm2
vmovaps %xmm6, %xmm1
vfmadd213ps %xmm2, %xmm15, %xmm3
vpermilps $170, %xmm3, %xmm0
vmovups %xmm3, (%rdx,%rax)
vpermilps $255, %xmm3, %xmm2
addq $16, %rax
decl %ecx
vmovaps %xmm0, %xmm3
jne .LBB0_4
# CHECK: Iterations: 100
# CHECK-NEXT: Instructions: 2200
# CHECK-NEXT: Total Cycles: 1039
# CHECK-NEXT: Total uOps: 2400
# CHECK: Dispatch Width: 6
# CHECK-NEXT: uOps Per Cycle: 2.31
# CHECK-NEXT: IPC: 2.12
# CHECK-NEXT: Block RThroughput: 6.0
# CHECK: Cycles with backend pressure increase [ 92.69% ]
# CHECK-NEXT: Throughput Bottlenecks:
# CHECK-NEXT: Resource Pressure [ 46.78% ]
# CHECK-NEXT: - SKLPort0 [ 14.24% ]
# CHECK-NEXT: - SKLPort1 [ 14.24% ]
# CHECK-NEXT: - SKLPort5 [ 46.49% ]
# CHECK-NEXT: - SKLPort6 [ 8.66% ]
# CHECK-NEXT: Data Dependencies: [ 64.97% ]
# CHECK-NEXT: - Register Dependencies [ 64.97% ]
# CHECK-NEXT: - Memory Dependencies [ 0.00% ]
# CHECK: Critical sequence based on the simulation:
# CHECK: Instruction Dependency Information
# CHECK-NEXT: +----< 18. addq $16, %rax
# CHECK-NEXT: |
# CHECK-NEXT: | < loop carried >
# CHECK-NEXT: |
# CHECK-NEXT: +----> 0. vmovups (%rsi,%rax,2), %xmm0 ## REGISTER dependency: %rax
# CHECK-NEXT: | 1. vpermilps $255, %xmm0, %xmm7
# CHECK-NEXT: | 2. vmulps -24(%rsp), %xmm7, %xmm8
# CHECK-NEXT: +----> 3. vpermilps $170, %xmm0, %xmm6 ## REGISTER dependency: %xmm0
# CHECK-NEXT: | 4. vpermilps $85, %xmm0, %xmm5
# CHECK-NEXT: | 5. vbroadcastss %xmm0, %xmm0
# CHECK-NEXT: +----> 6. vfmadd231ps %xmm9, %xmm6, %xmm8 ## REGISTER dependency: %xmm6
# CHECK-NEXT: +----> 7. vfmadd213ps %xmm8, %xmm10, %xmm5 ## REGISTER dependency: %xmm8
# CHECK-NEXT: +----> 8. vfmadd213ps %xmm5, %xmm11, %xmm0 ## REGISTER dependency: %xmm5
# CHECK-NEXT: +----> 9. vfmadd213ps %xmm0, %xmm12, %xmm4 ## REGISTER dependency: %xmm0
# CHECK-NEXT: +----> 10. vfmadd213ps %xmm4, %xmm13, %xmm1 ## REGISTER dependency: %xmm4
# CHECK-NEXT: | 11. vmovaps %xmm7, %xmm4
# CHECK-NEXT: +----> 12. vfmadd213ps %xmm1, %xmm14, %xmm2 ## REGISTER dependency: %xmm1
# CHECK-NEXT: | 13. vmovaps %xmm6, %xmm1
# CHECK-NEXT: +----> 14. vfmadd213ps %xmm2, %xmm15, %xmm3 ## REGISTER dependency: %xmm2
# CHECK-NEXT: +----> 15. vpermilps $170, %xmm3, %xmm0 ## REGISTER dependency: %xmm3
# CHECK-NEXT: | 16. vmovups %xmm3, (%rdx,%rax)
# CHECK-NEXT: | 17. vpermilps $255, %xmm3, %xmm2
# CHECK-NEXT: | 18. addq $16, %rax
# CHECK-NEXT: | 19. decl %ecx
# CHECK-NEXT: +----> 20. vmovaps %xmm0, %xmm3 ## REGISTER dependency: %xmm0
# CHECK-NEXT: 21. jne .LBB0_4
# CHECK: Instruction Info:
# CHECK-NEXT: [1]: #uOps
# CHECK-NEXT: [2]: Latency
# CHECK-NEXT: [3]: RThroughput
# CHECK-NEXT: [4]: MayLoad
# CHECK-NEXT: [5]: MayStore
# CHECK-NEXT: [6]: HasSideEffects (U)
# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
# CHECK-NEXT: 1 6 0.50 * vmovups (%rsi,%rax,2), %xmm0
# CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm0, %xmm7
# CHECK-NEXT: 2 10 0.50 * vmulps -24(%rsp), %xmm7, %xmm8
# CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm0, %xmm6
# CHECK-NEXT: 1 1 1.00 vpermilps $85, %xmm0, %xmm5
# CHECK-NEXT: 1 1 1.00 vbroadcastss %xmm0, %xmm0
# CHECK-NEXT: 1 4 0.50 vfmadd231ps %xmm9, %xmm6, %xmm8
# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm8, %xmm10, %xmm5
# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm5, %xmm11, %xmm0
# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm0, %xmm12, %xmm4
# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm4, %xmm13, %xmm1
# CHECK-NEXT: 1 1 0.33 vmovaps %xmm7, %xmm4
# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm1, %xmm14, %xmm2
# CHECK-NEXT: 1 1 0.33 vmovaps %xmm6, %xmm1
# CHECK-NEXT: 1 4 0.50 vfmadd213ps %xmm2, %xmm15, %xmm3
# CHECK-NEXT: 1 1 1.00 vpermilps $170, %xmm3, %xmm0
# CHECK-NEXT: 2 1 1.00 * vmovups %xmm3, (%rdx,%rax)
# CHECK-NEXT: 1 1 1.00 vpermilps $255, %xmm3, %xmm2
# CHECK-NEXT: 1 1 0.25 addq $16, %rax
# CHECK-NEXT: 1 1 0.25 decl %ecx
# CHECK-NEXT: 1 1 0.33 vmovaps %xmm0, %xmm3
# CHECK-NEXT: 1 1 0.50 jne .LBB0_4
# CHECK: Resources:
# CHECK-NEXT: [0] - SKLDivider
# CHECK-NEXT: [1] - SKLFPDivider
# CHECK-NEXT: [2] - SKLPort0
# CHECK-NEXT: [3] - SKLPort1
# CHECK-NEXT: [4] - SKLPort2
# CHECK-NEXT: [5] - SKLPort3
# CHECK-NEXT: [6] - SKLPort4
# CHECK-NEXT: [7] - SKLPort5
# CHECK-NEXT: [8] - SKLPort6
# CHECK-NEXT: [9] - SKLPort7
# CHECK: Resource pressure per iteration:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9]
# CHECK-NEXT: - - 5.52 5.53 1.01 1.03 1.00 6.02 2.93 0.96
# CHECK: Resource pressure by instruction:
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions:
# CHECK-NEXT: - - - - 0.04 0.96 - - - - vmovups (%rsi,%rax,2), %xmm0
# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm0, %xmm7
# CHECK-NEXT: - - 0.03 0.97 0.96 0.04 - - - - vmulps -24(%rsp), %xmm7, %xmm8
# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm0, %xmm6
# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $85, %xmm0, %xmm5
# CHECK-NEXT: - - - - - - - 1.00 - - vbroadcastss %xmm0, %xmm0
# CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd231ps %xmm9, %xmm6, %xmm8
# CHECK-NEXT: - - 0.50 0.50 - - - - - - vfmadd213ps %xmm8, %xmm10, %xmm5
# CHECK-NEXT: - - 0.92 0.08 - - - - - - vfmadd213ps %xmm5, %xmm11, %xmm0
# CHECK-NEXT: - - 0.95 0.05 - - - - - - vfmadd213ps %xmm0, %xmm12, %xmm4
# CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm4, %xmm13, %xmm1
# CHECK-NEXT: - - 0.52 0.48 - - - - - - vmovaps %xmm7, %xmm4
# CHECK-NEXT: - - 0.49 0.51 - - - - - - vfmadd213ps %xmm1, %xmm14, %xmm2
# CHECK-NEXT: - - 0.04 0.95 - - - 0.01 - - vmovaps %xmm6, %xmm1
# CHECK-NEXT: - - 0.51 0.49 - - - - - - vfmadd213ps %xmm2, %xmm15, %xmm3
# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $170, %xmm3, %xmm0
# CHECK-NEXT: - - - - 0.01 0.03 1.00 - - 0.96 vmovups %xmm3, (%rdx,%rax)
# CHECK-NEXT: - - - - - - - 1.00 - - vpermilps $255, %xmm3, %xmm2
# CHECK-NEXT: - - - - - - - - 1.00 - addq $16, %rax
# CHECK-NEXT: - - 0.04 0.01 - - - 0.01 0.94 - decl %ecx
# CHECK-NEXT: - - 0.05 0.95 - - - - - - vmovaps %xmm0, %xmm3
# CHECK-NEXT: - - 0.01 - - - - - 0.99 - jne .LBB0_4