# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck -check-prefixes=GFX9 %s --- name: test_waitcnt_preexisting_lgkmcnt_unmodified body: | bb.0: liveins: $vgpr0 ; GFX9-LABEL: name: test_waitcnt_preexisting_lgkmcnt_unmodified ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec ; GFX9-NEXT: S_WAITCNT 49279 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec S_WAITCNT 49279 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- name: test_waitcnt_preexisting_vmcnt_unmodified body: | bb.0: liveins: $vgpr0_vgpr1 ; GFX9-LABEL: name: test_waitcnt_preexisting_vmcnt_unmodified ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec ; GFX9-NEXT: S_WAITCNT 3952 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec S_WAITCNT 3952 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... # Respect preexisting waitcnt and add required wait. --- name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt body: | bb.0: liveins: $vgpr0 ; GFX9-LABEL: name: test_waitcnt_preexisting_vmcnt_needs_lgkmcnt ; GFX9: liveins: $vgpr0 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = DS_READ2_B32 $vgpr0, 0, 1, 0, implicit $m0, implicit $exec S_WAITCNT 3952 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt body: | bb.0: liveins: $vgpr0_vgpr1 ; GFX9-LABEL: name: test_waitcnt_preexisting_lgkmcnt_needs_vmcnt ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec S_WAITCNT 49279 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... # Apply wait for all counters from preexisting waitcnt regardless of the wait # required by the next instruction. --- name: test_waitcnt_preexisting_apply_all_counters body: | bb.0: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-LABEL: name: test_waitcnt_preexisting_apply_all_counters ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec ; GFX9-NEXT: $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr $vgpr4_vgpr5 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec $vgpr6_vgpr7 = DS_READ2_B32 $vgpr2, 0, 1, 0, implicit $m0, implicit $exec S_WAITCNT 0 $vgpr6 = V_OR_B32_e32 1, killed $vgpr6, implicit $exec $vgpr0 = FLAT_LOAD_DWORD $vgpr4_vgpr5, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ... --- name: test_waitcnt_preexisting_combine_waitcnt body: | bb.0: liveins: $vgpr0_vgpr1 ; GFX9-LABEL: name: test_waitcnt_preexisting_combine_waitcnt ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr S_WAITCNT 0 S_WAITCNT 0 S_WAITCNT 0 S_WAITCNT 0 S_WAITCNT 0 S_WAITCNT 0 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ... --- name: test_waitcnt_preexisting_combine_waitcnt_diff_counters body: | bb.0: liveins: $vgpr0_vgpr1 ; GFX9-LABEL: name: test_waitcnt_preexisting_combine_waitcnt_diff_counters ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr S_WAITCNT 49279 S_WAITCNT 3952 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ... # Apply preexisting waitcnt when no wait is immediately needed. # FIXME: Move waitcnt as late as possible. --- name: test_waitcnt_preexisting_early_wait body: | bb.0: liveins: $vgpr0_vgpr1 ; GFX9-LABEL: name: test_waitcnt_preexisting_early_wait ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: S_NOP 0 ; GFX9-NEXT: S_NOP 0 ; GFX9-NEXT: S_NOP 0 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_ENDPGM 0 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr S_WAITCNT 0 S_NOP 0 S_NOP 0 S_NOP 0 FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ... --- name: test_waitcnt_preexisting_ignore_kill body: | bb.0: liveins: $vgpr0_vgpr1 ; GFX9-LABEL: name: test_waitcnt_preexisting_ignore_kill ; GFX9: liveins: $vgpr0_vgpr1 ; GFX9-NEXT: {{ $}} ; GFX9-NEXT: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 3952 ; GFX9-NEXT: KILL $vgpr0 $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr S_WAITCNT 3952 KILL $vgpr0 ... # Combine preexisting waitcnt with wait added to the start of a non-entry function. --- name: test_waitcnt_preexisting_func_start body: | bb.0: ; GFX9-LABEL: name: test_waitcnt_preexisting_func_start ; GFX9: S_WAITCNT 0 ; GFX9-NEXT: S_ENDPGM 0 S_WAITCNT 0 S_ENDPGM 0 ... # Verify that extra waitcnt are not added after buffer invalidate instructions. --- name: test_waitcnt_preexisting_buffer_inv body: | bb.0: ; GFX9-LABEL: name: test_waitcnt_preexisting_buffer_inv ; GFX9: S_WAITCNT 0 ; GFX9-NEXT: $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec ; GFX9-NEXT: S_WAITCNT 3952 ; GFX9-NEXT: BUFFER_INVL2 implicit $exec ; GFX9-NEXT: BUFFER_WBINVL1_VOL implicit $exec ; GFX9-NEXT: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_WAITCNT 112 ; GFX9-NEXT: FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr ; GFX9-NEXT: S_ENDPGM 0 $vgpr0_vgpr1 = GLOBAL_LOAD_DWORDX2 $vgpr0_vgpr1, 0, 0, implicit $exec S_WAITCNT 3952 BUFFER_INVL2 implicit $exec BUFFER_WBINVL1_VOL implicit $exec $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr FLAT_STORE_DWORD $vgpr0_vgpr1, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr S_ENDPGM 0 ...