; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; NOTE: The checks for opt are NOT added by the update script. Those ; checks are looking for the absence of specific metadata, which ; cannot be expressed reliably by the generated checks. ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -check-prefix=ISA ; RUN: opt --amdgpu-annotate-uniform -S %s | FileCheck %s -check-prefix=UNIFORM ; RUN: opt --amdgpu-annotate-uniform --si-annotate-control-flow -S %s | FileCheck %s -check-prefix=CONTROLFLOW ; This module creates a divergent branch in block Flow2. The branch is ; marked as divergent by the divergence analysis but the condition is ; not. This test ensures that the divergence of the branch is tested, ; not its condition, so that branch is correctly emitted as divergent. target triple = "amdgcn-mesa-mesa3d" define amdgpu_ps void @main(i32 %0, float %1) { ; ISA-LABEL: main: ; ISA: ; %bb.0: ; %start ; ISA-NEXT: v_readfirstlane_b32 s0, v0 ; ISA-NEXT: s_mov_b32 m0, s0 ; ISA-NEXT: s_mov_b32 s8, 0 ; ISA-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x ; ISA-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 ; ISA-NEXT: s_mov_b64 s[0:1], 0 ; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5 ; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3 ; ISA-NEXT: s_branch .LBB0_3 ; ISA-NEXT: .LBB0_1: ; %Flow1 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; ISA-NEXT: s_or_b64 exec, exec, s[6:7] ; ISA-NEXT: s_add_i32 s8, s8, 1 ; ISA-NEXT: s_mov_b64 s[6:7], 0 ; ISA-NEXT: .LBB0_2: ; %Flow ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; ISA-NEXT: s_and_b64 s[10:11], exec, s[4:5] ; ISA-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1] ; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec ; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec ; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] ; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1] ; ISA-NEXT: s_cbranch_execz .LBB0_6 ; ISA-NEXT: .LBB0_3: ; %loop ; ISA-NEXT: ; =>This Inner Loop Header: Depth=1 ; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec ; ISA-NEXT: s_cmp_lt_u32 s8, 32 ; ISA-NEXT: s_mov_b64 s[6:7], -1 ; ISA-NEXT: s_cbranch_scc0 .LBB0_2 ; ISA-NEXT: ; %bb.4: ; %endif1 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; ISA-NEXT: s_mov_b64 s[4:5], -1 ; ISA-NEXT: s_and_saveexec_b64 s[6:7], vcc ; ISA-NEXT: s_cbranch_execz .LBB0_1 ; ISA-NEXT: ; %bb.5: ; %endif2 ; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1 ; ISA-NEXT: s_xor_b64 s[4:5], exec, -1 ; ISA-NEXT: s_branch .LBB0_1 ; ISA-NEXT: .LBB0_6: ; %Flow2 ; ISA-NEXT: s_or_b64 exec, exec, s[0:1] ; ISA-NEXT: v_mov_b32_e32 v1, 0 ; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[2:3] ; ISA-NEXT: ; %bb.7: ; %if1 ; ISA-NEXT: v_sqrt_f32_e32 v1, v0 ; ISA-NEXT: ; %bb.8: ; %endloop ; ISA-NEXT: s_or_b64 exec, exec, s[0:1] ; ISA-NEXT: exp mrt0 v1, v1, v1, v1 done vm ; ISA-NEXT: s_endpgm start: %v0 = call float @llvm.amdgcn.interp.p1(float %1, i32 0, i32 0, i32 %0) br label %loop loop: ; preds = %Flow, %start %v1 = phi i32 [ 0, %start ], [ %6, %Flow ] %v2 = icmp ugt i32 %v1, 31 %2 = xor i1 %v2, true br i1 %2, label %endif1, label %Flow Flow1: ; preds = %endif2, %endif1 %3 = phi i32 [ %v5, %endif2 ], [ undef, %endif1 ] %4 = phi i1 [ false, %endif2 ], [ true, %endif1 ] br label %Flow ; UNIFORM-LABEL: Flow2: ; UNIFORM-NEXT: br i1 %8, label %if1, label %endloop ; UNIFORM-NOT: !amdgpu.uniform ; UNIFORM: if1: ; CONTROLFLOW-LABEL: Flow2: ; CONTROLFLOW-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %{{.*}}) ; CONTROLFLOW-NEXT: [[IF:%.*]] = call { i1, i64 } @llvm.amdgcn.if.i64(i1 %{{.*}}) ; CONTROLFLOW-NEXT: [[COND:%.*]] = extractvalue { i1, i64 } [[IF]], 0 ; CONTROLFLOW-NEXT: %{{.*}} = extractvalue { i1, i64 } [[IF]], 1 ; CONTROLFLOW-NEXT: br i1 [[COND]], label %if1, label %endloop Flow2: ; preds = %Flow br i1 %8, label %if1, label %endloop if1: ; preds = %Flow2 %v3 = call float @llvm.sqrt.f32(float %v0) br label %endloop endif1: ; preds = %loop %v4 = fcmp ogt float %v0, 0.000000e+00 %5 = xor i1 %v4, true br i1 %5, label %endif2, label %Flow1 Flow: ; preds = %Flow1, %loop %6 = phi i32 [ %3, %Flow1 ], [ undef, %loop ] %7 = phi i1 [ %4, %Flow1 ], [ true, %loop ] %8 = phi i1 [ false, %Flow1 ], [ true, %loop ] br i1 %7, label %Flow2, label %loop endif2: ; preds = %endif1 %v5 = add i32 %v1, 1 br label %Flow1 endloop: ; preds = %if1, %Flow2 %v6 = phi float [ 0.000000e+00, %Flow2 ], [ %v3, %if1 ] call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %v6, float %v6, float %v6, float %v6, i1 true, i1 true) ret void } ; Function Attrs: nounwind readnone speculatable willreturn declare float @llvm.sqrt.f32(float) #0 ; Function Attrs: nounwind readnone speculatable declare float @llvm.amdgcn.interp.p1(float, i32 immarg, i32 immarg, i32) #1 ; Function Attrs: inaccessiblememonly nounwind writeonly declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #2 attributes #0 = { nounwind readnone speculatable willreturn } attributes #1 = { nounwind readnone speculatable } attributes #2 = { inaccessiblememonly nounwind writeonly }