# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s # Test that we fold correct element from G_UNMERGE_VALUES into fma --- name: test_f32_add_mul body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %6:_(s32) = G_FMUL %0, %1 %7:_(s32) = G_FADD %6, %el1 $vgpr0 = COPY %7(s32) ... --- name: test_f32_add_mul_rhs machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %6:_(s32) = G_FMUL %0, %1 %7:_(s32) = G_FADD %el1, %6 $vgpr0 = COPY %7(s32) ... --- name: test_f16_f32_add_ext_mul machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 ; GFX10-LABEL: name: test_f16_f32_add_ext_mul ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s16) = G_TRUNC %0(s32) %2:_(s32) = COPY $sgpr1 %3:_(s16) = G_TRUNC %2(s32) %ptr:_(p1) = COPY $vgpr0_vgpr1 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3 %9:_(s32) = G_FPEXT %8(s16) %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1 $vgpr0 = COPY %10(s32) ... --- name: test_f16_f32_add_ext_mul_rhs machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 ; GFX10-LABEL: name: test_f16_f32_add_ext_mul_rhs ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s16) = G_TRUNC %0(s32) %2:_(s32) = COPY $sgpr1 %3:_(s16) = G_TRUNC %2(s32) %ptr:_(p1) = COPY $vgpr0_vgpr1 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3 %9:_(s32) = G_FPEXT %8(s16) %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9 $vgpr0 = COPY %10(s32) ... --- name: test_f32_add_fma_mul body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 ; GFX10-LABEL: name: test_f32_add_fma_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 %3:_(s32) = COPY $vgpr3 %ptr:_(p1) = COPY $vgpr4_vgpr5 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3 %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1 $vgpr0 = COPY %10(s32) ... --- name: test_f32_add_fma_mul_rhs body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 ; GFX10-LABEL: name: test_f32_add_fma_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 %3:_(s32) = COPY $vgpr3 %ptr:_(p1) = COPY $vgpr4_vgpr5 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3 %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9 $vgpr0 = COPY %10(s32) ... --- name: test_f16_f32_add_fma_ext_mul machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %6:_(s32) = COPY $vgpr4 %7:_(s16) = G_TRUNC %6(s32) %8:_(s32) = COPY $vgpr5 %9:_(s16) = G_TRUNC %8(s32) %10:_(s16) = G_FMUL %7, %9 %11:_(s32) = G_FPEXT %10(s16) %12:_(s32) = G_FMA %0, %1, %11 %13:_(s32) = G_FADD %12, %el1 $vgpr0 = COPY %13(s32) ... --- name: test_f16_f32_add_ext_fma_mul machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s16) = G_TRUNC %0(s32) %2:_(s32) = COPY $vgpr1 %3:_(s16) = G_TRUNC %2(s32) %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %8:_(s32) = COPY $vgpr4 %9:_(s16) = G_TRUNC %8(s32) %10:_(s32) = COPY $vgpr5 %11:_(s16) = G_TRUNC %10(s32) %12:_(s16) = G_FMUL %9, %11 %13:_(s16) = G_FMUL %1, %3 %14:_(s16) = G_FADD %13, %12 %15:_(s32) = G_FPEXT %14(s16) %16:_(s32) = G_FADD %15, %el1 $vgpr0 = COPY %16(s32) ... --- name: test_f16_f32_add_fma_ext_mul_rhs machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul_rhs ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) %ptr:_(p1) = COPY $vgpr0_vgpr1 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %4:_(s32) = COPY $vgpr2 %5:_(s32) = COPY $vgpr3 %6:_(s32) = COPY $vgpr4 %7:_(s16) = G_TRUNC %6(s32) %8:_(s32) = COPY $vgpr5 %9:_(s16) = G_TRUNC %8(s32) %10:_(s16) = G_FMUL %7, %9 %11:_(s32) = G_FPEXT %10(s16) %12:_(s32) = G_FMA %4, %5, %11 %13:_(s32) = G_FADD %el1, %12 $vgpr0 = COPY %13(s32) ... --- name: test_f16_f32_add_ext_fma_mul_rhs machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul_rhs ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) %ptr:_(p1) = COPY $vgpr0_vgpr1 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %4:_(s32) = COPY $vgpr2 %5:_(s16) = G_TRUNC %4(s32) %6:_(s32) = COPY $vgpr3 %7:_(s16) = G_TRUNC %6(s32) %8:_(s32) = COPY $vgpr4 %9:_(s16) = G_TRUNC %8(s32) %10:_(s32) = COPY $vgpr5 %11:_(s16) = G_TRUNC %10(s32) %12:_(s16) = G_FMUL %9, %11 %13:_(s16) = G_FMUL %5, %7 %14:_(s16) = G_FADD %13, %12 %15:_(s32) = G_FPEXT %14(s16) %16:_(s32) = G_FADD %el1, %15 $vgpr0 = COPY %16(s32) ... --- name: test_f32_sub_mul machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-LABEL: name: test_f32_sub_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %el1 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FNEG]] ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr0_vgpr1 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %6:_(s32) = G_FMUL %0, %1 %7:_(s32) = G_FSUB %6, %el1 $vgpr0 = COPY %7(s32) ... --- name: test_f32_sub_mul_rhs machineFunctionInfo: mode: fp32-input-denormals: false body: | bb.1: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-LABEL: name: test_f32_sub_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[COPY1]], %el1 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) %6:_(s32) = G_FMUL %0, %1 %7:_(s32) = G_FSUB %el1, %6 $vgpr0 = COPY %7(s32) ...