llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir

414 lines
17 KiB
YAML

# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s
# Test that we fold correct element from G_UNMERGE_VALUES into fma
---
name: test_f32_add_mul
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
; GFX10-LABEL: name: test_f32_add_mul
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1
; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%ptr:_(p1) = COPY $vgpr2_vgpr3
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%6:_(s32) = G_FMUL %0, %1
%7:_(s32) = G_FADD %6, %el1
$vgpr0 = COPY %7(s32)
...
---
name: test_f32_add_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
; GFX10-LABEL: name: test_f32_add_mul_rhs
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1
; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%ptr:_(p1) = COPY $vgpr2_vgpr3
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%6:_(s32) = G_FMUL %0, %1
%7:_(s32) = G_FADD %el1, %6
$vgpr0 = COPY %7(s32)
...
---
name: test_f16_f32_add_ext_mul
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
; GFX10-LABEL: name: test_f16_f32_add_ext_mul
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s16) = G_TRUNC %0(s32)
%2:_(s32) = COPY $sgpr1
%3:_(s16) = G_TRUNC %2(s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3
%9:_(s32) = G_FPEXT %8(s16)
%10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1
$vgpr0 = COPY %10(s32)
...
---
name: test_f16_f32_add_ext_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1
; GFX10-LABEL: name: test_f16_f32_add_ext_mul_rhs
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
%0:_(s32) = COPY $sgpr0
%1:_(s16) = G_TRUNC %0(s32)
%2:_(s32) = COPY $sgpr1
%3:_(s16) = G_TRUNC %2(s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3
%9:_(s32) = G_FPEXT %8(s16)
%10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9
$vgpr0 = COPY %10(s32)
...
---
name: test_f32_add_fma_mul
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
; GFX10-LABEL: name: test_f32_add_fma_mul
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1
; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%ptr:_(p1) = COPY $vgpr4_vgpr5
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3
%9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8
%10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1
$vgpr0 = COPY %10(s32)
...
---
name: test_f32_add_fma_mul_rhs
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5
; GFX10-LABEL: name: test_f32_add_fma_mul_rhs
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1
; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = COPY $vgpr3
%ptr:_(p1) = COPY $vgpr4_vgpr5
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3
%9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8
%10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9
$vgpr0 = COPY %10(s32)
...
---
name: test_f16_f32_add_fma_ext_mul
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%ptr:_(p1) = COPY $vgpr2_vgpr3
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%6:_(s32) = COPY $vgpr4
%7:_(s16) = G_TRUNC %6(s32)
%8:_(s32) = COPY $vgpr5
%9:_(s16) = G_TRUNC %8(s32)
%10:_(s16) = G_FMUL %7, %9
%11:_(s32) = G_FPEXT %10(s16)
%12:_(s32) = G_FMA %0, %1, %11
%13:_(s32) = G_FADD %12, %el1
$vgpr0 = COPY %13(s32)
...
---
name: test_f16_f32_add_ext_fma_mul
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5
; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1
; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]]
; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s16) = G_TRUNC %0(s32)
%2:_(s32) = COPY $vgpr1
%3:_(s16) = G_TRUNC %2(s32)
%ptr:_(p1) = COPY $vgpr2_vgpr3
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%8:_(s32) = COPY $vgpr4
%9:_(s16) = G_TRUNC %8(s32)
%10:_(s32) = COPY $vgpr5
%11:_(s16) = G_TRUNC %10(s32)
%12:_(s16) = G_FMUL %9, %11
%13:_(s16) = G_FMUL %1, %3
%14:_(s16) = G_FADD %13, %12
%15:_(s32) = G_FPEXT %14(s16)
%16:_(s32) = G_FADD %15, %el1
$vgpr0 = COPY %16(s32)
...
---
name: test_f16_f32_add_fma_ext_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul_rhs
; GFX10: %ptr:_(p1) = COPY $vgpr0_vgpr1
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1
; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]]
; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%4:_(s32) = COPY $vgpr2
%5:_(s32) = COPY $vgpr3
%6:_(s32) = COPY $vgpr4
%7:_(s16) = G_TRUNC %6(s32)
%8:_(s32) = COPY $vgpr5
%9:_(s16) = G_TRUNC %8(s32)
%10:_(s16) = G_FMUL %7, %9
%11:_(s32) = G_FPEXT %10(s16)
%12:_(s32) = G_FMA %4, %5, %11
%13:_(s32) = G_FADD %el1, %12
$vgpr0 = COPY %13(s32)
...
---
name: test_f16_f32_add_ext_fma_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul_rhs
; GFX10: %ptr:_(p1) = COPY $vgpr0_vgpr1
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2
; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3
; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4
; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5
; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1
; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]]
; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32)
%ptr:_(p1) = COPY $vgpr0_vgpr1
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%4:_(s32) = COPY $vgpr2
%5:_(s16) = G_TRUNC %4(s32)
%6:_(s32) = COPY $vgpr3
%7:_(s16) = G_TRUNC %6(s32)
%8:_(s32) = COPY $vgpr4
%9:_(s16) = G_TRUNC %8(s32)
%10:_(s32) = COPY $vgpr5
%11:_(s16) = G_TRUNC %10(s32)
%12:_(s16) = G_FMUL %9, %11
%13:_(s16) = G_FMUL %5, %7
%14:_(s16) = G_FADD %13, %12
%15:_(s32) = G_FPEXT %14(s16)
%16:_(s32) = G_FADD %el1, %15
$vgpr0 = COPY %16(s32)
...
---
name: test_f32_sub_mul
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
; GFX10-LABEL: name: test_f32_sub_mul
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %el1
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FNEG]]
; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%ptr:_(p1) = COPY $vgpr0_vgpr1
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%6:_(s32) = G_FMUL %0, %1
%7:_(s32) = G_FSUB %6, %el1
$vgpr0 = COPY %7(s32)
...
---
name: test_f32_sub_mul_rhs
machineFunctionInfo:
mode:
fp32-input-denormals: false
body: |
bb.1:
liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3
; GFX10-LABEL: name: test_f32_sub_mul_rhs
; GFX10: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3
; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[COPY1]], %el1
; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%ptr:_(p1) = COPY $vgpr2_vgpr3
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
%6:_(s32) = G_FMUL %0, %1
%7:_(s32) = G_FSUB %el1, %6
$vgpr0 = COPY %7(s32)
...