AMDGPU: Legalize/regbankselect fma

llvm-svn: 349467
This commit is contained in:
Matt Arsenault 2018-12-18 09:39:56 +00:00
parent af6fbbf18b
commit c0ea221068
4 changed files with 185 additions and 1 deletions

View File

@ -91,7 +91,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
setAction({G_CONSTANT, S1}, Legal);
getActionDefinitionsBuilder(
{ G_FADD, G_FMUL, G_FNEG, G_FABS})
{ G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA})
.legalFor({S32, S64});
// Use actual fsub instruction

View File

@ -357,6 +357,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case AMDGPU::G_FPTOSI:
case AMDGPU::G_FPTOUI:
case AMDGPU::G_FMUL:
case AMDGPU::G_FMA:
return getDefaultMappingVOP(MI);
case AMDGPU::G_IMPLICIT_DEF: {
unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();

View File

@ -0,0 +1,35 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s
---
name: test_fma_f32
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: test_fma
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
; CHECK: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = G_FMA %0, %1, %2
$vgpr0 = COPY %3
...
---
name: test_fma_f64
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5
; CHECK-LABEL: name: test_fma
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
; CHECK: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[COPY]], [[COPY1]]
%0:_(s64) = COPY $vgpr0_vgpr1
%1:_(s64) = COPY $vgpr2_vgpr3
%2:_(s64) = COPY $vgpr4_vgpr5
%3:_(s64) = G_FMA %0, %1, %2
$vgpr0_vgpr1 = COPY %3
...

View File

@ -0,0 +1,148 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
---
name: fma_sss
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $sgpr2
; CHECK-LABEL: name: fma_sss
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY4]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $sgpr2
%3:_(s32) = G_FMA %0, %1, %2
...
---
name: fma_vss
legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $sgpr1
; CHECK-LABEL: name: fma_vss
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY4]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr0
%2:_(s32) = COPY $sgpr1
%3:_(s32) = G_FMA %0, %1, %2
...
---
name: fma_svs
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $sgpr1
; CHECK-LABEL: name: fma_svs
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY3]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $sgpr1
%3:_(s32) = G_FMA %0, %1, %2
...
---
name: fma_ssv
legalized: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0
; CHECK-LABEL: name: fma_ssv
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr0
%3:_(s32) = G_FMA %0, %1, %2
...
---
name: fma_vvs
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $sgpr0
; CHECK-LABEL: name: fma_vvs
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
; CHECK: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY3]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $sgpr0
%3:_(s32) = G_FMA %0, %1, %2
...
---
name: fma_vsv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0, $vgpr1
; CHECK-LABEL: name: fma_vsv
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
; CHECK: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY3]], [[COPY2]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $sgpr1
%2:_(s32) = COPY $vgpr1
%3:_(s32) = G_FMA %0, %1, %2
...
---
name: fma_svv
legalized: true
body: |
bb.0:
liveins: $sgpr0, $vgpr0, $vgpr1
; CHECK-LABEL: name: fma_svv
; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
%0:_(s32) = COPY $sgpr0
%1:_(s32) = COPY $vgpr0
%2:_(s32) = COPY $vgpr1
%3:_(s32) = G_FMA %0, %1, %2
...
---
name: fma_vvv
legalized: true
body: |
bb.0:
liveins: $vgpr0, $vgpr1, $vgpr2
; CHECK-LABEL: name: fma_vvv
; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
; CHECK: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2
; CHECK: [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
%3:_(s32) = G_FMA %0, %1, %2
...