AMDGPU/GlobalISel: Try to select VOP3 form of add

There are several things broken, but at least emit the right thing for
gfx9.

The import of the pattern with the unused carry out seems to not
work. Needs a special class for clamp, because OperandWithDefaultOps
doesn't really work.

llvm-svn: 364804
This commit is contained in:
Matt Arsenault 2019-07-01 16:27:32 +00:00
parent e3e38cce4a
commit d810ff2588
2 changed files with 46 additions and 13 deletions

View File

@ -70,6 +70,17 @@ class GISelVop2Pat <
(inst src0_vt:$src0, src1_vt:$src1)
>;
// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken.
class GISelVop2ClampingPat <
SDPatternOperator node,
Instruction inst,
ValueType dst_vt,
ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat <
(dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))),
(inst src0_vt:$src0, src1_vt:$src1, 0)
>;
class GISelVop2CommutePat <
SDPatternOperator node,
Instruction inst,
@ -129,7 +140,16 @@ def : GISelSop2Pat <or, S_OR_B32, i32>;
def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
def : GISelSop2Pat <add, S_ADD_I32, i32>;
let SubtargetPredicate = NotHasAddNoCarryInsts in {
// FIXME: This should use the VOP3 form
//def : GISelVop2ClampingPat <add, V_ADD_I32_e64, i32>;
def : GISelVop2Pat <add, V_ADD_I32_e32, i32>;
}
let SubtargetPredicate = HasAddNoCarryInsts in {
def : GISelVop2ClampingPat <add, V_ADD_U32_e64, i32>;
}
def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
let AddedComplexity = 100 in {

View File

@ -1,24 +1,37 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
---
name: add_i32
name: add_s32
legalized: true
regBankSelected: true
body: |
bb.0:
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
; GCN-LABEL: name: add_i32
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
; GCN: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
; GCN: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GFX6-LABEL: name: add_s32
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
; GFX9-LABEL: name: add_s32
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec
; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
%0:sgpr(s32) = COPY $sgpr0
%1:sgpr(s32) = COPY $sgpr1
%2:vgpr(s32) = COPY $vgpr0