forked from OSchip/llvm-project
AMDGPU/GlobalISel: Try to select VOP3 form of add
There are several things broken, but at least emit the right thing for gfx9. The import of the pattern with the unused carry out seems to not work. Needs a special class for clamp, because OperandWithDefaultOps doesn't really work. llvm-svn: 364804
This commit is contained in:
parent
e3e38cce4a
commit
d810ff2588
|
@ -70,6 +70,17 @@ class GISelVop2Pat <
|
|||
(inst src0_vt:$src0, src1_vt:$src1)
|
||||
>;
|
||||
|
||||
// FIXME: clamp operand should be OperandWithDefaultOps to 0, but it's badly broken.
|
||||
class GISelVop2ClampingPat <
|
||||
SDPatternOperator node,
|
||||
Instruction inst,
|
||||
ValueType dst_vt,
|
||||
ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat <
|
||||
|
||||
(dst_vt (node (src0_vt (sd_vsrc0 src0_vt:$src0)), (src1_vt VGPR_32:$src1))),
|
||||
(inst src0_vt:$src0, src1_vt:$src1, 0)
|
||||
>;
|
||||
|
||||
class GISelVop2CommutePat <
|
||||
SDPatternOperator node,
|
||||
Instruction inst,
|
||||
|
@ -129,7 +140,16 @@ def : GISelSop2Pat <or, S_OR_B32, i32>;
|
|||
def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
|
||||
|
||||
def : GISelSop2Pat <add, S_ADD_I32, i32>;
|
||||
|
||||
let SubtargetPredicate = NotHasAddNoCarryInsts in {
|
||||
// FIXME: This should use the VOP3 form
|
||||
//def : GISelVop2ClampingPat <add, V_ADD_I32_e64, i32>;
|
||||
def : GISelVop2Pat <add, V_ADD_I32_e32, i32>;
|
||||
}
|
||||
|
||||
let SubtargetPredicate = HasAddNoCarryInsts in {
|
||||
def : GISelVop2ClampingPat <add, V_ADD_U32_e64, i32>;
|
||||
}
|
||||
|
||||
def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
|
||||
let AddedComplexity = 100 in {
|
||||
|
|
|
@ -1,24 +1,37 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
|
||||
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX6 %s
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=instruction-select -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s
|
||||
|
||||
---
|
||||
name: add_i32
|
||||
name: add_s32
|
||||
legalized: true
|
||||
regBankSelected: true
|
||||
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
|
||||
; GCN-LABEL: name: add_i32
|
||||
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
|
||||
; GCN: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||
; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
|
||||
; GCN: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
; GFX6-LABEL: name: add_s32
|
||||
; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||
; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
|
||||
; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||
; GFX6: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[COPY2]], implicit-def $vcc, implicit $exec
|
||||
; GFX6: [[V_ADD_I32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_ADD_I32_]], [[V_ADD_I32_e32_]], implicit-def $vcc, implicit $exec
|
||||
; GFX6: [[V_ADD_I32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_ADD_I32_e32_1]], [[COPY2]], implicit-def $vcc, implicit $exec
|
||||
; GFX6: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_I32_e32_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GFX9-LABEL: name: add_s32
|
||||
; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||
; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY3:%[0-9]+]]:vreg_64 = COPY $vgpr3_vgpr4
|
||||
; GFX9: [[S_ADD_I32_:%[0-9]+]]:sreg_32_xm0 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[S_ADD_I32_]], [[V_ADD_U32_e64_]], 0, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_ADD_U32_e64_1]], [[COPY2]], 0, implicit $exec
|
||||
; GFX9: FLAT_STORE_DWORD [[COPY3]], [[V_ADD_U32_e64_2]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
%0:sgpr(s32) = COPY $sgpr0
|
||||
%1:sgpr(s32) = COPY $sgpr1
|
||||
%2:vgpr(s32) = COPY $vgpr0
|
||||
|
|
Loading…
Reference in New Issue