forked from OSchip/llvm-project
AMDGPU/GlobalISel: legalize and select 32-bit G_ASHR
Reviewers: arsenm, nhaehnle Subscribers: kzhuravl, wdng, yaxunl, rovka, kristof.beyls, dstuttard, tpr, llvm-commits, t-tye Differential Revision: https://reviews.llvm.org/D48196 llvm-svn: 335318
This commit is contained in:
parent
fe70b29cf7
commit
26fac0f8e1
|
@ -18,6 +18,11 @@ def gi_vsrc0 :
|
||||||
GIComplexOperandMatcher<s32, "selectVSRC0">,
|
GIComplexOperandMatcher<s32, "selectVSRC0">,
|
||||||
GIComplexPatternEquiv<sd_vsrc0>;
|
GIComplexPatternEquiv<sd_vsrc0>;
|
||||||
|
|
||||||
|
def sd_vcsrc : ComplexPattern<i32, 1, "">;
|
||||||
|
def gi_vcsrc :
|
||||||
|
GIComplexOperandMatcher<s32, "selectVCSRC">,
|
||||||
|
GIComplexPatternEquiv<sd_vcsrc>;
|
||||||
|
|
||||||
def gi_vop3mods0 :
|
def gi_vop3mods0 :
|
||||||
GIComplexOperandMatcher<s32, "selectVOP3Mods0">,
|
GIComplexOperandMatcher<s32, "selectVOP3Mods0">,
|
||||||
GIComplexPatternEquiv<VOP3Mods0>;
|
GIComplexPatternEquiv<VOP3Mods0>;
|
||||||
|
@ -60,6 +65,26 @@ class GISelVop2CommutePat <
|
||||||
(inst src0_vt:$src0, src1_vt:$src1)
|
(inst src0_vt:$src0, src1_vt:$src1)
|
||||||
>;
|
>;
|
||||||
|
|
||||||
|
class GISelVop3Pat2 <
|
||||||
|
SDPatternOperator node,
|
||||||
|
Instruction inst,
|
||||||
|
ValueType dst_vt,
|
||||||
|
ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat <
|
||||||
|
|
||||||
|
(dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))),
|
||||||
|
(inst src0_vt:$src0, src1_vt:$src1)
|
||||||
|
>;
|
||||||
|
|
||||||
|
class GISelVop3Pat2CommutePat <
|
||||||
|
SDPatternOperator node,
|
||||||
|
Instruction inst,
|
||||||
|
ValueType dst_vt,
|
||||||
|
ValueType src0_vt = dst_vt, ValueType src1_vt = src0_vt> : GCNPat <
|
||||||
|
|
||||||
|
(dst_vt (node (src0_vt (sd_vcsrc src0_vt:$src0)), (src1_vt (sd_vcsrc src1_vt:$src1)))),
|
||||||
|
(inst src0_vt:$src1, src1_vt:$src0)
|
||||||
|
>;
|
||||||
|
|
||||||
multiclass GISelVop2IntrPat <
|
multiclass GISelVop2IntrPat <
|
||||||
SDPatternOperator node, Instruction inst,
|
SDPatternOperator node, Instruction inst,
|
||||||
ValueType dst_vt, ValueType src_vt = dst_vt> {
|
ValueType dst_vt, ValueType src_vt = dst_vt> {
|
||||||
|
@ -76,6 +101,15 @@ multiclass GISelVop2IntrPat <
|
||||||
def : GISelSop2Pat <or, S_OR_B32, i32>;
|
def : GISelSop2Pat <or, S_OR_B32, i32>;
|
||||||
def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
|
def : GISelVop2Pat <or, V_OR_B32_e32, i32>;
|
||||||
|
|
||||||
|
def : GISelSop2Pat <sra, S_ASHR_I32, i32>;
|
||||||
|
let AddedComplexity = 100 in {
|
||||||
|
let SubtargetPredicate = isSICI in {
|
||||||
|
def : GISelVop2Pat <sra, V_ASHR_I32_e32, i32>;
|
||||||
|
}
|
||||||
|
def : GISelVop2CommutePat <sra, V_ASHRREV_I32_e32, i32>;
|
||||||
|
}
|
||||||
|
def : GISelVop3Pat2CommutePat <sra, V_ASHRREV_I32_e64, i32>;
|
||||||
|
|
||||||
// FIXME: Select directly to _e32 so we don't need to deal with modifiers.
|
// FIXME: Select directly to _e32 so we don't need to deal with modifiers.
|
||||||
// FIXME: We can't re-use SelectionDAG patterns here because they match
|
// FIXME: We can't re-use SelectionDAG patterns here because they match
|
||||||
// against a custom SDNode and we would need to create a generic machine
|
// against a custom SDNode and we would need to create a generic machine
|
||||||
|
|
|
@ -537,6 +537,7 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
|
||||||
switch (I.getOpcode()) {
|
switch (I.getOpcode()) {
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
|
case TargetOpcode::G_ASHR:
|
||||||
case TargetOpcode::G_SITOFP:
|
case TargetOpcode::G_SITOFP:
|
||||||
case TargetOpcode::G_FMUL:
|
case TargetOpcode::G_FMUL:
|
||||||
case TargetOpcode::G_FADD:
|
case TargetOpcode::G_FADD:
|
||||||
|
@ -564,6 +565,14 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InstructionSelector::ComplexRendererFns
|
||||||
|
AMDGPUInstructionSelector::selectVCSRC(MachineOperand &Root) const {
|
||||||
|
return {{
|
||||||
|
[=](MachineInstrBuilder &MIB) { MIB.add(Root); }
|
||||||
|
}};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
///
|
///
|
||||||
/// This will select either an SGPR or VGPR operand and will save us from
|
/// This will select either an SGPR or VGPR operand and will save us from
|
||||||
/// having to write an extra tablegen pattern.
|
/// having to write an extra tablegen pattern.
|
||||||
|
|
|
@ -72,6 +72,9 @@ private:
|
||||||
bool selectG_LOAD(MachineInstr &I) const;
|
bool selectG_LOAD(MachineInstr &I) const;
|
||||||
bool selectG_STORE(MachineInstr &I) const;
|
bool selectG_STORE(MachineInstr &I) const;
|
||||||
|
|
||||||
|
InstructionSelector::ComplexRendererFns
|
||||||
|
selectVCSRC(MachineOperand &Root) const;
|
||||||
|
|
||||||
InstructionSelector::ComplexRendererFns
|
InstructionSelector::ComplexRendererFns
|
||||||
selectVSRC0(MachineOperand &Root) const;
|
selectVSRC0(MachineOperand &Root) const;
|
||||||
|
|
||||||
|
|
|
@ -55,6 +55,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const SISubtarget &ST,
|
||||||
};
|
};
|
||||||
|
|
||||||
setAction({G_ADD, S32}, Legal);
|
setAction({G_ADD, S32}, Legal);
|
||||||
|
setAction({G_ASHR, S32}, Legal);
|
||||||
setAction({G_SUB, S32}, Legal);
|
setAction({G_SUB, S32}, Legal);
|
||||||
setAction({G_MUL, S32}, Legal);
|
setAction({G_MUL, S32}, Legal);
|
||||||
setAction({G_AND, S32}, Legal);
|
setAction({G_AND, S32}, Legal);
|
||||||
|
|
|
@ -0,0 +1,86 @@
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,SI
|
||||||
|
# RUN: llc -march=amdgcn -mcpu=fiji -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN,VI
|
||||||
|
|
||||||
|
--- |
|
||||||
|
define void @ashr(i32 addrspace(1)* %global0) {ret void}
|
||||||
|
...
|
||||||
|
---
|
||||||
|
|
||||||
|
name: ashr
|
||||||
|
legalized: true
|
||||||
|
regBankSelected: true
|
||||||
|
|
||||||
|
# GCN-LABEL: name: ashr
|
||||||
|
body: |
|
||||||
|
bb.0:
|
||||||
|
liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
|
||||||
|
; GCN: [[SGPR0:%[0-9]+]]:sreg_32 = COPY $sgpr0
|
||||||
|
; GCN: [[SGPR1:%[0-9]+]]:sreg_32 = COPY $sgpr1
|
||||||
|
; GCN: [[VGPR0:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||||
|
%0:sgpr(s32) = COPY $sgpr0
|
||||||
|
%1:sgpr(s32) = COPY $sgpr1
|
||||||
|
%2:vgpr(s32) = COPY $vgpr0
|
||||||
|
%3:vgpr(s64) = COPY $vgpr3_vgpr4
|
||||||
|
|
||||||
|
; GCN: [[C1:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 1
|
||||||
|
; GCN: [[C4096:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||||
|
%4:sgpr(s32) = G_CONSTANT i32 1
|
||||||
|
%5:sgpr(s32) = G_CONSTANT i32 4096
|
||||||
|
|
||||||
|
; ashr ss
|
||||||
|
; GCN: [[SS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SGPR0]], [[SGPR1]]
|
||||||
|
%6:sgpr(s32) = G_ASHR %0, %1
|
||||||
|
|
||||||
|
; ashr si
|
||||||
|
; GCN: [[SI:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[SS]], [[C1]]
|
||||||
|
%7:sgpr(s32) = G_ASHR %6, %4
|
||||||
|
|
||||||
|
; ashr is
|
||||||
|
; GCN: [[IS:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[C1]], [[SI]]
|
||||||
|
%8:sgpr(s32) = G_ASHR %4, %7
|
||||||
|
|
||||||
|
; ashr sc
|
||||||
|
; GCN: [[SC:%[0-9]+]]:sreg_32 = S_ASHR_I32 [[IS]], [[C4096]]
|
||||||
|
%9:sgpr(s32) = G_ASHR %8, %5
|
||||||
|
|
||||||
|
; ashr cs
|
||||||
|
; GCN: [[CS:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[C4096]], [[SC]]
|
||||||
|
%10:sgpr(s32) = G_ASHR %5, %9
|
||||||
|
|
||||||
|
; ashr vs
|
||||||
|
; GCN: [[VS:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[CS]], [[VGPR0]]
|
||||||
|
%11:vgpr(s32) = G_ASHR %2, %10
|
||||||
|
|
||||||
|
; ashr sv
|
||||||
|
; SI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[CS]], [[VS]]
|
||||||
|
; VI: [[SV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VS]], [[CS]]
|
||||||
|
%12:vgpr(s32) = G_ASHR %10, %11
|
||||||
|
|
||||||
|
; ashr vv
|
||||||
|
; SI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[SV]], [[VGPR0]]
|
||||||
|
; VI: [[VV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[VGPR0]], [[SV]]
|
||||||
|
%13:vgpr(s32) = G_ASHR %12, %2
|
||||||
|
|
||||||
|
; ashr iv
|
||||||
|
; SI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C1]], [[VV]]
|
||||||
|
; VI: [[IV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VV]], [[C1]]
|
||||||
|
%14:vgpr(s32) = G_ASHR %4, %13
|
||||||
|
|
||||||
|
; ashr vi
|
||||||
|
; GCN: [[VI:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C1]], [[IV]]
|
||||||
|
%15:vgpr(s32) = G_ASHR %14, %4
|
||||||
|
|
||||||
|
; ashr cv
|
||||||
|
; SI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHR_I32_e32 [[C4096]], [[VI]]
|
||||||
|
; VI: [[CV:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 [[VI]], [[C4096]]
|
||||||
|
%16:vgpr(s32) = G_ASHR %5, %15
|
||||||
|
|
||||||
|
; ashr vc
|
||||||
|
; GCN: [[VC:%[-1-9]+]]:vgpr_32 = V_ASHRREV_I32_e32 [[C4096]], [[CV]]
|
||||||
|
%17:vgpr(s32) = G_ASHR %16, %5
|
||||||
|
|
||||||
|
|
||||||
|
G_STORE %17, %3 :: (store 4 into %ir.global0)
|
||||||
|
|
||||||
|
...
|
||||||
|
---
|
|
@ -0,0 +1,22 @@
|
||||||
|
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||||
|
# RUN: llc -mtriple=amdgcn-mesa-mesa3d -O0 -run-pass=legalizer -global-isel %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
---
|
||||||
|
name: test_ashr
|
||||||
|
registers:
|
||||||
|
- { id: 0, class: _ }
|
||||||
|
- { id: 1, class: _ }
|
||||||
|
- { id: 2, class: _ }
|
||||||
|
body: |
|
||||||
|
bb.0.entry:
|
||||||
|
liveins: $vgpr0, $vgpr1
|
||||||
|
|
||||||
|
; CHECK-LABEL: name: test_ashr
|
||||||
|
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
|
||||||
|
; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
|
||||||
|
; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[COPY1]]
|
||||||
|
%0(s32) = COPY $vgpr0
|
||||||
|
%1(s32) = COPY $vgpr1
|
||||||
|
%2(s32) = G_ASHR %0, %1
|
||||||
|
$vgpr0 = COPY %2
|
||||||
|
...
|
Loading…
Reference in New Issue