forked from OSchip/llvm-project
AMDGPU: Move permlane discard vdst_in optimization
This case can be handled as a regular selection pattern, so move it out of the weird post-isel folding code which doesn't have an exactly equivalent place in GlobalISel. I think it doesn't make much sense to do this optimization here though, and it would be more useful in instcombine. There's not really any new information that will be gained during lowering since these inputs were known from the beginning.
This commit is contained in:
parent
fa4112fffc
commit
91e758b732
|
@ -10373,24 +10373,6 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
|
|||
Ops.push_back(ImpDef.getValue(1));
|
||||
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||
}
|
||||
case AMDGPU::V_PERMLANE16_B32:
|
||||
case AMDGPU::V_PERMLANEX16_B32: {
|
||||
ConstantSDNode *FI = cast<ConstantSDNode>(Node->getOperand(0));
|
||||
ConstantSDNode *BC = cast<ConstantSDNode>(Node->getOperand(2));
|
||||
if (!FI->getZExtValue() && !BC->getZExtValue())
|
||||
break;
|
||||
SDValue VDstIn = Node->getOperand(6);
|
||||
if (VDstIn.isMachineOpcode()
|
||||
&& VDstIn.getMachineOpcode() == AMDGPU::IMPLICIT_DEF)
|
||||
break;
|
||||
MachineSDNode *ImpDef = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF,
|
||||
SDLoc(Node), MVT::i32);
|
||||
SmallVector<SDValue, 8> Ops = { SDValue(FI, 0), Node->getOperand(1),
|
||||
SDValue(BC, 0), Node->getOperand(3),
|
||||
Node->getOperand(4), Node->getOperand(5),
|
||||
SDValue(ImpDef, 0), Node->getOperand(7) };
|
||||
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -639,6 +639,34 @@ def VOP3_PERMLANE_Profile : VOP3_Profile<VOPProfile <[i32, i32, i32, i32]>, VOP3
|
|||
let HasOMod = 0;
|
||||
}
|
||||
|
||||
class PermlanePat<SDPatternOperator permlane,
|
||||
Instruction inst> : GCNPat<
|
||||
(permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2,
|
||||
timm:$fi, timm:$bc),
|
||||
(inst (as_i1imm $fi), $src0, (as_i1imm $bc),
|
||||
$src1, 0, $src2, $vdst_in)
|
||||
>;
|
||||
|
||||
// Permlane intrinsic that has either fetch invalid or bound control
|
||||
// fields enabled.
|
||||
class BoundControlOrFetchInvalidPermlane<SDPatternOperator permlane> :
|
||||
PatFrag<(ops node:$vdst_in, node:$src0, node:$src1, node:$src2,
|
||||
node:$fi, node:$bc),
|
||||
(permlane node:$vdst_in, node:$src0, node:
|
||||
$src1, node:$src2, node:$fi, node:$bc)> {
|
||||
let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 ||
|
||||
N->getConstantOperandVal(6) != 0; }];
|
||||
}
|
||||
|
||||
// Drop the input value if it won't be read.
|
||||
class PermlaneDiscardVDstIn<SDPatternOperator permlane,
|
||||
Instruction inst> : GCNPat<
|
||||
(permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
|
||||
(inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2,
|
||||
(IMPLICIT_DEF))
|
||||
>;
|
||||
|
||||
|
||||
let SubtargetPredicate = isGFX10Plus in {
|
||||
def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
|
||||
def : ThreeOp_i32_Pats<xor, xor, V_XOR3_B32>;
|
||||
|
@ -648,14 +676,16 @@ let SubtargetPredicate = isGFX10Plus in {
|
|||
def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>;
|
||||
} // End $vdst = $vdst_in, DisableEncoding $vdst_in
|
||||
|
||||
def : GCNPat<
|
||||
(int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
|
||||
(V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
|
||||
>;
|
||||
def : GCNPat<
|
||||
(int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc),
|
||||
(V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in)
|
||||
>;
|
||||
def : PermlanePat<int_amdgcn_permlane16, V_PERMLANE16_B32>;
|
||||
def : PermlanePat<int_amdgcn_permlanex16, V_PERMLANEX16_B32>;
|
||||
|
||||
def : PermlaneDiscardVDstIn<
|
||||
BoundControlOrFetchInvalidPermlane<int_amdgcn_permlane16>,
|
||||
V_PERMLANE16_B32>;
|
||||
def : PermlaneDiscardVDstIn<
|
||||
BoundControlOrFetchInvalidPermlane<int_amdgcn_permlanex16>,
|
||||
V_PERMLANEX16_B32>;
|
||||
|
||||
} // End SubtargetPredicate = isGFX10Plus
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
Loading…
Reference in New Issue