From 91e758b7329b4ff134684e661af93a85c436a460 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 15 Jan 2020 22:25:58 -0500 Subject: [PATCH] AMDGPU: Move permlane discard vdst_in optimization This case can be handled as a regular selection pattern, so move it out of the weird post-isel folding code which doesn't have an exactly equivalent place in GlobalISel. I think it doesn't make much sense to do this optimization here though, and it would be more useful in instcombine. There's not really any new information that will be gained during lowering since these inputs were known from the beginning. --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 18 --------- llvm/lib/Target/AMDGPU/VOP3Instructions.td | 46 ++++++++++++++++++---- 2 files changed, 38 insertions(+), 26 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 1487920aac21..e32b68eba86c 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -10373,24 +10373,6 @@ SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, Ops.push_back(ImpDef.getValue(1)); return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); } - case AMDGPU::V_PERMLANE16_B32: - case AMDGPU::V_PERMLANEX16_B32: { - ConstantSDNode *FI = cast(Node->getOperand(0)); - ConstantSDNode *BC = cast(Node->getOperand(2)); - if (!FI->getZExtValue() && !BC->getZExtValue()) - break; - SDValue VDstIn = Node->getOperand(6); - if (VDstIn.isMachineOpcode() - && VDstIn.getMachineOpcode() == AMDGPU::IMPLICIT_DEF) - break; - MachineSDNode *ImpDef = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, - SDLoc(Node), MVT::i32); - SmallVector Ops = { SDValue(FI, 0), Node->getOperand(1), - SDValue(BC, 0), Node->getOperand(3), - Node->getOperand(4), Node->getOperand(5), - SDValue(ImpDef, 0), Node->getOperand(7) }; - return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops); - } default: break; } diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td index 1fa6aaf9f1be..2469b0077bcf 100644 --- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td @@ -639,6 +639,34 @@ def VOP3_PERMLANE_Profile : VOP3_Profile, VOP3 let HasOMod = 0; } +class PermlanePat : GCNPat< + (permlane i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, + timm:$fi, timm:$bc), + (inst (as_i1imm $fi), $src0, (as_i1imm $bc), + $src1, 0, $src2, $vdst_in) +>; + +// Permlane intrinsic that has either fetch invalid or bound control +// fields enabled. +class BoundControlOrFetchInvalidPermlane : + PatFrag<(ops node:$vdst_in, node:$src0, node:$src1, node:$src2, + node:$fi, node:$bc), + (permlane node:$vdst_in, node:$src0, node: + $src1, node:$src2, node:$fi, node:$bc)> { + let PredicateCode = [{ return N->getConstantOperandVal(5) != 0 || + N->getConstantOperandVal(6) != 0; }]; +} + +// Drop the input value if it won't be read. +class PermlaneDiscardVDstIn : GCNPat< + (permlane srcvalue, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), + (inst (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, + (IMPLICIT_DEF)) +>; + + let SubtargetPredicate = isGFX10Plus in { def V_XOR3_B32 : VOP3Inst <"v_xor3_b32", VOP3_Profile>; def : ThreeOp_i32_Pats; @@ -648,14 +676,16 @@ let SubtargetPredicate = isGFX10Plus in { def V_PERMLANEX16_B32 : VOP3Inst <"v_permlanex16_b32", VOP3_PERMLANE_Profile>; } // End $vdst = $vdst_in, DisableEncoding $vdst_in - def : GCNPat< - (int_amdgcn_permlane16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), - (V_PERMLANE16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) - >; - def : GCNPat< - (int_amdgcn_permlanex16 i32:$vdst_in, i32:$src0, i32:$src1, i32:$src2, timm:$fi, timm:$bc), - (V_PERMLANEX16_B32 (as_i1imm $fi), $src0, (as_i1imm $bc), $src1, 0, $src2, $vdst_in) - >; + def : PermlanePat; + def : PermlanePat; + + def : PermlaneDiscardVDstIn< + BoundControlOrFetchInvalidPermlane, + V_PERMLANE16_B32>; + def : PermlaneDiscardVDstIn< + BoundControlOrFetchInvalidPermlane, + V_PERMLANEX16_B32>; + } // End SubtargetPredicate = isGFX10Plus //===----------------------------------------------------------------------===//