2016-01-21 05:22:21 +08:00
|
|
|
//===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===//
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//==-----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Defines an instruction selector for the AMDGPU target.
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
2015-12-02 07:04:05 +08:00
|
|
|
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "AMDGPU.h"
|
2017-08-04 06:30:46 +08:00
|
|
|
#include "AMDGPUArgumentUsageInfo.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "AMDGPUISelLowering.h" // For AMDGPUISD
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "AMDGPUInstrInfo.h"
|
2018-05-26 01:25:12 +08:00
|
|
|
#include "AMDGPUPerfHintAnalysis.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "AMDGPURegisterInfo.h"
|
2014-06-13 09:32:00 +08:00
|
|
|
#include "AMDGPUSubtarget.h"
|
2017-10-11 04:22:07 +08:00
|
|
|
#include "AMDGPUTargetMachine.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "SIDefines.h"
|
2013-02-27 01:52:23 +08:00
|
|
|
#include "SIISelLowering.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "SIInstrInfo.h"
|
2014-07-21 23:45:01 +08:00
|
|
|
#include "SIMachineFunctionInfo.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "SIRegisterInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "llvm/ADT/APInt.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
|
|
|
#include "llvm/ADT/StringRef.h"
|
2018-03-05 23:12:21 +08:00
|
|
|
#include "llvm/Analysis/DivergenceAnalysis.h"
|
2016-05-14 04:39:29 +08:00
|
|
|
#include "llvm/Analysis/ValueTracking.h"
|
2014-04-30 07:12:48 +08:00
|
|
|
#include "llvm/CodeGen/FunctionLoweringInfo.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "llvm/CodeGen/ISDOpcodes.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
2013-05-24 01:10:37 +08:00
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "llvm/CodeGen/SelectionDAGISel.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
2018-03-30 01:21:10 +08:00
|
|
|
#include "llvm/CodeGen/ValueTypes.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "llvm/IR/BasicBlock.h"
|
|
|
|
#include "llvm/IR/Instruction.h"
|
|
|
|
#include "llvm/MC/MCInstrDesc.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/CodeGen.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2018-03-24 07:58:25 +08:00
|
|
|
#include "llvm/Support/MachineValueType.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "llvm/Support/MathExtras.h"
|
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <new>
|
|
|
|
#include <vector>
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
2016-02-13 09:24:08 +08:00
|
|
|
namespace llvm {
|
2016-12-10 06:06:55 +08:00
|
|
|
|
2016-02-13 09:24:08 +08:00
|
|
|
class R600InstrInfo;
|
2016-12-10 06:06:55 +08:00
|
|
|
|
|
|
|
} // end namespace llvm
|
2016-02-13 09:24:08 +08:00
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Instruction Selector Implementation
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
namespace {
|
2016-02-13 07:45:29 +08:00
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
/// AMDGPU specific code to select AMDGPU machine instructions for
|
|
|
|
/// SelectionDAG operations.
|
|
|
|
class AMDGPUDAGToDAGISel : public SelectionDAGISel {
|
|
|
|
// Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can
|
|
|
|
// make the right decision when generating code for different targets.
|
2018-07-12 04:59:01 +08:00
|
|
|
const GCNSubtarget *Subtarget;
|
2017-03-27 22:04:01 +08:00
|
|
|
AMDGPUAS AMDGPUASI;
|
2017-10-11 04:22:07 +08:00
|
|
|
bool EnableLateStructurizeCFG;
|
2015-09-22 19:14:39 +08:00
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
public:
|
2017-08-04 06:30:46 +08:00
|
|
|
explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr,
|
|
|
|
CodeGenOpt::Level OptLevel = CodeGenOpt::Default)
|
|
|
|
: SelectionDAGISel(*TM, OptLevel) {
|
|
|
|
AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
|
2017-10-11 04:22:07 +08:00
|
|
|
EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG;
|
2017-03-27 22:04:01 +08:00
|
|
|
}
|
2016-12-10 06:06:55 +08:00
|
|
|
~AMDGPUDAGToDAGISel() override = default;
|
2016-10-04 02:47:26 +08:00
|
|
|
|
2017-08-04 06:30:46 +08:00
|
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
|
|
AU.addRequired<AMDGPUArgumentUsageInfo>();
|
2018-05-26 01:25:12 +08:00
|
|
|
AU.addRequired<AMDGPUPerfHintAnalysis>();
|
2018-03-05 23:12:21 +08:00
|
|
|
AU.addRequired<DivergenceAnalysis>();
|
2017-08-04 06:30:46 +08:00
|
|
|
SelectionDAGISel::getAnalysisUsage(AU);
|
|
|
|
}
|
|
|
|
|
2015-01-31 07:24:40 +08:00
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
2016-05-13 05:03:32 +08:00
|
|
|
void Select(SDNode *N) override;
|
2016-10-01 10:56:57 +08:00
|
|
|
StringRef getPassName() const override;
|
2014-04-29 15:57:24 +08:00
|
|
|
void PostprocessISelDAG() override;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-08-08 12:57:55 +08:00
|
|
|
protected:
|
|
|
|
void SelectBuildVector(SDNode *N, unsigned RegClassID);
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
private:
|
2017-05-18 05:02:58 +08:00
|
|
|
std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const;
|
2017-01-31 11:07:46 +08:00
|
|
|
bool isNoNanSrc(SDValue N) const;
|
2016-07-28 08:32:02 +08:00
|
|
|
bool isInlineImmediate(const SDNode *N) const;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2016-02-13 07:45:29 +08:00
|
|
|
bool isUniformBr(const SDNode *N) const;
|
|
|
|
|
2015-05-12 23:00:49 +08:00
|
|
|
SDNode *glueCopyToM0(SDNode *N) const;
|
|
|
|
|
2013-08-15 07:24:24 +08:00
|
|
|
const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const;
|
2017-08-08 12:57:55 +08:00
|
|
|
virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset);
|
|
|
|
virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset);
|
2014-08-23 02:49:33 +08:00
|
|
|
bool isDSOffsetLegal(const SDValue &Base, unsigned Offset,
|
|
|
|
unsigned OffsetBits) const;
|
|
|
|
bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const;
|
2014-08-23 02:49:35 +08:00
|
|
|
bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0,
|
|
|
|
SDValue &Offset1) const;
|
2015-12-23 04:55:23 +08:00
|
|
|
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
2014-08-12 06:18:17 +08:00
|
|
|
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
|
|
|
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
|
|
|
|
SDValue &TFE) const;
|
|
|
|
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
2015-02-27 22:59:41 +08:00
|
|
|
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
|
|
|
|
SDValue &SLC, SDValue &TFE) const;
|
2014-09-26 02:30:26 +08:00
|
|
|
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
2015-02-11 08:34:32 +08:00
|
|
|
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
|
2014-09-26 02:30:26 +08:00
|
|
|
SDValue &SLC) const;
|
2017-09-20 13:01:53 +08:00
|
|
|
bool SelectMUBUFScratchOffen(SDNode *Parent,
|
2017-05-18 05:02:58 +08:00
|
|
|
SDValue Addr, SDValue &RSrc, SDValue &VAddr,
|
2017-04-25 03:40:59 +08:00
|
|
|
SDValue &SOffset, SDValue &ImmOffset) const;
|
2017-09-20 13:01:53 +08:00
|
|
|
bool SelectMUBUFScratchOffset(SDNode *Parent,
|
2017-05-18 05:02:58 +08:00
|
|
|
SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
2017-04-25 03:40:59 +08:00
|
|
|
SDValue &Offset) const;
|
|
|
|
|
2014-08-12 06:18:17 +08:00
|
|
|
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
|
|
|
|
SDValue &Offset, SDValue &GLC, SDValue &SLC,
|
2014-07-21 23:45:01 +08:00
|
|
|
SDValue &TFE) const;
|
2014-09-26 02:30:26 +08:00
|
|
|
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
2016-06-10 07:42:48 +08:00
|
|
|
SDValue &Offset, SDValue &SLC) const;
|
2016-04-08 03:23:11 +08:00
|
|
|
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
|
|
|
SDValue &Offset) const;
|
2016-06-15 15:13:05 +08:00
|
|
|
bool SelectMUBUFConstant(SDValue Constant,
|
2016-03-19 00:24:20 +08:00
|
|
|
SDValue &SOffset,
|
|
|
|
SDValue &ImmOffset) const;
|
|
|
|
bool SelectMUBUFIntrinsicOffset(SDValue Offset, SDValue &SOffset,
|
|
|
|
SDValue &ImmOffset) const;
|
|
|
|
bool SelectMUBUFIntrinsicVOffset(SDValue Offset, SDValue &SOffset,
|
|
|
|
SDValue &ImmOffset, SDValue &VOffset) const;
|
2016-06-10 07:42:54 +08:00
|
|
|
|
2017-06-13 00:53:51 +08:00
|
|
|
bool SelectFlatAtomic(SDValue Addr, SDValue &VAddr,
|
|
|
|
SDValue &Offset, SDValue &SLC) const;
|
2017-07-29 09:03:53 +08:00
|
|
|
bool SelectFlatAtomicSigned(SDValue Addr, SDValue &VAddr,
|
|
|
|
SDValue &Offset, SDValue &SLC) const;
|
|
|
|
|
|
|
|
template <bool IsSigned>
|
2017-06-13 00:53:51 +08:00
|
|
|
bool SelectFlatOffset(SDValue Addr, SDValue &VAddr,
|
|
|
|
SDValue &Offset, SDValue &SLC) const;
|
2016-06-10 07:42:54 +08:00
|
|
|
|
2015-08-07 03:28:30 +08:00
|
|
|
bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset,
|
|
|
|
bool &Imm) const;
|
2018-02-10 00:57:57 +08:00
|
|
|
SDValue Expand32BitAddress(SDValue Addr) const;
|
2015-08-07 03:28:30 +08:00
|
|
|
bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset,
|
|
|
|
bool &Imm) const;
|
|
|
|
bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
|
2017-05-24 22:53:50 +08:00
|
|
|
bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
|
2015-08-07 03:28:30 +08:00
|
|
|
bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const;
|
|
|
|
bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const;
|
2017-05-24 22:53:50 +08:00
|
|
|
bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const;
|
2016-07-12 16:12:16 +08:00
|
|
|
bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const;
|
2017-01-31 11:07:46 +08:00
|
|
|
|
|
|
|
bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
2017-09-08 02:05:07 +08:00
|
|
|
bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods) const;
|
2014-08-01 08:32:39 +08:00
|
|
|
bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
2017-04-26 05:17:38 +08:00
|
|
|
bool SelectVOP3NoMods(SDValue In, SDValue &Src) const;
|
2014-08-01 08:32:39 +08:00
|
|
|
bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
|
|
SDValue &Clamp, SDValue &Omod) const;
|
2015-07-13 23:47:57 +08:00
|
|
|
bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
|
|
SDValue &Clamp, SDValue &Omod) const;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2015-01-07 07:00:37 +08:00
|
|
|
bool SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
|
|
SDValue &Clamp,
|
|
|
|
SDValue &Omod) const;
|
2014-11-14 03:49:04 +08:00
|
|
|
|
2017-03-27 23:57:17 +08:00
|
|
|
bool SelectVOP3OMods(SDValue In, SDValue &Src,
|
|
|
|
SDValue &Clamp, SDValue &Omod) const;
|
|
|
|
|
2017-02-28 06:15:25 +08:00
|
|
|
bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
|
|
|
bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
|
|
SDValue &Clamp) const;
|
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
|
|
|
bool SelectVOP3OpSel0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
|
|
SDValue &Clamp) const;
|
|
|
|
|
|
|
|
bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
|
|
|
bool SelectVOP3OpSelMods0(SDValue In, SDValue &Src, SDValue &SrcMods,
|
|
|
|
SDValue &Clamp) const;
|
2017-09-08 02:05:07 +08:00
|
|
|
bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const;
|
2017-09-21 04:28:39 +08:00
|
|
|
bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const;
|
2017-07-21 21:54:11 +08:00
|
|
|
|
2017-11-13 08:22:09 +08:00
|
|
|
bool SelectHi16Elt(SDValue In, SDValue &Src) const;
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
void SelectADD_SUB_I64(SDNode *N);
|
2017-01-31 02:11:38 +08:00
|
|
|
void SelectUADDO_USUBO(SDNode *N);
|
2016-05-13 05:03:32 +08:00
|
|
|
void SelectDIV_SCALE(SDNode *N);
|
2017-11-07 01:04:37 +08:00
|
|
|
void SelectMAD_64_32(SDNode *N);
|
2016-12-07 10:42:15 +08:00
|
|
|
void SelectFMA_W_CHAIN(SDNode *N);
|
|
|
|
void SelectFMUL_W_CHAIN(SDNode *N);
|
2014-06-24 02:00:34 +08:00
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
|
2015-03-24 21:40:27 +08:00
|
|
|
uint32_t Offset, uint32_t Width);
|
2016-05-13 05:03:32 +08:00
|
|
|
void SelectS_BFEFromShifts(SDNode *N);
|
|
|
|
void SelectS_BFE(SDNode *N);
|
2016-09-17 10:02:19 +08:00
|
|
|
bool isCBranchSCC(const SDNode *N) const;
|
2016-05-13 05:03:32 +08:00
|
|
|
void SelectBRCOND(SDNode *N);
|
2018-05-01 03:08:16 +08:00
|
|
|
void SelectFMAD_FMA(SDNode *N);
|
2016-06-10 07:42:48 +08:00
|
|
|
void SelectATOMIC_CMP_SWAP(SDNode *N);
|
2015-03-24 21:40:27 +08:00
|
|
|
|
2017-08-08 12:57:55 +08:00
|
|
|
protected:
|
2012-12-12 05:25:42 +08:00
|
|
|
// Include the pieces autogenerated from the target description.
|
|
|
|
#include "AMDGPUGenDAGISel.inc"
|
|
|
|
};
|
2016-12-10 06:06:55 +08:00
|
|
|
|
2017-08-08 12:57:55 +08:00
|
|
|
class R600DAGToDAGISel : public AMDGPUDAGToDAGISel {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
const R600Subtarget *Subtarget;
|
|
|
|
AMDGPUAS AMDGPUASI;
|
|
|
|
|
|
|
|
bool isConstantLoad(const MemSDNode *N, int cbID) const;
|
|
|
|
bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr);
|
|
|
|
bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg,
|
|
|
|
SDValue& Offset);
|
2017-08-08 12:57:55 +08:00
|
|
|
public:
|
|
|
|
explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) :
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
AMDGPUDAGToDAGISel(TM, OptLevel) {
|
|
|
|
AMDGPUASI = AMDGPU::getAMDGPUAS(*TM);
|
|
|
|
}
|
2017-08-08 12:57:55 +08:00
|
|
|
|
|
|
|
void Select(SDNode *N) override;
|
|
|
|
|
|
|
|
bool SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
|
|
|
SDValue &Offset) override;
|
|
|
|
bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
|
|
|
SDValue &Offset) override;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
|
|
protected:
|
|
|
|
// Include the pieces autogenerated from the target description.
|
|
|
|
#include "R600GenDAGISel.inc"
|
2017-08-08 12:57:55 +08:00
|
|
|
};
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
} // end anonymous namespace
|
|
|
|
|
2017-08-04 06:30:46 +08:00
|
|
|
INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "isel",
|
|
|
|
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
|
|
|
|
INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)
|
2018-05-26 01:25:12 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)
|
2018-05-22 02:18:52 +08:00
|
|
|
INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
|
2017-08-04 06:30:46 +08:00
|
|
|
INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "isel",
|
|
|
|
"AMDGPU DAG->DAG Pattern Instruction Selection", false, false)
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// This pass converts a legalized DAG into a AMDGPU-specific
|
2012-12-12 05:25:42 +08:00
|
|
|
// DAG, ready for instruction scheduling.
|
2017-08-04 06:30:46 +08:00
|
|
|
FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM,
|
2016-10-04 02:47:26 +08:00
|
|
|
CodeGenOpt::Level OptLevel) {
|
|
|
|
return new AMDGPUDAGToDAGISel(TM, OptLevel);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// This pass converts a legalized DAG into a R600-specific
|
2017-08-08 12:57:55 +08:00
|
|
|
// DAG, ready for instruction scheduling.
|
|
|
|
FunctionPass *llvm::createR600ISelDag(TargetMachine *TM,
|
|
|
|
CodeGenOpt::Level OptLevel) {
|
|
|
|
return new R600DAGToDAGISel(TM, OptLevel);
|
|
|
|
}
|
|
|
|
|
2015-01-31 07:24:40 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
|
2018-07-12 04:59:01 +08:00
|
|
|
Subtarget = &MF.getSubtarget<GCNSubtarget>();
|
2015-01-31 07:24:40 +08:00
|
|
|
return SelectionDAGISel::runOnMachineFunction(MF);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2017-01-31 11:07:46 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const {
|
|
|
|
if (TM.Options.NoNaNsFPMath)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// TODO: Move into isKnownNeverNaN
|
2017-05-01 23:17:51 +08:00
|
|
|
if (N->getFlags().isDefined())
|
|
|
|
return N->getFlags().hasNoNaNs();
|
2017-01-31 11:07:46 +08:00
|
|
|
|
|
|
|
return CurDAG->isKnownNeverNaN(N);
|
|
|
|
}
|
|
|
|
|
2016-07-28 08:32:02 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
const SIInstrInfo *TII = Subtarget->getInstrInfo();
|
2016-07-28 08:32:02 +08:00
|
|
|
|
|
|
|
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
|
|
|
|
return TII->isInlineConstant(C->getAPIntValue());
|
|
|
|
|
|
|
|
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
|
|
|
|
return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
|
|
|
|
|
|
|
|
return false;
|
2014-04-04 04:19:27 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Determine the register class for \p OpNo
|
2013-08-15 07:24:24 +08:00
|
|
|
/// \returns The register class of the virtual register that will be used for
|
|
|
|
/// the given operand number \OpNo or NULL if the register class cannot be
|
|
|
|
/// determined.
|
|
|
|
const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
|
|
|
|
unsigned OpNo) const {
|
2016-11-02 07:22:17 +08:00
|
|
|
if (!N->isMachineOpcode()) {
|
|
|
|
if (N->getOpcode() == ISD::CopyToReg) {
|
|
|
|
unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
|
|
|
|
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
|
|
|
|
MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
|
|
|
|
return MRI.getRegClass(Reg);
|
|
|
|
}
|
|
|
|
|
|
|
|
const SIRegisterInfo *TRI
|
2018-07-12 04:59:01 +08:00
|
|
|
= static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
|
2016-11-02 07:22:17 +08:00
|
|
|
return TRI->getPhysRegClass(Reg);
|
|
|
|
}
|
|
|
|
|
2014-04-18 15:40:20 +08:00
|
|
|
return nullptr;
|
2016-11-02 07:22:17 +08:00
|
|
|
}
|
2014-04-18 15:40:20 +08:00
|
|
|
|
2013-08-15 07:24:24 +08:00
|
|
|
switch (N->getMachineOpcode()) {
|
|
|
|
default: {
|
2014-08-05 05:25:23 +08:00
|
|
|
const MCInstrDesc &Desc =
|
2015-01-31 07:24:40 +08:00
|
|
|
Subtarget->getInstrInfo()->get(N->getMachineOpcode());
|
2013-08-15 15:11:34 +08:00
|
|
|
unsigned OpIdx = Desc.getNumDefs() + OpNo;
|
|
|
|
if (OpIdx >= Desc.getNumOperands())
|
2014-04-18 15:40:20 +08:00
|
|
|
return nullptr;
|
2013-08-15 15:11:34 +08:00
|
|
|
int RegClass = Desc.OpInfo[OpIdx].RegClass;
|
2014-04-18 15:40:20 +08:00
|
|
|
if (RegClass == -1)
|
|
|
|
return nullptr;
|
|
|
|
|
2015-01-31 07:24:40 +08:00
|
|
|
return Subtarget->getRegisterInfo()->getRegClass(RegClass);
|
2013-08-15 07:24:24 +08:00
|
|
|
}
|
|
|
|
case AMDGPU::REG_SEQUENCE: {
|
2014-04-18 15:40:20 +08:00
|
|
|
unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
|
2014-08-05 05:25:23 +08:00
|
|
|
const TargetRegisterClass *SuperRC =
|
2015-01-31 07:24:40 +08:00
|
|
|
Subtarget->getRegisterInfo()->getRegClass(RCID);
|
2014-04-18 15:40:20 +08:00
|
|
|
|
|
|
|
SDValue SubRegOp = N->getOperand(OpNo + 1);
|
|
|
|
unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue();
|
2015-01-31 07:24:40 +08:00
|
|
|
return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC,
|
|
|
|
SubRegIdx);
|
2013-08-15 07:24:24 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-05-12 23:00:49 +08:00
|
|
|
SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
|
2017-11-29 08:55:57 +08:00
|
|
|
if (cast<MemSDNode>(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS ||
|
|
|
|
!Subtarget->ldsRequiresM0Init())
|
2015-05-12 23:00:49 +08:00
|
|
|
return N;
|
|
|
|
|
|
|
|
const SITargetLowering& Lowering =
|
|
|
|
*static_cast<const SITargetLowering*>(getTargetLowering());
|
|
|
|
|
|
|
|
// Write max value to m0 before each load operation
|
|
|
|
|
|
|
|
SDValue M0 = Lowering.copyToM0(*CurDAG, CurDAG->getEntryNode(), SDLoc(N),
|
|
|
|
CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32));
|
|
|
|
|
|
|
|
SDValue Glue = M0.getValue(1);
|
|
|
|
|
|
|
|
SmallVector <SDValue, 8> Ops;
|
|
|
|
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
|
|
|
|
Ops.push_back(N->getOperand(i));
|
|
|
|
}
|
|
|
|
Ops.push_back(Glue);
|
2017-12-05 06:18:22 +08:00
|
|
|
return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops);
|
2015-05-12 23:00:49 +08:00
|
|
|
}
|
|
|
|
|
2015-11-11 08:01:36 +08:00
|
|
|
static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
|
2015-11-03 07:30:48 +08:00
|
|
|
switch (NumVectorElts) {
|
|
|
|
case 1:
|
2016-11-26 01:37:09 +08:00
|
|
|
return AMDGPU::SReg_32_XM0RegClassID;
|
2015-11-03 07:30:48 +08:00
|
|
|
case 2:
|
|
|
|
return AMDGPU::SReg_64RegClassID;
|
|
|
|
case 4:
|
|
|
|
return AMDGPU::SReg_128RegClassID;
|
|
|
|
case 8:
|
|
|
|
return AMDGPU::SReg_256RegClassID;
|
|
|
|
case 16:
|
|
|
|
return AMDGPU::SReg_512RegClassID;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("invalid vector size");
|
|
|
|
}
|
|
|
|
|
2017-02-28 06:15:25 +08:00
|
|
|
static bool getConstantValue(SDValue N, uint32_t &Out) {
|
|
|
|
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) {
|
|
|
|
Out = C->getAPIntValue().getZExtValue();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) {
|
|
|
|
Out = C->getValueAPF().bitcastToAPInt().getZExtValue();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-08-08 12:57:55 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
|
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
unsigned NumVectorElts = VT.getVectorNumElements();
|
|
|
|
EVT EltVT = VT.getVectorElementType();
|
|
|
|
SDLoc DL(N);
|
|
|
|
SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
|
|
|
|
|
|
|
|
if (NumVectorElts == 1) {
|
|
|
|
CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0),
|
|
|
|
RegClass);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(NumVectorElts <= 16 && "Vectors with more than 16 elements not "
|
|
|
|
"supported yet");
|
|
|
|
// 16 = Max Num Vector Elements
|
|
|
|
// 2 = 2 REG_SEQUENCE operands per element (value, subreg index)
|
|
|
|
// 1 = Vector Register Class
|
|
|
|
SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
|
|
|
|
|
|
|
|
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
|
|
|
|
bool IsRegSeq = true;
|
|
|
|
unsigned NOps = N->getNumOperands();
|
|
|
|
for (unsigned i = 0; i < NOps; i++) {
|
|
|
|
// XXX: Why is this here?
|
|
|
|
if (isa<RegisterSDNode>(N->getOperand(i))) {
|
|
|
|
IsRegSeq = false;
|
|
|
|
break;
|
|
|
|
}
|
2018-05-19 20:46:02 +08:00
|
|
|
unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
|
2017-08-08 12:57:55 +08:00
|
|
|
RegSeqArgs[1 + (2 * i)] = N->getOperand(i);
|
2018-05-19 20:46:02 +08:00
|
|
|
RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32);
|
2017-08-08 12:57:55 +08:00
|
|
|
}
|
|
|
|
if (NOps != NumVectorElts) {
|
|
|
|
// Fill in the missing undef elements if this was a scalar_to_vector.
|
2017-08-08 13:52:00 +08:00
|
|
|
assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts);
|
2017-08-08 12:57:55 +08:00
|
|
|
MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
|
|
|
|
DL, EltVT);
|
|
|
|
for (unsigned i = NOps; i < NumVectorElts; ++i) {
|
2018-05-19 20:46:02 +08:00
|
|
|
unsigned Sub = AMDGPURegisterInfo::getSubRegFromChannel(i);
|
2017-08-08 12:57:55 +08:00
|
|
|
RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0);
|
|
|
|
RegSeqArgs[1 + (2 * i) + 1] =
|
2018-05-19 20:46:02 +08:00
|
|
|
CurDAG->getTargetConstant(Sub, DL, MVT::i32);
|
2017-08-08 12:57:55 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!IsRegSeq)
|
|
|
|
SelectCode(N);
|
|
|
|
CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs);
|
|
|
|
}
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
void AMDGPUDAGToDAGISel::Select(SDNode *N) {
|
2012-12-12 05:25:42 +08:00
|
|
|
unsigned int Opc = N->getOpcode();
|
|
|
|
if (N->isMachineOpcode()) {
|
2013-09-22 16:21:56 +08:00
|
|
|
N->setNodeId(-1);
|
2016-05-13 05:03:32 +08:00
|
|
|
return; // Already selected.
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
2014-04-18 13:19:26 +08:00
|
|
|
|
2016-04-12 22:05:04 +08:00
|
|
|
if (isa<AtomicSDNode>(N) ||
|
2018-01-17 22:05:05 +08:00
|
|
|
(Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC ||
|
|
|
|
Opc == AMDGPUISD::ATOMIC_LOAD_FADD ||
|
|
|
|
Opc == AMDGPUISD::ATOMIC_LOAD_FMIN ||
|
|
|
|
Opc == AMDGPUISD::ATOMIC_LOAD_FMAX))
|
2015-05-12 23:00:49 +08:00
|
|
|
N = glueCopyToM0(N);
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
switch (Opc) {
|
2017-12-01 06:51:26 +08:00
|
|
|
default:
|
|
|
|
break;
|
2014-02-26 05:36:18 +08:00
|
|
|
// We are selecting i64 ADD here instead of custom lower it during
|
|
|
|
// DAG legalization, so we can fold some i64 ADDs used for address
|
|
|
|
// calculation into the LOAD and STORE instructions.
|
2016-10-14 18:30:00 +08:00
|
|
|
case ISD::ADDC:
|
|
|
|
case ISD::ADDE:
|
|
|
|
case ISD::SUBC:
|
|
|
|
case ISD::SUBE: {
|
2017-08-08 12:57:55 +08:00
|
|
|
if (N->getValueType(0) != MVT::i64)
|
2014-02-26 05:36:18 +08:00
|
|
|
break;
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
SelectADD_SUB_I64(N);
|
|
|
|
return;
|
2014-02-26 05:36:18 +08:00
|
|
|
}
|
2017-01-31 02:11:38 +08:00
|
|
|
case ISD::UADDO:
|
|
|
|
case ISD::USUBO: {
|
|
|
|
SelectUADDO_USUBO(N);
|
|
|
|
return;
|
|
|
|
}
|
2016-12-07 10:42:15 +08:00
|
|
|
case AMDGPUISD::FMUL_W_CHAIN: {
|
|
|
|
SelectFMUL_W_CHAIN(N);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
case AMDGPUISD::FMA_W_CHAIN: {
|
|
|
|
SelectFMA_W_CHAIN(N);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-06-12 01:40:32 +08:00
|
|
|
case ISD::SCALAR_TO_VECTOR:
|
2013-03-05 23:04:49 +08:00
|
|
|
case ISD::BUILD_VECTOR: {
|
2013-08-15 07:24:32 +08:00
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
unsigned NumVectorElts = VT.getVectorNumElements();
|
2018-06-21 03:45:48 +08:00
|
|
|
if (VT.getScalarSizeInBits() == 16) {
|
|
|
|
if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) {
|
2017-02-28 06:15:25 +08:00
|
|
|
uint32_t LHSVal, RHSVal;
|
|
|
|
if (getConstantValue(N->getOperand(0), LHSVal) &&
|
|
|
|
getConstantValue(N->getOperand(1), RHSVal)) {
|
|
|
|
uint32_t K = LHSVal | (RHSVal << 16);
|
|
|
|
CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT,
|
|
|
|
CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-08-08 13:52:00 +08:00
|
|
|
assert(VT.getVectorElementType().bitsEq(MVT::i32));
|
2017-08-08 12:57:55 +08:00
|
|
|
unsigned RegClassID = selectSGPRVectorRegClassID(NumVectorElts);
|
|
|
|
SelectBuildVector(N, RegClassID);
|
2016-05-13 05:03:32 +08:00
|
|
|
return;
|
2013-03-05 23:04:49 +08:00
|
|
|
}
|
2013-04-06 07:31:51 +08:00
|
|
|
case ISD::BUILD_PAIR: {
|
|
|
|
SDValue RC, SubReg0, SubReg1;
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(N);
|
2013-04-06 07:31:51 +08:00
|
|
|
if (N->getValueType(0) == MVT::i128) {
|
2015-04-28 22:05:47 +08:00
|
|
|
RC = CurDAG->getTargetConstant(AMDGPU::SReg_128RegClassID, DL, MVT::i32);
|
|
|
|
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32);
|
|
|
|
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32);
|
2013-04-06 07:31:51 +08:00
|
|
|
} else if (N->getValueType(0) == MVT::i64) {
|
2015-04-28 22:05:47 +08:00
|
|
|
RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32);
|
|
|
|
SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
|
|
|
|
SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
|
2013-04-06 07:31:51 +08:00
|
|
|
} else {
|
|
|
|
llvm_unreachable("Unhandled value type for BUILD_PAIR");
|
|
|
|
}
|
|
|
|
const SDValue Ops[] = { RC, N->getOperand(0), SubReg0,
|
|
|
|
N->getOperand(1), SubReg1 };
|
2016-05-13 05:03:32 +08:00
|
|
|
ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
|
|
|
|
N->getValueType(0), Ops));
|
|
|
|
return;
|
2013-04-06 07:31:51 +08:00
|
|
|
}
|
2014-04-04 04:19:27 +08:00
|
|
|
|
|
|
|
case ISD::Constant:
|
|
|
|
case ISD::ConstantFP: {
|
2017-08-08 12:57:55 +08:00
|
|
|
if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
|
2014-04-04 04:19:27 +08:00
|
|
|
break;
|
|
|
|
|
|
|
|
uint64_t Imm;
|
|
|
|
if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N))
|
|
|
|
Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue();
|
|
|
|
else {
|
2014-04-08 03:31:13 +08:00
|
|
|
ConstantSDNode *C = cast<ConstantSDNode>(N);
|
2014-04-04 04:19:27 +08:00
|
|
|
Imm = C->getZExtValue();
|
|
|
|
}
|
|
|
|
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(N);
|
|
|
|
SDNode *Lo = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
|
|
|
CurDAG->getConstant(Imm & 0xFFFFFFFF, DL,
|
|
|
|
MVT::i32));
|
|
|
|
SDNode *Hi = CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
|
|
|
CurDAG->getConstant(Imm >> 32, DL, MVT::i32));
|
2014-04-04 04:19:27 +08:00
|
|
|
const SDValue Ops[] = {
|
2015-04-28 22:05:47 +08:00
|
|
|
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
|
|
|
|
SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32),
|
|
|
|
SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)
|
2014-04-04 04:19:27 +08:00
|
|
|
};
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
|
|
|
|
N->getValueType(0), Ops));
|
|
|
|
return;
|
2014-04-04 04:19:27 +08:00
|
|
|
}
|
2015-09-26 01:27:08 +08:00
|
|
|
case ISD::LOAD:
|
2018-06-22 16:39:52 +08:00
|
|
|
case ISD::STORE:
|
|
|
|
case ISD::ATOMIC_LOAD:
|
|
|
|
case ISD::ATOMIC_STORE: {
|
2015-05-12 23:00:49 +08:00
|
|
|
N = glueCopyToM0(N);
|
2015-02-05 04:49:49 +08:00
|
|
|
break;
|
|
|
|
}
|
2014-04-18 13:19:26 +08:00
|
|
|
|
|
|
|
case AMDGPUISD::BFE_I32:
|
|
|
|
case AMDGPUISD::BFE_U32: {
|
|
|
|
// There is a scalar version available, but unlike the vector version which
|
|
|
|
// has a separate operand for the offset and width, the scalar version packs
|
|
|
|
// the width and offset into a single operand. Try to move to the scalar
|
|
|
|
// version if the offsets are constant, so that we can try to keep extended
|
|
|
|
// loads of kernel arguments in SGPRs.
|
|
|
|
|
|
|
|
// TODO: Technically we could try to pattern match scalar bitshifts of
|
|
|
|
// dynamic values, but it's probably not useful.
|
|
|
|
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
|
|
|
if (!Offset)
|
|
|
|
break;
|
|
|
|
|
|
|
|
ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2));
|
|
|
|
if (!Width)
|
|
|
|
break;
|
|
|
|
|
|
|
|
bool Signed = Opc == AMDGPUISD::BFE_I32;
|
|
|
|
|
|
|
|
uint32_t OffsetVal = Offset->getZExtValue();
|
|
|
|
uint32_t WidthVal = Width->getZExtValue();
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32,
|
|
|
|
SDLoc(N), N->getOperand(0), OffsetVal, WidthVal));
|
|
|
|
return;
|
2014-04-18 13:19:26 +08:00
|
|
|
}
|
2014-06-24 02:28:28 +08:00
|
|
|
case AMDGPUISD::DIV_SCALE: {
|
2016-05-13 05:03:32 +08:00
|
|
|
SelectDIV_SCALE(N);
|
|
|
|
return;
|
2014-06-24 02:28:28 +08:00
|
|
|
}
|
2017-11-07 01:04:37 +08:00
|
|
|
case AMDGPUISD::MAD_I64_I32:
|
|
|
|
case AMDGPUISD::MAD_U64_U32: {
|
|
|
|
SelectMAD_64_32(N);
|
|
|
|
return;
|
|
|
|
}
|
2014-10-10 03:06:00 +08:00
|
|
|
case ISD::CopyToReg: {
|
|
|
|
const SITargetLowering& Lowering =
|
|
|
|
*static_cast<const SITargetLowering*>(getTargetLowering());
|
2017-04-13 05:58:23 +08:00
|
|
|
N = Lowering.legalizeTargetIndependentNode(N, *CurDAG);
|
2014-10-10 03:06:00 +08:00
|
|
|
break;
|
|
|
|
}
|
2015-03-24 21:40:27 +08:00
|
|
|
case ISD::AND:
|
|
|
|
case ISD::SRL:
|
|
|
|
case ISD::SRA:
|
2016-04-23 06:59:16 +08:00
|
|
|
case ISD::SIGN_EXTEND_INREG:
|
2017-08-08 12:57:55 +08:00
|
|
|
if (N->getValueType(0) != MVT::i32)
|
2015-03-24 21:40:27 +08:00
|
|
|
break;
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
SelectS_BFE(N);
|
|
|
|
return;
|
2016-02-13 07:45:29 +08:00
|
|
|
case ISD::BRCOND:
|
2016-05-13 05:03:32 +08:00
|
|
|
SelectBRCOND(N);
|
|
|
|
return;
|
2017-09-08 02:05:07 +08:00
|
|
|
case ISD::FMAD:
|
2018-05-01 03:08:16 +08:00
|
|
|
case ISD::FMA:
|
|
|
|
SelectFMAD_FMA(N);
|
2017-09-08 02:05:07 +08:00
|
|
|
return;
|
2016-06-10 07:42:48 +08:00
|
|
|
case AMDGPUISD::ATOMIC_CMP_SWAP:
|
|
|
|
SelectATOMIC_CMP_SWAP(N);
|
|
|
|
return;
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
2014-10-10 03:06:00 +08:00
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
SelectCode(N);
|
2013-01-23 10:09:06 +08:00
|
|
|
}
|
|
|
|
|
2016-02-13 07:45:29 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
|
|
|
|
const BasicBlock *BB = FuncInfo->MBB->getBasicBlock();
|
2016-04-15 01:42:35 +08:00
|
|
|
const Instruction *Term = BB->getTerminator();
|
|
|
|
return Term->getMetadata("amdgpu.uniform") ||
|
|
|
|
Term->getMetadata("structurizecfg.uniform");
|
2016-02-13 07:45:29 +08:00
|
|
|
}
|
|
|
|
|
2016-10-01 10:56:57 +08:00
|
|
|
StringRef AMDGPUDAGToDAGISel::getPassName() const {
|
2012-12-12 05:25:42 +08:00
|
|
|
return "AMDGPU DAG->DAG Pattern Instruction Selection";
|
|
|
|
}
|
|
|
|
|
2013-07-23 09:48:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Complex Patterns
|
|
|
|
//===----------------------------------------------------------------------===//
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
2017-08-08 12:57:55 +08:00
|
|
|
SDValue &Offset) {
|
|
|
|
return false;
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2013-02-07 01:32:29 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
|
|
|
SDValue &Offset) {
|
|
|
|
ConstantSDNode *C;
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(Addr);
|
2013-02-07 01:32:29 +08:00
|
|
|
|
|
|
|
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
2017-01-07 05:00:46 +08:00
|
|
|
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
|
|
|
|
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
|
2017-01-07 05:00:46 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
2013-02-07 01:32:29 +08:00
|
|
|
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
|
|
|
|
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
|
|
|
Base = Addr.getOperand(0);
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
2013-02-07 01:32:29 +08:00
|
|
|
} else {
|
|
|
|
Base = Addr;
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
2013-02-07 01:32:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2013-02-27 01:52:16 +08:00
|
|
|
|
2017-12-01 06:51:26 +08:00
|
|
|
// FIXME: Should only handle addcarry/subcarry
|
2016-05-13 05:03:32 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
|
2014-06-24 02:00:34 +08:00
|
|
|
SDLoc DL(N);
|
|
|
|
SDValue LHS = N->getOperand(0);
|
|
|
|
SDValue RHS = N->getOperand(1);
|
|
|
|
|
2016-10-14 18:30:00 +08:00
|
|
|
unsigned Opcode = N->getOpcode();
|
|
|
|
bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
|
|
|
|
bool ProduceCarry =
|
|
|
|
ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
|
2017-12-01 06:51:26 +08:00
|
|
|
bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE;
|
2014-06-24 02:00:38 +08:00
|
|
|
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
|
|
|
|
SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
|
2014-06-24 02:00:34 +08:00
|
|
|
|
|
|
|
SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
|
|
|
DL, MVT::i32, LHS, Sub0);
|
|
|
|
SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
|
|
|
DL, MVT::i32, LHS, Sub1);
|
|
|
|
|
|
|
|
SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
|
|
|
DL, MVT::i32, RHS, Sub0);
|
|
|
|
SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
|
|
|
|
DL, MVT::i32, RHS, Sub1);
|
|
|
|
|
|
|
|
SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
|
|
|
|
|
2014-09-05 22:07:59 +08:00
|
|
|
unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
|
2014-06-24 02:00:38 +08:00
|
|
|
unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
|
|
|
|
|
2016-10-14 18:30:00 +08:00
|
|
|
SDNode *AddLo;
|
|
|
|
if (!ConsumeCarry) {
|
|
|
|
SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
|
|
|
|
AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
|
|
|
|
} else {
|
|
|
|
SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
|
|
|
|
AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
|
|
|
|
}
|
|
|
|
SDValue AddHiArgs[] = {
|
|
|
|
SDValue(Hi0, 0),
|
|
|
|
SDValue(Hi1, 0),
|
|
|
|
SDValue(AddLo, 1)
|
|
|
|
};
|
|
|
|
SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
|
2014-06-24 02:00:34 +08:00
|
|
|
|
2016-10-14 18:30:00 +08:00
|
|
|
SDValue RegSequenceArgs[] = {
|
2015-04-28 22:05:47 +08:00
|
|
|
CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
|
2014-06-24 02:00:34 +08:00
|
|
|
SDValue(AddLo,0),
|
|
|
|
Sub0,
|
|
|
|
SDValue(AddHi,0),
|
|
|
|
Sub1,
|
|
|
|
};
|
2016-10-14 18:30:00 +08:00
|
|
|
SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
|
|
|
|
MVT::i64, RegSequenceArgs);
|
|
|
|
|
|
|
|
if (ProduceCarry) {
|
|
|
|
// Replace the carry-use
|
2018-03-20 04:19:46 +08:00
|
|
|
ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1));
|
2016-10-14 18:30:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Replace the remaining uses.
|
2018-03-20 04:19:46 +08:00
|
|
|
ReplaceNode(N, RegSequence);
|
2014-06-24 02:00:34 +08:00
|
|
|
}
|
|
|
|
|
2017-01-31 02:11:38 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
|
|
|
|
// The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
|
|
|
|
// carry out despite the _i32 name. These were renamed in VI to _U32.
|
|
|
|
// FIXME: We should probably rename the opcodes here.
|
|
|
|
unsigned Opc = N->getOpcode() == ISD::UADDO ?
|
|
|
|
AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
|
|
|
|
|
|
|
|
CurDAG->SelectNodeTo(N, Opc, N->getVTList(),
|
|
|
|
{ N->getOperand(0), N->getOperand(1) });
|
|
|
|
}
|
|
|
|
|
2016-12-07 10:42:15 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
|
|
|
|
SDLoc SL(N);
|
|
|
|
// src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod
|
|
|
|
SDValue Ops[10];
|
|
|
|
|
|
|
|
SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
|
|
|
|
SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
|
|
|
|
SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
|
|
|
|
Ops[8] = N->getOperand(0);
|
|
|
|
Ops[9] = N->getOperand(4);
|
|
|
|
|
|
|
|
CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
|
|
|
|
}
|
|
|
|
|
|
|
|
void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
|
|
|
|
SDLoc SL(N);
|
2017-10-18 21:31:28 +08:00
|
|
|
// src0_modifiers, src0, src1_modifiers, src1, clamp, omod
|
2016-12-07 10:42:15 +08:00
|
|
|
SDValue Ops[8];
|
|
|
|
|
|
|
|
SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
|
|
|
|
SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
|
|
|
|
Ops[6] = N->getOperand(0);
|
|
|
|
Ops[7] = N->getOperand(3);
|
|
|
|
|
|
|
|
CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
|
|
|
|
}
|
|
|
|
|
2015-02-14 12:24:28 +08:00
|
|
|
// We need to handle this here because tablegen doesn't support matching
|
|
|
|
// instructions with multiple outputs.
|
2016-05-13 05:03:32 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) {
|
2014-06-24 02:28:28 +08:00
|
|
|
SDLoc SL(N);
|
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
|
|
|
|
assert(VT == MVT::f32 || VT == MVT::f64);
|
|
|
|
|
|
|
|
unsigned Opc
|
|
|
|
= (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32;
|
|
|
|
|
2017-01-19 14:04:12 +08:00
|
|
|
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
|
|
|
|
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
|
2014-06-24 02:28:28 +08:00
|
|
|
}
|
|
|
|
|
2017-11-07 01:04:37 +08:00
|
|
|
// We need to handle this here because tablegen doesn't support matching
|
|
|
|
// instructions with multiple outputs.
|
|
|
|
void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) {
|
|
|
|
SDLoc SL(N);
|
|
|
|
bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32;
|
|
|
|
unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32 : AMDGPU::V_MAD_U64_U32;
|
|
|
|
|
|
|
|
SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1);
|
|
|
|
SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
|
|
|
|
Clamp };
|
|
|
|
CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
|
|
|
|
}
|
|
|
|
|
2014-08-23 02:49:33 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::isDSOffsetLegal(const SDValue &Base, unsigned Offset,
|
|
|
|
unsigned OffsetBits) const {
|
|
|
|
if ((OffsetBits == 16 && !isUInt<16>(Offset)) ||
|
|
|
|
(OffsetBits == 8 && !isUInt<8>(Offset)))
|
|
|
|
return false;
|
|
|
|
|
2015-07-07 00:01:58 +08:00
|
|
|
if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS ||
|
|
|
|
Subtarget->unsafeDSOffsetFoldingEnabled())
|
2014-08-23 02:49:33 +08:00
|
|
|
return true;
|
|
|
|
|
|
|
|
// On Southern Islands instruction with a negative base value and an offset
|
|
|
|
// don't seem to work.
|
|
|
|
return CurDAG->SignBitIsZero(Base);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
|
|
|
|
SDValue &Offset) const {
|
2016-04-29 22:34:26 +08:00
|
|
|
SDLoc DL(Addr);
|
2014-08-23 02:49:33 +08:00
|
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
|
|
SDValue N0 = Addr.getOperand(0);
|
|
|
|
SDValue N1 = Addr.getOperand(1);
|
|
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
|
|
|
if (isDSOffsetLegal(N0, C1->getSExtValue(), 16)) {
|
|
|
|
// (add n0, c0)
|
|
|
|
Base = N0;
|
2016-04-29 22:34:26 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
2014-08-23 02:49:33 +08:00
|
|
|
return true;
|
|
|
|
}
|
2015-09-09 03:34:22 +08:00
|
|
|
} else if (Addr.getOpcode() == ISD::SUB) {
|
|
|
|
// sub C, x -> add (sub 0, x), C
|
|
|
|
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
|
|
|
|
int64_t ByteOffset = C->getSExtValue();
|
|
|
|
if (isUInt<16>(ByteOffset)) {
|
|
|
|
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
|
|
|
|
|
|
// XXX - This is kind of hacky. Create a dummy sub node so we can check
|
|
|
|
// the known bits in isDSOffsetLegal. We need to emit the selected node
|
|
|
|
// here, so this is thrown away.
|
|
|
|
SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
|
|
|
|
Zero, Addr.getOperand(1));
|
|
|
|
|
|
|
|
if (isDSOffsetLegal(Sub, ByteOffset, 16)) {
|
2017-12-01 06:51:26 +08:00
|
|
|
// FIXME: Select to VOP3 version for with-carry.
|
|
|
|
unsigned SubOp = Subtarget->hasAddNoCarry() ?
|
|
|
|
AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
|
|
|
|
|
2015-09-09 03:34:22 +08:00
|
|
|
MachineSDNode *MachineSub
|
2017-12-01 06:51:26 +08:00
|
|
|
= CurDAG->getMachineNode(SubOp, DL, MVT::i32,
|
2015-09-09 03:34:22 +08:00
|
|
|
Zero, Addr.getOperand(1));
|
|
|
|
|
|
|
|
Base = SDValue(MachineSub, 0);
|
2016-06-10 08:01:04 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16);
|
2015-09-09 03:34:22 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
|
|
|
|
// If we have a constant address, prefer to put the constant into the
|
|
|
|
// offset. This can save moves to load the constant address since multiple
|
|
|
|
// operations can share the zero base address register, and enables merging
|
|
|
|
// into read2 / write2 instructions.
|
2014-08-23 02:49:33 +08:00
|
|
|
|
2015-09-09 03:34:22 +08:00
|
|
|
SDLoc DL(Addr);
|
2015-04-28 22:05:47 +08:00
|
|
|
|
2014-10-15 01:21:19 +08:00
|
|
|
if (isUInt<16>(CAddr->getZExtValue())) {
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
2014-10-16 05:08:59 +08:00
|
|
|
MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
2015-04-28 22:05:47 +08:00
|
|
|
DL, MVT::i32, Zero);
|
2014-10-16 05:08:59 +08:00
|
|
|
Base = SDValue(MovZero, 0);
|
2016-06-10 08:01:04 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
|
2014-10-15 01:21:19 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-23 02:49:33 +08:00
|
|
|
// default case
|
|
|
|
Base = Addr;
|
2015-09-09 03:34:22 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16);
|
2014-08-23 02:49:33 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-09-09 03:34:22 +08:00
|
|
|
// TODO: If offset is too big, put low 16-bit into offset.
|
2014-08-23 02:49:35 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
|
|
|
|
SDValue &Offset0,
|
|
|
|
SDValue &Offset1) const {
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(Addr);
|
|
|
|
|
2014-08-23 02:49:35 +08:00
|
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
|
|
SDValue N0 = Addr.getOperand(0);
|
|
|
|
SDValue N1 = Addr.getOperand(1);
|
|
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
|
|
|
unsigned DWordOffset0 = C1->getZExtValue() / 4;
|
|
|
|
unsigned DWordOffset1 = DWordOffset0 + 1;
|
|
|
|
// (add n0, c0)
|
|
|
|
if (isDSOffsetLegal(N0, DWordOffset1, 8)) {
|
|
|
|
Base = N0;
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
|
|
|
|
Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
|
2014-08-23 02:49:35 +08:00
|
|
|
return true;
|
|
|
|
}
|
2015-09-09 03:34:22 +08:00
|
|
|
} else if (Addr.getOpcode() == ISD::SUB) {
|
|
|
|
// sub C, x -> add (sub 0, x), C
|
|
|
|
if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) {
|
|
|
|
unsigned DWordOffset0 = C->getZExtValue() / 4;
|
|
|
|
unsigned DWordOffset1 = DWordOffset0 + 1;
|
|
|
|
|
|
|
|
if (isUInt<8>(DWordOffset0)) {
|
|
|
|
SDLoc DL(Addr);
|
|
|
|
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
|
|
|
|
|
|
// XXX - This is kind of hacky. Create a dummy sub node so we can check
|
|
|
|
// the known bits in isDSOffsetLegal. We need to emit the selected node
|
|
|
|
// here, so this is thrown away.
|
|
|
|
SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32,
|
|
|
|
Zero, Addr.getOperand(1));
|
|
|
|
|
|
|
|
if (isDSOffsetLegal(Sub, DWordOffset1, 8)) {
|
2017-12-01 06:51:26 +08:00
|
|
|
unsigned SubOp = Subtarget->hasAddNoCarry() ?
|
|
|
|
AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
|
|
|
|
|
2015-09-09 03:34:22 +08:00
|
|
|
MachineSDNode *MachineSub
|
2017-12-01 06:51:26 +08:00
|
|
|
= CurDAG->getMachineNode(SubOp, DL, MVT::i32,
|
2015-09-09 03:34:22 +08:00
|
|
|
Zero, Addr.getOperand(1));
|
|
|
|
|
|
|
|
Base = SDValue(MachineSub, 0);
|
|
|
|
Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
|
|
|
|
Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
|
2014-10-16 02:06:43 +08:00
|
|
|
unsigned DWordOffset0 = CAddr->getZExtValue() / 4;
|
|
|
|
unsigned DWordOffset1 = DWordOffset0 + 1;
|
|
|
|
assert(4 * DWordOffset0 == CAddr->getZExtValue());
|
|
|
|
|
|
|
|
if (isUInt<8>(DWordOffset0) && isUInt<8>(DWordOffset1)) {
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
2014-10-16 02:06:43 +08:00
|
|
|
MachineSDNode *MovZero
|
|
|
|
= CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
2015-04-28 22:05:47 +08:00
|
|
|
DL, MVT::i32, Zero);
|
2014-10-16 02:06:43 +08:00
|
|
|
Base = SDValue(MovZero, 0);
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset0 = CurDAG->getTargetConstant(DWordOffset0, DL, MVT::i8);
|
|
|
|
Offset1 = CurDAG->getTargetConstant(DWordOffset1, DL, MVT::i8);
|
2014-10-16 02:06:43 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-08-23 02:49:35 +08:00
|
|
|
// default case
|
2016-09-10 06:29:28 +08:00
|
|
|
|
|
|
|
// FIXME: This is broken on SI where we still need to check if the base
|
|
|
|
// pointer is positive here.
|
2014-08-23 02:49:35 +08:00
|
|
|
Base = Addr;
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
|
|
|
|
Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
|
2014-08-23 02:49:35 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-12-23 04:55:23 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
2014-08-12 06:18:17 +08:00
|
|
|
SDValue &VAddr, SDValue &SOffset,
|
|
|
|
SDValue &Offset, SDValue &Offen,
|
|
|
|
SDValue &Idxen, SDValue &Addr64,
|
|
|
|
SDValue &GLC, SDValue &SLC,
|
|
|
|
SDValue &TFE) const {
|
2015-12-23 04:55:23 +08:00
|
|
|
// Subtarget prefers to use flat instruction
|
|
|
|
if (Subtarget->useFlatForGlobal())
|
|
|
|
return false;
|
|
|
|
|
2014-06-25 07:33:07 +08:00
|
|
|
SDLoc DL(Addr);
|
|
|
|
|
2016-04-08 03:23:11 +08:00
|
|
|
if (!GLC.getNode())
|
|
|
|
GLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
|
|
if (!SLC.getNode())
|
|
|
|
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
2015-04-28 22:05:47 +08:00
|
|
|
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
2014-08-12 06:18:17 +08:00
|
|
|
|
2015-04-28 22:05:47 +08:00
|
|
|
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
|
|
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
|
|
Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
|
|
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
2014-08-12 06:18:17 +08:00
|
|
|
|
2014-06-25 07:33:07 +08:00
|
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
|
|
SDValue N0 = Addr.getOperand(0);
|
|
|
|
SDValue N1 = Addr.getOperand(1);
|
|
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
|
|
|
|
2015-02-11 08:34:35 +08:00
|
|
|
if (N0.getOpcode() == ISD::ADD) {
|
|
|
|
// (add (add N2, N3), C1) -> addr64
|
|
|
|
SDValue N2 = N0.getOperand(0);
|
|
|
|
SDValue N3 = N0.getOperand(1);
|
2015-04-28 22:05:47 +08:00
|
|
|
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
|
2015-02-11 08:34:35 +08:00
|
|
|
Ptr = N2;
|
|
|
|
VAddr = N3;
|
|
|
|
} else {
|
2014-08-12 06:18:17 +08:00
|
|
|
// (add N0, C1) -> offset
|
2015-04-28 22:05:47 +08:00
|
|
|
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
2014-08-12 06:18:17 +08:00
|
|
|
Ptr = N0;
|
2015-02-11 08:34:35 +08:00
|
|
|
}
|
|
|
|
|
2017-11-09 09:52:17 +08:00
|
|
|
if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) {
|
2016-06-10 07:42:48 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (isUInt<32>(C1->getZExtValue())) {
|
2015-02-11 08:34:35 +08:00
|
|
|
// Illegal offset, store it in soffset.
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
2015-02-11 08:34:35 +08:00
|
|
|
SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
2015-04-28 22:05:47 +08:00
|
|
|
CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)),
|
|
|
|
0);
|
2015-12-23 04:55:23 +08:00
|
|
|
return true;
|
2014-06-25 07:33:07 +08:00
|
|
|
}
|
|
|
|
}
|
2015-02-11 08:34:35 +08:00
|
|
|
|
2014-06-25 07:33:07 +08:00
|
|
|
if (Addr.getOpcode() == ISD::ADD) {
|
2014-08-12 06:18:17 +08:00
|
|
|
// (add N0, N1) -> addr64
|
2014-06-25 07:33:07 +08:00
|
|
|
SDValue N0 = Addr.getOperand(0);
|
|
|
|
SDValue N1 = Addr.getOperand(1);
|
2015-04-28 22:05:47 +08:00
|
|
|
Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1);
|
2014-08-12 06:18:17 +08:00
|
|
|
Ptr = N0;
|
|
|
|
VAddr = N1;
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
2015-12-23 04:55:23 +08:00
|
|
|
return true;
|
2014-06-25 07:33:07 +08:00
|
|
|
}
|
|
|
|
|
2014-08-12 06:18:17 +08:00
|
|
|
// default case -> offset
|
2015-04-28 22:05:47 +08:00
|
|
|
VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
2014-08-12 06:18:17 +08:00
|
|
|
Ptr = Addr;
|
2015-04-28 22:05:47 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
2015-12-23 04:55:23 +08:00
|
|
|
|
|
|
|
return true;
|
2014-06-25 07:33:07 +08:00
|
|
|
}
|
|
|
|
|
2014-08-12 06:18:17 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
2015-02-11 08:34:32 +08:00
|
|
|
SDValue &VAddr, SDValue &SOffset,
|
2015-02-27 22:59:41 +08:00
|
|
|
SDValue &Offset, SDValue &GLC,
|
|
|
|
SDValue &SLC, SDValue &TFE) const {
|
|
|
|
SDValue Ptr, Offen, Idxen, Addr64;
|
2014-07-21 23:45:01 +08:00
|
|
|
|
2015-07-20 22:28:41 +08:00
|
|
|
// addr64 bit was removed for volcanic islands.
|
|
|
|
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
|
|
|
|
return false;
|
|
|
|
|
2015-12-23 04:55:23 +08:00
|
|
|
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
|
|
|
GLC, SLC, TFE))
|
|
|
|
return false;
|
2014-08-12 06:18:17 +08:00
|
|
|
|
|
|
|
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
|
|
|
|
if (C->getSExtValue()) {
|
|
|
|
SDLoc DL(Addr);
|
2014-11-06 03:01:17 +08:00
|
|
|
|
|
|
|
const SITargetLowering& Lowering =
|
|
|
|
*static_cast<const SITargetLowering*>(getTargetLowering());
|
|
|
|
|
|
|
|
SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0);
|
2014-08-12 06:18:17 +08:00
|
|
|
return true;
|
|
|
|
}
|
2014-11-06 03:01:17 +08:00
|
|
|
|
2014-08-12 06:18:17 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2014-09-26 02:30:26 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
2015-02-11 08:34:32 +08:00
|
|
|
SDValue &VAddr, SDValue &SOffset,
|
2015-09-22 19:15:07 +08:00
|
|
|
SDValue &Offset,
|
|
|
|
SDValue &SLC) const {
|
2015-04-28 22:05:47 +08:00
|
|
|
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
|
2015-02-27 22:59:41 +08:00
|
|
|
SDValue GLC, TFE;
|
2014-09-26 02:30:26 +08:00
|
|
|
|
2015-02-27 22:59:41 +08:00
|
|
|
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
|
2014-09-26 02:30:26 +08:00
|
|
|
}
|
|
|
|
|
2017-05-18 05:02:58 +08:00
|
|
|
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
|
|
|
|
auto PSV = PtrInfo.V.dyn_cast<const PseudoSourceValue *>();
|
|
|
|
return PSV && PSV->isStack();
|
2016-09-18 00:09:55 +08:00
|
|
|
}
|
|
|
|
|
2017-05-18 05:02:58 +08:00
|
|
|
std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
|
|
|
|
const MachineFunction &MF = CurDAG->getMachineFunction();
|
|
|
|
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
|
|
|
if (auto FI = dyn_cast<FrameIndexSDNode>(N)) {
|
|
|
|
SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(),
|
|
|
|
FI->getValueType(0));
|
|
|
|
|
|
|
|
// If we can resolve this to a frame index access, this is relative to the
|
|
|
|
// frame pointer SGPR.
|
|
|
|
return std::make_pair(TFI, CurDAG->getRegister(Info->getFrameOffsetReg(),
|
|
|
|
MVT::i32));
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we don't know this private access is a local stack object, it needs to
|
|
|
|
// be relative to the entry point's scratch wave offset register.
|
|
|
|
return std::make_pair(N, CurDAG->getRegister(Info->getScratchWaveOffsetReg(),
|
|
|
|
MVT::i32));
|
|
|
|
}
|
|
|
|
|
2017-09-20 13:01:53 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
|
2017-05-18 05:02:58 +08:00
|
|
|
SDValue Addr, SDValue &Rsrc,
|
2017-04-25 03:40:59 +08:00
|
|
|
SDValue &VAddr, SDValue &SOffset,
|
|
|
|
SDValue &ImmOffset) const {
|
2014-07-21 23:45:01 +08:00
|
|
|
|
|
|
|
SDLoc DL(Addr);
|
|
|
|
MachineFunction &MF = CurDAG->getMachineFunction();
|
2015-12-01 05:15:53 +08:00
|
|
|
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
2015-01-21 01:49:47 +08:00
|
|
|
|
2015-12-01 05:15:53 +08:00
|
|
|
Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
|
2014-07-21 23:45:01 +08:00
|
|
|
|
2017-04-25 03:40:59 +08:00
|
|
|
if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) {
|
|
|
|
unsigned Imm = CAddr->getZExtValue();
|
|
|
|
|
|
|
|
SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32);
|
|
|
|
MachineSDNode *MovHighBits = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
|
|
|
DL, MVT::i32, HighBits);
|
|
|
|
VAddr = SDValue(MovHighBits, 0);
|
2017-05-18 05:02:58 +08:00
|
|
|
|
|
|
|
// In a call sequence, stores to the argument stack area are relative to the
|
|
|
|
// stack pointer.
|
2017-09-20 13:01:53 +08:00
|
|
|
const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
|
2017-05-18 05:02:58 +08:00
|
|
|
unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
|
|
|
|
Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
|
|
|
|
|
|
|
|
SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
|
2017-04-25 03:40:59 +08:00
|
|
|
ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-07-21 23:45:01 +08:00
|
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
2017-04-25 03:40:59 +08:00
|
|
|
// (add n0, c1)
|
|
|
|
|
2015-07-17 03:40:09 +08:00
|
|
|
SDValue N0 = Addr.getOperand(0);
|
2014-07-21 23:45:01 +08:00
|
|
|
SDValue N1 = Addr.getOperand(1);
|
2016-02-24 12:55:29 +08:00
|
|
|
|
2017-11-30 08:52:40 +08:00
|
|
|
// Offsets in vaddr must be positive if range checking is enabled.
|
2017-11-15 08:45:43 +08:00
|
|
|
//
|
2017-11-30 08:52:40 +08:00
|
|
|
// The total computation of vaddr + soffset + offset must not overflow. If
|
|
|
|
// vaddr is negative, even if offset is 0 the sgpr offset add will end up
|
2017-11-15 08:45:43 +08:00
|
|
|
// overflowing.
|
2017-11-30 08:52:40 +08:00
|
|
|
//
|
|
|
|
// Prior to gfx9, MUBUF instructions with the vaddr offset enabled would
|
|
|
|
// always perform a range check. If a negative vaddr base index was used,
|
|
|
|
// this would fail the range check. The overall address computation would
|
|
|
|
// compute a valid address, but this doesn't happen due to the range
|
|
|
|
// check. For out-of-bounds MUBUF loads, a 0 is returned.
|
|
|
|
//
|
|
|
|
// Therefore it should be safe to fold any VGPR offset on gfx9 into the
|
|
|
|
// MUBUF vaddr, but not on older subtargets which can only do this if the
|
|
|
|
// sign bit is known 0.
|
2016-02-24 12:55:29 +08:00
|
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
2017-11-15 08:45:43 +08:00
|
|
|
if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) &&
|
2017-11-30 08:52:40 +08:00
|
|
|
(!Subtarget->privateMemoryResourceIsRangeChecked() ||
|
|
|
|
CurDAG->SignBitIsZero(N0))) {
|
2017-05-18 05:02:58 +08:00
|
|
|
std::tie(VAddr, SOffset) = foldFrameIndex(N0);
|
2016-02-24 12:55:29 +08:00
|
|
|
ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
|
|
|
|
return true;
|
2014-07-21 23:45:01 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// (node)
|
2017-05-18 05:02:58 +08:00
|
|
|
std::tie(VAddr, SOffset) = foldFrameIndex(Addr);
|
2015-04-28 22:05:47 +08:00
|
|
|
ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
2014-07-21 23:45:01 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-20 13:01:53 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
|
2017-05-18 05:02:58 +08:00
|
|
|
SDValue Addr,
|
2017-04-25 03:40:59 +08:00
|
|
|
SDValue &SRsrc,
|
|
|
|
SDValue &SOffset,
|
|
|
|
SDValue &Offset) const {
|
|
|
|
ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr);
|
2017-11-09 09:52:17 +08:00
|
|
|
if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue()))
|
2017-04-25 03:40:59 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
SDLoc DL(Addr);
|
|
|
|
MachineFunction &MF = CurDAG->getMachineFunction();
|
|
|
|
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
|
|
|
SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32);
|
2017-05-18 05:02:58 +08:00
|
|
|
|
2017-09-20 13:01:53 +08:00
|
|
|
const MachinePointerInfo &PtrInfo = cast<MemSDNode>(Parent)->getPointerInfo();
|
2017-05-18 05:02:58 +08:00
|
|
|
unsigned SOffsetReg = isStackPtrRelative(PtrInfo) ?
|
|
|
|
Info->getStackPtrOffsetReg() : Info->getScratchWaveOffsetReg();
|
|
|
|
|
|
|
|
// FIXME: Get from MachinePointerInfo? We should only be using the frame
|
|
|
|
// offset if we know this is in a call sequence.
|
|
|
|
SOffset = CurDAG->getRegister(SOffsetReg, MVT::i32);
|
|
|
|
|
2017-04-25 03:40:59 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2014-08-12 06:18:17 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|
|
|
SDValue &SOffset, SDValue &Offset,
|
|
|
|
SDValue &GLC, SDValue &SLC,
|
|
|
|
SDValue &TFE) const {
|
|
|
|
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
|
2014-12-03 01:05:41 +08:00
|
|
|
const SIInstrInfo *TII =
|
2015-01-31 07:24:40 +08:00
|
|
|
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
2014-07-21 23:45:01 +08:00
|
|
|
|
2015-12-23 04:55:23 +08:00
|
|
|
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
|
|
|
GLC, SLC, TFE))
|
|
|
|
return false;
|
2014-07-21 23:45:01 +08:00
|
|
|
|
2014-08-12 06:18:17 +08:00
|
|
|
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
|
|
|
|
!cast<ConstantSDNode>(Idxen)->getSExtValue() &&
|
|
|
|
!cast<ConstantSDNode>(Addr64)->getSExtValue()) {
|
2014-12-03 01:05:41 +08:00
|
|
|
uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
|
2014-08-12 06:18:17 +08:00
|
|
|
APInt::getAllOnesValue(32).getZExtValue(); // Size
|
|
|
|
SDLoc DL(Addr);
|
2014-11-06 03:01:19 +08:00
|
|
|
|
|
|
|
const SITargetLowering& Lowering =
|
|
|
|
*static_cast<const SITargetLowering*>(getTargetLowering());
|
|
|
|
|
|
|
|
SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0);
|
2014-08-12 06:18:17 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2014-07-21 23:45:01 +08:00
|
|
|
}
|
|
|
|
|
2016-04-08 03:23:11 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|
|
|
SDValue &Soffset, SDValue &Offset
|
|
|
|
) const {
|
|
|
|
SDValue GLC, SLC, TFE;
|
|
|
|
|
|
|
|
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
|
|
|
|
}
|
2014-09-26 02:30:26 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|
|
|
SDValue &Soffset, SDValue &Offset,
|
2016-06-10 07:42:48 +08:00
|
|
|
SDValue &SLC) const {
|
|
|
|
SDValue GLC, TFE;
|
2014-09-26 02:30:26 +08:00
|
|
|
|
|
|
|
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE);
|
|
|
|
}
|
|
|
|
|
2016-06-15 15:13:05 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFConstant(SDValue Constant,
|
2016-03-19 00:24:20 +08:00
|
|
|
SDValue &SOffset,
|
|
|
|
SDValue &ImmOffset) const {
|
|
|
|
SDLoc DL(Constant);
|
2017-10-10 20:22:23 +08:00
|
|
|
const uint32_t Align = 4;
|
|
|
|
const uint32_t MaxImm = alignDown(4095, Align);
|
2016-03-19 00:24:20 +08:00
|
|
|
uint32_t Imm = cast<ConstantSDNode>(Constant)->getZExtValue();
|
|
|
|
uint32_t Overflow = 0;
|
|
|
|
|
2017-10-10 20:22:23 +08:00
|
|
|
if (Imm > MaxImm) {
|
|
|
|
if (Imm <= MaxImm + 64) {
|
|
|
|
// Use an SOffset inline constant for 4..64
|
|
|
|
Overflow = Imm - MaxImm;
|
|
|
|
Imm = MaxImm;
|
2016-03-19 00:24:20 +08:00
|
|
|
} else {
|
|
|
|
// Try to keep the same value in SOffset for adjacent loads, so that
|
|
|
|
// the corresponding register contents can be re-used.
|
|
|
|
//
|
2017-10-10 20:22:23 +08:00
|
|
|
// Load values with all low-bits (except for alignment bits) set into
|
|
|
|
// SOffset, so that a larger range of values can be covered using
|
|
|
|
// s_movk_i32.
|
|
|
|
//
|
|
|
|
// Atomic operations fail to work correctly when individual address
|
|
|
|
// components are unaligned, even if their sum is aligned.
|
|
|
|
uint32_t High = (Imm + Align) & ~4095;
|
|
|
|
uint32_t Low = (Imm + Align) & 4095;
|
2016-03-19 00:24:20 +08:00
|
|
|
Imm = Low;
|
2017-10-10 20:22:23 +08:00
|
|
|
Overflow = High - Align;
|
2016-03-19 00:24:20 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-15 15:13:05 +08:00
|
|
|
// There is a hardware bug in SI and CI which prevents address clamping in
|
|
|
|
// MUBUF instructions from working correctly with SOffsets. The immediate
|
|
|
|
// offset is unaffected.
|
|
|
|
if (Overflow > 0 &&
|
|
|
|
Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
|
|
|
|
return false;
|
|
|
|
|
2016-03-19 00:24:20 +08:00
|
|
|
ImmOffset = CurDAG->getTargetConstant(Imm, DL, MVT::i16);
|
|
|
|
|
|
|
|
if (Overflow <= 64)
|
|
|
|
SOffset = CurDAG->getTargetConstant(Overflow, DL, MVT::i32);
|
|
|
|
else
|
|
|
|
SOffset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32,
|
|
|
|
CurDAG->getTargetConstant(Overflow, DL, MVT::i32)),
|
|
|
|
0);
|
2016-06-15 15:13:05 +08:00
|
|
|
|
|
|
|
return true;
|
2016-03-19 00:24:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicOffset(SDValue Offset,
|
|
|
|
SDValue &SOffset,
|
|
|
|
SDValue &ImmOffset) const {
|
|
|
|
SDLoc DL(Offset);
|
|
|
|
|
|
|
|
if (!isa<ConstantSDNode>(Offset))
|
|
|
|
return false;
|
|
|
|
|
2016-06-15 15:13:05 +08:00
|
|
|
return SelectMUBUFConstant(Offset, SOffset, ImmOffset);
|
2016-03-19 00:24:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectMUBUFIntrinsicVOffset(SDValue Offset,
|
|
|
|
SDValue &SOffset,
|
|
|
|
SDValue &ImmOffset,
|
|
|
|
SDValue &VOffset) const {
|
|
|
|
SDLoc DL(Offset);
|
|
|
|
|
|
|
|
// Don't generate an unnecessary voffset for constant offsets.
|
2016-06-15 15:13:05 +08:00
|
|
|
if (isa<ConstantSDNode>(Offset)) {
|
|
|
|
SDValue Tmp1, Tmp2;
|
|
|
|
|
|
|
|
// When necessary, use a voffset in <= CI anyway to work around a hardware
|
|
|
|
// bug.
|
|
|
|
if (Subtarget->getGeneration() > AMDGPUSubtarget::SEA_ISLANDS ||
|
|
|
|
SelectMUBUFConstant(Offset, Tmp1, Tmp2))
|
|
|
|
return false;
|
|
|
|
}
|
2016-03-19 00:24:20 +08:00
|
|
|
|
|
|
|
if (CurDAG->isBaseWithConstantOffset(Offset)) {
|
|
|
|
SDValue N0 = Offset.getOperand(0);
|
|
|
|
SDValue N1 = Offset.getOperand(1);
|
2016-06-15 15:13:05 +08:00
|
|
|
if (cast<ConstantSDNode>(N1)->getSExtValue() >= 0 &&
|
|
|
|
SelectMUBUFConstant(N1, SOffset, ImmOffset)) {
|
|
|
|
VOffset = N0;
|
|
|
|
return true;
|
|
|
|
}
|
2016-03-19 00:24:20 +08:00
|
|
|
}
|
|
|
|
|
2016-06-15 15:13:05 +08:00
|
|
|
SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
|
|
ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
|
|
|
|
VOffset = Offset;
|
|
|
|
|
2016-03-19 00:24:20 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-07-29 09:03:53 +08:00
|
|
|
template <bool IsSigned>
|
2017-06-13 00:53:51 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDValue Addr,
|
|
|
|
SDValue &VAddr,
|
|
|
|
SDValue &Offset,
|
|
|
|
SDValue &SLC) const {
|
|
|
|
int64_t OffsetVal = 0;
|
|
|
|
|
|
|
|
if (Subtarget->hasFlatInstOffsets() &&
|
|
|
|
CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
|
|
SDValue N0 = Addr.getOperand(0);
|
|
|
|
SDValue N1 = Addr.getOperand(1);
|
2017-07-29 09:03:53 +08:00
|
|
|
int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue();
|
|
|
|
|
|
|
|
if ((IsSigned && isInt<13>(COffsetVal)) ||
|
|
|
|
(!IsSigned && isUInt<12>(COffsetVal))) {
|
2017-06-13 00:53:51 +08:00
|
|
|
Addr = N0;
|
|
|
|
OffsetVal = COffsetVal;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-10 07:42:54 +08:00
|
|
|
VAddr = Addr;
|
2017-06-13 00:53:51 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16);
|
2017-05-12 01:38:33 +08:00
|
|
|
SLC = CurDAG->getTargetConstant(0, SDLoc(), MVT::i1);
|
2017-06-13 00:53:51 +08:00
|
|
|
|
2016-06-10 07:42:54 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-06-13 00:53:51 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectFlatAtomic(SDValue Addr,
|
|
|
|
SDValue &VAddr,
|
|
|
|
SDValue &Offset,
|
|
|
|
SDValue &SLC) const {
|
2017-07-29 09:03:53 +08:00
|
|
|
return SelectFlatOffset<false>(Addr, VAddr, Offset, SLC);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectFlatAtomicSigned(SDValue Addr,
|
|
|
|
SDValue &VAddr,
|
|
|
|
SDValue &Offset,
|
|
|
|
SDValue &SLC) const {
|
|
|
|
return SelectFlatOffset<true>(Addr, VAddr, Offset, SLC);
|
2017-06-13 00:53:51 +08:00
|
|
|
}
|
|
|
|
|
2015-08-07 03:28:30 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
|
|
|
|
SDValue &Offset, bool &Imm) const {
|
|
|
|
|
|
|
|
// FIXME: Handle non-constant offsets.
|
|
|
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode);
|
|
|
|
if (!C)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
SDLoc SL(ByteOffsetNode);
|
2018-07-12 04:59:01 +08:00
|
|
|
GCNSubtarget::Generation Gen = Subtarget->getGeneration();
|
2015-08-07 03:28:30 +08:00
|
|
|
int64_t ByteOffset = C->getSExtValue();
|
2017-01-28 02:41:14 +08:00
|
|
|
int64_t EncodedOffset = AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset);
|
2015-08-07 03:28:30 +08:00
|
|
|
|
2017-01-28 02:41:14 +08:00
|
|
|
if (AMDGPU::isLegalSMRDImmOffset(*Subtarget, ByteOffset)) {
|
2015-08-07 03:28:30 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
|
|
|
|
Imm = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-08-07 03:28:38 +08:00
|
|
|
if (!isUInt<32>(EncodedOffset) || !isUInt<32>(ByteOffset))
|
|
|
|
return false;
|
|
|
|
|
2017-05-24 22:53:50 +08:00
|
|
|
if (Gen == AMDGPUSubtarget::SEA_ISLANDS && isUInt<32>(EncodedOffset)) {
|
|
|
|
// 32-bit Immediates are supported on Sea Islands.
|
2015-08-07 03:28:38 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(EncodedOffset, SL, MVT::i32);
|
|
|
|
} else {
|
2015-08-07 03:28:30 +08:00
|
|
|
SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32);
|
|
|
|
Offset = SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32,
|
|
|
|
C32Bit), 0);
|
|
|
|
}
|
2015-08-07 03:28:38 +08:00
|
|
|
Imm = false;
|
|
|
|
return true;
|
2015-08-07 03:28:30 +08:00
|
|
|
}
|
|
|
|
|
2018-02-10 00:57:57 +08:00
|
|
|
SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const {
|
|
|
|
if (Addr.getValueType() != MVT::i32)
|
|
|
|
return Addr;
|
|
|
|
|
|
|
|
// Zero-extend a 32-bit address.
|
|
|
|
SDLoc SL(Addr);
|
|
|
|
|
|
|
|
const MachineFunction &MF = CurDAG->getMachineFunction();
|
|
|
|
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
unsigned AddrHiVal = Info->get32BitAddressHighBits();
|
|
|
|
SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32);
|
|
|
|
|
|
|
|
const SDValue Ops[] = {
|
|
|
|
CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32),
|
|
|
|
Addr,
|
|
|
|
CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32),
|
|
|
|
SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi),
|
|
|
|
0),
|
|
|
|
CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32),
|
|
|
|
};
|
|
|
|
|
|
|
|
return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64,
|
|
|
|
Ops), 0);
|
|
|
|
}
|
|
|
|
|
2015-08-07 03:28:30 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
|
|
|
|
SDValue &Offset, bool &Imm) const {
|
|
|
|
SDLoc SL(Addr);
|
2018-02-10 00:57:57 +08:00
|
|
|
|
2015-08-07 03:28:30 +08:00
|
|
|
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
|
|
|
SDValue N0 = Addr.getOperand(0);
|
|
|
|
SDValue N1 = Addr.getOperand(1);
|
|
|
|
|
|
|
|
if (SelectSMRDOffset(N1, Offset, Imm)) {
|
2018-02-10 00:57:57 +08:00
|
|
|
SBase = Expand32BitAddress(N0);
|
2015-08-07 03:28:30 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2018-02-10 00:57:57 +08:00
|
|
|
SBase = Expand32BitAddress(Addr);
|
2015-08-07 03:28:30 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(0, SL, MVT::i32);
|
|
|
|
Imm = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase,
|
|
|
|
SDValue &Offset) const {
|
|
|
|
bool Imm;
|
2017-05-24 22:53:50 +08:00
|
|
|
return SelectSMRD(Addr, SBase, Offset, Imm) && Imm;
|
|
|
|
}
|
2015-08-07 03:28:38 +08:00
|
|
|
|
2017-05-24 22:53:50 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase,
|
|
|
|
SDValue &Offset) const {
|
|
|
|
|
|
|
|
if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
|
2015-08-07 03:28:38 +08:00
|
|
|
return false;
|
|
|
|
|
2017-05-24 22:53:50 +08:00
|
|
|
bool Imm;
|
|
|
|
if (!SelectSMRD(Addr, SBase, Offset, Imm))
|
|
|
|
return false;
|
2017-05-24 01:14:34 +08:00
|
|
|
|
2017-05-24 22:53:50 +08:00
|
|
|
return !Imm && isa<ConstantSDNode>(Offset);
|
2015-08-07 03:28:38 +08:00
|
|
|
}
|
|
|
|
|
2015-08-07 03:28:30 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase,
|
|
|
|
SDValue &Offset) const {
|
|
|
|
bool Imm;
|
2015-08-07 03:28:38 +08:00
|
|
|
return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm &&
|
|
|
|
!isa<ConstantSDNode>(Offset);
|
2015-08-07 03:28:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr,
|
|
|
|
SDValue &Offset) const {
|
|
|
|
bool Imm;
|
2017-05-24 22:53:50 +08:00
|
|
|
return SelectSMRDOffset(Addr, Offset, Imm) && Imm;
|
|
|
|
}
|
2015-08-07 03:28:38 +08:00
|
|
|
|
2017-05-24 22:53:50 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr,
|
|
|
|
SDValue &Offset) const {
|
|
|
|
if (Subtarget->getGeneration() != AMDGPUSubtarget::SEA_ISLANDS)
|
2015-08-07 03:28:38 +08:00
|
|
|
return false;
|
|
|
|
|
2017-05-24 22:53:50 +08:00
|
|
|
bool Imm;
|
|
|
|
if (!SelectSMRDOffset(Addr, Offset, Imm))
|
|
|
|
return false;
|
2017-05-24 01:14:34 +08:00
|
|
|
|
2017-05-24 22:53:50 +08:00
|
|
|
return !Imm && isa<ConstantSDNode>(Offset);
|
2015-08-07 03:28:38 +08:00
|
|
|
}
|
|
|
|
|
2016-07-12 16:12:16 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index,
|
|
|
|
SDValue &Base,
|
|
|
|
SDValue &Offset) const {
|
2016-07-09 09:13:56 +08:00
|
|
|
SDLoc DL(Index);
|
|
|
|
|
|
|
|
if (CurDAG->isBaseWithConstantOffset(Index)) {
|
|
|
|
SDValue N0 = Index.getOperand(0);
|
|
|
|
SDValue N1 = Index.getOperand(1);
|
|
|
|
ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
|
|
|
|
|
|
|
|
// (add n0, c0)
|
|
|
|
Base = N0;
|
|
|
|
Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-07-12 16:12:16 +08:00
|
|
|
if (isa<ConstantSDNode>(Index))
|
|
|
|
return false;
|
2016-07-09 09:13:56 +08:00
|
|
|
|
|
|
|
Base = Index;
|
|
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL,
|
|
|
|
SDValue Val, uint32_t Offset,
|
|
|
|
uint32_t Width) {
|
2015-03-24 21:40:27 +08:00
|
|
|
// Transformation function, pack the offset and width of a BFE into
|
|
|
|
// the format expected by the S_BFE_I32 / S_BFE_U32. In the second
|
|
|
|
// source, bits [5:0] contain the offset and bits [22:16] the width.
|
|
|
|
uint32_t PackedVal = Offset | (Width << 16);
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32);
|
2015-03-24 21:40:27 +08:00
|
|
|
|
|
|
|
return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst);
|
|
|
|
}
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) {
|
2015-03-24 21:40:27 +08:00
|
|
|
// "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c)
|
|
|
|
// "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c)
|
|
|
|
// Predicate: 0 < b <= c < 32
|
|
|
|
|
|
|
|
const SDValue &Shl = N->getOperand(0);
|
|
|
|
ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1));
|
|
|
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
|
|
|
|
|
|
|
if (B && C) {
|
|
|
|
uint32_t BVal = B->getZExtValue();
|
|
|
|
uint32_t CVal = C->getZExtValue();
|
|
|
|
|
|
|
|
if (0 < BVal && BVal <= CVal && CVal < 32) {
|
|
|
|
bool Signed = N->getOpcode() == ISD::SRA;
|
|
|
|
unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32;
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal,
|
|
|
|
32 - CVal));
|
|
|
|
return;
|
2015-03-24 21:40:27 +08:00
|
|
|
}
|
|
|
|
}
|
2016-05-13 05:03:32 +08:00
|
|
|
SelectCode(N);
|
2015-03-24 21:40:27 +08:00
|
|
|
}
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
|
2015-03-24 21:40:27 +08:00
|
|
|
switch (N->getOpcode()) {
|
|
|
|
case ISD::AND:
|
|
|
|
if (N->getOperand(0).getOpcode() == ISD::SRL) {
|
|
|
|
// "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)"
|
|
|
|
// Predicate: isMask(mask)
|
|
|
|
const SDValue &Srl = N->getOperand(0);
|
|
|
|
ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1));
|
|
|
|
ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
|
|
|
|
|
|
|
if (Shift && Mask) {
|
|
|
|
uint32_t ShiftVal = Shift->getZExtValue();
|
|
|
|
uint32_t MaskVal = Mask->getZExtValue();
|
|
|
|
|
|
|
|
if (isMask_32(MaskVal)) {
|
|
|
|
uint32_t WidthVal = countPopulation(MaskVal);
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
|
|
|
|
Srl.getOperand(0), ShiftVal, WidthVal));
|
|
|
|
return;
|
2015-03-24 21:40:27 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ISD::SRL:
|
|
|
|
if (N->getOperand(0).getOpcode() == ISD::AND) {
|
|
|
|
// "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)"
|
|
|
|
// Predicate: isMask(mask >> b)
|
|
|
|
const SDValue &And = N->getOperand(0);
|
|
|
|
ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1));
|
|
|
|
ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1));
|
|
|
|
|
|
|
|
if (Shift && Mask) {
|
|
|
|
uint32_t ShiftVal = Shift->getZExtValue();
|
|
|
|
uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal;
|
|
|
|
|
|
|
|
if (isMask_32(MaskVal)) {
|
|
|
|
uint32_t WidthVal = countPopulation(MaskVal);
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N),
|
|
|
|
And.getOperand(0), ShiftVal, WidthVal));
|
|
|
|
return;
|
2015-03-24 21:40:27 +08:00
|
|
|
}
|
|
|
|
}
|
2016-05-13 05:03:32 +08:00
|
|
|
} else if (N->getOperand(0).getOpcode() == ISD::SHL) {
|
|
|
|
SelectS_BFEFromShifts(N);
|
|
|
|
return;
|
|
|
|
}
|
2015-03-24 21:40:27 +08:00
|
|
|
break;
|
|
|
|
case ISD::SRA:
|
2016-05-13 05:03:32 +08:00
|
|
|
if (N->getOperand(0).getOpcode() == ISD::SHL) {
|
|
|
|
SelectS_BFEFromShifts(N);
|
|
|
|
return;
|
|
|
|
}
|
2015-03-24 21:40:27 +08:00
|
|
|
break;
|
2016-04-23 06:59:16 +08:00
|
|
|
|
|
|
|
case ISD::SIGN_EXTEND_INREG: {
|
|
|
|
// sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8
|
|
|
|
SDValue Src = N->getOperand(0);
|
|
|
|
if (Src.getOpcode() != ISD::SRL)
|
|
|
|
break;
|
|
|
|
|
|
|
|
const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
|
|
|
|
if (!Amt)
|
|
|
|
break;
|
|
|
|
|
|
|
|
unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
|
2016-05-13 05:03:32 +08:00
|
|
|
ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0),
|
|
|
|
Amt->getZExtValue(), Width));
|
|
|
|
return;
|
2016-04-23 06:59:16 +08:00
|
|
|
}
|
2015-03-24 21:40:27 +08:00
|
|
|
}
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
SelectCode(N);
|
2015-03-24 21:40:27 +08:00
|
|
|
}
|
|
|
|
|
2016-09-17 10:02:19 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
|
|
|
|
assert(N->getOpcode() == ISD::BRCOND);
|
|
|
|
if (!N->hasOneUse())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
SDValue Cond = N->getOperand(1);
|
|
|
|
if (Cond.getOpcode() == ISD::CopyToReg)
|
|
|
|
Cond = Cond.getOperand(2);
|
|
|
|
|
|
|
|
if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
MVT VT = Cond.getOperand(0).getSimpleValueType();
|
|
|
|
if (VT == MVT::i32)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
if (VT == MVT::i64) {
|
2018-07-12 04:59:01 +08:00
|
|
|
auto ST = static_cast<const GCNSubtarget *>(Subtarget);
|
2016-09-17 10:02:19 +08:00
|
|
|
|
|
|
|
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
|
|
|
|
return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2016-05-13 05:03:32 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
|
2016-02-13 07:45:29 +08:00
|
|
|
SDValue Cond = N->getOperand(1);
|
|
|
|
|
2016-12-16 05:57:11 +08:00
|
|
|
if (Cond.isUndef()) {
|
|
|
|
CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
|
|
|
|
N->getOperand(2), N->getOperand(0));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2017-10-11 04:34:49 +08:00
|
|
|
bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N);
|
|
|
|
unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ;
|
|
|
|
unsigned CondReg = UseSCCBr ? AMDGPU::SCC : AMDGPU::VCC;
|
2016-02-13 07:45:29 +08:00
|
|
|
SDLoc SL(N);
|
|
|
|
|
[AMDGPU] Fixed incorrect uniform branch condition
Summary:
I had a case where multiple nested uniform ifs resulted in code that did
v_cmp comparisons, combining the results with s_and_b64, s_or_b64 and
s_xor_b64 and using the resulting mask in s_cbranch_vccnz, without first
ensuring that bits for inactive lanes were clear.
There was already code for inserting an "s_and_b64 vcc, exec, vcc" to
clear bits for inactive lanes in the case that the branch is instruction
selected as s_cbranch_scc1 and is then changed to s_cbranch_vccnz in
SIFixSGPRCopies. I have added the same code into SILowerControlFlow for
the case that the branch is instruction selected as s_cbranch_vccnz.
This de-optimizes the code in some cases where the s_and is not needed,
because vcc is the result of a v_cmp, or multiple v_cmp instructions
combined by s_and/s_or. We should add a pass to re-optimize those cases.
Reviewers: arsenm, kzhuravl
Subscribers: wdng, yaxunl, t-tye, llvm-commits, dstuttard, timcorringham, nhaehnle
Differential Revision: https://reviews.llvm.org/D41292
llvm-svn: 322119
2018-01-10 05:34:43 +08:00
|
|
|
if (!UseSCCBr) {
|
|
|
|
// This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not
|
|
|
|
// analyzed what generates the vcc value, so we do not know whether vcc
|
|
|
|
// bits for disabled lanes are 0. Thus we need to mask out bits for
|
|
|
|
// disabled lanes.
|
|
|
|
//
|
|
|
|
// For the case that we select S_CBRANCH_SCC1 and it gets
|
|
|
|
// changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls
|
|
|
|
// SIInstrInfo::moveToVALU which inserts the S_AND).
|
|
|
|
//
|
|
|
|
// We could add an analysis of what generates the vcc value here and omit
|
|
|
|
// the S_AND when is unnecessary. But it would be better to add a separate
|
|
|
|
// pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it
|
|
|
|
// catches both cases.
|
|
|
|
Cond = SDValue(CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
|
|
|
|
CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
|
|
|
|
Cond),
|
|
|
|
0);
|
|
|
|
}
|
|
|
|
|
2017-10-11 04:34:49 +08:00
|
|
|
SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond);
|
|
|
|
CurDAG->SelectNodeTo(N, BrOp, MVT::Other,
|
2016-05-13 05:03:32 +08:00
|
|
|
N->getOperand(2), // Basic Block
|
2016-11-08 03:09:33 +08:00
|
|
|
VCC.getValue(0));
|
2016-02-13 07:45:29 +08:00
|
|
|
}
|
|
|
|
|
2018-05-01 03:08:16 +08:00
|
|
|
void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) {
|
2017-09-08 02:05:07 +08:00
|
|
|
MVT VT = N->getSimpleValueType(0);
|
2018-05-01 03:08:16 +08:00
|
|
|
bool IsFMA = N->getOpcode() == ISD::FMA;
|
|
|
|
if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() &&
|
|
|
|
!Subtarget->hasFmaMixInsts()) ||
|
|
|
|
((IsFMA && Subtarget->hasMadMixInsts()) ||
|
|
|
|
(!IsFMA && Subtarget->hasFmaMixInsts()))) {
|
2017-09-08 02:05:07 +08:00
|
|
|
SelectCode(N);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue Src0 = N->getOperand(0);
|
|
|
|
SDValue Src1 = N->getOperand(1);
|
|
|
|
SDValue Src2 = N->getOperand(2);
|
|
|
|
unsigned Src0Mods, Src1Mods, Src2Mods;
|
|
|
|
|
2018-05-01 03:08:16 +08:00
|
|
|
// Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand
|
|
|
|
// using the conversion from f16.
|
2017-09-08 02:05:07 +08:00
|
|
|
bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods);
|
|
|
|
bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods);
|
|
|
|
bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods);
|
|
|
|
|
2018-05-01 03:08:16 +08:00
|
|
|
assert((IsFMA || !Subtarget->hasFP32Denormals()) &&
|
2017-09-08 02:05:07 +08:00
|
|
|
"fmad selected with denormals enabled");
|
|
|
|
// TODO: We can select this with f32 denormals enabled if all the sources are
|
|
|
|
// converted from f16 (in which case fmad isn't legal).
|
|
|
|
|
|
|
|
if (Sel0 || Sel1 || Sel2) {
|
|
|
|
// For dummy operands.
|
|
|
|
SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32);
|
|
|
|
SDValue Ops[] = {
|
|
|
|
CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0,
|
|
|
|
CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1,
|
|
|
|
CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2,
|
|
|
|
CurDAG->getTargetConstant(0, SDLoc(), MVT::i1),
|
|
|
|
Zero, Zero
|
|
|
|
};
|
|
|
|
|
2018-05-01 03:08:16 +08:00
|
|
|
CurDAG->SelectNodeTo(N,
|
|
|
|
IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32,
|
|
|
|
MVT::f32, Ops);
|
2017-09-08 02:05:07 +08:00
|
|
|
} else {
|
|
|
|
SelectCode(N);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-10 07:42:48 +08:00
|
|
|
// This is here because there isn't a way to use the generated sub0_sub1 as the
|
|
|
|
// subreg index to EXTRACT_SUBREG in tablegen.
|
|
|
|
void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
|
|
|
|
MemSDNode *Mem = cast<MemSDNode>(N);
|
|
|
|
unsigned AS = Mem->getAddressSpace();
|
2017-03-27 22:04:01 +08:00
|
|
|
if (AS == AMDGPUASI.FLAT_ADDRESS) {
|
2016-06-10 07:42:54 +08:00
|
|
|
SelectCode(N);
|
|
|
|
return;
|
|
|
|
}
|
2016-06-10 07:42:48 +08:00
|
|
|
|
|
|
|
MVT VT = N->getSimpleValueType(0);
|
|
|
|
bool Is32 = (VT == MVT::i32);
|
|
|
|
SDLoc SL(N);
|
|
|
|
|
|
|
|
MachineSDNode *CmpSwap = nullptr;
|
|
|
|
if (Subtarget->hasAddr64()) {
|
2017-10-15 13:35:02 +08:00
|
|
|
SDValue SRsrc, VAddr, SOffset, Offset, SLC;
|
2016-06-10 07:42:48 +08:00
|
|
|
|
|
|
|
if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset, SLC)) {
|
2017-07-21 05:06:04 +08:00
|
|
|
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN :
|
|
|
|
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN;
|
2016-06-10 07:42:48 +08:00
|
|
|
SDValue CmpVal = Mem->getOperand(2);
|
|
|
|
|
|
|
|
// XXX - Do we care about glue operands?
|
|
|
|
|
|
|
|
SDValue Ops[] = {
|
|
|
|
CmpVal, VAddr, SRsrc, SOffset, Offset, SLC, Mem->getChain()
|
|
|
|
};
|
|
|
|
|
|
|
|
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!CmpSwap) {
|
|
|
|
SDValue SRsrc, SOffset, Offset, SLC;
|
|
|
|
if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset, SLC)) {
|
2017-07-21 05:06:04 +08:00
|
|
|
unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN :
|
|
|
|
AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN;
|
2016-06-10 07:42:48 +08:00
|
|
|
|
|
|
|
SDValue CmpVal = Mem->getOperand(2);
|
|
|
|
SDValue Ops[] = {
|
|
|
|
CmpVal, SRsrc, SOffset, Offset, SLC, Mem->getChain()
|
|
|
|
};
|
|
|
|
|
|
|
|
CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!CmpSwap) {
|
|
|
|
SelectCode(N);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
MachineSDNode::mmo_iterator MMOs = MF->allocateMemRefsArray(1);
|
|
|
|
*MMOs = Mem->getMemOperand();
|
|
|
|
CmpSwap->setMemRefs(MMOs, MMOs + 1);
|
|
|
|
|
|
|
|
unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
|
|
|
|
SDValue Extract
|
|
|
|
= CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0));
|
|
|
|
|
|
|
|
ReplaceUses(SDValue(N, 0), Extract);
|
|
|
|
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1));
|
|
|
|
CurDAG->RemoveDeadNode(N);
|
|
|
|
}
|
|
|
|
|
2017-09-08 02:05:07 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src,
|
|
|
|
unsigned &Mods) const {
|
|
|
|
Mods = 0;
|
2014-08-01 08:32:39 +08:00
|
|
|
Src = In;
|
|
|
|
|
|
|
|
if (Src.getOpcode() == ISD::FNEG) {
|
|
|
|
Mods |= SISrcMods::NEG;
|
|
|
|
Src = Src.getOperand(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Src.getOpcode() == ISD::FABS) {
|
|
|
|
Mods |= SISrcMods::ABS;
|
|
|
|
Src = Src.getOperand(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-08 02:05:07 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods) const {
|
|
|
|
unsigned Mods;
|
|
|
|
if (SelectVOP3ModsImpl(In, Src, Mods)) {
|
|
|
|
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-01-31 11:07:46 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods) const {
|
|
|
|
SelectVOP3Mods(In, Src, SrcMods);
|
|
|
|
return isNoNanSrc(Src);
|
|
|
|
}
|
|
|
|
|
2017-04-26 05:17:38 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const {
|
|
|
|
if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Src = In;
|
|
|
|
return true;
|
2015-07-13 23:47:57 +08:00
|
|
|
}
|
|
|
|
|
2014-08-01 08:32:39 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods, SDValue &Clamp,
|
|
|
|
SDValue &Omod) const {
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(In);
|
2017-04-26 05:17:38 +08:00
|
|
|
Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
|
|
Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
2014-08-01 08:32:39 +08:00
|
|
|
|
|
|
|
return SelectVOP3Mods(In, Src, SrcMods);
|
|
|
|
}
|
|
|
|
|
2015-01-07 07:00:37 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods,
|
|
|
|
SDValue &Clamp,
|
|
|
|
SDValue &Omod) const {
|
2015-04-28 22:05:47 +08:00
|
|
|
Clamp = Omod = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
|
2015-01-07 07:00:37 +08:00
|
|
|
return SelectVOP3Mods(In, Src, SrcMods);
|
|
|
|
}
|
|
|
|
|
2017-03-27 23:57:17 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src,
|
|
|
|
SDValue &Clamp, SDValue &Omod) const {
|
|
|
|
Src = In;
|
|
|
|
|
|
|
|
SDLoc DL(In);
|
2017-04-26 05:17:38 +08:00
|
|
|
Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
|
|
|
Omod = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
2017-03-27 23:57:17 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-05-18 04:30:58 +08:00
|
|
|
static SDValue stripBitcast(SDValue Val) {
|
|
|
|
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Figure out if this is really an extract of the high 16-bits of a dword.
|
|
|
|
static bool isExtractHiElt(SDValue In, SDValue &Out) {
|
|
|
|
In = stripBitcast(In);
|
|
|
|
if (In.getOpcode() != ISD::TRUNCATE)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
SDValue Srl = In.getOperand(0);
|
|
|
|
if (Srl.getOpcode() == ISD::SRL) {
|
|
|
|
if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
|
|
|
|
if (ShiftAmt->getZExtValue() == 16) {
|
|
|
|
Out = stripBitcast(Srl.getOperand(0));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Look through operations that obscure just looking at the low 16-bits of the
|
|
|
|
// same register.
|
|
|
|
static SDValue stripExtractLoElt(SDValue In) {
|
|
|
|
if (In.getOpcode() == ISD::TRUNCATE) {
|
|
|
|
SDValue Src = In.getOperand(0);
|
|
|
|
if (Src.getValueType().getSizeInBits() == 32)
|
|
|
|
return stripBitcast(Src);
|
|
|
|
}
|
|
|
|
|
|
|
|
return In;
|
|
|
|
}
|
|
|
|
|
2017-02-28 06:15:25 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods) const {
|
|
|
|
unsigned Mods = 0;
|
|
|
|
Src = In;
|
|
|
|
|
|
|
|
if (Src.getOpcode() == ISD::FNEG) {
|
2017-05-18 04:00:00 +08:00
|
|
|
Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
|
2017-02-28 06:15:25 +08:00
|
|
|
Src = Src.getOperand(0);
|
|
|
|
}
|
|
|
|
|
2017-05-18 04:00:00 +08:00
|
|
|
if (Src.getOpcode() == ISD::BUILD_VECTOR) {
|
|
|
|
unsigned VecMods = Mods;
|
|
|
|
|
2017-05-18 04:30:58 +08:00
|
|
|
SDValue Lo = stripBitcast(Src.getOperand(0));
|
|
|
|
SDValue Hi = stripBitcast(Src.getOperand(1));
|
2017-05-18 04:00:00 +08:00
|
|
|
|
|
|
|
if (Lo.getOpcode() == ISD::FNEG) {
|
2017-05-18 04:30:58 +08:00
|
|
|
Lo = stripBitcast(Lo.getOperand(0));
|
2017-05-18 04:00:00 +08:00
|
|
|
Mods ^= SISrcMods::NEG;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (Hi.getOpcode() == ISD::FNEG) {
|
2017-05-18 04:30:58 +08:00
|
|
|
Hi = stripBitcast(Hi.getOperand(0));
|
2017-05-18 04:00:00 +08:00
|
|
|
Mods ^= SISrcMods::NEG_HI;
|
|
|
|
}
|
|
|
|
|
2017-05-18 04:30:58 +08:00
|
|
|
if (isExtractHiElt(Lo, Lo))
|
|
|
|
Mods |= SISrcMods::OP_SEL_0;
|
|
|
|
|
|
|
|
if (isExtractHiElt(Hi, Hi))
|
|
|
|
Mods |= SISrcMods::OP_SEL_1;
|
|
|
|
|
|
|
|
Lo = stripExtractLoElt(Lo);
|
|
|
|
Hi = stripExtractLoElt(Hi);
|
|
|
|
|
2017-05-18 04:00:00 +08:00
|
|
|
if (Lo == Hi && !isInlineImmediate(Lo.getNode())) {
|
|
|
|
// Really a scalar input. Just select from the low half of the register to
|
|
|
|
// avoid packing.
|
|
|
|
|
|
|
|
Src = Lo;
|
|
|
|
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Mods = VecMods;
|
|
|
|
}
|
|
|
|
|
2017-02-28 06:15:25 +08:00
|
|
|
// Packed instructions do not have abs modifiers.
|
|
|
|
Mods |= SISrcMods::OP_SEL_1;
|
|
|
|
|
|
|
|
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods,
|
|
|
|
SDValue &Clamp) const {
|
|
|
|
SDLoc SL(In);
|
|
|
|
|
|
|
|
// FIXME: Handle clamp and op_sel
|
|
|
|
Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
|
|
|
|
|
|
|
|
return SelectVOP3PMods(In, Src, SrcMods);
|
|
|
|
}
|
|
|
|
|
2017-07-21 21:54:11 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods) const {
|
|
|
|
Src = In;
|
|
|
|
// FIXME: Handle op_sel
|
|
|
|
SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel0(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods,
|
|
|
|
SDValue &Clamp) const {
|
|
|
|
SDLoc SL(In);
|
|
|
|
|
|
|
|
// FIXME: Handle clamp
|
|
|
|
Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
|
|
|
|
|
|
|
|
return SelectVOP3OpSel(In, Src, SrcMods);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods) const {
|
|
|
|
// FIXME: Handle op_sel
|
|
|
|
return SelectVOP3Mods(In, Src, SrcMods);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods0(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods,
|
|
|
|
SDValue &Clamp) const {
|
|
|
|
SDLoc SL(In);
|
|
|
|
|
|
|
|
// FIXME: Handle clamp
|
|
|
|
Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32);
|
|
|
|
|
|
|
|
return SelectVOP3OpSelMods(In, Src, SrcMods);
|
|
|
|
}
|
|
|
|
|
2017-09-08 02:05:07 +08:00
|
|
|
// The return value is not whether the match is possible (which it always is),
|
|
|
|
// but whether or not it a conversion is really used.
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src,
|
|
|
|
unsigned &Mods) const {
|
|
|
|
Mods = 0;
|
|
|
|
SelectVOP3ModsImpl(In, Src, Mods);
|
|
|
|
|
|
|
|
if (Src.getOpcode() == ISD::FP_EXTEND) {
|
|
|
|
Src = Src.getOperand(0);
|
|
|
|
assert(Src.getValueType() == MVT::f16);
|
|
|
|
Src = stripBitcast(Src);
|
|
|
|
|
2017-10-14 04:45:49 +08:00
|
|
|
// Be careful about folding modifiers if we already have an abs. fneg is
|
|
|
|
// applied last, so we don't want to apply an earlier fneg.
|
|
|
|
if ((Mods & SISrcMods::ABS) == 0) {
|
|
|
|
unsigned ModsTmp;
|
|
|
|
SelectVOP3ModsImpl(Src, Src, ModsTmp);
|
|
|
|
|
|
|
|
if ((ModsTmp & SISrcMods::NEG) != 0)
|
|
|
|
Mods ^= SISrcMods::NEG;
|
|
|
|
|
|
|
|
if ((ModsTmp & SISrcMods::ABS) != 0)
|
|
|
|
Mods |= SISrcMods::ABS;
|
|
|
|
}
|
|
|
|
|
2017-09-08 02:05:07 +08:00
|
|
|
// op_sel/op_sel_hi decide the source type and source.
|
|
|
|
// If the source's op_sel_hi is set, it indicates to do a conversion from fp16.
|
|
|
|
// If the sources's op_sel is set, it picks the high half of the source
|
|
|
|
// register.
|
|
|
|
|
|
|
|
Mods |= SISrcMods::OP_SEL_1;
|
2017-10-14 04:45:49 +08:00
|
|
|
if (isExtractHiElt(Src, Src)) {
|
2017-09-08 02:05:07 +08:00
|
|
|
Mods |= SISrcMods::OP_SEL_0;
|
|
|
|
|
2017-10-14 04:45:49 +08:00
|
|
|
// TODO: Should we try to look for neg/abs here?
|
|
|
|
}
|
|
|
|
|
2017-09-08 02:05:07 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-09-21 04:28:39 +08:00
|
|
|
bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src,
|
|
|
|
SDValue &SrcMods) const {
|
|
|
|
unsigned Mods = 0;
|
|
|
|
SelectVOP3PMadMixModsImpl(In, Src, Mods);
|
|
|
|
SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-11-13 08:22:09 +08:00
|
|
|
// TODO: Can we identify things like v_mad_mixhi_f16?
|
|
|
|
bool AMDGPUDAGToDAGISel::SelectHi16Elt(SDValue In, SDValue &Src) const {
|
|
|
|
if (In.isUndef()) {
|
|
|
|
Src = In;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(In)) {
|
|
|
|
SDLoc SL(In);
|
|
|
|
SDValue K = CurDAG->getTargetConstant(C->getZExtValue() << 16, SL, MVT::i32);
|
|
|
|
MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
|
|
|
SL, MVT::i32, K);
|
|
|
|
Src = SDValue(MovK, 0);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(In)) {
|
|
|
|
SDLoc SL(In);
|
|
|
|
SDValue K = CurDAG->getTargetConstant(
|
|
|
|
C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32);
|
|
|
|
MachineSDNode *MovK = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
|
|
|
|
SL, MVT::i32, K);
|
|
|
|
Src = SDValue(MovK, 0);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return isExtractHiElt(In, Src);
|
|
|
|
}
|
|
|
|
|
2013-02-27 01:52:16 +08:00
|
|
|
void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
|
2013-06-20 05:36:55 +08:00
|
|
|
const AMDGPUTargetLowering& Lowering =
|
2014-04-18 15:40:20 +08:00
|
|
|
*static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
|
2013-09-13 07:44:44 +08:00
|
|
|
bool IsModified = false;
|
|
|
|
do {
|
|
|
|
IsModified = false;
|
2017-12-05 06:18:27 +08:00
|
|
|
|
2013-09-13 07:44:44 +08:00
|
|
|
// Go over all selected nodes and try to fold them a bit more
|
2017-12-05 06:18:27 +08:00
|
|
|
SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin();
|
|
|
|
while (Position != CurDAG->allnodes_end()) {
|
|
|
|
SDNode *Node = &*Position++;
|
|
|
|
MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node);
|
2013-09-13 07:44:44 +08:00
|
|
|
if (!MachineNode)
|
|
|
|
continue;
|
2013-02-27 01:52:16 +08:00
|
|
|
|
2013-09-13 07:44:44 +08:00
|
|
|
SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
|
2017-12-05 06:18:27 +08:00
|
|
|
if (ResNode != Node) {
|
|
|
|
if (ResNode)
|
|
|
|
ReplaceUses(Node, ResNode);
|
2013-09-13 07:44:44 +08:00
|
|
|
IsModified = true;
|
|
|
|
}
|
2013-06-04 01:39:46 +08:00
|
|
|
}
|
2013-09-13 07:44:44 +08:00
|
|
|
CurDAG->RemoveDeadNodes();
|
|
|
|
} while (IsModified);
|
2013-02-27 01:52:16 +08:00
|
|
|
}
|
2017-08-08 12:57:55 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
|
|
|
|
Subtarget = &MF.getSubtarget<R600Subtarget>();
|
|
|
|
return SelectionDAGISel::runOnMachineFunction(MF);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const {
|
|
|
|
if (!N->readMem())
|
|
|
|
return false;
|
|
|
|
if (CbId == -1)
|
|
|
|
return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS ||
|
|
|
|
N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT;
|
|
|
|
|
|
|
|
return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr,
|
|
|
|
SDValue& IntPtr) {
|
|
|
|
if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) {
|
|
|
|
IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr),
|
|
|
|
true);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr,
|
|
|
|
SDValue& BaseReg, SDValue &Offset) {
|
|
|
|
if (!isa<ConstantSDNode>(Addr)) {
|
|
|
|
BaseReg = Addr;
|
|
|
|
Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-08-08 12:57:55 +08:00
|
|
|
void R600DAGToDAGISel::Select(SDNode *N) {
|
|
|
|
unsigned int Opc = N->getOpcode();
|
|
|
|
if (N->isMachineOpcode()) {
|
|
|
|
N->setNodeId(-1);
|
|
|
|
return; // Already selected.
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (Opc) {
|
|
|
|
default: break;
|
|
|
|
case AMDGPUISD::BUILD_VERTICAL_VECTOR:
|
|
|
|
case ISD::SCALAR_TO_VECTOR:
|
|
|
|
case ISD::BUILD_VECTOR: {
|
|
|
|
EVT VT = N->getValueType(0);
|
|
|
|
unsigned NumVectorElts = VT.getVectorNumElements();
|
|
|
|
unsigned RegClassID;
|
|
|
|
// BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG
|
|
|
|
// that adds a 128 bits reg copy when going through TwoAddressInstructions
|
|
|
|
// pass. We want to avoid 128 bits copies as much as possible because they
|
|
|
|
// can't be bundled by our scheduler.
|
|
|
|
switch(NumVectorElts) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case 2: RegClassID = R600::R600_Reg64RegClassID; break;
|
2017-08-08 12:57:55 +08:00
|
|
|
case 4:
|
|
|
|
if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR)
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
RegClassID = R600::R600_Reg128VerticalRegClassID;
|
2017-08-08 12:57:55 +08:00
|
|
|
else
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
RegClassID = R600::R600_Reg128RegClassID;
|
2017-08-08 12:57:55 +08:00
|
|
|
break;
|
|
|
|
default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR");
|
|
|
|
}
|
|
|
|
SelectBuildVector(N, RegClassID);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
SelectCode(N);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base,
|
|
|
|
SDValue &Offset) {
|
|
|
|
ConstantSDNode *C;
|
|
|
|
SDLoc DL(Addr);
|
|
|
|
|
|
|
|
if ((C = dyn_cast<ConstantSDNode>(Addr))) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
|
2017-08-08 12:57:55 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
|
|
|
} else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) &&
|
|
|
|
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32);
|
2017-08-08 12:57:55 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
|
|
|
} else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) &&
|
|
|
|
(C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) {
|
|
|
|
Base = Addr.getOperand(0);
|
|
|
|
Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32);
|
|
|
|
} else {
|
|
|
|
Base = Addr;
|
|
|
|
Offset = CurDAG->getTargetConstant(0, DL, MVT::i32);
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base,
|
|
|
|
SDValue &Offset) {
|
|
|
|
ConstantSDNode *IMMOffset;
|
|
|
|
|
|
|
|
if (Addr.getOpcode() == ISD::ADD
|
|
|
|
&& (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))
|
|
|
|
&& isInt<16>(IMMOffset->getZExtValue())) {
|
|
|
|
|
|
|
|
Base = Addr.getOperand(0);
|
|
|
|
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
|
|
|
|
MVT::i32);
|
|
|
|
return true;
|
|
|
|
// If the pointer address is constant, we can move it to the offset field.
|
|
|
|
} else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr))
|
|
|
|
&& isInt<16>(IMMOffset->getZExtValue())) {
|
|
|
|
Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
|
|
|
|
SDLoc(CurDAG->getEntryNode()),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
R600::ZERO, MVT::i32);
|
2017-08-08 12:57:55 +08:00
|
|
|
Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr),
|
|
|
|
MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Default case, no offset
|
|
|
|
Base = Addr;
|
|
|
|
Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|