2012-12-12 05:25:42 +08:00
|
|
|
//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++ -*-===//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Interface definition of the TargetLowering class that is common
|
2012-12-12 05:25:42 +08:00
|
|
|
/// to all AMD GPUs.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2016-03-11 16:00:27 +08:00
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
|
|
|
|
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-03-27 22:04:01 +08:00
|
|
|
#include "AMDGPU.h"
|
2017-04-12 06:29:24 +08:00
|
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetLowering.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
namespace llvm {
|
|
|
|
|
2013-06-28 23:47:08 +08:00
|
|
|
class AMDGPUMachineFunction;
|
2018-07-12 04:59:01 +08:00
|
|
|
class AMDGPUSubtarget;
|
2017-08-04 07:00:29 +08:00
|
|
|
struct ArgDescriptor;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
class AMDGPUTargetLowering : public TargetLowering {
|
2016-11-02 01:49:33 +08:00
|
|
|
private:
|
2018-07-12 04:59:01 +08:00
|
|
|
const AMDGPUSubtarget *Subtarget;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
|
2016-11-02 01:49:33 +08:00
|
|
|
/// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
|
|
|
|
/// legalized from a smaller type VT. Need to match pre-legalized type because
|
|
|
|
/// the generic legalization inserts the add/sub between the select and
|
|
|
|
/// compare.
|
2017-10-13 03:37:14 +08:00
|
|
|
SDValue getFFBX_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL, unsigned Opc) const;
|
2016-11-02 01:49:33 +08:00
|
|
|
|
2017-05-23 23:59:58 +08:00
|
|
|
public:
|
2017-11-07 01:04:37 +08:00
|
|
|
static unsigned numBitsUnsigned(SDValue Op, SelectionDAG &DAG);
|
|
|
|
static unsigned numBitsSigned(SDValue Op, SelectionDAG &DAG);
|
2019-09-10 01:13:44 +08:00
|
|
|
static bool hasDefinedInitializer(const GlobalValue *GV);
|
2017-05-23 23:59:58 +08:00
|
|
|
|
2014-02-25 05:01:28 +08:00
|
|
|
protected:
|
2013-08-15 07:25:00 +08:00
|
|
|
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Split a vector store into multiple scalar stores.
|
2014-04-18 15:40:20 +08:00
|
|
|
/// \returns The resulting chain.
|
2014-06-16 04:08:02 +08:00
|
|
|
|
2014-09-11 05:44:27 +08:00
|
|
|
SDValue LowerFREM(SDValue Op, SelectionDAG &DAG) const;
|
2014-06-19 01:05:30 +08:00
|
|
|
SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const;
|
2014-06-19 01:05:26 +08:00
|
|
|
SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const;
|
2014-06-19 06:03:45 +08:00
|
|
|
SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const;
|
2015-01-22 02:18:25 +08:00
|
|
|
|
2019-12-25 00:07:45 +08:00
|
|
|
SDValue LowerFROUND_LegalFTRUNC(SDValue Op, SelectionDAG &DAG) const;
|
2015-01-22 02:18:25 +08:00
|
|
|
SDValue LowerFROUND64(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
|
2014-06-19 01:05:30 +08:00
|
|
|
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
|
2018-08-17 01:07:52 +08:00
|
|
|
SDValue LowerFLOG(SDValue Op, SelectionDAG &DAG,
|
2017-11-27 21:26:38 +08:00
|
|
|
double Log2BaseInverted) const;
|
2018-08-17 01:07:52 +08:00
|
|
|
SDValue lowerFEXP(SDValue Op, SelectionDAG &DAG) const;
|
2014-06-19 01:05:30 +08:00
|
|
|
|
2017-10-13 03:37:14 +08:00
|
|
|
SDValue LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const;
|
2016-01-12 00:50:29 +08:00
|
|
|
|
2016-01-12 06:01:48 +08:00
|
|
|
SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const;
|
2014-10-04 07:54:41 +08:00
|
|
|
SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
|
2013-10-31 01:22:05 +08:00
|
|
|
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
2014-10-04 07:54:41 +08:00
|
|
|
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2014-10-04 07:54:56 +08:00
|
|
|
SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const;
|
2016-11-02 00:31:48 +08:00
|
|
|
SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
|
2014-10-04 07:54:56 +08:00
|
|
|
SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2014-06-16 04:23:38 +08:00
|
|
|
SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2016-01-19 06:01:13 +08:00
|
|
|
protected:
|
2016-07-02 06:55:55 +08:00
|
|
|
bool shouldCombineMemoryType(EVT VT) const;
|
2016-07-02 06:47:50 +08:00
|
|
|
SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2014-07-15 10:06:31 +08:00
|
|
|
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2017-07-15 13:52:59 +08:00
|
|
|
SDValue performAssertSZExtCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2019-08-27 08:18:09 +08:00
|
|
|
SDValue performIntrinsicWOChainCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2016-09-14 23:19:03 +08:00
|
|
|
|
|
|
|
SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
|
|
|
|
unsigned Opc, SDValue LHS,
|
|
|
|
uint32_t ValLo, uint32_t ValHi) const;
|
2015-07-15 02:20:33 +08:00
|
|
|
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2016-01-19 06:01:13 +08:00
|
|
|
SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2016-01-19 05:43:36 +08:00
|
|
|
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2018-05-10 02:37:39 +08:00
|
|
|
SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2014-07-01 01:55:48 +08:00
|
|
|
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2016-08-27 09:32:27 +08:00
|
|
|
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
|
|
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
|
|
|
SDValue performMulLoHi24Combine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2017-10-13 03:37:14 +08:00
|
|
|
SDValue performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue RHS, DAGCombinerInfo &DCI) const;
|
2016-01-12 01:02:00 +08:00
|
|
|
SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2018-08-16 05:03:55 +08:00
|
|
|
|
|
|
|
bool isConstantCostlierToNegate(SDValue N) const;
|
2017-01-12 08:09:34 +08:00
|
|
|
SDValue performFNegCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2017-02-02 10:27:04 +08:00
|
|
|
SDValue performFAbsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2018-06-27 23:33:33 +08:00
|
|
|
SDValue performRcpCombine(SDNode *N, DAGCombinerInfo &DCI) const;
|
2014-07-01 01:55:48 +08:00
|
|
|
|
2014-06-11 11:29:54 +08:00
|
|
|
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2014-07-21 22:01:14 +08:00
|
|
|
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
|
|
|
|
SelectionDAG &DAG) const;
|
2014-07-25 01:10:35 +08:00
|
|
|
|
2016-01-19 06:01:13 +08:00
|
|
|
/// Return 64-bit value Op as two 32-bit integers.
|
|
|
|
std::pair<SDValue, SDValue> split64BitValue(SDValue Op,
|
|
|
|
SelectionDAG &DAG) const;
|
2016-01-19 06:09:04 +08:00
|
|
|
SDValue getLoHalf64(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
SDValue getHiHalf64(SDValue Op, SelectionDAG &DAG) const;
|
2016-01-19 06:01:13 +08:00
|
|
|
|
2019-03-21 20:01:21 +08:00
|
|
|
/// Split a vector type into two parts. The first part is a power of two
|
|
|
|
/// vector. The second part is whatever is left over, and is a scalar if it
|
|
|
|
/// would otherwise be a 1-vector.
|
|
|
|
std::pair<EVT, EVT> getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const;
|
|
|
|
|
|
|
|
/// Split a vector value into two parts of types LoVT and HiVT. HiVT could be
|
|
|
|
/// scalar.
|
|
|
|
std::pair<SDValue, SDValue> splitVector(const SDValue &N, const SDLoc &DL,
|
|
|
|
const EVT &LoVT, const EVT &HighVT,
|
|
|
|
SelectionDAG &DAG) const;
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Split a vector load into 2 loads of half the vector.
|
2014-07-25 01:10:35 +08:00
|
|
|
SDValue SplitVectorLoad(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2019-03-21 20:01:21 +08:00
|
|
|
/// Widen a vector load from vec3 to vec4.
|
|
|
|
SDValue WidenVectorLoad(SDValue Op, SelectionDAG &DAG) const;
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Split a vector store into 2 stores of half the vector.
|
2013-10-23 08:44:32 +08:00
|
|
|
SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
|
2014-07-25 01:10:35 +08:00
|
|
|
|
2013-08-26 23:05:44 +08:00
|
|
|
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
|
2014-06-23 05:43:01 +08:00
|
|
|
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
2015-01-23 07:42:43 +08:00
|
|
|
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
|
2014-08-13 01:31:20 +08:00
|
|
|
SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
|
2014-11-15 09:07:53 +08:00
|
|
|
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
|
|
|
|
SmallVectorImpl<SDValue> &Results) const;
|
2018-07-20 17:05:08 +08:00
|
|
|
|
|
|
|
void analyzeFormalArgumentsCompute(
|
|
|
|
CCState &State,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins) const;
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
public:
|
2018-07-12 04:59:01 +08:00
|
|
|
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-01-19 14:35:27 +08:00
|
|
|
bool mayIgnoreSignedZero(SDValue Op) const {
|
2017-01-25 14:27:02 +08:00
|
|
|
if (getTargetMachine().Options.NoSignedZerosFPMath)
|
2017-01-19 14:35:27 +08:00
|
|
|
return true;
|
|
|
|
|
2017-05-01 23:17:51 +08:00
|
|
|
const auto Flags = Op.getNode()->getFlags();
|
|
|
|
if (Flags.isDefined())
|
|
|
|
return Flags.hasNoSignedZeros();
|
2017-01-19 14:35:27 +08:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-05-16 19:47:30 +08:00
|
|
|
static inline SDValue stripBitcast(SDValue Val) {
|
|
|
|
return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val;
|
|
|
|
}
|
|
|
|
|
2017-05-12 01:26:25 +08:00
|
|
|
static bool allUsesHaveSourceMods(const SDNode *N,
|
|
|
|
unsigned CostThreshold = 4);
|
2014-04-29 15:57:24 +08:00
|
|
|
bool isFAbsFree(EVT VT) const override;
|
|
|
|
bool isFNegFree(EVT VT) const override;
|
|
|
|
bool isTruncateFree(EVT Src, EVT Dest) const override;
|
|
|
|
bool isTruncateFree(Type *Src, Type *Dest) const override;
|
|
|
|
|
|
|
|
bool isZExtFree(Type *Src, Type *Dest) const override;
|
|
|
|
bool isZExtFree(EVT Src, EVT Dest) const override;
|
2014-06-26 21:45:47 +08:00
|
|
|
bool isZExtFree(SDValue Val, EVT VT2) const override;
|
2014-04-29 15:57:24 +08:00
|
|
|
|
|
|
|
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
MVT getVectorIdxTy(const DataLayout &) const override;
|
2014-06-24 02:00:55 +08:00
|
|
|
bool isSelectSupported(SelectSupportKind) const override;
|
2014-06-16 04:23:38 +08:00
|
|
|
|
2019-03-19 02:40:07 +08:00
|
|
|
bool isFPImmLegal(const APFloat &Imm, EVT VT,
|
|
|
|
bool ForCodeSize) const override;
|
2014-06-16 04:23:38 +08:00
|
|
|
bool ShouldShrinkFPConstant(EVT VT) const override;
|
2014-12-12 08:00:24 +08:00
|
|
|
bool shouldReduceLoadWidth(SDNode *Load,
|
|
|
|
ISD::LoadExtType ExtType,
|
|
|
|
EVT ExtVT) const override;
|
2014-06-16 04:23:38 +08:00
|
|
|
|
2019-07-10 03:55:28 +08:00
|
|
|
bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
|
|
|
|
const MachineMemOperand &MMO) const final;
|
2015-05-24 08:51:27 +08:00
|
|
|
|
|
|
|
bool storeOfVectorConstantIsCheap(EVT MemVT,
|
|
|
|
unsigned NumElem,
|
|
|
|
unsigned AS) const override;
|
2015-10-13 07:59:50 +08:00
|
|
|
bool aggressivelyPreferBuildVectorSources(EVT VecVT) const override;
|
2015-01-14 03:46:48 +08:00
|
|
|
bool isCheapToSpeculateCttz() const override;
|
|
|
|
bool isCheapToSpeculateCtlz() const override;
|
|
|
|
|
2018-03-06 00:25:10 +08:00
|
|
|
bool isSDNodeAlwaysUniform(const SDNode *N) const override;
|
2017-04-12 06:29:24 +08:00
|
|
|
static CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg);
|
2017-05-18 05:56:25 +08:00
|
|
|
static CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool IsVarArg);
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
2014-04-29 15:57:24 +08:00
|
|
|
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
|
|
|
|
SelectionDAG &DAG) const override;
|
2017-08-04 07:32:41 +08:00
|
|
|
|
2017-08-12 04:42:08 +08:00
|
|
|
SDValue addTokenForArgument(SDValue Chain,
|
|
|
|
SelectionDAG &DAG,
|
|
|
|
MachineFrameInfo &MFI,
|
|
|
|
int ClobberedFI) const;
|
|
|
|
|
2017-08-04 07:32:41 +08:00
|
|
|
SDValue lowerUnhandledCall(CallLoweringInfo &CLI,
|
|
|
|
SmallVectorImpl<SDValue> &InVals,
|
|
|
|
StringRef Reason) const;
|
2014-04-29 15:57:24 +08:00
|
|
|
SDValue LowerCall(CallLoweringInfo &CLI,
|
|
|
|
SmallVectorImpl<SDValue> &InVals) const override;
|
|
|
|
|
2015-08-27 02:37:13 +08:00
|
|
|
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op,
|
|
|
|
SelectionDAG &DAG) const;
|
|
|
|
|
2014-04-29 15:57:24 +08:00
|
|
|
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
|
2014-06-16 04:23:38 +08:00
|
|
|
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
|
2014-04-29 15:57:24 +08:00
|
|
|
void ReplaceNodeResults(SDNode * N,
|
|
|
|
SmallVectorImpl<SDValue> &Results,
|
|
|
|
SelectionDAG &DAG) const override;
|
2014-03-28 01:23:24 +08:00
|
|
|
|
2017-02-01 08:42:40 +08:00
|
|
|
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS,
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue RHS, SDValue True, SDValue False,
|
|
|
|
SDValue CC, DAGCombinerInfo &DCI) const;
|
2014-11-15 02:30:06 +08:00
|
|
|
|
2014-04-29 15:57:24 +08:00
|
|
|
const char* getTargetNodeName(unsigned Opcode) const override;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2019-01-30 00:37:27 +08:00
|
|
|
// FIXME: Turn off MergeConsecutiveStores() before Instruction Selection for
|
|
|
|
// AMDGPU. Commit r319036,
|
|
|
|
// (https://github.com/llvm/llvm-project/commit/db77e57ea86d941a4262ef60261692f4cb6893e6)
|
|
|
|
// turned on MergeConsecutiveStores() before Instruction Selection for all
|
|
|
|
// targets. Enough AMDGPU compiles go into an infinite loop (
|
|
|
|
// MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges;
|
|
|
|
// MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for
|
|
|
|
// now.
|
2019-06-04 23:15:59 +08:00
|
|
|
bool mergeStoresAfterLegalization(EVT) const override { return false; }
|
2017-12-20 03:26:23 +08:00
|
|
|
|
2016-08-04 20:47:28 +08:00
|
|
|
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
|
|
|
|
return true;
|
|
|
|
}
|
2016-11-11 07:31:06 +08:00
|
|
|
SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
|
|
|
|
int &RefinementSteps, bool &UseOneConstNR,
|
|
|
|
bool Reciprocal) const override;
|
2016-10-21 00:55:45 +08:00
|
|
|
SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
|
|
|
|
int &RefinementSteps) const override;
|
2015-01-14 04:53:18 +08:00
|
|
|
|
2014-04-29 15:57:24 +08:00
|
|
|
virtual SDNode *PostISelFolding(MachineSDNode *N,
|
2016-03-11 16:00:27 +08:00
|
|
|
SelectionDAG &DAG) const = 0;
|
2013-02-27 01:52:16 +08:00
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Determine which of the bits specified in \p Mask are known to be
|
2012-12-12 05:25:42 +08:00
|
|
|
/// either zero or one and return them in the \p KnownZero and \p KnownOne
|
|
|
|
/// bitsets.
|
2014-05-15 05:14:37 +08:00
|
|
|
void computeKnownBitsForTargetNode(const SDValue Op,
|
2017-04-28 13:31:46 +08:00
|
|
|
KnownBits &Known,
|
2017-03-31 19:24:16 +08:00
|
|
|
const APInt &DemandedElts,
|
2014-05-15 05:14:37 +08:00
|
|
|
const SelectionDAG &DAG,
|
|
|
|
unsigned Depth = 0) const override;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-03-31 21:54:09 +08:00
|
|
|
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts,
|
|
|
|
const SelectionDAG &DAG,
|
2014-09-03 19:41:21 +08:00
|
|
|
unsigned Depth = 0) const override;
|
2014-07-21 23:45:01 +08:00
|
|
|
|
2018-08-04 02:27:52 +08:00
|
|
|
bool isKnownNeverNaNForTargetNode(SDValue Op,
|
|
|
|
const SelectionDAG &DAG,
|
|
|
|
bool SNaN = false,
|
|
|
|
unsigned Depth = 0) const override;
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Helper function that adds Reg to the LiveIn list of the DAG's
|
2014-07-21 23:45:01 +08:00
|
|
|
/// MachineFunction.
|
|
|
|
///
|
2017-06-20 05:52:45 +08:00
|
|
|
/// \returns a RegisterSDNode representing Reg if \p RawReg is true, otherwise
|
|
|
|
/// a copy from the register.
|
|
|
|
SDValue CreateLiveInRegister(SelectionDAG &DAG,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
unsigned Reg, EVT VT,
|
|
|
|
const SDLoc &SL,
|
|
|
|
bool RawReg = false) const;
|
|
|
|
SDValue CreateLiveInRegister(SelectionDAG &DAG,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
unsigned Reg, EVT VT) const {
|
|
|
|
return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()));
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns the raw live in register rather than a copy from it.
|
|
|
|
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
unsigned Reg, EVT VT) const {
|
|
|
|
return CreateLiveInRegister(DAG, RC, Reg, VT, SDLoc(DAG.getEntryNode()), true);
|
|
|
|
}
|
2015-07-10 05:20:37 +08:00
|
|
|
|
2017-08-04 07:00:29 +08:00
|
|
|
/// Similar to CreateLiveInRegister, except value maybe loaded from a stack
|
|
|
|
/// slot rather than passed in a register.
|
|
|
|
SDValue loadStackInputValue(SelectionDAG &DAG,
|
|
|
|
EVT VT,
|
|
|
|
const SDLoc &SL,
|
|
|
|
int64_t Offset) const;
|
|
|
|
|
|
|
|
SDValue storeStackInputValue(SelectionDAG &DAG,
|
|
|
|
const SDLoc &SL,
|
|
|
|
SDValue Chain,
|
|
|
|
SDValue ArgVal,
|
|
|
|
int64_t Offset) const;
|
|
|
|
|
|
|
|
SDValue loadInputValue(SelectionDAG &DAG,
|
|
|
|
const TargetRegisterClass *RC,
|
|
|
|
EVT VT, const SDLoc &SL,
|
|
|
|
const ArgDescriptor &Arg) const;
|
|
|
|
|
2015-07-10 05:20:37 +08:00
|
|
|
enum ImplicitParameter {
|
2016-06-22 04:46:20 +08:00
|
|
|
FIRST_IMPLICIT,
|
|
|
|
GRID_DIM = FIRST_IMPLICIT,
|
|
|
|
GRID_OFFSET,
|
2015-07-10 05:20:37 +08:00
|
|
|
};
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Helper function that returns the byte offset of the given
|
2015-07-10 05:20:37 +08:00
|
|
|
/// type of implicit parameter.
|
2018-06-28 18:18:55 +08:00
|
|
|
uint32_t getImplicitParameterOffset(const MachineFunction &MF,
|
2015-07-10 05:20:37 +08:00
|
|
|
const ImplicitParameter Param) const;
|
2017-03-27 22:04:01 +08:00
|
|
|
|
2017-04-25 02:26:27 +08:00
|
|
|
MVT getFenceOperandTy(const DataLayout &DL) const override {
|
|
|
|
return MVT::i32;
|
|
|
|
}
|
2018-10-02 11:50:56 +08:00
|
|
|
|
|
|
|
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override;
|
2012-12-12 05:25:42 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
namespace AMDGPUISD {
|
|
|
|
|
2015-05-08 05:33:59 +08:00
|
|
|
enum NodeType : unsigned {
|
2012-12-12 05:25:42 +08:00
|
|
|
// AMDIL ISD Opcodes
|
|
|
|
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
|
|
|
UMUL, // 32bit unsigned multiplication
|
|
|
|
BRANCH_COND,
|
|
|
|
// End AMDIL ISD Opcodes
|
2017-03-18 04:41:45 +08:00
|
|
|
|
2017-03-22 06:18:10 +08:00
|
|
|
// Function call.
|
|
|
|
CALL,
|
2017-08-12 04:42:08 +08:00
|
|
|
TC_RETURN,
|
2017-04-25 01:49:13 +08:00
|
|
|
TRAP,
|
2017-03-22 06:18:10 +08:00
|
|
|
|
2017-03-18 04:41:45 +08:00
|
|
|
// Masked control flow nodes.
|
|
|
|
IF,
|
|
|
|
ELSE,
|
|
|
|
LOOP,
|
|
|
|
|
2017-03-22 06:18:10 +08:00
|
|
|
// A uniform kernel return that terminates the wavefront.
|
2016-06-23 04:15:28 +08:00
|
|
|
ENDPGM,
|
2017-03-22 06:18:10 +08:00
|
|
|
|
|
|
|
// Return to a shader part's epilog code.
|
|
|
|
RETURN_TO_EPILOG,
|
|
|
|
|
|
|
|
// Return with values from a non-entry function.
|
|
|
|
RET_FLAG,
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
DWORDADDR,
|
|
|
|
FRACT,
|
2017-02-22 07:35:48 +08:00
|
|
|
|
|
|
|
/// CLAMP value between 0.0 and 1.0. NaN clamped to 0, following clamp output
|
|
|
|
/// modifier behavior with dx10_enable.
|
2014-06-13 05:15:44 +08:00
|
|
|
CLAMP,
|
2017-02-22 07:35:48 +08:00
|
|
|
|
2016-08-27 09:32:27 +08:00
|
|
|
// This is SETCC with the full mask result which is used for a compare with a
|
2016-07-29 00:42:13 +08:00
|
|
|
// result bit per item in the wavefront.
|
2016-08-27 09:32:27 +08:00
|
|
|
SETCC,
|
2016-12-07 10:42:15 +08:00
|
|
|
SETREG,
|
Re-commit: [AMDGPU] Use S_DENORM_MODE for gfx10
Summary: During fdiv32 lowering use S_DENORM_MODE to select denorm mode in gfx10.
Reviewers: arsenm, rampitec
Reviewed By: arsenm, rampitec
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65620
llvm-svn: 367969
2019-08-06 10:16:11 +08:00
|
|
|
|
|
|
|
DENORM_MODE,
|
|
|
|
|
2016-12-07 10:42:15 +08:00
|
|
|
// FP ops with input and output chain.
|
|
|
|
FMA_W_CHAIN,
|
|
|
|
FMUL_W_CHAIN,
|
2014-06-19 09:19:19 +08:00
|
|
|
|
|
|
|
// SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
|
|
|
|
// Denormals handled on some parts.
|
2013-07-09 23:03:11 +08:00
|
|
|
COS_HW,
|
|
|
|
SIN_HW,
|
2014-11-14 07:03:09 +08:00
|
|
|
FMAX_LEGACY,
|
|
|
|
FMIN_LEGACY,
|
2018-10-23 00:27:27 +08:00
|
|
|
|
2014-11-15 04:08:52 +08:00
|
|
|
FMAX3,
|
|
|
|
SMAX3,
|
|
|
|
UMAX3,
|
|
|
|
FMIN3,
|
|
|
|
SMIN3,
|
|
|
|
UMIN3,
|
2016-01-29 04:53:42 +08:00
|
|
|
FMED3,
|
|
|
|
SMED3,
|
|
|
|
UMED3,
|
2018-07-17 02:19:59 +08:00
|
|
|
FDOT2,
|
2012-12-12 05:25:42 +08:00
|
|
|
URECIP,
|
2014-06-19 09:19:19 +08:00
|
|
|
DIV_SCALE,
|
|
|
|
DIV_FMAS,
|
|
|
|
DIV_FIXUP,
|
2017-02-25 07:00:29 +08:00
|
|
|
// For emitting ISD::FMAD when f32 denormals are enabled because mac/mad is
|
|
|
|
// treated as an illegal operation.
|
|
|
|
FMAD_FTZ,
|
2014-06-19 09:19:19 +08:00
|
|
|
TRIG_PREOP, // 1 ULP max error for f64
|
|
|
|
|
|
|
|
// RCP, RSQ - For f32, 1 ULP max error, no denormal handling.
|
|
|
|
// For f64, max error 2^29 ULP, handles denormals.
|
|
|
|
RCP,
|
|
|
|
RSQ,
|
2016-07-27 00:45:45 +08:00
|
|
|
RCP_LEGACY,
|
2014-06-25 06:13:39 +08:00
|
|
|
RSQ_LEGACY,
|
2018-06-27 23:33:33 +08:00
|
|
|
RCP_IFLAG,
|
2016-07-27 00:45:45 +08:00
|
|
|
FMUL_LEGACY,
|
2016-02-13 09:03:00 +08:00
|
|
|
RSQ_CLAMP,
|
2014-08-16 01:30:25 +08:00
|
|
|
LDEXP,
|
2015-01-07 07:00:37 +08:00
|
|
|
FP_CLASS,
|
2013-05-18 00:50:32 +08:00
|
|
|
DOT4,
|
2015-05-01 01:15:56 +08:00
|
|
|
CARRY,
|
|
|
|
BORROW,
|
2014-03-18 02:58:11 +08:00
|
|
|
BFE_U32, // Extract range of bits with zero extension to 32-bits.
|
|
|
|
BFE_I32, // Extract range of bits with sign extension to 32-bits.
|
2014-04-01 02:21:13 +08:00
|
|
|
BFI, // (src0 & src1) | (~src0 & src2)
|
|
|
|
BFM, // Insert a range of bits into a 32-bit word.
|
2016-01-12 01:02:00 +08:00
|
|
|
FFBH_U32, // ctlz with -1 if input is zero.
|
2016-07-19 02:35:05 +08:00
|
|
|
FFBH_I32,
|
2017-10-13 03:37:14 +08:00
|
|
|
FFBL_B32, // cttz with -1 if input is zero.
|
2014-04-08 03:45:41 +08:00
|
|
|
MUL_U24,
|
|
|
|
MUL_I24,
|
2016-08-27 09:32:27 +08:00
|
|
|
MULHI_U24,
|
|
|
|
MULHI_I24,
|
2014-05-23 02:00:15 +08:00
|
|
|
MAD_U24,
|
|
|
|
MAD_I24,
|
2017-11-07 01:04:37 +08:00
|
|
|
MAD_U64_U32,
|
|
|
|
MAD_I64_I32,
|
2016-08-27 09:32:27 +08:00
|
|
|
MUL_LOHI_I24,
|
|
|
|
MUL_LOHI_U24,
|
2018-06-13 07:50:37 +08:00
|
|
|
PERM,
|
2013-05-18 00:50:20 +08:00
|
|
|
TEXTURE_FETCH,
|
2016-12-06 04:23:10 +08:00
|
|
|
R600_EXPORT,
|
2013-01-23 10:09:03 +08:00
|
|
|
CONST_ADDRESS,
|
2013-02-07 01:32:29 +08:00
|
|
|
REGISTER_LOAD,
|
|
|
|
REGISTER_STORE,
|
2013-08-15 07:24:45 +08:00
|
|
|
SAMPLE,
|
|
|
|
SAMPLEB,
|
|
|
|
SAMPLED,
|
|
|
|
SAMPLEL,
|
2014-06-12 01:50:44 +08:00
|
|
|
|
|
|
|
// These cvt_f32_ubyte* nodes need to remain consecutive and in order.
|
|
|
|
CVT_F32_UBYTE0,
|
|
|
|
CVT_F32_UBYTE1,
|
|
|
|
CVT_F32_UBYTE2,
|
|
|
|
CVT_F32_UBYTE3,
|
2017-02-22 08:27:34 +08:00
|
|
|
|
|
|
|
// Convert two float 32 numbers into a single register holding two packed f16
|
|
|
|
// with round to zero.
|
|
|
|
CVT_PKRTZ_F16_F32,
|
2018-02-01 04:18:04 +08:00
|
|
|
CVT_PKNORM_I16_F32,
|
|
|
|
CVT_PKNORM_U16_F32,
|
|
|
|
CVT_PK_I16_I32,
|
|
|
|
CVT_PK_U16_U32,
|
2017-02-22 08:27:34 +08:00
|
|
|
|
2017-03-16 03:04:26 +08:00
|
|
|
// Same as the standard node, except the high bits of the resulting integer
|
|
|
|
// are known 0.
|
|
|
|
FP_TO_FP16,
|
|
|
|
|
2017-04-01 03:53:03 +08:00
|
|
|
// Wrapper around fp16 results that are known to zero the high bits.
|
|
|
|
FP16_ZEXT,
|
|
|
|
|
2014-06-18 00:53:14 +08:00
|
|
|
/// This node is for VLIW targets and it is used to represent a vector
|
|
|
|
/// that is stored in consecutive registers with the same channel.
|
|
|
|
/// For example:
|
|
|
|
/// |X |Y|Z|W|
|
|
|
|
/// T0|v.x| | | |
|
|
|
|
/// T1|v.y| | | |
|
|
|
|
/// T2|v.z| | | |
|
|
|
|
/// T3|v.w| | | |
|
|
|
|
BUILD_VERTICAL_VECTOR,
|
2014-07-21 22:01:14 +08:00
|
|
|
/// Pointer to the start of the shader's constant data.
|
|
|
|
CONST_DATA_PTR,
|
[AMDGPU] Add intrinsics for 16 bit interpolation
Summary:
Added the intrinsics llvm.amdgcn.interp.p1.f16() and
llvm.amdgcn.interp.p2.f16() and related LIT test.
The p1 intrinsic generates code appropriate for both 16 and 32
bank LDS.
Reviewers: #amdgpu, dstuttard, arsenm, tpr
Reviewed By: #amdgpu, arsenm
Subscribers: jvesely, mgorny, arsenm, kzhuravl, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D46754
llvm-svn: 352357
2019-01-28 21:48:59 +08:00
|
|
|
INTERP_P1LL_F16,
|
|
|
|
INTERP_P1LV_F16,
|
|
|
|
INTERP_P2_F16,
|
2016-06-15 04:29:59 +08:00
|
|
|
PC_ADD_REL_OFFSET,
|
AMDGPU: Write LDS objects out as global symbols in code generation
Summary:
The symbols use the processor-specific SHN_AMDGPU_LDS section index
introduced with a previous change. The linker is then expected to resolve
relocations, which are also emitted.
Initially disabled for HSA and PAL environments until they have caught up
in terms of linker and runtime loader.
Some notes:
- The llvm.amdgcn.groupstaticsize intrinsics can no longer be lowered
to a constant at compile times, which means some tests can no longer
be applied.
The current "solution" is a terrible hack, but the intrinsic isn't
used by Mesa, so we can keep it for now.
- We no longer know the full LDS size per kernel at compile time, which
means that we can no longer generate a relevant error message at
compile time. It would be possible to add a check for the size of
individual variables, but ultimately the linker will have to perform
the final check.
Change-Id: If66dbf33fccfbf3609aefefa2558ac0850d42275
Reviewers: arsenm, rampitec, t-tye, b-sumner, jsjodin
Subscribers: qcolombet, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61494
llvm-svn: 364297
2019-06-25 19:52:30 +08:00
|
|
|
LDS,
|
2016-07-20 00:27:56 +08:00
|
|
|
KILL,
|
2017-01-21 05:24:26 +08:00
|
|
|
DUMMY_CHAIN,
|
2013-08-15 07:24:45 +08:00
|
|
|
FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
|
2019-03-09 04:58:11 +08:00
|
|
|
LOAD_D16_HI,
|
|
|
|
LOAD_D16_LO,
|
|
|
|
LOAD_D16_HI_I8,
|
|
|
|
LOAD_D16_HI_U8,
|
|
|
|
LOAD_D16_LO_I8,
|
|
|
|
LOAD_D16_LO_U8,
|
|
|
|
|
2013-08-16 09:12:06 +08:00
|
|
|
STORE_MSKOR,
|
2013-08-15 07:24:45 +08:00
|
|
|
LOAD_CONSTANT,
|
2013-09-12 10:55:14 +08:00
|
|
|
TBUFFER_STORE_FORMAT,
|
2018-01-13 05:12:19 +08:00
|
|
|
TBUFFER_STORE_FORMAT_D16,
|
2017-06-23 00:29:22 +08:00
|
|
|
TBUFFER_LOAD_FORMAT,
|
2018-01-13 05:12:19 +08:00
|
|
|
TBUFFER_LOAD_FORMAT_D16,
|
2019-01-16 23:43:53 +08:00
|
|
|
DS_ORDERED_COUNT,
|
AMDGPU: Implement {BUFFER,FLAT}_ATOMIC_CMPSWAP{,_X2}
Summary:
Implement BUFFER_ATOMIC_CMPSWAP{,_X2} instructions on all GCN targets, and FLAT_ATOMIC_CMPSWAP{,_X2} on CI+.
32-bit instruction variants tested manually on Kabini and Bonaire. Tests and parts of code provided by Jan Veselý.
Patch by: Vedran Miletić
Reviewers: arsenm, tstellarAMD, nhaehnle
Subscribers: jvesely, scchan, kanarayan, arsenm
Differential Revision: http://reviews.llvm.org/D17280
llvm-svn: 265170
2016-04-02 02:27:37 +08:00
|
|
|
ATOMIC_CMP_SWAP,
|
2016-04-12 22:05:04 +08:00
|
|
|
ATOMIC_INC,
|
|
|
|
ATOMIC_DEC,
|
2018-01-17 22:05:05 +08:00
|
|
|
ATOMIC_LOAD_FMIN,
|
|
|
|
ATOMIC_LOAD_FMAX,
|
2016-12-21 01:19:44 +08:00
|
|
|
BUFFER_LOAD,
|
[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics
Summary:
Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer
Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59265
llvm-svn: 356465
2019-03-20 00:07:00 +08:00
|
|
|
BUFFER_LOAD_UBYTE,
|
|
|
|
BUFFER_LOAD_USHORT,
|
|
|
|
BUFFER_LOAD_BYTE,
|
|
|
|
BUFFER_LOAD_SHORT,
|
2016-12-21 01:19:44 +08:00
|
|
|
BUFFER_LOAD_FORMAT,
|
2018-01-13 05:12:19 +08:00
|
|
|
BUFFER_LOAD_FORMAT_D16,
|
[AMDGPU] Add support for multi-dword s.buffer.load intrinsic
Summary:
Patch by Marek Olsak and David Stuttard, both of AMD.
This adds a new amdgcn intrinsic supporting s.buffer.load, in particular
multiple dword variants. These are convenient to use from some front-end
implementations.
Also modified the existing llvm.SI.load.const intrinsic to common up the
underlying implementation.
This modification also requires that we can lower to non-uniform loads correctly
by splitting larger dword variants into sizes supported by the non-uniform
versions of the load.
V2: Addressed minor review comments.
V3: i1 glc is now i32 cachepolicy for consistency with buffer and
tbuffer intrinsics, plus fixed formatting issue.
V4: Added glc test.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D51098
Change-Id: I83a6e00681158bb243591a94a51c7baa445f169b
llvm-svn: 340684
2018-08-25 22:53:17 +08:00
|
|
|
SBUFFER_LOAD,
|
2017-11-09 09:52:48 +08:00
|
|
|
BUFFER_STORE,
|
[AMDGPU] Add buffer/load 8/16 bit overloaded intrinsics
Summary:
Add buffer store/load 8/16 overloaded intrinsics for buffer, raw_buffer and struct_buffer
Change-Id: I166a29f071b2ff4e4683fb0392564b1f223ac61d
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D59265
llvm-svn: 356465
2019-03-20 00:07:00 +08:00
|
|
|
BUFFER_STORE_BYTE,
|
|
|
|
BUFFER_STORE_SHORT,
|
2017-11-09 09:52:48 +08:00
|
|
|
BUFFER_STORE_FORMAT,
|
2018-01-13 05:12:19 +08:00
|
|
|
BUFFER_STORE_FORMAT_D16,
|
2017-11-09 09:52:48 +08:00
|
|
|
BUFFER_ATOMIC_SWAP,
|
|
|
|
BUFFER_ATOMIC_ADD,
|
|
|
|
BUFFER_ATOMIC_SUB,
|
|
|
|
BUFFER_ATOMIC_SMIN,
|
|
|
|
BUFFER_ATOMIC_UMIN,
|
|
|
|
BUFFER_ATOMIC_SMAX,
|
|
|
|
BUFFER_ATOMIC_UMAX,
|
|
|
|
BUFFER_ATOMIC_AND,
|
|
|
|
BUFFER_ATOMIC_OR,
|
|
|
|
BUFFER_ATOMIC_XOR,
|
AMDGPU: add missing llvm.amdgcn.{raw,struct}.buffer.atomic.{inc,dec}
Summary:
Wrapping increment/decrement. These aren't exposed by many APIs...
Change-Id: I1df25c7889de5a5ba76468ad8e8a2597efa9af6c
Reviewers: arsenm, tpr, dstuttard
Subscribers: kzhuravl, jvesely, wdng, yaxunl, t-tye, jfb, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D65283
llvm-svn: 367821
2019-08-05 17:36:06 +08:00
|
|
|
BUFFER_ATOMIC_INC,
|
|
|
|
BUFFER_ATOMIC_DEC,
|
2017-11-09 09:52:48 +08:00
|
|
|
BUFFER_ATOMIC_CMPSWAP,
|
2019-07-11 08:10:17 +08:00
|
|
|
BUFFER_ATOMIC_FADD,
|
|
|
|
BUFFER_ATOMIC_PK_FADD,
|
|
|
|
ATOMIC_PK_FADD,
|
2018-01-19 06:08:53 +08:00
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
LAST_AMDGPU_ISD_NUMBER
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
} // End namespace AMDGPUISD
|
|
|
|
|
|
|
|
} // End namespace llvm
|
|
|
|
|
2014-08-14 00:26:38 +08:00
|
|
|
#endif
|