2012-12-12 05:25:42 +08:00
|
|
|
//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
|
|
|
/// \file
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Custom DAG lowering for R600
|
2012-12-12 05:25:42 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
#include "R600ISelLowering.h"
|
2014-06-13 09:32:00 +08:00
|
|
|
#include "AMDGPUFrameLowering.h"
|
|
|
|
#include "AMDGPUSubtarget.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "R600Defines.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "R600FrameLowering.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "R600InstrInfo.h"
|
|
|
|
#include "R600MachineFunctionInfo.h"
|
AMDGPU: Remove #include "MCTargetDesc/AMDGPUMCTargetDesc.h" from common headers
Summary:
MCTargetDesc/AMDGPUMCTargetDesc.h contains enums for all the instuction
and register defintions, which are huge so we only want to include
them where needed.
This will also make it easier if we want to split the R600 and GCN
definitions into separate tablegenerated files.
I was unable to remove AMDGPUMCTargetDesc.h from SIMachineFunctionInfo.h
because it uses some enums from the header to initialize default values
for the SIMachineFunction class, so I ended up having to remove includes of
SIMachineFunctionInfo.h from headers too.
Reviewers: arsenm, nhaehnle
Reviewed By: nhaehnle
Subscribers: MatzeB, kzhuravl, wdng, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46272
llvm-svn: 332930
2018-05-22 10:03:23 +08:00
|
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "Utils/AMDGPUBaseInfo.h"
|
|
|
|
#include "llvm/ADT/APFloat.h"
|
|
|
|
#include "llvm/ADT/APInt.h"
|
|
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
|
|
#include "llvm/ADT/DenseMap.h"
|
|
|
|
#include "llvm/ADT/SmallVector.h"
|
2013-07-23 09:48:05 +08:00
|
|
|
#include "llvm/CodeGen/CallingConvLower.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "llvm/CodeGen/DAGCombine.h"
|
|
|
|
#include "llvm/CodeGen/ISDOpcodes.h"
|
|
|
|
#include "llvm/CodeGen/MachineBasicBlock.h"
|
|
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "llvm/CodeGen/MachineMemOperand.h"
|
2012-12-12 05:25:42 +08:00
|
|
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
|
|
|
#include "llvm/CodeGen/SelectionDAG.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include "llvm/IR/Constants.h"
|
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
|
|
|
#include "llvm/Support/Casting.h"
|
|
|
|
#include "llvm/Support/Compiler.h"
|
|
|
|
#include "llvm/Support/ErrorHandling.h"
|
2018-03-24 07:58:25 +08:00
|
|
|
#include "llvm/Support/MachineValueType.h"
|
2016-12-10 06:06:55 +08:00
|
|
|
#include <cassert>
|
|
|
|
#include <cstdint>
|
|
|
|
#include <iterator>
|
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
using namespace llvm;
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
|
|
|
|
CCValAssign::LocInfo LocInfo,
|
|
|
|
ISD::ArgFlagsTy ArgFlags, CCState &State) {
|
|
|
|
MachineFunction &MF = State.getMachineFunction();
|
|
|
|
AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
|
|
|
|
|
|
|
|
uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
|
|
|
|
ArgFlags.getOrigAlign());
|
|
|
|
State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
#include "R600GenCallingConv.inc"
|
|
|
|
|
2016-06-24 14:30:11 +08:00
|
|
|
R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
|
|
|
|
const R600Subtarget &STI)
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
: AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
|
|
|
|
addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
|
|
|
|
addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
|
|
|
|
addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
|
|
|
|
addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
|
|
|
|
addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
|
|
|
|
addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
|
2013-08-01 23:23:42 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
computeRegisterProperties(Subtarget->getRegisterInfo());
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2016-05-21 10:27:49 +08:00
|
|
|
// Legalize loads and stores to the private address space.
|
|
|
|
setOperationAction(ISD::LOAD, MVT::i32, Custom);
|
|
|
|
setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
|
|
|
|
setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
|
|
|
|
|
|
|
|
// EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
|
|
|
|
// spaces, so it is custom lowered to handle those where it isn't.
|
|
|
|
for (MVT VT : MVT::integer_valuetypes()) {
|
|
|
|
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
|
|
|
|
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i8, Custom);
|
|
|
|
setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i16, Custom);
|
|
|
|
|
|
|
|
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
|
|
|
|
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i8, Custom);
|
|
|
|
setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i16, Custom);
|
|
|
|
|
|
|
|
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
|
|
|
|
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i8, Custom);
|
|
|
|
setLoadExtAction(ISD::EXTLOAD, VT, MVT::i16, Custom);
|
|
|
|
}
|
|
|
|
|
2016-06-03 03:54:26 +08:00
|
|
|
// Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
|
|
|
|
setLoadExtAction(ISD::EXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
|
|
|
|
setLoadExtAction(ISD::SEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
|
|
|
|
setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i32, MVT::v2i1, Expand);
|
|
|
|
|
|
|
|
setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
|
|
|
|
setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
|
|
|
|
setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
|
|
|
|
|
2016-05-21 10:27:49 +08:00
|
|
|
setOperationAction(ISD::STORE, MVT::i8, Custom);
|
|
|
|
setOperationAction(ISD::STORE, MVT::i32, Custom);
|
|
|
|
setOperationAction(ISD::STORE, MVT::v2i32, Custom);
|
|
|
|
setOperationAction(ISD::STORE, MVT::v4i32, Custom);
|
|
|
|
|
|
|
|
setTruncStoreAction(MVT::i32, MVT::i8, Custom);
|
|
|
|
setTruncStoreAction(MVT::i32, MVT::i16, Custom);
|
2017-01-07 05:00:46 +08:00
|
|
|
// We need to include these since trunc STORES to PRIVATE need
|
|
|
|
// special handling to accommodate RMW
|
|
|
|
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
|
|
|
|
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
|
|
|
|
setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
|
|
|
|
setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
|
|
|
|
setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
|
|
|
|
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
|
|
|
|
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
|
|
|
|
setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
|
|
|
|
setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
|
|
|
|
setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
|
2016-05-21 10:27:49 +08:00
|
|
|
|
2016-06-03 03:54:26 +08:00
|
|
|
// Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
|
|
|
|
setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
|
|
|
|
setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
|
|
|
|
|
2013-09-28 10:50:50 +08:00
|
|
|
// Set condition code actions
|
|
|
|
setCondCodeAction(ISD::SETO, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETUO, MVT::f32, Expand);
|
2013-09-28 10:50:38 +08:00
|
|
|
setCondCodeAction(ISD::SETLT, MVT::f32, Expand);
|
2013-09-28 10:50:50 +08:00
|
|
|
setCondCodeAction(ISD::SETLE, MVT::f32, Expand);
|
2013-09-28 10:50:38 +08:00
|
|
|
setCondCodeAction(ISD::SETOLT, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETOLE, MVT::f32, Expand);
|
2013-09-28 10:50:50 +08:00
|
|
|
setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
|
2013-09-28 10:50:38 +08:00
|
|
|
setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
|
|
|
|
|
|
|
|
setCondCodeAction(ISD::SETLE, MVT::i32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETLT, MVT::i32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETULE, MVT::i32, Expand);
|
|
|
|
setCondCodeAction(ISD::SETULT, MVT::i32, Expand);
|
|
|
|
|
2013-07-09 23:03:11 +08:00
|
|
|
setOperationAction(ISD::FCOS, MVT::f32, Custom);
|
|
|
|
setOperationAction(ISD::FSIN, MVT::f32, Custom);
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
setOperationAction(ISD::SETCC, MVT::v4i32, Expand);
|
2013-08-01 23:23:42 +08:00
|
|
|
setOperationAction(ISD::SETCC, MVT::v2i32, Expand);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2013-03-08 23:37:07 +08:00
|
|
|
setOperationAction(ISD::BR_CC, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::BR_CC, MVT::f32, Expand);
|
2014-06-24 02:00:55 +08:00
|
|
|
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
setOperationAction(ISD::FSUB, MVT::f32, Expand);
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
setOperationAction(ISD::FCEIL, MVT::f64, Custom);
|
|
|
|
setOperationAction(ISD::FTRUNC, MVT::f64, Custom);
|
|
|
|
setOperationAction(ISD::FRINT, MVT::f64, Custom);
|
|
|
|
setOperationAction(ISD::FFLOOR, MVT::f64, Custom);
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
|
|
|
|
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
|
|
|
|
|
2013-03-08 23:37:05 +08:00
|
|
|
setOperationAction(ISD::SETCC, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::SETCC, MVT::f32, Expand);
|
2012-12-12 05:25:42 +08:00
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i1, Custom);
|
2016-07-23 01:01:21 +08:00
|
|
|
setOperationAction(ISD::FP_TO_SINT, MVT::i1, Custom);
|
2014-07-11 06:40:21 +08:00
|
|
|
setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
|
|
|
|
setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2013-09-06 02:38:03 +08:00
|
|
|
setOperationAction(ISD::SELECT, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::v2i32, Expand);
|
|
|
|
setOperationAction(ISD::SELECT, MVT::v4i32, Expand);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2015-05-01 01:15:56 +08:00
|
|
|
// ADD, SUB overflow.
|
|
|
|
// TODO: turn these into Legal?
|
|
|
|
if (Subtarget->hasCARRY())
|
|
|
|
setOperationAction(ISD::UADDO, MVT::i32, Custom);
|
|
|
|
|
|
|
|
if (Subtarget->hasBORROW())
|
|
|
|
setOperationAction(ISD::USUBO, MVT::i32, Custom);
|
|
|
|
|
2014-04-16 09:41:30 +08:00
|
|
|
// Expand sign extension of vectors
|
|
|
|
if (!Subtarget->hasBFE())
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
|
|
|
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Expand);
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Expand);
|
|
|
|
|
|
|
|
if (!Subtarget->hasBFE())
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Expand);
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Expand);
|
|
|
|
|
|
|
|
if (!Subtarget->hasBFE())
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Expand);
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Expand);
|
|
|
|
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Expand);
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i32, Expand);
|
|
|
|
|
|
|
|
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
|
|
|
|
|
2013-02-07 01:32:29 +08:00
|
|
|
setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
|
|
|
|
|
2014-06-18 00:53:14 +08:00
|
|
|
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom);
|
|
|
|
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom);
|
|
|
|
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom);
|
|
|
|
setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
|
|
|
|
|
|
|
|
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom);
|
|
|
|
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom);
|
|
|
|
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
|
|
|
|
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
|
|
|
|
|
2014-06-18 20:27:13 +08:00
|
|
|
// We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
|
|
|
|
// to be Legal/Custom in order to avoid library calls.
|
|
|
|
setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
|
2014-06-18 20:27:15 +08:00
|
|
|
setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
|
2014-06-18 20:27:17 +08:00
|
|
|
setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
|
2014-06-18 20:27:13 +08:00
|
|
|
|
2017-12-05 07:07:28 +08:00
|
|
|
if (!Subtarget->hasFMA()) {
|
|
|
|
setOperationAction(ISD::FMA, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::FMA, MVT::f64, Expand);
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
|
|
|
|
// FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
|
|
|
|
// need it for R600.
|
|
|
|
if (!Subtarget->hasFP32Denormals())
|
|
|
|
setOperationAction(ISD::FMAD, MVT::f32, Legal);
|
|
|
|
|
|
|
|
if (!Subtarget->hasBFI()) {
|
|
|
|
// fcopysign can be done in a single instruction with BFI.
|
|
|
|
setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
|
|
|
|
setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!Subtarget->hasBCNT(32))
|
|
|
|
setOperationAction(ISD::CTPOP, MVT::i32, Expand);
|
|
|
|
|
|
|
|
if (!Subtarget->hasBCNT(64))
|
|
|
|
setOperationAction(ISD::CTPOP, MVT::i64, Expand);
|
|
|
|
|
|
|
|
if (Subtarget->hasFFBH())
|
|
|
|
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
|
|
|
|
|
|
|
|
if (Subtarget->hasFFBL())
|
|
|
|
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
|
|
|
|
|
|
|
|
// FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
|
|
|
|
// need it for R600.
|
|
|
|
if (Subtarget->hasBFE())
|
|
|
|
setHasExtractBitsInsn(true);
|
2017-12-05 07:07:28 +08:00
|
|
|
|
2013-07-11 00:37:07 +08:00
|
|
|
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
|
|
|
|
2014-06-24 02:00:49 +08:00
|
|
|
const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
|
|
|
|
for (MVT VT : ScalarIntVTs) {
|
|
|
|
setOperationAction(ISD::ADDC, VT, Expand);
|
|
|
|
setOperationAction(ISD::SUBC, VT, Expand);
|
|
|
|
setOperationAction(ISD::ADDE, VT, Expand);
|
|
|
|
setOperationAction(ISD::SUBE, VT, Expand);
|
|
|
|
}
|
|
|
|
|
2017-01-17 05:20:13 +08:00
|
|
|
// LLVM will expand these to atomic_cmp_swap(0)
|
|
|
|
// and atomic_swap, respectively.
|
|
|
|
setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand);
|
|
|
|
setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand);
|
|
|
|
|
2017-04-04 02:08:08 +08:00
|
|
|
// We need to custom lower some of the intrinsics
|
|
|
|
setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
|
|
|
|
setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
|
|
|
|
|
2013-08-13 06:33:21 +08:00
|
|
|
setSchedulingPreference(Sched::Source);
|
2016-05-21 10:27:49 +08:00
|
|
|
|
|
|
|
setTargetDAGCombine(ISD::FP_ROUND);
|
|
|
|
setTargetDAGCombine(ISD::FP_TO_SINT);
|
|
|
|
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
|
|
|
|
setTargetDAGCombine(ISD::SELECT_CC);
|
|
|
|
setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
|
2016-08-28 03:09:43 +08:00
|
|
|
setTargetDAGCombine(ISD::LOAD);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2015-10-02 01:51:29 +08:00
|
|
|
static inline bool isEOP(MachineBasicBlock::iterator I) {
|
2016-08-13 09:12:49 +08:00
|
|
|
if (std::next(I) == I->getParent()->end())
|
|
|
|
return false;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
return std::next(I)->getOpcode() == R600::RETURN;
|
2015-10-02 01:51:29 +08:00
|
|
|
}
|
|
|
|
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineBasicBlock *
|
|
|
|
R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
|
|
|
|
MachineBasicBlock *BB) const {
|
2016-12-10 06:06:55 +08:00
|
|
|
MachineFunction *MF = BB->getParent();
|
2012-12-12 05:25:42 +08:00
|
|
|
MachineRegisterInfo &MRI = MF->getRegInfo();
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineBasicBlock::iterator I = MI;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
const R600InstrInfo *TII = Subtarget->getInstrInfo();
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2016-07-01 06:52:52 +08:00
|
|
|
switch (MI.getOpcode()) {
|
2013-08-26 23:05:59 +08:00
|
|
|
default:
|
2013-11-15 08:12:45 +08:00
|
|
|
// Replace LDS_*_RET instruction that don't have any uses with the
|
|
|
|
// equivalent LDS_*_NORET instruction.
|
2016-07-01 06:52:52 +08:00
|
|
|
if (TII->isLDSRetInstr(MI.getOpcode())) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
|
2013-09-06 02:38:09 +08:00
|
|
|
assert(DstIdx != -1);
|
|
|
|
MachineInstrBuilder NewMI;
|
2014-09-11 23:02:54 +08:00
|
|
|
// FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
|
|
|
|
// LDS_1A2D support and remove this special case.
|
2016-07-01 06:52:52 +08:00
|
|
|
if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
MI.getOpcode() == R600::LDS_CMPST_RET)
|
2013-11-15 08:12:45 +08:00
|
|
|
return BB;
|
|
|
|
|
|
|
|
NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
|
2016-07-01 06:52:52 +08:00
|
|
|
for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
|
2017-01-13 17:58:52 +08:00
|
|
|
NewMI.add(MI.getOperand(i));
|
2013-08-26 23:05:59 +08:00
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
|
|
|
|
}
|
|
|
|
break;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::FABS_R600: {
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineInstr *NewMI = TII->buildDefaultInstruction(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
*BB, I, R600::MOV, MI.getOperand(0).getReg(),
|
2016-07-01 06:52:52 +08:00
|
|
|
MI.getOperand(1).getReg());
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::FNEG_R600: {
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineInstr *NewMI = TII->buildDefaultInstruction(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
*BB, I, R600::MOV, MI.getOperand(0).getReg(),
|
2016-07-01 06:52:52 +08:00
|
|
|
MI.getOperand(1).getReg());
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::MASK_WRITE: {
|
2016-07-01 06:52:52 +08:00
|
|
|
unsigned maskedRegister = MI.getOperand(0).getReg();
|
2012-12-12 05:25:42 +08:00
|
|
|
assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
|
|
|
|
MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::MOV_IMM_F32:
|
2016-07-01 06:52:52 +08:00
|
|
|
TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
|
|
|
|
.getFPImm()
|
|
|
|
->getValueAPF()
|
|
|
|
.bitcastToAPInt()
|
|
|
|
.getZExtValue());
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
2016-12-10 06:06:55 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::MOV_IMM_I32:
|
2016-07-01 06:52:52 +08:00
|
|
|
TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
|
|
|
|
MI.getOperand(1).getImm());
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
2016-12-10 06:06:55 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::MOV_IMM_GLOBAL_ADDR: {
|
2016-05-14 04:39:29 +08:00
|
|
|
//TODO: Perhaps combine this instruction with the next if possible
|
2016-07-01 06:52:52 +08:00
|
|
|
auto MIB = TII->buildDefaultInstruction(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
*BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
|
|
|
|
int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
|
2016-05-14 04:39:29 +08:00
|
|
|
//TODO: Ugh this is rather ugly
|
2016-07-01 06:52:52 +08:00
|
|
|
MIB->getOperand(Idx) = MI.getOperand(1);
|
2016-05-14 04:39:29 +08:00
|
|
|
break;
|
|
|
|
}
|
2016-12-10 06:06:55 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CONST_COPY: {
|
2016-07-01 06:52:52 +08:00
|
|
|
MachineInstr *NewMI = TII->buildDefaultInstruction(
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
*BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
|
|
|
|
TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
|
2016-07-01 06:52:52 +08:00
|
|
|
MI.getOperand(1).getImm());
|
2013-03-05 23:04:55 +08:00
|
|
|
break;
|
|
|
|
}
|
2012-12-12 05:25:42 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::RAT_WRITE_CACHELESS_32_eg:
|
|
|
|
case R600::RAT_WRITE_CACHELESS_64_eg:
|
|
|
|
case R600::RAT_WRITE_CACHELESS_128_eg:
|
2016-07-01 06:52:52 +08:00
|
|
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(0))
|
|
|
|
.add(MI.getOperand(1))
|
2016-07-01 06:52:52 +08:00
|
|
|
.addImm(isEOP(I)); // Set End of program bit
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
2016-12-10 06:06:55 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::RAT_STORE_TYPED_eg:
|
2016-07-01 06:52:52 +08:00
|
|
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(0))
|
|
|
|
.add(MI.getOperand(1))
|
|
|
|
.add(MI.getOperand(2))
|
2016-07-01 06:52:52 +08:00
|
|
|
.addImm(isEOP(I)); // Set End of program bit
|
2015-10-02 01:51:34 +08:00
|
|
|
break;
|
2016-12-10 06:06:55 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::BRANCH:
|
|
|
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(0));
|
2016-07-01 06:52:52 +08:00
|
|
|
break;
|
2012-12-12 05:25:42 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::BRANCH_COND_f32: {
|
2012-12-12 05:25:42 +08:00
|
|
|
MachineInstr *NewMI =
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
|
|
|
|
R600::PREDICATE_BIT)
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(1))
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
.addImm(R600::PRED_SETNE)
|
2016-07-01 06:52:52 +08:00
|
|
|
.addImm(0); // Flags
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(0))
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
.addReg(R600::PREDICATE_BIT, RegState::Kill);
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::BRANCH_COND_i32: {
|
2012-12-12 05:25:42 +08:00
|
|
|
MachineInstr *NewMI =
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
|
|
|
|
R600::PREDICATE_BIT)
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(1))
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
.addImm(R600::PRED_SETNE_INT)
|
2012-12-12 05:25:42 +08:00
|
|
|
.addImm(0); // Flags
|
2016-06-30 08:01:54 +08:00
|
|
|
TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(0))
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
.addReg(R600::PREDICATE_BIT, RegState::Kill);
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::EG_ExportSwz:
|
|
|
|
case R600::R600_ExportSwz: {
|
2013-01-24 05:39:49 +08:00
|
|
|
// Instruction is left unmodified if its not the last one of its type
|
|
|
|
bool isLastInstructionOfItsType = true;
|
2016-07-01 06:52:52 +08:00
|
|
|
unsigned InstExportType = MI.getOperand(1).getImm();
|
2014-03-02 20:27:27 +08:00
|
|
|
for (MachineBasicBlock::iterator NextExportInst = std::next(I),
|
2013-01-24 05:39:49 +08:00
|
|
|
EndBlock = BB->end(); NextExportInst != EndBlock;
|
2014-03-02 20:27:27 +08:00
|
|
|
NextExportInst = std::next(NextExportInst)) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
|
|
|
|
NextExportInst->getOpcode() == R600::R600_ExportSwz) {
|
2013-01-24 05:39:49 +08:00
|
|
|
unsigned CurrentInstExportType = NextExportInst->getOperand(1)
|
|
|
|
.getImm();
|
|
|
|
if (CurrentInstExportType == InstExportType) {
|
|
|
|
isLastInstructionOfItsType = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-10-02 01:51:29 +08:00
|
|
|
bool EOP = isEOP(I);
|
2013-01-24 05:39:49 +08:00
|
|
|
if (!EOP && !isLastInstructionOfItsType)
|
2012-12-12 05:25:42 +08:00
|
|
|
return BB;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
|
2016-07-01 06:52:52 +08:00
|
|
|
BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
|
2017-01-13 17:58:52 +08:00
|
|
|
.add(MI.getOperand(0))
|
|
|
|
.add(MI.getOperand(1))
|
|
|
|
.add(MI.getOperand(2))
|
|
|
|
.add(MI.getOperand(3))
|
|
|
|
.add(MI.getOperand(4))
|
|
|
|
.add(MI.getOperand(5))
|
|
|
|
.add(MI.getOperand(6))
|
2016-07-01 06:52:52 +08:00
|
|
|
.addImm(CfInst)
|
|
|
|
.addImm(EOP);
|
2012-12-12 05:25:42 +08:00
|
|
|
break;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::RETURN: {
|
2013-02-06 01:53:52 +08:00
|
|
|
return BB;
|
|
|
|
}
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2016-07-01 06:52:52 +08:00
|
|
|
MI.eraseFromParent();
|
2012-12-12 05:25:42 +08:00
|
|
|
return BB;
|
|
|
|
}
|
|
|
|
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Custom DAG Lowering Operations
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
2013-06-28 23:47:08 +08:00
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
|
|
|
R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
|
2012-12-12 05:25:42 +08:00
|
|
|
switch (Op.getOpcode()) {
|
|
|
|
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
|
2014-06-18 00:53:14 +08:00
|
|
|
case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
|
|
|
|
case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
|
2014-06-18 20:27:13 +08:00
|
|
|
case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
|
2014-06-18 20:27:17 +08:00
|
|
|
case ISD::SRA_PARTS:
|
2014-06-18 20:27:15 +08:00
|
|
|
case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
|
2015-05-01 01:15:56 +08:00
|
|
|
case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
|
|
|
|
case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
|
2013-07-09 23:03:11 +08:00
|
|
|
case ISD::FCOS:
|
|
|
|
case ISD::FSIN: return LowerTrig(Op, DAG);
|
2012-12-12 05:25:42 +08:00
|
|
|
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
|
|
|
|
case ISD::STORE: return LowerSTORE(Op, DAG);
|
2014-07-08 02:34:45 +08:00
|
|
|
case ISD::LOAD: {
|
|
|
|
SDValue Result = LowerLOAD(Op, DAG);
|
|
|
|
assert((!Result.getNode() ||
|
|
|
|
Result.getNode()->getNumValues() == 2) &&
|
|
|
|
"Load should return a value and a chain");
|
|
|
|
return Result;
|
|
|
|
}
|
|
|
|
|
2014-06-24 02:00:55 +08:00
|
|
|
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
|
2013-06-28 23:47:08 +08:00
|
|
|
case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
|
2016-03-08 05:10:13 +08:00
|
|
|
case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
|
2012-12-12 05:25:42 +08:00
|
|
|
case ISD::INTRINSIC_VOID: {
|
|
|
|
SDValue Chain = Op.getOperand(0);
|
|
|
|
unsigned IntrinsicID =
|
|
|
|
cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
|
|
|
|
switch (IntrinsicID) {
|
2018-06-01 10:19:46 +08:00
|
|
|
case Intrinsic::r600_store_swizzle: {
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(Op);
|
2013-02-15 00:55:06 +08:00
|
|
|
const SDValue Args[8] = {
|
|
|
|
Chain,
|
|
|
|
Op.getOperand(2), // Export Value
|
|
|
|
Op.getOperand(3), // ArrayBase
|
|
|
|
Op.getOperand(4), // Type
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, DL, MVT::i32), // SWZ_X
|
|
|
|
DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
|
|
|
|
DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
|
|
|
|
DAG.getConstant(3, DL, MVT::i32) // SWZ_W
|
2013-02-15 00:55:06 +08:00
|
|
|
};
|
2016-12-06 04:23:10 +08:00
|
|
|
return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// default for switch(IntrinsicID)
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
// break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ISD::INTRINSIC_WO_CHAIN: {
|
|
|
|
unsigned IntrinsicID =
|
|
|
|
cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
|
|
|
|
EVT VT = Op.getValueType();
|
2013-05-25 10:42:55 +08:00
|
|
|
SDLoc DL(Op);
|
2017-04-04 02:08:08 +08:00
|
|
|
switch (IntrinsicID) {
|
2018-06-01 10:19:46 +08:00
|
|
|
case Intrinsic::r600_tex:
|
|
|
|
case Intrinsic::r600_texc: {
|
2013-05-18 00:50:20 +08:00
|
|
|
unsigned TextureOp;
|
|
|
|
switch (IntrinsicID) {
|
2018-06-01 10:19:46 +08:00
|
|
|
case Intrinsic::r600_tex:
|
2013-05-18 00:50:20 +08:00
|
|
|
TextureOp = 0;
|
|
|
|
break;
|
2018-06-01 10:19:46 +08:00
|
|
|
case Intrinsic::r600_texc:
|
2013-05-18 00:50:20 +08:00
|
|
|
TextureOp = 1;
|
|
|
|
break;
|
|
|
|
default:
|
2016-07-27 05:03:38 +08:00
|
|
|
llvm_unreachable("unhandled texture operation");
|
2013-05-18 00:50:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SDValue TexArgs[19] = {
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(TextureOp, DL, MVT::i32),
|
2013-05-18 00:50:20 +08:00
|
|
|
Op.getOperand(1),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, DL, MVT::i32),
|
|
|
|
DAG.getConstant(1, DL, MVT::i32),
|
|
|
|
DAG.getConstant(2, DL, MVT::i32),
|
|
|
|
DAG.getConstant(3, DL, MVT::i32),
|
2013-05-18 00:50:20 +08:00
|
|
|
Op.getOperand(2),
|
|
|
|
Op.getOperand(3),
|
|
|
|
Op.getOperand(4),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, DL, MVT::i32),
|
|
|
|
DAG.getConstant(1, DL, MVT::i32),
|
|
|
|
DAG.getConstant(2, DL, MVT::i32),
|
|
|
|
DAG.getConstant(3, DL, MVT::i32),
|
2013-05-18 00:50:20 +08:00
|
|
|
Op.getOperand(5),
|
|
|
|
Op.getOperand(6),
|
|
|
|
Op.getOperand(7),
|
|
|
|
Op.getOperand(8),
|
|
|
|
Op.getOperand(9),
|
|
|
|
Op.getOperand(10)
|
|
|
|
};
|
2014-04-27 02:35:24 +08:00
|
|
|
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
|
2013-05-18 00:50:20 +08:00
|
|
|
}
|
2018-06-01 10:19:46 +08:00
|
|
|
case Intrinsic::r600_dot4: {
|
2013-05-18 00:50:32 +08:00
|
|
|
SDValue Args[8] = {
|
|
|
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, DL, MVT::i32)),
|
2013-05-18 00:50:32 +08:00
|
|
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, DL, MVT::i32)),
|
2013-05-18 00:50:32 +08:00
|
|
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(1, DL, MVT::i32)),
|
2013-05-18 00:50:32 +08:00
|
|
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(1, DL, MVT::i32)),
|
2013-05-18 00:50:32 +08:00
|
|
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(2, DL, MVT::i32)),
|
2013-05-18 00:50:32 +08:00
|
|
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(2, DL, MVT::i32)),
|
2013-05-18 00:50:32 +08:00
|
|
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(3, DL, MVT::i32)),
|
2013-05-18 00:50:32 +08:00
|
|
|
DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(3, DL, MVT::i32))
|
2013-05-18 00:50:32 +08:00
|
|
|
};
|
2014-04-27 02:35:24 +08:00
|
|
|
return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
|
2013-05-18 00:50:32 +08:00
|
|
|
}
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2016-07-11 05:20:29 +08:00
|
|
|
case Intrinsic::r600_implicitarg_ptr: {
|
2017-03-27 22:04:01 +08:00
|
|
|
MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS);
|
2018-06-28 18:18:55 +08:00
|
|
|
uint32_t ByteOffset = getImplicitParameterOffset(MF, FIRST_IMPLICIT);
|
2016-07-11 05:20:29 +08:00
|
|
|
return DAG.getConstant(ByteOffset, DL, PtrVT);
|
|
|
|
}
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_ngroups_x:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 0);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_ngroups_y:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 1);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_ngroups_z:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 2);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_global_size_x:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 3);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_global_size_y:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 4);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_global_size_z:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 5);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_local_size_x:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 6);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_local_size_y:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 7);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_local_size_z:
|
2012-12-12 05:25:42 +08:00
|
|
|
return LowerImplicitParameter(DAG, VT, DL, 8);
|
|
|
|
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_tgid_x:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
|
|
|
R600::T1_X, VT);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_tgid_y:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
|
|
|
R600::T1_Y, VT);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_tgid_z:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
|
|
|
R600::T1_Z, VT);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_tidig_x:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
|
|
|
R600::T0_X, VT);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_tidig_y:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
|
|
|
R600::T0_Y, VT);
|
2013-05-22 14:37:31 +08:00
|
|
|
case Intrinsic::r600_read_tidig_z:
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
|
|
|
|
R600::T0_Z, VT);
|
2016-01-23 05:30:34 +08:00
|
|
|
|
2016-07-16 05:26:52 +08:00
|
|
|
case Intrinsic::r600_recipsqrt_ieee:
|
|
|
|
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
|
2016-01-23 05:30:34 +08:00
|
|
|
|
2016-07-16 05:26:52 +08:00
|
|
|
case Intrinsic::r600_recipsqrt_clamped:
|
|
|
|
return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
|
2017-04-04 02:08:08 +08:00
|
|
|
default:
|
|
|
|
return Op;
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
2016-07-16 05:26:52 +08:00
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
// break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} // end switch(Op.getOpcode())
|
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
|
|
|
|
void R600TargetLowering::ReplaceNodeResults(SDNode *N,
|
|
|
|
SmallVectorImpl<SDValue> &Results,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
switch (N->getOpcode()) {
|
2014-03-28 01:23:24 +08:00
|
|
|
default:
|
|
|
|
AMDGPUTargetLowering::ReplaceNodeResults(N, Results, DAG);
|
|
|
|
return;
|
2014-07-11 06:40:21 +08:00
|
|
|
case ISD::FP_TO_UINT:
|
|
|
|
if (N->getValueType(0) == MVT::i1) {
|
2016-07-23 01:01:21 +08:00
|
|
|
Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
|
2014-07-11 06:40:21 +08:00
|
|
|
return;
|
|
|
|
}
|
2016-08-17 13:10:15 +08:00
|
|
|
// Since we don't care about out of bounds values we can use FP_TO_SINT for
|
|
|
|
// uints too. The DAGLegalizer code for uint considers some extra cases
|
|
|
|
// which are not necessary here.
|
|
|
|
LLVM_FALLTHROUGH;
|
2014-07-11 06:40:21 +08:00
|
|
|
case ISD::FP_TO_SINT: {
|
2016-07-23 01:01:21 +08:00
|
|
|
if (N->getValueType(0) == MVT::i1) {
|
|
|
|
Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2014-07-11 06:40:21 +08:00
|
|
|
SDValue Result;
|
|
|
|
if (expandFP_TO_SINT(N, Result, DAG))
|
|
|
|
Results.push_back(Result);
|
2013-01-23 10:09:06 +08:00
|
|
|
return;
|
2014-07-11 06:40:21 +08:00
|
|
|
}
|
2014-06-23 05:43:01 +08:00
|
|
|
case ISD::SDIVREM: {
|
|
|
|
SDValue Op = SDValue(N, 1);
|
|
|
|
SDValue RES = LowerSDIVREM(Op, DAG);
|
|
|
|
Results.push_back(RES);
|
|
|
|
Results.push_back(RES.getValue(1));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ISD::UDIVREM: {
|
|
|
|
SDValue Op = SDValue(N, 0);
|
2014-11-15 09:07:53 +08:00
|
|
|
LowerUDIVREM64(Op, DAG, Results);
|
2014-06-23 05:43:01 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2014-06-18 00:53:14 +08:00
|
|
|
SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
|
|
|
|
SDValue Vector) const {
|
|
|
|
SDLoc DL(Vector);
|
|
|
|
EVT VecVT = Vector.getValueType();
|
|
|
|
EVT EltVT = VecVT.getVectorElementType();
|
|
|
|
SmallVector<SDValue, 8> Args;
|
|
|
|
|
2016-12-10 06:06:55 +08:00
|
|
|
for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
|
2015-07-09 10:09:04 +08:00
|
|
|
Args.push_back(DAG.getNode(
|
|
|
|
ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
|
|
|
|
DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
|
2014-06-18 00:53:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
SDLoc DL(Op);
|
|
|
|
SDValue Vector = Op.getOperand(0);
|
|
|
|
SDValue Index = Op.getOperand(1);
|
|
|
|
|
|
|
|
if (isa<ConstantSDNode>(Index) ||
|
|
|
|
Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
|
|
|
|
return Op;
|
|
|
|
|
|
|
|
Vector = vectorToVerticalVector(DAG, Vector);
|
|
|
|
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
|
|
|
|
Vector, Index);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
SDLoc DL(Op);
|
|
|
|
SDValue Vector = Op.getOperand(0);
|
|
|
|
SDValue Value = Op.getOperand(1);
|
|
|
|
SDValue Index = Op.getOperand(2);
|
|
|
|
|
|
|
|
if (isa<ConstantSDNode>(Index) ||
|
|
|
|
Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR)
|
|
|
|
return Op;
|
|
|
|
|
|
|
|
Vector = vectorToVerticalVector(DAG, Vector);
|
|
|
|
SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
|
|
|
|
Vector, Value, Index);
|
|
|
|
return vectorToVerticalVector(DAG, Insert);
|
|
|
|
}
|
|
|
|
|
2016-05-03 02:05:17 +08:00
|
|
|
SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
|
|
|
|
SDValue Op,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
|
2017-03-27 22:04:01 +08:00
|
|
|
if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS)
|
2016-05-03 02:05:17 +08:00
|
|
|
return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
|
|
|
|
|
|
|
|
const DataLayout &DL = DAG.getDataLayout();
|
|
|
|
const GlobalValue *GV = GSD->getGlobal();
|
2017-03-27 22:04:01 +08:00
|
|
|
MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
|
2016-05-03 02:05:17 +08:00
|
|
|
|
2016-05-14 04:39:29 +08:00
|
|
|
SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
|
|
|
|
return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
|
2016-05-03 02:05:17 +08:00
|
|
|
}
|
|
|
|
|
2013-07-09 23:03:11 +08:00
|
|
|
SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
|
|
|
|
// On hw >= R700, COS/SIN input must be between -1. and 1.
|
|
|
|
// Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
|
|
|
|
EVT VT = Op.getValueType();
|
|
|
|
SDValue Arg = Op.getOperand(0);
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(Op);
|
2015-09-17 00:31:21 +08:00
|
|
|
|
|
|
|
// TODO: Should this propagate fast-math-flags?
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
|
|
|
|
DAG.getNode(ISD::FADD, DL, VT,
|
|
|
|
DAG.getNode(ISD::FMUL, DL, VT, Arg,
|
|
|
|
DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
|
|
|
|
DAG.getConstantFP(0.5, DL, MVT::f32)));
|
2013-07-09 23:03:11 +08:00
|
|
|
unsigned TrigNode;
|
|
|
|
switch (Op.getOpcode()) {
|
|
|
|
case ISD::FCOS:
|
|
|
|
TrigNode = AMDGPUISD::COS_HW;
|
|
|
|
break;
|
|
|
|
case ISD::FSIN:
|
|
|
|
TrigNode = AMDGPUISD::SIN_HW;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
llvm_unreachable("Wrong trig opcode");
|
|
|
|
}
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
|
|
|
|
DAG.getNode(ISD::FADD, DL, VT, FractPart,
|
|
|
|
DAG.getConstantFP(-0.5, DL, MVT::f32)));
|
2016-06-24 14:30:11 +08:00
|
|
|
if (Gen >= R600Subtarget::R700)
|
2013-07-09 23:03:11 +08:00
|
|
|
return TrigVal;
|
|
|
|
// On R600 hw, COS/SIN input must be between -Pi and Pi.
|
2015-04-28 22:05:47 +08:00
|
|
|
return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
|
|
|
|
DAG.getConstantFP(3.14159265359, DL, MVT::f32));
|
2013-07-09 23:03:11 +08:00
|
|
|
}
|
|
|
|
|
2014-06-18 20:27:13 +08:00
|
|
|
SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
|
|
|
|
SDLoc DL(Op);
|
|
|
|
EVT VT = Op.getValueType();
|
|
|
|
|
|
|
|
SDValue Lo = Op.getOperand(0);
|
|
|
|
SDValue Hi = Op.getOperand(1);
|
|
|
|
SDValue Shift = Op.getOperand(2);
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue Zero = DAG.getConstant(0, DL, VT);
|
|
|
|
SDValue One = DAG.getConstant(1, DL, VT);
|
2014-06-18 20:27:13 +08:00
|
|
|
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
|
|
|
|
SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
|
2014-06-18 20:27:13 +08:00
|
|
|
SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
|
|
|
|
SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
|
|
|
|
|
|
|
|
// The dance around Width1 is necessary for 0 special case.
|
|
|
|
// Without it the CompShift might be 32, producing incorrect results in
|
|
|
|
// Overflow. So we do the shift in two steps, the alternative is to
|
|
|
|
// add a conditional to filter the special case.
|
|
|
|
|
|
|
|
SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
|
|
|
|
Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
|
|
|
|
|
|
|
|
SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
|
|
|
|
HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
|
|
|
|
SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
|
|
|
|
|
|
|
|
SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
|
|
|
|
SDValue LoBig = Zero;
|
|
|
|
|
|
|
|
Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
|
|
|
|
Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
|
|
|
|
|
2014-06-18 20:27:15 +08:00
|
|
|
return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
|
|
|
|
SDLoc DL(Op);
|
|
|
|
EVT VT = Op.getValueType();
|
|
|
|
|
|
|
|
SDValue Lo = Op.getOperand(0);
|
|
|
|
SDValue Hi = Op.getOperand(1);
|
|
|
|
SDValue Shift = Op.getOperand(2);
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue Zero = DAG.getConstant(0, DL, VT);
|
|
|
|
SDValue One = DAG.getConstant(1, DL, VT);
|
2014-06-18 20:27:15 +08:00
|
|
|
|
2014-06-18 20:27:17 +08:00
|
|
|
const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
|
|
|
|
|
2015-04-28 22:05:47 +08:00
|
|
|
SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
|
|
|
|
SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
|
2014-06-18 20:27:15 +08:00
|
|
|
SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
|
|
|
|
SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
|
|
|
|
|
|
|
|
// The dance around Width1 is necessary for 0 special case.
|
|
|
|
// Without it the CompShift might be 32, producing incorrect results in
|
|
|
|
// Overflow. So we do the shift in two steps, the alternative is to
|
|
|
|
// add a conditional to filter the special case.
|
|
|
|
|
|
|
|
SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
|
|
|
|
Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
|
|
|
|
|
2014-06-18 20:27:17 +08:00
|
|
|
SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
|
2014-06-18 20:27:15 +08:00
|
|
|
SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
|
|
|
|
LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
|
|
|
|
|
2014-06-18 20:27:17 +08:00
|
|
|
SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
|
|
|
|
SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
|
2014-06-18 20:27:15 +08:00
|
|
|
|
|
|
|
Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
|
|
|
|
Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
|
|
|
|
|
2014-06-18 20:27:13 +08:00
|
|
|
return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
|
|
|
|
}
|
|
|
|
|
2015-05-01 01:15:56 +08:00
|
|
|
SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
|
|
|
|
unsigned mainop, unsigned ovf) const {
|
|
|
|
SDLoc DL(Op);
|
|
|
|
EVT VT = Op.getValueType();
|
|
|
|
|
|
|
|
SDValue Lo = Op.getOperand(0);
|
|
|
|
SDValue Hi = Op.getOperand(1);
|
|
|
|
|
|
|
|
SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
|
|
|
|
// Extend sign.
|
|
|
|
OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
|
|
|
|
DAG.getValueType(MVT::i1));
|
|
|
|
|
|
|
|
SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
|
|
|
|
|
|
|
|
return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
|
|
|
|
}
|
|
|
|
|
2016-07-23 01:01:21 +08:00
|
|
|
SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
|
|
|
|
SDLoc DL(Op);
|
|
|
|
return DAG.getNode(
|
|
|
|
ISD::SETCC,
|
|
|
|
DL,
|
|
|
|
MVT::i1,
|
|
|
|
Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
|
|
|
|
DAG.getCondCode(ISD::SETEQ));
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(Op);
|
2012-12-12 05:25:42 +08:00
|
|
|
return DAG.getNode(
|
|
|
|
ISD::SETCC,
|
2015-04-28 22:05:47 +08:00
|
|
|
DL,
|
2012-12-12 05:25:42 +08:00
|
|
|
MVT::i1,
|
2016-07-23 01:01:21 +08:00
|
|
|
Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
|
|
|
|
DAG.getCondCode(ISD::SETEQ));
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
|
2016-06-12 23:39:02 +08:00
|
|
|
const SDLoc &DL,
|
2012-12-12 05:25:42 +08:00
|
|
|
unsigned DwordOffset) const {
|
|
|
|
unsigned ByteOffset = DwordOffset * 4;
|
|
|
|
PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
2017-03-27 22:04:01 +08:00
|
|
|
AMDGPUASI.CONSTANT_BUFFER_0);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
// We shouldn't be using an offset wider than 16-bits for implicit parameters.
|
|
|
|
assert(isInt<16>(ByteOffset));
|
|
|
|
|
|
|
|
return DAG.getLoad(VT, DL, DAG.getEntryNode(),
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
MachinePointerInfo(ConstantPointerNull::get(PtrType)));
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool R600TargetLowering::isZero(SDValue Op) const {
|
|
|
|
if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
|
|
|
|
return Cst->isNullValue();
|
|
|
|
} else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
|
|
|
|
return CstFP->isZero();
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-11 16:00:27 +08:00
|
|
|
bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
|
|
|
|
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
|
|
|
return CFP->isExactlyValue(1.0);
|
|
|
|
}
|
|
|
|
return isAllOnesConstant(Op);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
|
|
|
|
if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
|
|
|
|
return CFP->getValueAPF().isZero();
|
|
|
|
}
|
|
|
|
return isNullConstant(Op);
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
2013-05-25 10:42:55 +08:00
|
|
|
SDLoc DL(Op);
|
2012-12-12 05:25:42 +08:00
|
|
|
EVT VT = Op.getValueType();
|
|
|
|
|
|
|
|
SDValue LHS = Op.getOperand(0);
|
|
|
|
SDValue RHS = Op.getOperand(1);
|
|
|
|
SDValue True = Op.getOperand(2);
|
|
|
|
SDValue False = Op.getOperand(3);
|
|
|
|
SDValue CC = Op.getOperand(4);
|
|
|
|
SDValue Temp;
|
|
|
|
|
2014-12-12 10:30:37 +08:00
|
|
|
if (VT == MVT::f32) {
|
|
|
|
DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
|
2017-02-01 08:42:40 +08:00
|
|
|
SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
|
2014-12-12 10:30:37 +08:00
|
|
|
if (MinMax)
|
|
|
|
return MinMax;
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
// LHS and RHS are guaranteed to be the same value type
|
|
|
|
EVT CompareVT = LHS.getValueType();
|
|
|
|
|
|
|
|
// Check if we can lower this to a native operation.
|
|
|
|
|
2013-03-08 23:37:09 +08:00
|
|
|
// Try to lower to a SET* instruction:
|
|
|
|
//
|
|
|
|
// SET* can match the following patterns:
|
|
|
|
//
|
2013-09-28 10:50:38 +08:00
|
|
|
// select_cc f32, f32, -1, 0, cc_supported
|
|
|
|
// select_cc f32, f32, 1.0f, 0.0f, cc_supported
|
|
|
|
// select_cc i32, i32, -1, 0, cc_supported
|
2013-03-08 23:37:09 +08:00
|
|
|
//
|
|
|
|
|
|
|
|
// Move hardware True/False values to the correct operand.
|
2013-09-28 10:50:38 +08:00
|
|
|
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
|
|
|
ISD::CondCode InverseCC =
|
|
|
|
ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
|
2013-09-28 10:50:43 +08:00
|
|
|
if (isHWTrueValue(False) && isHWFalseValue(True)) {
|
|
|
|
if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
|
|
|
|
std::swap(False, True);
|
|
|
|
CC = DAG.getCondCode(InverseCC);
|
|
|
|
} else {
|
|
|
|
ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
|
|
|
|
if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
|
|
|
|
std::swap(False, True);
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
CC = DAG.getCondCode(SwapInvCC);
|
|
|
|
}
|
|
|
|
}
|
2013-03-08 23:37:09 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (isHWTrueValue(True) && isHWFalseValue(False) &&
|
|
|
|
(CompareVT == VT || VT == MVT::i32)) {
|
|
|
|
// This can be matched by a SET* instruction.
|
|
|
|
return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
// Try to lower to a CND* instruction:
|
2013-03-08 23:37:09 +08:00
|
|
|
//
|
|
|
|
// CND* can match the following patterns:
|
|
|
|
//
|
2013-09-28 10:50:38 +08:00
|
|
|
// select_cc f32, 0.0, f32, f32, cc_supported
|
|
|
|
// select_cc f32, 0.0, i32, i32, cc_supported
|
|
|
|
// select_cc i32, 0, f32, f32, cc_supported
|
|
|
|
// select_cc i32, 0, i32, i32, cc_supported
|
2013-03-08 23:37:09 +08:00
|
|
|
//
|
2013-09-28 10:50:38 +08:00
|
|
|
|
|
|
|
// Try to move the zero value to the RHS
|
|
|
|
if (isZero(LHS)) {
|
|
|
|
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
|
|
|
// Try swapping the operands
|
|
|
|
ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
|
|
|
|
if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
CC = DAG.getCondCode(CCSwapped);
|
|
|
|
} else {
|
|
|
|
// Try inverting the conditon and then swapping the operands
|
|
|
|
ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
|
|
|
|
CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
|
|
|
|
if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
|
|
|
|
std::swap(True, False);
|
|
|
|
std::swap(LHS, RHS);
|
|
|
|
CC = DAG.getCondCode(CCSwapped);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (isZero(RHS)) {
|
|
|
|
SDValue Cond = LHS;
|
|
|
|
SDValue Zero = RHS;
|
2012-12-12 05:25:42 +08:00
|
|
|
ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
|
|
|
|
if (CompareVT != VT) {
|
|
|
|
// Bitcast True / False to the correct types. This will end up being
|
|
|
|
// a nop, but it allows us to define only a single pattern in the
|
|
|
|
// .TD files for each CND* instruction rather than having to have
|
|
|
|
// one pattern for integer True/False and one for fp True/False
|
|
|
|
True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
|
|
|
|
False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (CCOpcode) {
|
|
|
|
case ISD::SETONE:
|
|
|
|
case ISD::SETUNE:
|
|
|
|
case ISD::SETNE:
|
|
|
|
CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
|
|
|
|
Temp = True;
|
|
|
|
True = False;
|
|
|
|
False = Temp;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
|
|
|
|
Cond, Zero,
|
|
|
|
True, False,
|
|
|
|
DAG.getCondCode(CCOpcode));
|
|
|
|
return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
|
|
|
|
}
|
|
|
|
|
|
|
|
// If we make it this for it means we have no native instructions to handle
|
|
|
|
// this SELECT_CC, so we must lower it.
|
|
|
|
SDValue HWTrue, HWFalse;
|
|
|
|
|
|
|
|
if (CompareVT == MVT::f32) {
|
2015-04-28 22:05:47 +08:00
|
|
|
HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
|
|
|
|
HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
|
2012-12-12 05:25:42 +08:00
|
|
|
} else if (CompareVT == MVT::i32) {
|
2015-04-28 22:05:47 +08:00
|
|
|
HWTrue = DAG.getConstant(-1, DL, CompareVT);
|
|
|
|
HWFalse = DAG.getConstant(0, DL, CompareVT);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
else {
|
2013-12-11 05:37:42 +08:00
|
|
|
llvm_unreachable("Unhandled value type in LowerSELECT_CC");
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Lower this unsupported SELECT_CC into a combination of two supported
|
|
|
|
// SELECT_CC operations.
|
|
|
|
SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
|
|
|
|
|
|
|
|
return DAG.getNode(ISD::SELECT_CC, DL, VT,
|
|
|
|
Cond, HWFalse,
|
|
|
|
True, False,
|
|
|
|
DAG.getCondCode(ISD::SETNE));
|
|
|
|
}
|
|
|
|
|
2014-01-25 01:20:08 +08:00
|
|
|
/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
|
2013-02-07 01:32:29 +08:00
|
|
|
/// convert these pointers to a register index. Each register holds
|
|
|
|
/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
|
|
|
|
/// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
|
|
|
|
/// for indirect addressing.
|
|
|
|
SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
|
|
|
|
unsigned StackWidth,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
unsigned SRLPad;
|
|
|
|
switch(StackWidth) {
|
|
|
|
case 1:
|
|
|
|
SRLPad = 2;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
SRLPad = 3;
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
SRLPad = 4;
|
|
|
|
break;
|
|
|
|
default: llvm_unreachable("Invalid stack width");
|
|
|
|
}
|
|
|
|
|
2015-04-28 22:05:47 +08:00
|
|
|
SDLoc DL(Ptr);
|
|
|
|
return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
|
|
|
|
DAG.getConstant(SRLPad, DL, MVT::i32));
|
2013-02-07 01:32:29 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void R600TargetLowering::getStackAddress(unsigned StackWidth,
|
|
|
|
unsigned ElemIdx,
|
|
|
|
unsigned &Channel,
|
|
|
|
unsigned &PtrIncr) const {
|
|
|
|
switch (StackWidth) {
|
|
|
|
default:
|
|
|
|
case 1:
|
|
|
|
Channel = 0;
|
|
|
|
if (ElemIdx > 0) {
|
|
|
|
PtrIncr = 1;
|
|
|
|
} else {
|
|
|
|
PtrIncr = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
Channel = ElemIdx % 2;
|
|
|
|
if (ElemIdx == 2) {
|
|
|
|
PtrIncr = 1;
|
|
|
|
} else {
|
|
|
|
PtrIncr = 0;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case 4:
|
|
|
|
Channel = ElemIdx;
|
|
|
|
PtrIncr = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-11 13:32:46 +08:00
|
|
|
SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
SDLoc DL(Store);
|
2017-01-07 05:00:46 +08:00
|
|
|
//TODO: Who creates the i8 stores?
|
|
|
|
assert(Store->isTruncatingStore()
|
|
|
|
|| Store->getValue().getValueType() == MVT::i8);
|
2017-03-27 22:04:01 +08:00
|
|
|
assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS);
|
2016-02-11 13:32:46 +08:00
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
SDValue Mask;
|
2016-02-11 13:32:46 +08:00
|
|
|
if (Store->getMemoryVT() == MVT::i8) {
|
2017-01-07 05:00:46 +08:00
|
|
|
assert(Store->getAlignment() >= 1);
|
|
|
|
Mask = DAG.getConstant(0xff, DL, MVT::i32);
|
2016-02-11 13:32:46 +08:00
|
|
|
} else if (Store->getMemoryVT() == MVT::i16) {
|
2017-01-07 05:00:46 +08:00
|
|
|
assert(Store->getAlignment() >= 2);
|
2017-06-06 13:08:36 +08:00
|
|
|
Mask = DAG.getConstant(0xffff, DL, MVT::i32);
|
2017-01-07 05:00:46 +08:00
|
|
|
} else {
|
|
|
|
llvm_unreachable("Unsupported private trunc store");
|
2016-02-11 13:32:46 +08:00
|
|
|
}
|
|
|
|
|
2017-01-21 05:24:26 +08:00
|
|
|
SDValue OldChain = Store->getChain();
|
|
|
|
bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
|
|
|
|
// Skip dummy
|
|
|
|
SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
|
2016-02-11 13:32:46 +08:00
|
|
|
SDValue BasePtr = Store->getBasePtr();
|
2017-01-07 05:00:46 +08:00
|
|
|
SDValue Offset = Store->getOffset();
|
2016-02-11 13:32:46 +08:00
|
|
|
EVT MemVT = Store->getMemoryVT();
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
SDValue LoadPtr = BasePtr;
|
|
|
|
if (!Offset.isUndef()) {
|
|
|
|
LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get dword location
|
|
|
|
// TODO: this should be eliminated by the future SHR ptr, 2
|
|
|
|
SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
|
|
|
|
DAG.getConstant(0xfffffffc, DL, MVT::i32));
|
|
|
|
|
|
|
|
// Load dword
|
|
|
|
// TODO: can we be smarter about machine pointer info?
|
2017-11-10 10:03:28 +08:00
|
|
|
MachinePointerInfo PtrInfo(UndefValue::get(
|
|
|
|
Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
|
|
|
|
SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
|
2016-02-11 13:32:46 +08:00
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
Chain = Dst.getValue(1);
|
|
|
|
|
|
|
|
// Get offset in dword
|
|
|
|
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
|
2016-02-11 13:32:46 +08:00
|
|
|
DAG.getConstant(0x3, DL, MVT::i32));
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// Convert byte offset to bit shift
|
2016-02-11 13:32:46 +08:00
|
|
|
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
|
|
|
|
DAG.getConstant(3, DL, MVT::i32));
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// TODO: Contrary to the name of the functiom,
|
|
|
|
// it also handles sub i32 non-truncating stores (like i1)
|
2016-02-11 13:32:46 +08:00
|
|
|
SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
|
|
|
|
Store->getValue());
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// Mask the value to the right type
|
2016-02-11 13:32:46 +08:00
|
|
|
SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// Shift the value in place
|
2016-02-11 13:32:46 +08:00
|
|
|
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
|
|
|
|
MaskedValue, ShiftAmt);
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// Shift the mask in place
|
|
|
|
SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
|
|
|
|
|
|
|
|
// Invert the mask. NOTE: if we had native ROL instructions we could
|
|
|
|
// use inverted mask
|
|
|
|
DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
|
|
|
|
|
|
|
|
// Cleanup the target bits
|
2016-02-11 13:32:46 +08:00
|
|
|
Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// Add the new bits
|
2016-02-11 13:32:46 +08:00
|
|
|
SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
|
2017-01-07 05:00:46 +08:00
|
|
|
|
|
|
|
// Store dword
|
|
|
|
// TODO: Can we be smarter about MachinePointerInfo?
|
2017-11-10 10:03:28 +08:00
|
|
|
SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
|
2017-01-21 05:24:26 +08:00
|
|
|
|
|
|
|
// If we are part of expanded vector, make our neighbors depend on this store
|
|
|
|
if (VectorTrunc) {
|
|
|
|
// Make all other vector elements depend on this store
|
|
|
|
Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
|
|
|
|
DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
|
|
|
|
}
|
|
|
|
return NewStore;
|
2016-02-11 13:32:46 +08:00
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
|
|
|
|
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
|
2016-02-11 13:32:46 +08:00
|
|
|
unsigned AS = StoreNode->getAddressSpace();
|
2017-01-07 05:00:46 +08:00
|
|
|
|
|
|
|
SDValue Chain = StoreNode->getChain();
|
|
|
|
SDValue Ptr = StoreNode->getBasePtr();
|
2016-02-11 13:32:46 +08:00
|
|
|
SDValue Value = StoreNode->getValue();
|
2017-01-07 05:00:46 +08:00
|
|
|
|
|
|
|
EVT VT = Value.getValueType();
|
2016-09-03 03:07:06 +08:00
|
|
|
EVT MemVT = StoreNode->getMemoryVT();
|
2017-01-07 05:00:46 +08:00
|
|
|
EVT PtrVT = Ptr.getValueType();
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
SDLoc DL(Op);
|
|
|
|
|
|
|
|
// Neither LOCAL nor PRIVATE can do vectors at the moment
|
2017-03-27 22:04:01 +08:00
|
|
|
if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
|
2017-01-07 05:00:46 +08:00
|
|
|
VT.isVector()) {
|
2017-03-27 22:04:01 +08:00
|
|
|
if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
|
|
|
|
StoreNode->isTruncatingStore()) {
|
2017-01-21 05:24:26 +08:00
|
|
|
// Add an extra level of chain to isolate this vector
|
|
|
|
SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
|
|
|
|
// TODO: can the chain be replaced without creating a new store?
|
|
|
|
SDValue NewStore = DAG.getTruncStore(
|
|
|
|
NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
|
|
|
|
MemVT, StoreNode->getAlignment(),
|
|
|
|
StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
|
|
|
|
StoreNode = cast<StoreSDNode>(NewStore);
|
|
|
|
}
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
return scalarizeVectorStore(StoreNode, DAG);
|
2016-02-11 13:32:46 +08:00
|
|
|
}
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
unsigned Align = StoreNode->getAlignment();
|
|
|
|
if (Align < MemVT.getStoreSize() &&
|
2016-12-10 06:06:55 +08:00
|
|
|
!allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
|
2016-09-03 03:07:06 +08:00
|
|
|
return expandUnalignedStore(StoreNode, DAG);
|
|
|
|
}
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
|
|
|
|
DAG.getConstant(2, DL, PtrVT));
|
2013-08-16 09:12:11 +08:00
|
|
|
|
2017-03-27 22:04:01 +08:00
|
|
|
if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
|
2016-09-03 03:07:06 +08:00
|
|
|
// It is beneficial to create MSKOR here instead of combiner to avoid
|
|
|
|
// artificial dependencies introduced by RMW
|
2013-08-16 09:12:06 +08:00
|
|
|
if (StoreNode->isTruncatingStore()) {
|
2013-08-16 09:12:11 +08:00
|
|
|
assert(VT.bitsLE(MVT::i32));
|
2013-08-16 09:12:06 +08:00
|
|
|
SDValue MaskConstant;
|
|
|
|
if (MemVT == MVT::i8) {
|
2015-04-28 22:05:47 +08:00
|
|
|
MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
|
2013-08-16 09:12:06 +08:00
|
|
|
} else {
|
|
|
|
assert(MemVT == MVT::i16);
|
2016-09-03 03:07:06 +08:00
|
|
|
assert(StoreNode->getAlignment() >= 2);
|
2015-04-28 22:05:47 +08:00
|
|
|
MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
|
2013-08-16 09:12:06 +08:00
|
|
|
}
|
2017-01-07 05:00:46 +08:00
|
|
|
|
|
|
|
SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
|
|
|
|
DAG.getConstant(0x00000003, DL, PtrVT));
|
|
|
|
SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
|
|
|
|
DAG.getConstant(3, DL, VT));
|
|
|
|
|
|
|
|
// Put the mask in correct place
|
|
|
|
SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
|
|
|
|
|
2017-01-21 05:24:26 +08:00
|
|
|
// Put the value bits in correct place
|
2013-08-16 09:12:06 +08:00
|
|
|
SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
|
2017-01-07 05:00:46 +08:00
|
|
|
SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
|
|
|
|
|
2013-08-16 09:12:06 +08:00
|
|
|
// XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
|
|
|
|
// vector instead.
|
|
|
|
SDValue Src[4] = {
|
|
|
|
ShiftedValue,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, DL, MVT::i32),
|
|
|
|
DAG.getConstant(0, DL, MVT::i32),
|
2013-08-16 09:12:06 +08:00
|
|
|
Mask
|
|
|
|
};
|
2016-04-27 05:15:30 +08:00
|
|
|
SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
|
2013-08-16 09:12:06 +08:00
|
|
|
SDValue Args[3] = { Chain, Input, DWordAddr };
|
|
|
|
return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
|
2014-04-27 03:29:41 +08:00
|
|
|
Op->getVTList(), Args, MemVT,
|
2013-08-16 09:12:06 +08:00
|
|
|
StoreNode->getMemOperand());
|
2017-01-07 05:00:46 +08:00
|
|
|
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
|
2013-08-16 09:12:06 +08:00
|
|
|
// Convert pointer from byte address to dword address.
|
2017-01-07 05:00:46 +08:00
|
|
|
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
|
2013-08-16 09:12:06 +08:00
|
|
|
|
|
|
|
if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
|
2013-12-11 05:37:42 +08:00
|
|
|
llvm_unreachable("Truncated and indexed stores not supported yet");
|
2013-08-16 09:12:06 +08:00
|
|
|
} else {
|
|
|
|
Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
|
|
|
|
}
|
|
|
|
return Chain;
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
}
|
2013-02-07 01:32:29 +08:00
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
|
2017-03-27 22:04:01 +08:00
|
|
|
if (AS != AMDGPUASI.PRIVATE_ADDRESS)
|
2013-02-07 01:32:29 +08:00
|
|
|
return SDValue();
|
|
|
|
|
2016-02-11 13:32:46 +08:00
|
|
|
if (MemVT.bitsLT(MVT::i32))
|
|
|
|
return lowerPrivateTruncStore(StoreNode, DAG);
|
2013-02-07 01:32:29 +08:00
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// Standard i32+ store, tag it with DWORDADDR to note that the address
|
|
|
|
// has been shifted
|
|
|
|
if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
|
|
|
|
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
|
|
|
|
return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
|
2013-02-07 01:32:29 +08:00
|
|
|
}
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// Tagged i32+ stores will be matched by patterns
|
|
|
|
return SDValue();
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
|
|
|
|
2013-01-23 10:09:06 +08:00
|
|
|
// return (512 + (kc_bank << 12)
|
|
|
|
static int
|
2017-11-02 03:12:38 +08:00
|
|
|
ConstantAddressBlock(unsigned AddressSpace) {
|
2013-01-23 10:09:06 +08:00
|
|
|
switch (AddressSpace) {
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_0:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_1:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_2:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 2;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_3:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 3;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_4:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 4;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_5:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 5;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_6:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 6;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_7:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 7;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_8:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 8;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_9:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 9;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_10:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 10;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_11:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 11;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_12:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 12;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_13:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 13;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_14:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 14;
|
2017-11-02 03:12:38 +08:00
|
|
|
case AMDGPUAS::CONSTANT_BUFFER_15:
|
2013-01-23 10:09:06 +08:00
|
|
|
return 512 + 4096 * 15;
|
|
|
|
default:
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-11 02:21:39 +08:00
|
|
|
SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
|
|
|
|
SelectionDAG &DAG) const {
|
2013-05-25 10:42:55 +08:00
|
|
|
SDLoc DL(Op);
|
2016-02-11 02:21:39 +08:00
|
|
|
LoadSDNode *Load = cast<LoadSDNode>(Op);
|
|
|
|
ISD::LoadExtType ExtType = Load->getExtensionType();
|
|
|
|
EVT MemVT = Load->getMemoryVT();
|
2017-01-07 05:00:46 +08:00
|
|
|
assert(Load->getAlignment() >= MemVT.getStoreSize());
|
2016-02-11 02:21:39 +08:00
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
SDValue BasePtr = Load->getBasePtr();
|
|
|
|
SDValue Chain = Load->getChain();
|
|
|
|
SDValue Offset = Load->getOffset();
|
2016-02-11 02:21:39 +08:00
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
SDValue LoadPtr = BasePtr;
|
|
|
|
if (!Offset.isUndef()) {
|
|
|
|
LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Get dword location
|
|
|
|
// NOTE: this should be eliminated by the future SHR ptr, 2
|
|
|
|
SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
|
|
|
|
DAG.getConstant(0xfffffffc, DL, MVT::i32));
|
|
|
|
|
|
|
|
// Load dword
|
|
|
|
// TODO: can we be smarter about machine pointer info?
|
2017-11-10 10:03:28 +08:00
|
|
|
MachinePointerInfo PtrInfo(UndefValue::get(
|
|
|
|
Type::getInt32PtrTy(*DAG.getContext(), AMDGPUASI.PRIVATE_ADDRESS)));
|
|
|
|
SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
|
2016-02-11 02:21:39 +08:00
|
|
|
|
|
|
|
// Get offset within the register.
|
|
|
|
SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
|
2017-01-07 05:00:46 +08:00
|
|
|
LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
|
2016-02-11 02:21:39 +08:00
|
|
|
|
|
|
|
// Bit offset of target byte (byteIdx * 8).
|
|
|
|
SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
|
|
|
|
DAG.getConstant(3, DL, MVT::i32));
|
|
|
|
|
|
|
|
// Shift to the right.
|
2017-01-07 05:00:46 +08:00
|
|
|
SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
|
2016-02-11 02:21:39 +08:00
|
|
|
|
|
|
|
// Eliminate the upper bits by setting them to ...
|
|
|
|
EVT MemEltVT = MemVT.getScalarType();
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
if (ExtType == ISD::SEXTLOAD) { // ... ones.
|
2016-02-11 02:21:39 +08:00
|
|
|
SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
|
2017-01-07 05:00:46 +08:00
|
|
|
Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
|
|
|
|
} else { // ... or zeros.
|
|
|
|
Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
|
2016-02-11 02:21:39 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SDValue Ops[] = {
|
2017-01-07 05:00:46 +08:00
|
|
|
Ret,
|
|
|
|
Read.getValue(1) // This should be our output chain
|
2016-02-11 02:21:39 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
return DAG.getMergeValues(Ops, DL);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
|
2013-01-23 10:09:06 +08:00
|
|
|
LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
|
2016-02-11 02:21:39 +08:00
|
|
|
unsigned AS = LoadNode->getAddressSpace();
|
|
|
|
EVT MemVT = LoadNode->getMemoryVT();
|
|
|
|
ISD::LoadExtType ExtType = LoadNode->getExtensionType();
|
2013-01-23 10:09:06 +08:00
|
|
|
|
2017-03-27 22:04:01 +08:00
|
|
|
if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
|
2016-02-11 02:21:39 +08:00
|
|
|
ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
|
|
|
|
return lowerPrivateExtLoad(Op, DAG);
|
|
|
|
}
|
|
|
|
|
|
|
|
SDLoc DL(Op);
|
|
|
|
EVT VT = Op.getValueType();
|
|
|
|
SDValue Chain = LoadNode->getChain();
|
|
|
|
SDValue Ptr = LoadNode->getBasePtr();
|
2014-01-23 03:24:14 +08:00
|
|
|
|
2017-03-27 22:04:01 +08:00
|
|
|
if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
|
|
|
|
LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
|
2017-01-07 05:00:46 +08:00
|
|
|
VT.isVector()) {
|
|
|
|
return scalarizeVectorLoad(LoadNode, DAG);
|
2013-08-26 23:06:04 +08:00
|
|
|
}
|
|
|
|
|
2017-11-02 03:12:38 +08:00
|
|
|
int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
|
2013-11-13 10:39:07 +08:00
|
|
|
if (ConstantBlock > -1 &&
|
|
|
|
((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
|
|
|
|
(LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
|
2013-01-23 10:09:06 +08:00
|
|
|
SDValue Result;
|
2014-04-15 15:22:52 +08:00
|
|
|
if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
|
|
|
|
isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
|
2013-11-02 01:39:26 +08:00
|
|
|
isa<ConstantSDNode>(Ptr)) {
|
2013-01-23 10:09:06 +08:00
|
|
|
SDValue Slots[4];
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
|
|
// We want Const position encoded with the following formula :
|
|
|
|
// (((512 + (kc_bank << 12) + const_index) << 2) + chan)
|
|
|
|
// const_index is Ptr computed by llvm using an alignment of 16.
|
|
|
|
// Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
|
|
|
|
// then div by 4 at the ISel step
|
|
|
|
SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
|
2013-01-23 10:09:06 +08:00
|
|
|
Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
|
|
|
|
}
|
2013-08-01 23:23:42 +08:00
|
|
|
EVT NewVT = MVT::v4i32;
|
|
|
|
unsigned NumElements = 4;
|
|
|
|
if (VT.isVector()) {
|
|
|
|
NewVT = VT;
|
|
|
|
NumElements = VT.getVectorNumElements();
|
|
|
|
}
|
2016-04-27 05:15:30 +08:00
|
|
|
Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
|
2013-01-23 10:09:06 +08:00
|
|
|
} else {
|
2013-12-05 13:44:44 +08:00
|
|
|
// non-constant ptr can't be folded, keeps it as a v4f32 load
|
2013-01-23 10:09:06 +08:00
|
|
|
Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
|
|
|
|
DAG.getConstant(4, DL, MVT::i32)),
|
|
|
|
DAG.getConstant(LoadNode->getAddressSpace() -
|
2017-03-27 22:04:01 +08:00
|
|
|
AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32)
|
2013-01-23 10:09:06 +08:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!VT.isVector()) {
|
|
|
|
Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
|
2015-04-28 22:05:47 +08:00
|
|
|
DAG.getConstant(0, DL, MVT::i32));
|
2013-01-23 10:09:06 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SDValue MergedValues[2] = {
|
2014-04-08 00:44:24 +08:00
|
|
|
Result,
|
|
|
|
Chain
|
2013-01-23 10:09:06 +08:00
|
|
|
};
|
2014-04-28 03:20:57 +08:00
|
|
|
return DAG.getMergeValues(MergedValues, DL);
|
2013-01-23 10:09:06 +08:00
|
|
|
}
|
|
|
|
|
2013-10-31 07:43:29 +08:00
|
|
|
// For most operations returning SDValue() will result in the node being
|
|
|
|
// expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
|
|
|
|
// need to manually expand loads that may be legal in some address spaces and
|
|
|
|
// illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
|
|
|
|
// compute shaders, since the data is sign extended when it is uploaded to the
|
|
|
|
// buffer. However SEXT loads from other address spaces are not supported, so
|
|
|
|
// we need to expand them here.
|
2013-07-23 09:48:24 +08:00
|
|
|
if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
|
|
|
|
EVT MemVT = LoadNode->getMemoryVT();
|
|
|
|
assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
SDValue NewLoad = DAG.getExtLoad(
|
|
|
|
ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
|
|
|
|
LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
|
2015-05-27 02:07:22 +08:00
|
|
|
SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
|
|
|
|
DAG.getValueType(MemVT));
|
2013-07-23 09:48:24 +08:00
|
|
|
|
2015-05-27 02:07:22 +08:00
|
|
|
SDValue MergedValues[2] = { Res, Chain };
|
2014-04-28 03:20:57 +08:00
|
|
|
return DAG.getMergeValues(MergedValues, DL);
|
2013-07-23 09:48:24 +08:00
|
|
|
}
|
|
|
|
|
2017-03-27 22:04:01 +08:00
|
|
|
if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
|
2013-02-07 01:32:29 +08:00
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
|
2017-01-07 05:00:46 +08:00
|
|
|
// DWORDADDR ISD marks already shifted address
|
|
|
|
if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
|
|
|
|
assert(VT == MVT::i32);
|
|
|
|
Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
|
|
|
|
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
|
|
|
|
return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
|
2013-02-07 01:32:29 +08:00
|
|
|
}
|
2017-01-07 05:00:46 +08:00
|
|
|
return SDValue();
|
2013-01-23 10:09:06 +08:00
|
|
|
}
|
2012-12-12 05:25:42 +08:00
|
|
|
|
2014-06-24 02:00:55 +08:00
|
|
|
SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
|
|
|
|
SDValue Chain = Op.getOperand(0);
|
|
|
|
SDValue Cond = Op.getOperand(1);
|
|
|
|
SDValue Jump = Op.getOperand(2);
|
|
|
|
|
|
|
|
return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
|
|
|
|
Chain, Jump, Cond);
|
|
|
|
}
|
|
|
|
|
2016-03-08 05:10:13 +08:00
|
|
|
SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
|
|
|
|
SelectionDAG &DAG) const {
|
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
const R600FrameLowering *TFL = Subtarget->getFrameLowering();
|
2016-03-08 05:10:13 +08:00
|
|
|
|
|
|
|
FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
|
|
|
|
|
|
|
|
unsigned FrameIndex = FIN->getIndex();
|
|
|
|
unsigned IgnoredFrameReg;
|
|
|
|
unsigned Offset =
|
|
|
|
TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
|
|
|
|
return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
|
|
|
|
Op.getValueType());
|
|
|
|
}
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
CCAssignFn *R600TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
|
|
|
|
bool IsVarArg) const {
|
|
|
|
switch (CC) {
|
|
|
|
case CallingConv::AMDGPU_KERNEL:
|
|
|
|
case CallingConv::SPIR_KERNEL:
|
|
|
|
case CallingConv::C:
|
|
|
|
case CallingConv::Fast:
|
|
|
|
case CallingConv::Cold:
|
|
|
|
return CC_R600_Kernel;
|
|
|
|
case CallingConv::AMDGPU_VS:
|
|
|
|
case CallingConv::AMDGPU_GS:
|
|
|
|
case CallingConv::AMDGPU_PS:
|
|
|
|
case CallingConv::AMDGPU_CS:
|
|
|
|
case CallingConv::AMDGPU_HS:
|
|
|
|
case CallingConv::AMDGPU_ES:
|
|
|
|
case CallingConv::AMDGPU_LS:
|
|
|
|
return CC_R600;
|
|
|
|
default:
|
|
|
|
report_fatal_error("Unsupported calling convention.");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
/// XXX Only kernel functions are supported, so we can assume for now that
|
|
|
|
/// every function is a kernel function, but in the future we should use
|
|
|
|
/// separate calling conventions for kernel and non-kernel functions.
|
|
|
|
SDValue R600TargetLowering::LowerFormalArguments(
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
|
|
|
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
|
|
|
|
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
|
2013-07-23 09:48:05 +08:00
|
|
|
SmallVector<CCValAssign, 16> ArgLocs;
|
2014-08-07 02:45:26 +08:00
|
|
|
CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
|
|
|
|
*DAG.getContext());
|
2013-11-12 06:10:24 +08:00
|
|
|
MachineFunction &MF = DAG.getMachineFunction();
|
2013-10-23 08:44:32 +08:00
|
|
|
SmallVector<ISD::InputArg, 8> LocalIns;
|
|
|
|
|
2016-09-17 05:53:00 +08:00
|
|
|
if (AMDGPU::isShader(CallConv)) {
|
2017-04-12 06:29:24 +08:00
|
|
|
CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
|
2016-09-17 05:53:00 +08:00
|
|
|
} else {
|
|
|
|
analyzeFormalArgumentsCompute(CCInfo, Ins);
|
|
|
|
}
|
2013-07-23 09:48:05 +08:00
|
|
|
|
2013-07-23 09:48:18 +08:00
|
|
|
for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
|
2013-07-23 09:48:05 +08:00
|
|
|
CCValAssign &VA = ArgLocs[i];
|
2014-08-14 02:14:11 +08:00
|
|
|
const ISD::InputArg &In = Ins[i];
|
|
|
|
EVT VT = In.VT;
|
|
|
|
EVT MemVT = VA.getLocVT();
|
|
|
|
if (!VT.isVector() && MemVT.isVector()) {
|
|
|
|
// Get load source type if scalarized.
|
|
|
|
MemVT = MemVT.getVectorElementType();
|
|
|
|
}
|
2013-07-23 09:47:58 +08:00
|
|
|
|
2016-04-07 03:40:20 +08:00
|
|
|
if (AMDGPU::isShader(CallConv)) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
|
2013-11-12 06:10:24 +08:00
|
|
|
SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
|
|
|
|
InVals.push_back(Register);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
|
2017-03-27 22:04:01 +08:00
|
|
|
AMDGPUASI.CONSTANT_BUFFER_0);
|
2013-07-23 09:48:05 +08:00
|
|
|
|
2014-03-18 02:58:11 +08:00
|
|
|
// i64 isn't a legal type, so the register type used ends up as i32, which
|
|
|
|
// isn't expected here. It attempts to create this sextload, but it ends up
|
|
|
|
// being invalid. Somehow this seems to work with i64 arguments, but breaks
|
|
|
|
// for <1 x i64>.
|
|
|
|
|
2013-07-23 09:48:05 +08:00
|
|
|
// The first 36 bytes of the input buffer contains information about
|
|
|
|
// thread group and global sizes.
|
2014-08-14 02:14:11 +08:00
|
|
|
ISD::LoadExtType Ext = ISD::NON_EXTLOAD;
|
|
|
|
if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
|
|
|
|
// FIXME: This should really check the extload type, but the handling of
|
|
|
|
// extload vector parameters seems to be broken.
|
|
|
|
|
|
|
|
// Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
|
|
|
|
Ext = ISD::SEXTLOAD;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Compute the offset from the value.
|
|
|
|
// XXX - I think PartOffset should give you this, but it seems to give the
|
|
|
|
// size of the register which isn't useful.
|
|
|
|
|
2015-02-17 02:10:47 +08:00
|
|
|
unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
|
2014-08-14 02:14:11 +08:00
|
|
|
unsigned PartOffset = VA.getLocMemOffset();
|
2018-07-07 01:16:17 +08:00
|
|
|
unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF.getFunction()) +
|
2018-05-30 01:42:50 +08:00
|
|
|
VA.getLocMemOffset();
|
2014-04-12 04:59:54 +08:00
|
|
|
|
2014-08-14 02:14:11 +08:00
|
|
|
MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
|
[SelectionDAG] Get rid of bool parameters in SelectionDAG::getLoad, getStore, and friends.
Summary:
Instead, we take a single flags arg (a bitset).
Also add a default 0 alignment, and change the order of arguments so the
alignment comes before the flags.
This greatly simplifies many callsites, and fixes a bug in
AMDGPUISelLowering, wherein the order of the args to getLoad was
inverted. It also greatly simplifies the process of adding another flag
to getLoad.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, jyknight, dsanders, nemanjai, llvm-commits
Differential Revision: http://reviews.llvm.org/D22249
llvm-svn: 275592
2016-07-16 02:27:10 +08:00
|
|
|
SDValue Arg = DAG.getLoad(
|
|
|
|
ISD::UNINDEXED, Ext, VT, DL, Chain,
|
|
|
|
DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
|
[CodeGen] Split out the notions of MI invariance and MI dereferenceability.
Summary:
An IR load can be invariant, dereferenceable, neither, or both. But
currently, MI's notion of invariance is IR-invariant &&
IR-dereferenceable.
This patch splits up the notions of invariance and dereferenceability at
the MI level. It's NFC, so adds some probably-unnecessary
"is-dereferenceable" checks, which we can remove later if desired.
Reviewers: chandlerc, tstellarAMD
Subscribers: jholewinski, arsenm, nemanjai, llvm-commits
Differential Revision: https://reviews.llvm.org/D23371
llvm-svn: 281151
2016-09-11 09:38:58 +08:00
|
|
|
MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
|
|
|
|
MachineMemOperand::MODereferenceable |
|
|
|
|
MachineMemOperand::MOInvariant);
|
2014-04-18 15:40:20 +08:00
|
|
|
|
|
|
|
// 4 is the preferred alignment for the CONSTANT memory space.
|
2012-12-12 05:25:42 +08:00
|
|
|
InVals.push_back(Arg);
|
|
|
|
}
|
|
|
|
return Chain;
|
|
|
|
}
|
|
|
|
|
2015-07-09 10:09:04 +08:00
|
|
|
EVT R600TargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
|
|
|
|
EVT VT) const {
|
2014-04-18 15:40:20 +08:00
|
|
|
if (!VT.isVector())
|
|
|
|
return MVT::i32;
|
2012-12-12 05:25:42 +08:00
|
|
|
return VT.changeVectorElementTypeToInteger();
|
|
|
|
}
|
|
|
|
|
2017-07-11 04:25:54 +08:00
|
|
|
bool R600TargetLowering::canMergeStoresTo(unsigned AS, EVT MemVT,
|
|
|
|
const SelectionDAG &DAG) const {
|
2017-05-24 23:59:09 +08:00
|
|
|
// Local and Private addresses do not handle vectors. Limit to i32
|
|
|
|
if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
|
|
|
|
return (MemVT.getSizeInBits() <= 32);
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2016-02-23 05:04:16 +08:00
|
|
|
bool R600TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
|
|
|
|
unsigned AddrSpace,
|
|
|
|
unsigned Align,
|
|
|
|
bool *IsFast) const {
|
|
|
|
if (IsFast)
|
|
|
|
*IsFast = false;
|
|
|
|
|
|
|
|
if (!VT.isSimple() || VT == MVT::Other)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (VT.bitsLT(MVT::i32))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// TODO: This is a rough estimate.
|
|
|
|
if (IsFast)
|
|
|
|
*IsFast = true;
|
|
|
|
|
|
|
|
return VT.bitsGT(MVT::i32) && Align % 4 == 0;
|
|
|
|
}
|
|
|
|
|
2014-04-18 15:40:20 +08:00
|
|
|
static SDValue CompactSwizzlableVector(
|
|
|
|
SelectionDAG &DAG, SDValue VectorEntry,
|
|
|
|
DenseMap<unsigned, unsigned> &RemapSwizzle) {
|
2013-06-04 23:04:53 +08:00
|
|
|
assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
|
|
|
|
assert(RemapSwizzle.empty());
|
|
|
|
SDValue NewBldVec[4] = {
|
2014-04-18 15:40:20 +08:00
|
|
|
VectorEntry.getOperand(0),
|
|
|
|
VectorEntry.getOperand(1),
|
|
|
|
VectorEntry.getOperand(2),
|
|
|
|
VectorEntry.getOperand(3)
|
2013-06-04 23:04:53 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
2016-03-15 01:28:46 +08:00
|
|
|
if (NewBldVec[i].isUndef())
|
2013-10-14 01:56:10 +08:00
|
|
|
// We mask write here to teach later passes that the ith element of this
|
|
|
|
// vector is undef. Thus we can use it to reduce 128 bits reg usage,
|
|
|
|
// break false dependencies and additionnaly make assembly easier to read.
|
|
|
|
RemapSwizzle[i] = 7; // SEL_MASK_WRITE
|
2013-06-04 23:04:53 +08:00
|
|
|
if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
|
|
|
|
if (C->isZero()) {
|
|
|
|
RemapSwizzle[i] = 4; // SEL_0
|
|
|
|
NewBldVec[i] = DAG.getUNDEF(MVT::f32);
|
|
|
|
} else if (C->isExactlyValue(1.0)) {
|
|
|
|
RemapSwizzle[i] = 5; // SEL_1
|
|
|
|
NewBldVec[i] = DAG.getUNDEF(MVT::f32);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-03-15 01:28:46 +08:00
|
|
|
if (NewBldVec[i].isUndef())
|
2013-06-04 23:04:53 +08:00
|
|
|
continue;
|
|
|
|
for (unsigned j = 0; j < i; j++) {
|
|
|
|
if (NewBldVec[i] == NewBldVec[j]) {
|
|
|
|
NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
|
|
|
|
RemapSwizzle[i] = j;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-27 05:15:30 +08:00
|
|
|
return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
|
|
|
|
NewBldVec);
|
2013-06-04 23:04:53 +08:00
|
|
|
}
|
|
|
|
|
2013-06-11 21:32:25 +08:00
|
|
|
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry,
|
|
|
|
DenseMap<unsigned, unsigned> &RemapSwizzle) {
|
2013-06-04 23:04:53 +08:00
|
|
|
assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
|
|
|
|
assert(RemapSwizzle.empty());
|
|
|
|
SDValue NewBldVec[4] = {
|
|
|
|
VectorEntry.getOperand(0),
|
|
|
|
VectorEntry.getOperand(1),
|
|
|
|
VectorEntry.getOperand(2),
|
|
|
|
VectorEntry.getOperand(3)
|
|
|
|
};
|
|
|
|
bool isUnmovable[4] = { false, false, false, false };
|
2013-12-10 22:43:31 +08:00
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
2013-07-09 23:03:25 +08:00
|
|
|
RemapSwizzle[i] = i;
|
2013-12-10 22:43:31 +08:00
|
|
|
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
|
|
|
unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
|
|
|
|
->getZExtValue();
|
|
|
|
if (i == Idx)
|
|
|
|
isUnmovable[Idx] = true;
|
|
|
|
}
|
|
|
|
}
|
2013-06-04 23:04:53 +08:00
|
|
|
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
|
|
|
if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
|
|
|
|
unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
|
|
|
|
->getZExtValue();
|
2013-10-14 01:56:04 +08:00
|
|
|
if (isUnmovable[Idx])
|
|
|
|
continue;
|
|
|
|
// Swap i and Idx
|
|
|
|
std::swap(NewBldVec[Idx], NewBldVec[i]);
|
|
|
|
std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
|
|
|
|
break;
|
2013-06-04 23:04:53 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-27 05:15:30 +08:00
|
|
|
return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
|
|
|
|
NewBldVec);
|
2013-06-04 23:04:53 +08:00
|
|
|
}
|
|
|
|
|
2016-06-12 23:39:02 +08:00
|
|
|
SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
|
|
|
|
SelectionDAG &DAG,
|
|
|
|
const SDLoc &DL) const {
|
2013-06-04 23:04:53 +08:00
|
|
|
assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
|
|
|
|
// Old -> New swizzle values
|
|
|
|
DenseMap<unsigned, unsigned> SwizzleRemap;
|
|
|
|
|
|
|
|
BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
2015-04-10 19:24:51 +08:00
|
|
|
unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
|
2013-06-04 23:04:53 +08:00
|
|
|
if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
|
2015-04-28 22:05:47 +08:00
|
|
|
Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
|
2013-06-04 23:04:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
SwizzleRemap.clear();
|
|
|
|
BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
|
|
|
|
for (unsigned i = 0; i < 4; i++) {
|
2015-04-10 19:24:51 +08:00
|
|
|
unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
|
2013-06-04 23:04:53 +08:00
|
|
|
if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
|
2015-04-28 22:05:47 +08:00
|
|
|
Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
|
2013-06-04 23:04:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return BuildVector;
|
|
|
|
}
|
|
|
|
|
2012-12-12 05:25:42 +08:00
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// Custom DAG Optimizations
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
|
|
|
SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
|
|
|
|
DAGCombinerInfo &DCI) const {
|
|
|
|
SelectionDAG &DAG = DCI.DAG;
|
2016-08-30 07:21:46 +08:00
|
|
|
SDLoc DL(N);
|
2012-12-12 05:25:42 +08:00
|
|
|
|
|
|
|
switch (N->getOpcode()) {
|
|
|
|
// (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
|
|
|
|
case ISD::FP_ROUND: {
|
|
|
|
SDValue Arg = N->getOperand(0);
|
|
|
|
if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
|
2016-08-30 07:21:46 +08:00
|
|
|
return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
|
2012-12-12 05:25:42 +08:00
|
|
|
Arg.getOperand(0));
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
2013-02-07 22:02:35 +08:00
|
|
|
|
|
|
|
// (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
|
|
|
|
// (i32 select_cc f32, f32, -1, 0 cc)
|
|
|
|
//
|
|
|
|
// Mesa's GLSL frontend generates the above pattern a lot and we can lower
|
|
|
|
// this to one of the SET*_DX10 instructions.
|
|
|
|
case ISD::FP_TO_SINT: {
|
|
|
|
SDValue FNeg = N->getOperand(0);
|
|
|
|
if (FNeg.getOpcode() != ISD::FNEG) {
|
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
SDValue SelectCC = FNeg.getOperand(0);
|
|
|
|
if (SelectCC.getOpcode() != ISD::SELECT_CC ||
|
|
|
|
SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
|
|
|
|
SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
|
|
|
|
!isHWTrueValue(SelectCC.getOperand(2)) ||
|
|
|
|
!isHWFalseValue(SelectCC.getOperand(3))) {
|
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
|
2016-08-30 07:21:46 +08:00
|
|
|
return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
|
2013-02-07 22:02:35 +08:00
|
|
|
SelectCC.getOperand(0), // LHS
|
|
|
|
SelectCC.getOperand(1), // RHS
|
2016-08-30 07:21:46 +08:00
|
|
|
DAG.getConstant(-1, DL, MVT::i32), // True
|
|
|
|
DAG.getConstant(0, DL, MVT::i32), // False
|
2013-02-07 22:02:35 +08:00
|
|
|
SelectCC.getOperand(4)); // CC
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
2013-07-30 08:27:16 +08:00
|
|
|
|
2013-10-28 12:07:38 +08:00
|
|
|
// insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
|
|
|
|
// => build_vector elt0, ... , NewEltIdx, ... , eltN
|
2013-07-30 08:27:16 +08:00
|
|
|
case ISD::INSERT_VECTOR_ELT: {
|
|
|
|
SDValue InVec = N->getOperand(0);
|
|
|
|
SDValue InVal = N->getOperand(1);
|
|
|
|
SDValue EltNo = N->getOperand(2);
|
|
|
|
|
|
|
|
// If the inserted element is an UNDEF, just use the input vector.
|
2016-03-15 01:28:46 +08:00
|
|
|
if (InVal.isUndef())
|
2013-07-30 08:27:16 +08:00
|
|
|
return InVec;
|
|
|
|
|
|
|
|
EVT VT = InVec.getValueType();
|
|
|
|
|
|
|
|
// If we can't generate a legal BUILD_VECTOR, exit
|
|
|
|
if (!isOperationLegal(ISD::BUILD_VECTOR, VT))
|
|
|
|
return SDValue();
|
|
|
|
|
|
|
|
// Check that we know which element is being inserted
|
|
|
|
if (!isa<ConstantSDNode>(EltNo))
|
|
|
|
return SDValue();
|
|
|
|
unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
|
|
|
|
|
|
|
|
// Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
|
|
|
|
// be converted to a BUILD_VECTOR). Fill in the Ops vector with the
|
|
|
|
// vector elements.
|
|
|
|
SmallVector<SDValue, 8> Ops;
|
|
|
|
if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
|
|
|
|
Ops.append(InVec.getNode()->op_begin(),
|
|
|
|
InVec.getNode()->op_end());
|
2016-03-15 01:28:46 +08:00
|
|
|
} else if (InVec.isUndef()) {
|
2013-07-30 08:27:16 +08:00
|
|
|
unsigned NElts = VT.getVectorNumElements();
|
|
|
|
Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
|
|
|
|
} else {
|
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Insert the element
|
|
|
|
if (Elt < Ops.size()) {
|
|
|
|
// All the operands of BUILD_VECTOR must have the same type;
|
|
|
|
// we enforce that here.
|
|
|
|
EVT OpVT = Ops[0].getValueType();
|
|
|
|
if (InVal.getValueType() != OpVT)
|
|
|
|
InVal = OpVT.bitsGT(InVal.getValueType()) ?
|
2016-08-30 07:21:46 +08:00
|
|
|
DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
|
|
|
|
DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
|
2013-07-30 08:27:16 +08:00
|
|
|
Ops[Elt] = InVal;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Return the new vector
|
2016-08-30 07:21:46 +08:00
|
|
|
return DAG.getBuildVector(VT, DL, Ops);
|
2013-07-30 08:27:16 +08:00
|
|
|
}
|
|
|
|
|
2013-01-23 10:09:06 +08:00
|
|
|
// Extract_vec (Build_vector) generated by custom lowering
|
|
|
|
// also needs to be customly combined
|
|
|
|
case ISD::EXTRACT_VECTOR_ELT: {
|
|
|
|
SDValue Arg = N->getOperand(0);
|
|
|
|
if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
|
|
|
|
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
|
|
|
|
unsigned Element = Const->getZExtValue();
|
|
|
|
return Arg->getOperand(Element);
|
|
|
|
}
|
|
|
|
}
|
2013-02-01 06:11:53 +08:00
|
|
|
if (Arg.getOpcode() == ISD::BITCAST &&
|
2016-09-03 04:13:19 +08:00
|
|
|
Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
|
|
|
|
(Arg.getOperand(0).getValueType().getVectorNumElements() ==
|
|
|
|
Arg.getValueType().getVectorNumElements())) {
|
2013-02-01 06:11:53 +08:00
|
|
|
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
|
|
|
|
unsigned Element = Const->getZExtValue();
|
2016-08-30 07:21:46 +08:00
|
|
|
return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
|
|
|
|
Arg->getOperand(0).getOperand(Element));
|
2013-02-01 06:11:53 +08:00
|
|
|
}
|
|
|
|
}
|
2015-07-16 14:23:12 +08:00
|
|
|
break;
|
2013-01-23 10:09:06 +08:00
|
|
|
}
|
2013-02-07 22:02:35 +08:00
|
|
|
|
|
|
|
case ISD::SELECT_CC: {
|
2014-05-10 00:42:16 +08:00
|
|
|
// Try common optimizations
|
2016-02-10 06:54:12 +08:00
|
|
|
if (SDValue Ret = AMDGPUTargetLowering::PerformDAGCombine(N, DCI))
|
2014-05-10 00:42:16 +08:00
|
|
|
return Ret;
|
|
|
|
|
2013-02-07 22:02:35 +08:00
|
|
|
// fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
|
|
|
|
// selectcc x, y, a, b, inv(cc)
|
R600: Optimize another selectcc case
fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
selectcc x, y, a, b, cc
Reviewed-by: Christian König <christian.koenig@amd.com>
llvm-svn: 176700
2013-03-08 23:37:11 +08:00
|
|
|
//
|
|
|
|
// fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
|
|
|
|
// selectcc x, y, a, b, cc
|
2013-02-07 22:02:35 +08:00
|
|
|
SDValue LHS = N->getOperand(0);
|
|
|
|
if (LHS.getOpcode() != ISD::SELECT_CC) {
|
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
|
|
|
|
SDValue RHS = N->getOperand(1);
|
|
|
|
SDValue True = N->getOperand(2);
|
|
|
|
SDValue False = N->getOperand(3);
|
R600: Optimize another selectcc case
fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
selectcc x, y, a, b, cc
Reviewed-by: Christian König <christian.koenig@amd.com>
llvm-svn: 176700
2013-03-08 23:37:11 +08:00
|
|
|
ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
|
2013-02-07 22:02:35 +08:00
|
|
|
|
|
|
|
if (LHS.getOperand(2).getNode() != True.getNode() ||
|
|
|
|
LHS.getOperand(3).getNode() != False.getNode() ||
|
R600: Optimize another selectcc case
fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
selectcc x, y, a, b, cc
Reviewed-by: Christian König <christian.koenig@amd.com>
llvm-svn: 176700
2013-03-08 23:37:11 +08:00
|
|
|
RHS.getNode() != False.getNode()) {
|
2013-02-07 22:02:35 +08:00
|
|
|
return SDValue();
|
|
|
|
}
|
|
|
|
|
R600: Optimize another selectcc case
fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
selectcc x, y, a, b, cc
Reviewed-by: Christian König <christian.koenig@amd.com>
llvm-svn: 176700
2013-03-08 23:37:11 +08:00
|
|
|
switch (NCC) {
|
|
|
|
default: return SDValue();
|
|
|
|
case ISD::SETNE: return LHS;
|
|
|
|
case ISD::SETEQ: {
|
|
|
|
ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
|
|
|
|
LHSCC = ISD::getSetCCInverse(LHSCC,
|
|
|
|
LHS.getOperand(0).getValueType().isInteger());
|
2013-09-28 10:50:38 +08:00
|
|
|
if (DCI.isBeforeLegalizeOps() ||
|
|
|
|
isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
|
2016-08-30 07:21:46 +08:00
|
|
|
return DAG.getSelectCC(DL,
|
2013-09-28 10:50:38 +08:00
|
|
|
LHS.getOperand(0),
|
|
|
|
LHS.getOperand(1),
|
|
|
|
LHS.getOperand(2),
|
|
|
|
LHS.getOperand(3),
|
|
|
|
LHSCC);
|
|
|
|
break;
|
2013-02-15 00:55:06 +08:00
|
|
|
}
|
R600: Optimize another selectcc case
fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
selectcc x, y, a, b, cc
Reviewed-by: Christian König <christian.koenig@amd.com>
llvm-svn: 176700
2013-03-08 23:37:11 +08:00
|
|
|
}
|
2013-09-28 10:50:38 +08:00
|
|
|
return SDValue();
|
R600: Optimize another selectcc case
fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
selectcc x, y, a, b, cc
Reviewed-by: Christian König <christian.koenig@amd.com>
llvm-svn: 176700
2013-03-08 23:37:11 +08:00
|
|
|
}
|
2013-08-16 09:12:11 +08:00
|
|
|
|
2016-12-06 04:23:10 +08:00
|
|
|
case AMDGPUISD::R600_EXPORT: {
|
2013-02-15 00:55:06 +08:00
|
|
|
SDValue Arg = N->getOperand(1);
|
|
|
|
if (Arg.getOpcode() != ISD::BUILD_VECTOR)
|
|
|
|
break;
|
2013-06-04 23:04:53 +08:00
|
|
|
|
2013-02-15 00:55:06 +08:00
|
|
|
SDValue NewArgs[8] = {
|
|
|
|
N->getOperand(0), // Chain
|
|
|
|
SDValue(),
|
|
|
|
N->getOperand(2), // ArrayBase
|
|
|
|
N->getOperand(3), // Type
|
|
|
|
N->getOperand(4), // SWZ_X
|
|
|
|
N->getOperand(5), // SWZ_Y
|
|
|
|
N->getOperand(6), // SWZ_Z
|
|
|
|
N->getOperand(7) // SWZ_W
|
|
|
|
};
|
2015-04-28 22:05:47 +08:00
|
|
|
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
|
2016-12-06 04:23:10 +08:00
|
|
|
return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
|
2013-02-07 22:02:35 +08:00
|
|
|
}
|
2013-06-04 23:04:53 +08:00
|
|
|
case AMDGPUISD::TEXTURE_FETCH: {
|
|
|
|
SDValue Arg = N->getOperand(1);
|
|
|
|
if (Arg.getOpcode() != ISD::BUILD_VECTOR)
|
|
|
|
break;
|
|
|
|
|
|
|
|
SDValue NewArgs[19] = {
|
|
|
|
N->getOperand(0),
|
|
|
|
N->getOperand(1),
|
|
|
|
N->getOperand(2),
|
|
|
|
N->getOperand(3),
|
|
|
|
N->getOperand(4),
|
|
|
|
N->getOperand(5),
|
|
|
|
N->getOperand(6),
|
|
|
|
N->getOperand(7),
|
|
|
|
N->getOperand(8),
|
|
|
|
N->getOperand(9),
|
|
|
|
N->getOperand(10),
|
|
|
|
N->getOperand(11),
|
|
|
|
N->getOperand(12),
|
|
|
|
N->getOperand(13),
|
|
|
|
N->getOperand(14),
|
|
|
|
N->getOperand(15),
|
|
|
|
N->getOperand(16),
|
|
|
|
N->getOperand(17),
|
|
|
|
N->getOperand(18),
|
|
|
|
};
|
2015-04-28 22:05:47 +08:00
|
|
|
NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
|
|
|
|
return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
|
2013-06-04 23:04:53 +08:00
|
|
|
}
|
2016-08-30 07:21:46 +08:00
|
|
|
default: break;
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
2014-05-23 02:09:07 +08:00
|
|
|
|
|
|
|
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
|
2012-12-12 05:25:42 +08:00
|
|
|
}
|
2013-09-13 07:44:44 +08:00
|
|
|
|
2016-06-24 14:30:11 +08:00
|
|
|
bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
|
|
|
|
SDValue &Src, SDValue &Neg, SDValue &Abs,
|
|
|
|
SDValue &Sel, SDValue &Imm,
|
|
|
|
SelectionDAG &DAG) const {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
const R600InstrInfo *TII = Subtarget->getInstrInfo();
|
2013-09-13 07:44:44 +08:00
|
|
|
if (!Src.isMachineOpcode())
|
|
|
|
return false;
|
2016-06-24 14:30:11 +08:00
|
|
|
|
2013-09-13 07:44:44 +08:00
|
|
|
switch (Src.getMachineOpcode()) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::FNEG_R600:
|
2013-09-13 07:44:44 +08:00
|
|
|
if (!Neg.getNode())
|
|
|
|
return false;
|
|
|
|
Src = Src.getOperand(0);
|
2015-04-28 22:05:47 +08:00
|
|
|
Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
|
2013-09-13 07:44:44 +08:00
|
|
|
return true;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::FABS_R600:
|
2013-09-13 07:44:44 +08:00
|
|
|
if (!Abs.getNode())
|
|
|
|
return false;
|
|
|
|
Src = Src.getOperand(0);
|
2015-04-28 22:05:47 +08:00
|
|
|
Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
|
2013-09-13 07:44:44 +08:00
|
|
|
return true;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::CONST_COPY: {
|
2013-09-13 07:44:44 +08:00
|
|
|
unsigned Opcode = ParentNode->getMachineOpcode();
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
|
2013-09-13 07:44:44 +08:00
|
|
|
|
|
|
|
if (!Sel.getNode())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
SDValue CstOffset = Src.getOperand(0);
|
|
|
|
if (ParentNode->getValueType(0).isVector())
|
|
|
|
return false;
|
|
|
|
|
|
|
|
// Gather constants values
|
|
|
|
int SrcIndices[] = {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src2),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_X),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_W),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_X),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_W)
|
2013-09-13 07:44:44 +08:00
|
|
|
};
|
|
|
|
std::vector<unsigned> Consts;
|
2014-05-13 03:23:21 +08:00
|
|
|
for (int OtherSrcIdx : SrcIndices) {
|
2013-09-13 07:44:44 +08:00
|
|
|
int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
|
|
|
|
if (OtherSrcIdx < 0 || OtherSelIdx < 0)
|
|
|
|
continue;
|
|
|
|
if (HasDst) {
|
|
|
|
OtherSrcIdx--;
|
|
|
|
OtherSelIdx--;
|
|
|
|
}
|
|
|
|
if (RegisterSDNode *Reg =
|
|
|
|
dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (Reg->getReg() == R600::ALU_CONST) {
|
2014-05-13 03:26:38 +08:00
|
|
|
ConstantSDNode *Cst
|
|
|
|
= cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
|
2013-09-13 07:44:44 +08:00
|
|
|
Consts.push_back(Cst->getZExtValue());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-05-13 04:42:57 +08:00
|
|
|
ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
|
2013-09-13 07:44:44 +08:00
|
|
|
Consts.push_back(Cst->getZExtValue());
|
|
|
|
if (!TII->fitsConstReadLimitations(Consts)) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
Sel = CstOffset;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
|
2013-09-13 07:44:44 +08:00
|
|
|
return true;
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::MOV_IMM_GLOBAL_ADDR:
|
2016-05-14 04:39:31 +08:00
|
|
|
// Check if the Imm slot is used. Taken from below.
|
|
|
|
if (cast<ConstantSDNode>(Imm)->getZExtValue())
|
|
|
|
return false;
|
|
|
|
Imm = Src.getOperand(0);
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
|
2016-05-14 04:39:31 +08:00
|
|
|
return true;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
case R600::MOV_IMM_I32:
|
|
|
|
case R600::MOV_IMM_F32: {
|
|
|
|
unsigned ImmReg = R600::ALU_LITERAL_X;
|
2013-09-13 07:44:53 +08:00
|
|
|
uint64_t ImmValue = 0;
|
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
|
2013-09-13 07:44:53 +08:00
|
|
|
ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
|
|
|
|
float FloatValue = FPC->getValueAPF().convertToFloat();
|
|
|
|
if (FloatValue == 0.0) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
ImmReg = R600::ZERO;
|
2013-09-13 07:44:53 +08:00
|
|
|
} else if (FloatValue == 0.5) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
ImmReg = R600::HALF;
|
2013-09-13 07:44:53 +08:00
|
|
|
} else if (FloatValue == 1.0) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
ImmReg = R600::ONE;
|
2013-09-13 07:44:53 +08:00
|
|
|
} else {
|
|
|
|
ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
|
|
|
|
uint64_t Value = C->getZExtValue();
|
|
|
|
if (Value == 0) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
ImmReg = R600::ZERO;
|
2013-09-13 07:44:53 +08:00
|
|
|
} else if (Value == 1) {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
ImmReg = R600::ONE_INT;
|
2013-09-13 07:44:53 +08:00
|
|
|
} else {
|
|
|
|
ImmValue = Value;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Check that we aren't already using an immediate.
|
|
|
|
// XXX: It's possible for an instruction to have more than one
|
|
|
|
// immediate operand, but this is not supported yet.
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (ImmReg == R600::ALU_LITERAL_X) {
|
2013-09-13 07:44:53 +08:00
|
|
|
if (!Imm.getNode())
|
|
|
|
return false;
|
|
|
|
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
|
|
|
|
assert(C);
|
|
|
|
if (C->getZExtValue())
|
|
|
|
return false;
|
2015-04-28 22:05:47 +08:00
|
|
|
Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
|
2013-09-13 07:44:53 +08:00
|
|
|
}
|
|
|
|
Src = DAG.getRegister(ImmReg, MVT::i32);
|
|
|
|
return true;
|
|
|
|
}
|
2013-09-13 07:44:44 +08:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-01 23:54:18 +08:00
|
|
|
/// Fold the instructions after selecting them
|
2013-09-13 07:44:44 +08:00
|
|
|
SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
|
|
|
|
SelectionDAG &DAG) const {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
const R600InstrInfo *TII = Subtarget->getInstrInfo();
|
2013-09-13 07:44:44 +08:00
|
|
|
if (!Node->isMachineOpcode())
|
|
|
|
return Node;
|
2016-06-24 14:30:11 +08:00
|
|
|
|
2013-09-13 07:44:44 +08:00
|
|
|
unsigned Opcode = Node->getMachineOpcode();
|
|
|
|
SDValue FakeOp;
|
|
|
|
|
2015-02-17 23:29:18 +08:00
|
|
|
std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
|
2013-09-13 07:44:44 +08:00
|
|
|
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
if (Opcode == R600::DOT_4) {
|
2013-09-13 07:44:44 +08:00
|
|
|
int OperandIdx[] = {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_X),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_W),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_X),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_W)
|
2013-10-28 12:07:23 +08:00
|
|
|
};
|
2013-09-13 07:44:44 +08:00
|
|
|
int NegIdx[] = {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
|
2013-09-13 07:44:44 +08:00
|
|
|
};
|
|
|
|
int AbsIdx[] = {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
|
2013-09-13 07:44:44 +08:00
|
|
|
};
|
|
|
|
for (unsigned i = 0; i < 8; i++) {
|
|
|
|
if (OperandIdx[i] < 0)
|
|
|
|
return Node;
|
|
|
|
SDValue &Src = Ops[OperandIdx[i] - 1];
|
|
|
|
SDValue &Neg = Ops[NegIdx[i] - 1];
|
|
|
|
SDValue &Abs = Ops[AbsIdx[i] - 1];
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
|
2013-09-13 07:44:44 +08:00
|
|
|
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
|
|
|
|
if (HasDst)
|
|
|
|
SelIdx--;
|
|
|
|
SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
|
2013-09-13 07:44:53 +08:00
|
|
|
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
|
|
|
|
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
|
|
|
}
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
} else if (Opcode == R600::REG_SEQUENCE) {
|
2013-09-13 07:44:53 +08:00
|
|
|
for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
|
|
|
|
SDValue &Src = Ops[i];
|
|
|
|
if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
|
2013-09-13 07:44:44 +08:00
|
|
|
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (!TII->hasInstrModifiers(Opcode))
|
|
|
|
return Node;
|
|
|
|
int OperandIdx[] = {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src2)
|
2013-09-13 07:44:44 +08:00
|
|
|
};
|
|
|
|
int NegIdx[] = {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
|
2013-09-13 07:44:44 +08:00
|
|
|
};
|
|
|
|
int AbsIdx[] = {
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
|
|
|
|
TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
|
2013-09-13 07:44:44 +08:00
|
|
|
-1
|
|
|
|
};
|
|
|
|
for (unsigned i = 0; i < 3; i++) {
|
|
|
|
if (OperandIdx[i] < 0)
|
|
|
|
return Node;
|
|
|
|
SDValue &Src = Ops[OperandIdx[i] - 1];
|
|
|
|
SDValue &Neg = Ops[NegIdx[i] - 1];
|
|
|
|
SDValue FakeAbs;
|
|
|
|
SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
|
2013-09-13 07:44:44 +08:00
|
|
|
int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
|
AMDGPU: Separate R600 and GCN TableGen files
Summary:
We now have two sets of generated TableGen files, one for R600 and one
for GCN, so each sub-target now has its own tables of instructions,
registers, ISel patterns, etc. This should help reduce compile time
since each sub-target now only has to consider information that
is specific to itself. This will also help prevent the R600
sub-target from slowing down new features for GCN, like disassembler
support, GlobalISel, etc.
Reviewers: arsenm, nhaehnle, jvesely
Reviewed By: arsenm
Subscribers: MatzeB, kzhuravl, wdng, mgorny, yaxunl, dstuttard, tpr, t-tye, javed.absar, llvm-commits
Differential Revision: https://reviews.llvm.org/D46365
llvm-svn: 335942
2018-06-29 07:47:12 +08:00
|
|
|
int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
|
2013-09-13 07:44:53 +08:00
|
|
|
if (HasDst) {
|
2013-09-13 07:44:44 +08:00
|
|
|
SelIdx--;
|
2013-09-13 07:44:53 +08:00
|
|
|
ImmIdx--;
|
|
|
|
}
|
2013-09-13 07:44:44 +08:00
|
|
|
SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
|
2013-09-13 07:44:53 +08:00
|
|
|
SDValue &Imm = Ops[ImmIdx];
|
|
|
|
if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
|
2013-09-13 07:44:44 +08:00
|
|
|
return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Node;
|
|
|
|
}
|