Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
//===- AMDGPULegalizerInfo.cpp -----------------------------------*- C++ -*-==//
|
|
|
|
//
|
2019-01-19 16:50:56 +08:00
|
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
/// \file
|
|
|
|
/// This file implements the targeting of the Machinelegalizer class for
|
|
|
|
/// AMDGPU.
|
|
|
|
/// \todo This should be generated by TableGen.
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2019-08-30 04:32:53 +08:00
|
|
|
#if defined(_MSC_VER) || defined(__MINGW32__)
|
|
|
|
// According to Microsoft, one must set _USE_MATH_DEFINES in order to get M_PI
|
|
|
|
// from the Visual C++ cmath / math.h headers:
|
|
|
|
// https://docs.microsoft.com/en-us/cpp/c-runtime-library/math-constants?view=vs-2019
|
|
|
|
#define _USE_MATH_DEFINES
|
|
|
|
#endif
|
|
|
|
|
2018-03-24 07:58:31 +08:00
|
|
|
#include "AMDGPU.h"
|
2018-03-30 01:21:10 +08:00
|
|
|
#include "AMDGPULegalizerInfo.h"
|
2018-03-17 23:17:41 +08:00
|
|
|
#include "AMDGPUTargetMachine.h"
|
2019-02-08 10:40:47 +08:00
|
|
|
#include "SIMachineFunctionInfo.h"
|
2019-07-11 00:31:19 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
|
2019-02-08 10:40:47 +08:00
|
|
|
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
|
2017-11-17 09:07:10 +08:00
|
|
|
#include "llvm/CodeGen/TargetOpcodes.h"
|
2018-03-30 01:21:10 +08:00
|
|
|
#include "llvm/CodeGen/ValueTypes.h"
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
#include "llvm/IR/DerivedTypes.h"
|
2017-06-06 19:49:48 +08:00
|
|
|
#include "llvm/IR/Type.h"
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
|
2019-07-02 02:45:36 +08:00
|
|
|
#define DEBUG_TYPE "amdgpu-legalinfo"
|
|
|
|
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
using namespace llvm;
|
2018-01-30 01:37:29 +08:00
|
|
|
using namespace LegalizeActions;
|
2019-01-25 08:51:00 +08:00
|
|
|
using namespace LegalizeMutations;
|
2019-01-21 03:45:18 +08:00
|
|
|
using namespace LegalityPredicates;
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
|
2019-02-08 03:10:15 +08:00
|
|
|
|
|
|
|
static LegalityPredicate isMultiple32(unsigned TypeIdx,
|
|
|
|
unsigned MaxSize = 512) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
const LLT Ty = Query.Types[TypeIdx];
|
|
|
|
const LLT EltTy = Ty.getScalarType();
|
|
|
|
return Ty.getSizeInBits() <= MaxSize && EltTy.getSizeInBits() % 32 == 0;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-02-12 06:00:39 +08:00
|
|
|
static LegalityPredicate isSmallOddVector(unsigned TypeIdx) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
const LLT Ty = Query.Types[TypeIdx];
|
|
|
|
return Ty.isVector() &&
|
|
|
|
Ty.getNumElements() % 2 != 0 &&
|
|
|
|
Ty.getElementType().getSizeInBits() < 32;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
static LegalizeMutation oneMoreElement(unsigned TypeIdx) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
const LLT Ty = Query.Types[TypeIdx];
|
|
|
|
const LLT EltTy = Ty.getElementType();
|
|
|
|
return std::make_pair(TypeIdx, LLT::vector(Ty.getNumElements() + 1, EltTy));
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-02-20 00:30:19 +08:00
|
|
|
static LegalizeMutation fewerEltsToSize64Vector(unsigned TypeIdx) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
const LLT Ty = Query.Types[TypeIdx];
|
|
|
|
const LLT EltTy = Ty.getElementType();
|
|
|
|
unsigned Size = Ty.getSizeInBits();
|
|
|
|
unsigned Pieces = (Size + 63) / 64;
|
|
|
|
unsigned NewNumElts = (Ty.getNumElements() + 1) / Pieces;
|
|
|
|
return std::make_pair(TypeIdx, LLT::scalarOrVector(NewNumElts, EltTy));
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
static LegalityPredicate vectorWiderThan(unsigned TypeIdx, unsigned Size) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
const LLT QueryTy = Query.Types[TypeIdx];
|
|
|
|
return QueryTy.isVector() && QueryTy.getSizeInBits() > Size;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-02-20 01:03:09 +08:00
|
|
|
static LegalityPredicate numElementsNotEven(unsigned TypeIdx) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
const LLT QueryTy = Query.Types[TypeIdx];
|
|
|
|
return QueryTy.isVector() && QueryTy.getNumElements() % 2 != 0;
|
|
|
|
};
|
|
|
|
}
|
2019-02-12 06:00:39 +08:00
|
|
|
|
2019-07-09 22:17:31 +08:00
|
|
|
// Any combination of 32 or 64-bit elements up to 512 bits, and multiples of
|
|
|
|
// v2s16.
|
|
|
|
static LegalityPredicate isRegisterType(unsigned TypeIdx) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
const LLT Ty = Query.Types[TypeIdx];
|
|
|
|
if (Ty.isVector()) {
|
|
|
|
const int EltSize = Ty.getElementType().getSizeInBits();
|
|
|
|
return EltSize == 32 || EltSize == 64 ||
|
2019-07-10 06:48:04 +08:00
|
|
|
(EltSize == 16 && Ty.getNumElements() % 2 == 0) ||
|
|
|
|
EltSize == 128 || EltSize == 256;
|
2019-07-09 22:17:31 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return Ty.getSizeInBits() % 32 == 0 && Ty.getSizeInBits() <= 512;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-08-14 00:26:28 +08:00
|
|
|
static LegalityPredicate elementTypeIs(unsigned TypeIdx, LLT Type) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
return Query.Types[TypeIdx].getElementType() == Type;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2019-07-02 02:49:01 +08:00
|
|
|
AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
|
|
|
|
const GCNTargetMachine &TM)
|
|
|
|
: ST(ST_) {
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
using namespace TargetOpcode;
|
|
|
|
|
2018-03-17 23:17:41 +08:00
|
|
|
auto GetAddrSpacePtr = [&TM](unsigned AS) {
|
|
|
|
return LLT::pointer(AS, TM.getPointerSizeInBits(AS));
|
|
|
|
};
|
|
|
|
|
|
|
|
const LLT S1 = LLT::scalar(1);
|
2019-02-03 08:07:33 +08:00
|
|
|
const LLT S8 = LLT::scalar(8);
|
2019-01-19 05:33:50 +08:00
|
|
|
const LLT S16 = LLT::scalar(16);
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
const LLT S64 = LLT::scalar(64);
|
2019-01-25 10:36:32 +08:00
|
|
|
const LLT S128 = LLT::scalar(128);
|
2019-01-21 02:40:36 +08:00
|
|
|
const LLT S256 = LLT::scalar(256);
|
AMDGPU/GlobalISel: Make IMPLICIT_DEF of all sizes < 512 legal.
Summary:
We could split sizes that are not power of two into smaller sized
G_IMPLICIT_DEF instructions, but this ends up generating
G_MERGE_VALUES instructions which we then have to handle in the instruction
selector. Since G_IMPLICIT_DEF is really a no-op it's easier just to
keep everything that can fit into a register legal.
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D48777
llvm-svn: 336041
2018-06-30 12:09:44 +08:00
|
|
|
const LLT S512 = LLT::scalar(512);
|
2018-03-17 23:17:41 +08:00
|
|
|
|
2018-12-21 11:03:11 +08:00
|
|
|
const LLT V2S16 = LLT::vector(2, 16);
|
2019-01-08 09:30:02 +08:00
|
|
|
const LLT V4S16 = LLT::vector(4, 16);
|
2018-12-21 11:03:11 +08:00
|
|
|
|
|
|
|
const LLT V2S32 = LLT::vector(2, 32);
|
|
|
|
const LLT V3S32 = LLT::vector(3, 32);
|
|
|
|
const LLT V4S32 = LLT::vector(4, 32);
|
|
|
|
const LLT V5S32 = LLT::vector(5, 32);
|
|
|
|
const LLT V6S32 = LLT::vector(6, 32);
|
|
|
|
const LLT V7S32 = LLT::vector(7, 32);
|
|
|
|
const LLT V8S32 = LLT::vector(8, 32);
|
|
|
|
const LLT V9S32 = LLT::vector(9, 32);
|
|
|
|
const LLT V10S32 = LLT::vector(10, 32);
|
|
|
|
const LLT V11S32 = LLT::vector(11, 32);
|
|
|
|
const LLT V12S32 = LLT::vector(12, 32);
|
|
|
|
const LLT V13S32 = LLT::vector(13, 32);
|
|
|
|
const LLT V14S32 = LLT::vector(14, 32);
|
|
|
|
const LLT V15S32 = LLT::vector(15, 32);
|
|
|
|
const LLT V16S32 = LLT::vector(16, 32);
|
|
|
|
|
|
|
|
const LLT V2S64 = LLT::vector(2, 64);
|
|
|
|
const LLT V3S64 = LLT::vector(3, 64);
|
|
|
|
const LLT V4S64 = LLT::vector(4, 64);
|
|
|
|
const LLT V5S64 = LLT::vector(5, 64);
|
|
|
|
const LLT V6S64 = LLT::vector(6, 64);
|
|
|
|
const LLT V7S64 = LLT::vector(7, 64);
|
|
|
|
const LLT V8S64 = LLT::vector(8, 64);
|
|
|
|
|
|
|
|
std::initializer_list<LLT> AllS32Vectors =
|
|
|
|
{V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32,
|
|
|
|
V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32};
|
|
|
|
std::initializer_list<LLT> AllS64Vectors =
|
|
|
|
{V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64};
|
|
|
|
|
2018-03-17 23:17:41 +08:00
|
|
|
const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS);
|
|
|
|
const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS);
|
2019-07-20 06:28:44 +08:00
|
|
|
const LLT Constant32Ptr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS_32BIT);
|
2018-03-17 23:17:45 +08:00
|
|
|
const LLT LocalPtr = GetAddrSpacePtr(AMDGPUAS::LOCAL_ADDRESS);
|
2019-07-20 06:28:44 +08:00
|
|
|
const LLT RegionPtr = GetAddrSpacePtr(AMDGPUAS::REGION_ADDRESS);
|
2018-08-31 13:49:54 +08:00
|
|
|
const LLT FlatPtr = GetAddrSpacePtr(AMDGPUAS::FLAT_ADDRESS);
|
|
|
|
const LLT PrivatePtr = GetAddrSpacePtr(AMDGPUAS::PRIVATE_ADDRESS);
|
2018-03-17 23:17:45 +08:00
|
|
|
|
2018-12-14 04:34:15 +08:00
|
|
|
const LLT CodePtr = FlatPtr;
|
|
|
|
|
2019-02-15 06:24:28 +08:00
|
|
|
const std::initializer_list<LLT> AddrSpaces64 = {
|
|
|
|
GlobalPtr, ConstantPtr, FlatPtr
|
|
|
|
};
|
|
|
|
|
|
|
|
const std::initializer_list<LLT> AddrSpaces32 = {
|
2019-07-20 06:28:44 +08:00
|
|
|
LocalPtr, PrivatePtr, Constant32Ptr, RegionPtr
|
2018-03-17 23:17:45 +08:00
|
|
|
};
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
|
2019-07-02 01:35:53 +08:00
|
|
|
const std::initializer_list<LLT> FPTypesBase = {
|
|
|
|
S32, S64
|
|
|
|
};
|
|
|
|
|
|
|
|
const std::initializer_list<LLT> FPTypes16 = {
|
|
|
|
S32, S64, S16
|
|
|
|
};
|
|
|
|
|
2019-07-11 00:31:19 +08:00
|
|
|
const std::initializer_list<LLT> FPTypesPK16 = {
|
|
|
|
S32, S64, S16, V2S16
|
|
|
|
};
|
|
|
|
|
2019-01-08 09:22:47 +08:00
|
|
|
setAction({G_BRCOND, S1}, Legal);
|
|
|
|
|
2019-02-21 23:48:13 +08:00
|
|
|
// TODO: All multiples of 32, vectors of pointers, all v2s16 pairs, more
|
|
|
|
// elements for v3s16
|
|
|
|
getActionDefinitionsBuilder(G_PHI)
|
|
|
|
.legalFor({S32, S64, V2S16, V4S16, S1, S128, S256})
|
|
|
|
.legalFor(AllS32Vectors)
|
|
|
|
.legalFor(AllS64Vectors)
|
|
|
|
.legalFor(AddrSpaces64)
|
|
|
|
.legalFor(AddrSpaces32)
|
|
|
|
.clampScalar(0, S32, S256)
|
|
|
|
.widenScalarToNextPow2(0, 32)
|
2019-02-28 08:16:32 +08:00
|
|
|
.clampMaxNumElements(0, S32, 16)
|
2019-02-28 08:01:05 +08:00
|
|
|
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
2019-02-21 23:48:13 +08:00
|
|
|
.legalIf(isPointer(0));
|
|
|
|
|
2019-07-02 02:18:55 +08:00
|
|
|
if (ST.has16BitInsts()) {
|
|
|
|
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
|
|
|
|
.legalFor({S32, S16})
|
|
|
|
.clampScalar(0, S16, S32)
|
|
|
|
.scalarize(0);
|
|
|
|
} else {
|
|
|
|
getActionDefinitionsBuilder({G_ADD, G_SUB, G_MUL})
|
|
|
|
.legalFor({S32})
|
|
|
|
.clampScalar(0, S32, S32)
|
|
|
|
.scalarize(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
getActionDefinitionsBuilder({G_UMULH, G_SMULH})
|
2019-01-25 11:23:04 +08:00
|
|
|
.legalFor({S32})
|
2019-01-27 08:52:51 +08:00
|
|
|
.clampScalar(0, S32, S32)
|
2019-01-25 11:23:04 +08:00
|
|
|
.scalarize(0);
|
2018-12-20 09:35:49 +08:00
|
|
|
|
2019-01-27 07:47:07 +08:00
|
|
|
// Report legal for any types we can handle anywhere. For the cases only legal
|
|
|
|
// on the SALU, RegBankSelect will be able to re-legalize.
|
2018-12-20 09:35:49 +08:00
|
|
|
getActionDefinitionsBuilder({G_AND, G_OR, G_XOR})
|
2019-07-16 22:28:30 +08:00
|
|
|
.legalFor({S32, S1, S64, V2S32, S16, V2S16, V4S16})
|
2019-01-27 07:47:07 +08:00
|
|
|
.clampScalar(0, S32, S64)
|
2019-02-20 00:30:19 +08:00
|
|
|
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
|
|
|
.fewerElementsIf(vectorWiderThan(0, 32), fewerEltsToSize64Vector(0))
|
2019-02-26 05:32:48 +08:00
|
|
|
.widenScalarToNextPow2(0)
|
2019-01-27 07:47:07 +08:00
|
|
|
.scalarize(0);
|
AMDGPU/GlobalISel: Mark 32-bit G_ADD as legal
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, igorb, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D33992
llvm-svn: 305232
2017-06-13 04:54:56 +08:00
|
|
|
|
2019-01-08 09:09:09 +08:00
|
|
|
getActionDefinitionsBuilder({G_UADDO, G_SADDO, G_USUBO, G_SSUBO,
|
|
|
|
G_UADDE, G_SADDE, G_USUBE, G_SSUBE})
|
2019-01-27 07:44:51 +08:00
|
|
|
.legalFor({{S32, S1}})
|
|
|
|
.clampScalar(0, S32, S32);
|
2019-01-08 09:03:58 +08:00
|
|
|
|
2019-01-21 03:45:18 +08:00
|
|
|
getActionDefinitionsBuilder(G_BITCAST)
|
|
|
|
.legalForCartesianProduct({S32, V2S16})
|
|
|
|
.legalForCartesianProduct({S64, V2S32, V4S16})
|
|
|
|
.legalForCartesianProduct({V2S64, V4S32})
|
|
|
|
// Don't worry about the size constraint.
|
|
|
|
.legalIf(all(isPointer(0), isPointer(1)));
|
AMDGPU/GlobalISel: Mark G_BITCAST s32 <--> <2 x s16> legal
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, igorb, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D34129
llvm-svn: 305692
2017-06-19 21:15:45 +08:00
|
|
|
|
2019-09-05 00:19:45 +08:00
|
|
|
getActionDefinitionsBuilder(G_FCONSTANT)
|
|
|
|
.legalFor({S32, S64, S16})
|
|
|
|
.clampScalar(0, S16, S64);
|
AMDGPU/GlobalISel: Make IMPLICIT_DEF of all sizes < 512 legal.
Summary:
We could split sizes that are not power of two into smaller sized
G_IMPLICIT_DEF instructions, but this ends up generating
G_MERGE_VALUES instructions which we then have to handle in the instruction
selector. Since G_IMPLICIT_DEF is really a no-op it's easier just to
keep everything that can fit into a register legal.
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D48777
llvm-svn: 336041
2018-06-30 12:09:44 +08:00
|
|
|
|
2018-06-25 23:42:12 +08:00
|
|
|
getActionDefinitionsBuilder(G_IMPLICIT_DEF)
|
2019-09-05 00:19:45 +08:00
|
|
|
.legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
|
2019-02-08 03:10:15 +08:00
|
|
|
ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
|
2019-02-12 06:00:39 +08:00
|
|
|
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
2019-02-08 03:10:15 +08:00
|
|
|
.clampScalarOrElt(0, S32, S512)
|
2019-02-08 22:46:27 +08:00
|
|
|
.legalIf(isMultiple32(0))
|
2019-02-26 04:46:06 +08:00
|
|
|
.widenScalarToNextPow2(0, 32)
|
|
|
|
.clampMaxNumElements(0, S32, 16);
|
2018-06-25 23:42:12 +08:00
|
|
|
|
2018-03-17 23:17:48 +08:00
|
|
|
|
AMDGPU/GlobalISel: Mark 1-bit integer constants as legal
Summary:
These are mostly legal, but will probably need special lowering for some
cases.
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, igorb, dstuttard, tpr, llvm-commits, t-tye
Differential Revision: https://reviews.llvm.org/D33791
llvm-svn: 304628
2017-06-03 09:13:33 +08:00
|
|
|
// FIXME: i1 operands to intrinsics should always be legal, but other i1
|
|
|
|
// values may not be legal. We need to figure out how to distinguish
|
|
|
|
// between these two scenarios.
|
2019-01-19 05:33:50 +08:00
|
|
|
getActionDefinitionsBuilder(G_CONSTANT)
|
2019-09-05 00:19:45 +08:00
|
|
|
.legalFor({S1, S32, S64, S16, GlobalPtr,
|
2019-02-03 07:33:49 +08:00
|
|
|
LocalPtr, ConstantPtr, PrivatePtr, FlatPtr })
|
2019-01-19 05:33:50 +08:00
|
|
|
.clampScalar(0, S32, S64)
|
2019-02-03 07:33:49 +08:00
|
|
|
.widenScalarToNextPow2(0)
|
|
|
|
.legalIf(isPointer(0));
|
2018-03-02 03:16:52 +08:00
|
|
|
|
2018-12-18 17:46:13 +08:00
|
|
|
setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
|
|
|
|
|
2019-02-08 02:03:11 +08:00
|
|
|
auto &FPOpActions = getActionDefinitionsBuilder(
|
2019-02-12 01:05:20 +08:00
|
|
|
{ G_FADD, G_FMUL, G_FNEG, G_FABS, G_FMA, G_FCANONICALIZE})
|
2019-02-08 02:03:11 +08:00
|
|
|
.legalFor({S32, S64});
|
2019-08-30 04:06:48 +08:00
|
|
|
auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
|
|
|
|
.customFor({S32, S64});
|
2019-02-08 02:03:11 +08:00
|
|
|
|
|
|
|
if (ST.has16BitInsts()) {
|
|
|
|
if (ST.hasVOP3PInsts())
|
|
|
|
FPOpActions.legalFor({S16, V2S16});
|
|
|
|
else
|
|
|
|
FPOpActions.legalFor({S16});
|
2019-08-30 04:06:48 +08:00
|
|
|
|
|
|
|
TrigActions.customFor({S16});
|
2019-02-08 02:03:11 +08:00
|
|
|
}
|
|
|
|
|
2019-07-11 00:31:19 +08:00
|
|
|
auto &MinNumMaxNum = getActionDefinitionsBuilder({
|
|
|
|
G_FMINNUM, G_FMAXNUM, G_FMINNUM_IEEE, G_FMAXNUM_IEEE});
|
|
|
|
|
|
|
|
if (ST.hasVOP3PInsts()) {
|
|
|
|
MinNumMaxNum.customFor(FPTypesPK16)
|
|
|
|
.clampMaxNumElements(0, S16, 2)
|
|
|
|
.clampScalar(0, S16, S64)
|
|
|
|
.scalarize(0);
|
|
|
|
} else if (ST.has16BitInsts()) {
|
|
|
|
MinNumMaxNum.customFor(FPTypes16)
|
|
|
|
.clampScalar(0, S16, S64)
|
|
|
|
.scalarize(0);
|
|
|
|
} else {
|
|
|
|
MinNumMaxNum.customFor(FPTypesBase)
|
|
|
|
.clampScalar(0, S32, S64)
|
|
|
|
.scalarize(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// TODO: Implement
|
|
|
|
getActionDefinitionsBuilder({G_FMINIMUM, G_FMAXIMUM}).lower();
|
|
|
|
|
2019-02-08 02:03:11 +08:00
|
|
|
if (ST.hasVOP3PInsts())
|
|
|
|
FPOpActions.clampMaxNumElements(0, S16, 2);
|
2019-08-30 04:06:48 +08:00
|
|
|
|
2019-02-08 02:03:11 +08:00
|
|
|
FPOpActions
|
|
|
|
.scalarize(0)
|
|
|
|
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
|
AMDGPU/GlobalISel: Mark 32-bit G_FADD as legal
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, igorb, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D38439
llvm-svn: 316815
2017-10-28 07:57:41 +08:00
|
|
|
|
2019-08-30 04:06:48 +08:00
|
|
|
TrigActions
|
|
|
|
.scalarize(0)
|
|
|
|
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
|
|
|
|
|
2019-02-08 02:14:39 +08:00
|
|
|
if (ST.has16BitInsts()) {
|
|
|
|
getActionDefinitionsBuilder(G_FSQRT)
|
|
|
|
.legalFor({S32, S64, S16})
|
|
|
|
.scalarize(0)
|
|
|
|
.clampScalar(0, S16, S64);
|
|
|
|
} else {
|
|
|
|
getActionDefinitionsBuilder(G_FSQRT)
|
|
|
|
.legalFor({S32, S64})
|
|
|
|
.scalarize(0)
|
|
|
|
.clampScalar(0, S32, S64);
|
|
|
|
}
|
|
|
|
|
2018-12-20 08:37:02 +08:00
|
|
|
getActionDefinitionsBuilder(G_FPTRUNC)
|
2019-01-25 12:37:33 +08:00
|
|
|
.legalFor({{S32, S64}, {S16, S32}})
|
|
|
|
.scalarize(0);
|
2018-12-20 08:37:02 +08:00
|
|
|
|
2019-01-21 02:34:24 +08:00
|
|
|
getActionDefinitionsBuilder(G_FPEXT)
|
|
|
|
.legalFor({{S64, S32}, {S32, S16}})
|
2019-01-25 10:36:32 +08:00
|
|
|
.lowerFor({{S64, S16}}) // FIXME: Implement
|
|
|
|
.scalarize(0);
|
2019-01-21 02:34:24 +08:00
|
|
|
|
2019-07-10 07:34:29 +08:00
|
|
|
// TODO: Verify V_BFI_B32 is generated from expanded bit ops.
|
|
|
|
getActionDefinitionsBuilder(G_FCOPYSIGN).lower();
|
2019-05-17 20:19:52 +08:00
|
|
|
|
2019-01-21 03:10:31 +08:00
|
|
|
getActionDefinitionsBuilder(G_FSUB)
|
2019-01-23 04:14:29 +08:00
|
|
|
// Use actual fsub instruction
|
|
|
|
.legalFor({S32})
|
|
|
|
// Must use fadd + fneg
|
|
|
|
.lowerFor({S64, S16, V2S16})
|
2019-01-25 08:51:00 +08:00
|
|
|
.scalarize(0)
|
2019-01-23 04:14:29 +08:00
|
|
|
.clampScalar(0, S32, S64);
|
2018-12-18 17:19:03 +08:00
|
|
|
|
2019-01-21 02:34:24 +08:00
|
|
|
getActionDefinitionsBuilder({G_SEXT, G_ZEXT, G_ANYEXT})
|
2019-01-21 03:28:20 +08:00
|
|
|
.legalFor({{S64, S32}, {S32, S16}, {S64, S16},
|
2019-01-25 10:36:32 +08:00
|
|
|
{S32, S1}, {S64, S1}, {S16, S1},
|
|
|
|
// FIXME: Hack
|
2019-02-26 05:32:48 +08:00
|
|
|
{S64, LLT::scalar(33)},
|
2019-02-03 08:07:33 +08:00
|
|
|
{S32, S8}, {S128, S32}, {S128, S64}, {S32, LLT::scalar(24)}})
|
2019-01-25 10:36:32 +08:00
|
|
|
.scalarize(0);
|
2018-12-13 16:23:51 +08:00
|
|
|
|
2019-01-22 08:20:17 +08:00
|
|
|
getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})
|
2019-01-25 12:37:33 +08:00
|
|
|
.legalFor({{S32, S32}, {S64, S32}})
|
2019-05-18 07:05:13 +08:00
|
|
|
.lowerFor({{S32, S64}})
|
2019-05-18 07:05:18 +08:00
|
|
|
.customFor({{S64, S64}})
|
2019-01-25 12:37:33 +08:00
|
|
|
.scalarize(0);
|
2018-03-02 03:04:25 +08:00
|
|
|
|
2019-01-22 08:20:17 +08:00
|
|
|
getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})
|
2019-01-25 12:37:33 +08:00
|
|
|
.legalFor({{S32, S32}, {S32, S64}})
|
|
|
|
.scalarize(0);
|
2018-02-07 12:47:59 +08:00
|
|
|
|
2019-05-17 20:20:01 +08:00
|
|
|
getActionDefinitionsBuilder(G_INTRINSIC_ROUND)
|
2019-01-27 08:12:21 +08:00
|
|
|
.legalFor({S32, S64})
|
|
|
|
.scalarize(0);
|
2018-12-21 11:14:45 +08:00
|
|
|
|
2019-05-17 20:19:57 +08:00
|
|
|
if (ST.getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
|
2019-05-17 20:20:05 +08:00
|
|
|
getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
|
2019-05-17 20:19:57 +08:00
|
|
|
.legalFor({S32, S64})
|
|
|
|
.clampScalar(0, S32, S64)
|
|
|
|
.scalarize(0);
|
|
|
|
} else {
|
2019-05-17 20:20:05 +08:00
|
|
|
getActionDefinitionsBuilder({G_INTRINSIC_TRUNC, G_FCEIL, G_FRINT})
|
2019-05-17 20:19:57 +08:00
|
|
|
.legalFor({S32})
|
|
|
|
.customFor({S64})
|
|
|
|
.clampScalar(0, S32, S64)
|
|
|
|
.scalarize(0);
|
|
|
|
}
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
|
2019-02-15 06:24:28 +08:00
|
|
|
getActionDefinitionsBuilder(G_GEP)
|
|
|
|
.legalForCartesianProduct(AddrSpaces64, {S64})
|
|
|
|
.legalForCartesianProduct(AddrSpaces32, {S32})
|
|
|
|
.scalarize(0);
|
2019-01-25 12:54:00 +08:00
|
|
|
|
2018-12-14 04:34:15 +08:00
|
|
|
setAction({G_BLOCK_ADDR, CodePtr}, Legal);
|
|
|
|
|
2019-07-09 22:10:43 +08:00
|
|
|
auto &CmpBuilder =
|
|
|
|
getActionDefinitionsBuilder(G_ICMP)
|
2019-02-03 07:35:15 +08:00
|
|
|
.legalForCartesianProduct(
|
|
|
|
{S1}, {S32, S64, GlobalPtr, LocalPtr, ConstantPtr, PrivatePtr, FlatPtr})
|
2019-07-09 22:10:43 +08:00
|
|
|
.legalFor({{S1, S32}, {S1, S64}});
|
|
|
|
if (ST.has16BitInsts()) {
|
|
|
|
CmpBuilder.legalFor({{S1, S16}});
|
|
|
|
}
|
|
|
|
|
|
|
|
CmpBuilder
|
2019-02-03 07:35:15 +08:00
|
|
|
.widenScalarToNextPow2(1)
|
|
|
|
.clampScalar(1, S32, S64)
|
|
|
|
.scalarize(0)
|
|
|
|
.legalIf(all(typeIs(0, S1), isPointer(1)));
|
|
|
|
|
|
|
|
getActionDefinitionsBuilder(G_FCMP)
|
2019-07-02 01:35:53 +08:00
|
|
|
.legalForCartesianProduct({S1}, ST.has16BitInsts() ? FPTypes16 : FPTypesBase)
|
2019-01-25 10:59:34 +08:00
|
|
|
.widenScalarToNextPow2(1)
|
|
|
|
.clampScalar(1, S32, S64)
|
2019-01-27 07:54:53 +08:00
|
|
|
.scalarize(0);
|
2019-01-25 10:59:34 +08:00
|
|
|
|
2019-01-25 12:03:38 +08:00
|
|
|
// FIXME: fexp, flog2, flog10 needs to be custom lowered.
|
|
|
|
getActionDefinitionsBuilder({G_FPOW, G_FEXP, G_FEXP2,
|
|
|
|
G_FLOG, G_FLOG2, G_FLOG10})
|
|
|
|
.legalFor({S32})
|
|
|
|
.scalarize(0);
|
AMDGPU/GlobalISel: Mark 32-bit G_ICMP as legal
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, igorb, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D33890
llvm-svn: 304797
2017-06-06 22:16:50 +08:00
|
|
|
|
2019-01-31 10:09:57 +08:00
|
|
|
// The 64-bit versions produce 32-bit results, but only on the SALU.
|
|
|
|
getActionDefinitionsBuilder({G_CTLZ, G_CTLZ_ZERO_UNDEF,
|
|
|
|
G_CTTZ, G_CTTZ_ZERO_UNDEF,
|
|
|
|
G_CTPOP})
|
|
|
|
.legalFor({{S32, S32}, {S32, S64}})
|
|
|
|
.clampScalar(0, S32, S32)
|
2019-02-21 00:42:52 +08:00
|
|
|
.clampScalar(1, S32, S64)
|
2019-02-21 23:22:20 +08:00
|
|
|
.scalarize(0)
|
|
|
|
.widenScalarToNextPow2(0, 32)
|
|
|
|
.widenScalarToNextPow2(1, 32);
|
2019-01-31 10:09:57 +08:00
|
|
|
|
2019-01-31 10:34:03 +08:00
|
|
|
// TODO: Expand for > s32
|
2019-09-05 04:46:15 +08:00
|
|
|
getActionDefinitionsBuilder({G_BSWAP, G_BITREVERSE})
|
2019-01-31 10:34:03 +08:00
|
|
|
.legalFor({S32})
|
|
|
|
.clampScalar(0, S32, S32)
|
|
|
|
.scalarize(0);
|
2019-01-31 10:09:57 +08:00
|
|
|
|
2019-05-24 01:58:48 +08:00
|
|
|
if (ST.has16BitInsts()) {
|
|
|
|
if (ST.hasVOP3PInsts()) {
|
|
|
|
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
|
|
|
|
.legalFor({S32, S16, V2S16})
|
|
|
|
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
|
|
|
.clampMaxNumElements(0, S16, 2)
|
|
|
|
.clampScalar(0, S16, S32)
|
|
|
|
.widenScalarToNextPow2(0)
|
|
|
|
.scalarize(0);
|
|
|
|
} else {
|
|
|
|
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
|
|
|
|
.legalFor({S32, S16})
|
|
|
|
.widenScalarToNextPow2(0)
|
|
|
|
.clampScalar(0, S16, S32)
|
|
|
|
.scalarize(0);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
|
|
|
|
.legalFor({S32})
|
|
|
|
.clampScalar(0, S32, S32)
|
|
|
|
.widenScalarToNextPow2(0)
|
|
|
|
.scalarize(0);
|
|
|
|
}
|
2018-12-13 16:23:51 +08:00
|
|
|
|
2019-02-03 07:29:55 +08:00
|
|
|
auto smallerThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
return Query.Types[TypeIdx0].getSizeInBits() <
|
|
|
|
Query.Types[TypeIdx1].getSizeInBits();
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
auto greaterThan = [](unsigned TypeIdx0, unsigned TypeIdx1) {
|
|
|
|
return [=](const LegalityQuery &Query) {
|
|
|
|
return Query.Types[TypeIdx0].getSizeInBits() >
|
|
|
|
Query.Types[TypeIdx1].getSizeInBits();
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
AMDGPU/GlobalISel: Add support for G_INTTOPTR
Summary: This is a no-op.
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D52916
llvm-svn: 343839
2018-10-05 12:34:09 +08:00
|
|
|
getActionDefinitionsBuilder(G_INTTOPTR)
|
2019-02-03 07:29:55 +08:00
|
|
|
// List the common cases
|
2019-02-15 06:24:28 +08:00
|
|
|
.legalForCartesianProduct(AddrSpaces64, {S64})
|
|
|
|
.legalForCartesianProduct(AddrSpaces32, {S32})
|
2019-02-03 07:29:55 +08:00
|
|
|
.scalarize(0)
|
|
|
|
// Accept any address space as long as the size matches
|
|
|
|
.legalIf(sameSize(0, 1))
|
|
|
|
.widenScalarIf(smallerThan(1, 0),
|
|
|
|
[](const LegalityQuery &Query) {
|
|
|
|
return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
|
|
|
|
})
|
|
|
|
.narrowScalarIf(greaterThan(1, 0),
|
|
|
|
[](const LegalityQuery &Query) {
|
|
|
|
return std::make_pair(1, LLT::scalar(Query.Types[0].getSizeInBits()));
|
|
|
|
});
|
2018-03-17 23:17:41 +08:00
|
|
|
|
2018-12-13 16:23:51 +08:00
|
|
|
getActionDefinitionsBuilder(G_PTRTOINT)
|
2019-02-03 07:29:55 +08:00
|
|
|
// List the common cases
|
2019-02-15 06:24:28 +08:00
|
|
|
.legalForCartesianProduct(AddrSpaces64, {S64})
|
|
|
|
.legalForCartesianProduct(AddrSpaces32, {S32})
|
2019-02-03 07:29:55 +08:00
|
|
|
.scalarize(0)
|
|
|
|
// Accept any address space as long as the size matches
|
|
|
|
.legalIf(sameSize(0, 1))
|
|
|
|
.widenScalarIf(smallerThan(0, 1),
|
|
|
|
[](const LegalityQuery &Query) {
|
|
|
|
return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
|
|
|
|
})
|
|
|
|
.narrowScalarIf(
|
|
|
|
greaterThan(0, 1),
|
|
|
|
[](const LegalityQuery &Query) {
|
|
|
|
return std::make_pair(0, LLT::scalar(Query.Types[1].getSizeInBits()));
|
|
|
|
});
|
2018-12-13 16:23:51 +08:00
|
|
|
|
2019-08-28 08:58:24 +08:00
|
|
|
getActionDefinitionsBuilder(G_ADDRSPACE_CAST)
|
|
|
|
.scalarize(0)
|
|
|
|
.custom();
|
2019-02-08 10:40:47 +08:00
|
|
|
|
2019-07-17 02:05:29 +08:00
|
|
|
// TODO: Should load to s16 be legal? Most loads extend to 32-bits, but we
|
|
|
|
// handle some operations by just promoting the register during
|
|
|
|
// selection. There are also d16 loads on GFX9+ which preserve the high bits.
|
2018-03-17 23:17:41 +08:00
|
|
|
getActionDefinitionsBuilder({G_LOAD, G_STORE})
|
2019-01-30 02:13:02 +08:00
|
|
|
.narrowScalarIf([](const LegalityQuery &Query) {
|
|
|
|
unsigned Size = Query.Types[0].getSizeInBits();
|
|
|
|
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
|
|
|
return (Size > 32 && MemSize < Size);
|
|
|
|
},
|
|
|
|
[](const LegalityQuery &Query) {
|
|
|
|
return std::make_pair(0, LLT::scalar(32));
|
|
|
|
})
|
2019-08-01 09:44:22 +08:00
|
|
|
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
2019-07-02 02:49:01 +08:00
|
|
|
.fewerElementsIf([=](const LegalityQuery &Query) {
|
2019-01-30 10:35:38 +08:00
|
|
|
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
2019-01-31 10:46:05 +08:00
|
|
|
return (MemSize == 96) &&
|
|
|
|
Query.Types[0].isVector() &&
|
2019-06-20 07:54:58 +08:00
|
|
|
!ST.hasDwordx3LoadStores();
|
2019-01-30 10:35:38 +08:00
|
|
|
},
|
|
|
|
[=](const LegalityQuery &Query) {
|
|
|
|
return std::make_pair(0, V2S32);
|
|
|
|
})
|
2019-07-02 02:49:01 +08:00
|
|
|
.legalIf([=](const LegalityQuery &Query) {
|
2018-03-17 23:17:41 +08:00
|
|
|
const LLT &Ty0 = Query.Types[0];
|
|
|
|
|
2019-01-30 02:13:02 +08:00
|
|
|
unsigned Size = Ty0.getSizeInBits();
|
|
|
|
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
|
2019-02-03 07:39:13 +08:00
|
|
|
if (Size < 32 || (Size > 32 && MemSize < Size))
|
2019-01-30 02:13:02 +08:00
|
|
|
return false;
|
|
|
|
|
|
|
|
if (Ty0.isVector() && Size != MemSize)
|
|
|
|
return false;
|
|
|
|
|
2018-03-17 23:17:41 +08:00
|
|
|
// TODO: Decompose private loads into 4-byte components.
|
|
|
|
// TODO: Illegal flat loads on SI
|
2019-01-30 02:13:02 +08:00
|
|
|
switch (MemSize) {
|
|
|
|
case 8:
|
|
|
|
case 16:
|
|
|
|
return Size == 32;
|
2018-03-17 23:17:41 +08:00
|
|
|
case 32:
|
|
|
|
case 64:
|
|
|
|
case 128:
|
|
|
|
return true;
|
|
|
|
|
|
|
|
case 96:
|
2019-06-20 07:54:58 +08:00
|
|
|
return ST.hasDwordx3LoadStores();
|
2018-03-17 23:17:41 +08:00
|
|
|
|
|
|
|
case 256:
|
|
|
|
case 512:
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
// TODO: Possibly support loads of i256 and i512 . This will require
|
|
|
|
// adding i256 and i512 types to MVT in order for to be able to use
|
|
|
|
// TableGen.
|
|
|
|
// TODO: Add support for other vector types, this will require
|
|
|
|
// defining more value mappings for the new types.
|
|
|
|
return Ty0.isVector() && (Ty0.getScalarType().getSizeInBits() == 32 ||
|
|
|
|
Ty0.getScalarType().getSizeInBits() == 64);
|
|
|
|
|
2018-03-17 23:17:41 +08:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
2019-01-30 02:13:02 +08:00
|
|
|
})
|
|
|
|
.clampScalar(0, S32, S64);
|
2018-03-17 23:17:41 +08:00
|
|
|
|
|
|
|
|
2019-02-15 06:41:09 +08:00
|
|
|
// FIXME: Handle alignment requirements.
|
2019-01-23 03:02:10 +08:00
|
|
|
auto &ExtLoads = getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})
|
2019-02-15 06:41:09 +08:00
|
|
|
.legalForTypesWithMemDesc({
|
|
|
|
{S32, GlobalPtr, 8, 8},
|
|
|
|
{S32, GlobalPtr, 16, 8},
|
|
|
|
{S32, LocalPtr, 8, 8},
|
|
|
|
{S32, LocalPtr, 16, 8},
|
|
|
|
{S32, PrivatePtr, 8, 8},
|
|
|
|
{S32, PrivatePtr, 16, 8}});
|
2019-01-23 03:02:10 +08:00
|
|
|
if (ST.hasFlatAddressSpace()) {
|
2019-02-15 06:41:09 +08:00
|
|
|
ExtLoads.legalForTypesWithMemDesc({{S32, FlatPtr, 8, 8},
|
|
|
|
{S32, FlatPtr, 16, 8}});
|
2019-01-23 03:02:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ExtLoads.clampScalar(0, S32, S32)
|
|
|
|
.widenScalarToNextPow2(0)
|
|
|
|
.unsupportedIfMemSizeNotPow2()
|
|
|
|
.lower();
|
|
|
|
|
2018-12-20 08:33:49 +08:00
|
|
|
auto &Atomics = getActionDefinitionsBuilder(
|
|
|
|
{G_ATOMICRMW_XCHG, G_ATOMICRMW_ADD, G_ATOMICRMW_SUB,
|
|
|
|
G_ATOMICRMW_AND, G_ATOMICRMW_OR, G_ATOMICRMW_XOR,
|
|
|
|
G_ATOMICRMW_MAX, G_ATOMICRMW_MIN, G_ATOMICRMW_UMAX,
|
|
|
|
G_ATOMICRMW_UMIN, G_ATOMIC_CMPXCHG})
|
|
|
|
.legalFor({{S32, GlobalPtr}, {S32, LocalPtr},
|
|
|
|
{S64, GlobalPtr}, {S64, LocalPtr}});
|
|
|
|
if (ST.hasFlatAddressSpace()) {
|
|
|
|
Atomics.legalFor({{S32, FlatPtr}, {S64, FlatPtr}});
|
|
|
|
}
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
|
2019-08-01 11:33:15 +08:00
|
|
|
getActionDefinitionsBuilder(G_ATOMICRMW_FADD)
|
|
|
|
.legalFor({{S32, LocalPtr}});
|
|
|
|
|
2019-01-19 05:42:55 +08:00
|
|
|
// TODO: Pointer types, any 32-bit or 64-bit vector
|
|
|
|
getActionDefinitionsBuilder(G_SELECT)
|
2019-07-01 23:42:47 +08:00
|
|
|
.legalForCartesianProduct({S32, S64, S16, V2S32, V2S16, V4S16,
|
2019-02-04 22:04:52 +08:00
|
|
|
GlobalPtr, LocalPtr, FlatPtr, PrivatePtr,
|
|
|
|
LLT::vector(2, LocalPtr), LLT::vector(2, PrivatePtr)}, {S1})
|
2019-07-01 23:42:47 +08:00
|
|
|
.clampScalar(0, S16, S64)
|
2019-02-20 01:03:09 +08:00
|
|
|
.moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
|
|
|
|
.fewerElementsIf(numElementsNotEven(0), scalarize(0))
|
2019-01-30 12:19:31 +08:00
|
|
|
.scalarize(1)
|
2019-02-03 07:31:50 +08:00
|
|
|
.clampMaxNumElements(0, S32, 2)
|
|
|
|
.clampMaxNumElements(0, LocalPtr, 2)
|
|
|
|
.clampMaxNumElements(0, PrivatePtr, 2)
|
2019-02-20 01:03:09 +08:00
|
|
|
.scalarize(0)
|
2019-04-05 22:03:04 +08:00
|
|
|
.widenScalarToNextPow2(0)
|
2019-02-03 07:31:50 +08:00
|
|
|
.legalIf(all(isPointer(0), typeIs(1, S1)));
|
AMDGPU/GlobalISel: Mark 32-bit G_SELECT as legal
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: kzhuravl, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, igorb, dstuttard, tpr, t-tye, llvm-commits
Differential Revision: https://reviews.llvm.org/D33949
llvm-svn: 304910
2017-06-07 21:54:51 +08:00
|
|
|
|
2019-01-23 06:00:19 +08:00
|
|
|
// TODO: Only the low 4/5/6 bits of the shift amount are observed, so we can
|
|
|
|
// be more flexible with the shift amount type.
|
|
|
|
auto &Shifts = getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
|
|
|
|
.legalFor({{S32, S32}, {S64, S32}});
|
2019-01-30 11:36:25 +08:00
|
|
|
if (ST.has16BitInsts()) {
|
2019-02-08 01:38:00 +08:00
|
|
|
if (ST.hasVOP3PInsts()) {
|
|
|
|
Shifts.legalFor({{S16, S32}, {S16, S16}, {V2S16, V2S16}})
|
|
|
|
.clampMaxNumElements(0, S16, 2);
|
|
|
|
} else
|
|
|
|
Shifts.legalFor({{S16, S32}, {S16, S16}});
|
2019-02-08 03:37:44 +08:00
|
|
|
|
|
|
|
Shifts.clampScalar(1, S16, S32);
|
2019-01-30 11:36:25 +08:00
|
|
|
Shifts.clampScalar(0, S16, S64);
|
2019-02-08 23:06:24 +08:00
|
|
|
Shifts.widenScalarToNextPow2(0, 16);
|
2019-02-08 03:37:44 +08:00
|
|
|
} else {
|
|
|
|
// Make sure we legalize the shift amount type first, as the general
|
|
|
|
// expansion for the shifted type will produce much worse code if it hasn't
|
|
|
|
// been truncated already.
|
|
|
|
Shifts.clampScalar(1, S32, S32);
|
2019-01-23 06:00:19 +08:00
|
|
|
Shifts.clampScalar(0, S32, S64);
|
2019-02-08 23:06:24 +08:00
|
|
|
Shifts.widenScalarToNextPow2(0, 32);
|
2019-02-08 03:37:44 +08:00
|
|
|
}
|
|
|
|
Shifts.scalarize(0);
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
|
2018-03-12 21:35:53 +08:00
|
|
|
for (unsigned Op : {G_EXTRACT_VECTOR_ELT, G_INSERT_VECTOR_ELT}) {
|
2019-01-23 04:38:15 +08:00
|
|
|
unsigned VecTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 1 : 0;
|
|
|
|
unsigned EltTypeIdx = Op == G_EXTRACT_VECTOR_ELT ? 0 : 1;
|
|
|
|
unsigned IdxTypeIdx = 2;
|
|
|
|
|
2018-03-12 21:35:53 +08:00
|
|
|
getActionDefinitionsBuilder(Op)
|
2019-07-16 03:40:59 +08:00
|
|
|
.customIf([=](const LegalityQuery &Query) {
|
2019-07-16 02:31:10 +08:00
|
|
|
const LLT EltTy = Query.Types[EltTypeIdx];
|
|
|
|
const LLT VecTy = Query.Types[VecTypeIdx];
|
|
|
|
const LLT IdxTy = Query.Types[IdxTypeIdx];
|
|
|
|
return (EltTy.getSizeInBits() == 16 ||
|
|
|
|
EltTy.getSizeInBits() % 32 == 0) &&
|
|
|
|
VecTy.getSizeInBits() % 32 == 0 &&
|
|
|
|
VecTy.getSizeInBits() <= 512 &&
|
|
|
|
IdxTy.getSizeInBits() == 32;
|
2019-01-23 04:38:15 +08:00
|
|
|
})
|
|
|
|
.clampScalar(EltTypeIdx, S32, S64)
|
|
|
|
.clampScalar(VecTypeIdx, S32, S64)
|
|
|
|
.clampScalar(IdxTypeIdx, S32, S32);
|
2018-03-12 21:35:53 +08:00
|
|
|
}
|
|
|
|
|
2019-01-23 04:38:15 +08:00
|
|
|
getActionDefinitionsBuilder(G_EXTRACT_VECTOR_ELT)
|
|
|
|
.unsupportedIf([=](const LegalityQuery &Query) {
|
|
|
|
const LLT &EltTy = Query.Types[1].getElementType();
|
|
|
|
return Query.Types[0] != EltTy;
|
|
|
|
});
|
|
|
|
|
2019-02-21 00:11:22 +08:00
|
|
|
for (unsigned Op : {G_EXTRACT, G_INSERT}) {
|
|
|
|
unsigned BigTyIdx = Op == G_EXTRACT ? 1 : 0;
|
|
|
|
unsigned LitTyIdx = Op == G_EXTRACT ? 0 : 1;
|
|
|
|
|
|
|
|
// FIXME: Doesn't handle extract of illegal sizes.
|
|
|
|
getActionDefinitionsBuilder(Op)
|
2019-02-08 01:25:51 +08:00
|
|
|
.legalIf([=](const LegalityQuery &Query) {
|
2019-02-21 00:11:22 +08:00
|
|
|
const LLT BigTy = Query.Types[BigTyIdx];
|
|
|
|
const LLT LitTy = Query.Types[LitTyIdx];
|
|
|
|
return (BigTy.getSizeInBits() % 32 == 0) &&
|
|
|
|
(LitTy.getSizeInBits() % 16 == 0);
|
|
|
|
})
|
|
|
|
.widenScalarIf(
|
|
|
|
[=](const LegalityQuery &Query) {
|
|
|
|
const LLT BigTy = Query.Types[BigTyIdx];
|
|
|
|
return (BigTy.getScalarSizeInBits() < 16);
|
|
|
|
},
|
|
|
|
LegalizeMutations::widenScalarOrEltToNextPow2(BigTyIdx, 16))
|
2019-02-08 01:25:51 +08:00
|
|
|
.widenScalarIf(
|
2019-02-21 00:11:22 +08:00
|
|
|
[=](const LegalityQuery &Query) {
|
|
|
|
const LLT LitTy = Query.Types[LitTyIdx];
|
|
|
|
return (LitTy.getScalarSizeInBits() < 16);
|
|
|
|
},
|
|
|
|
LegalizeMutations::widenScalarOrEltToNextPow2(LitTyIdx, 16))
|
2019-04-22 23:22:46 +08:00
|
|
|
.moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
|
|
|
|
.widenScalarToNextPow2(BigTyIdx, 32);
|
|
|
|
|
2019-02-21 00:11:22 +08:00
|
|
|
}
|
2018-03-06 00:25:15 +08:00
|
|
|
|
2018-12-11 02:44:58 +08:00
|
|
|
getActionDefinitionsBuilder(G_BUILD_VECTOR)
|
2019-01-23 04:14:29 +08:00
|
|
|
.legalForCartesianProduct(AllS32Vectors, {S32})
|
|
|
|
.legalForCartesianProduct(AllS64Vectors, {S64})
|
|
|
|
.clampNumElements(0, V16S32, V16S32)
|
|
|
|
.clampNumElements(0, V2S64, V8S64)
|
|
|
|
.minScalarSameAs(1, 0)
|
2019-07-10 06:48:04 +08:00
|
|
|
.legalIf(isRegisterType(0))
|
|
|
|
.minScalarOrElt(0, S32);
|
2018-12-21 11:03:11 +08:00
|
|
|
|
2019-01-08 09:30:02 +08:00
|
|
|
getActionDefinitionsBuilder(G_CONCAT_VECTORS)
|
2019-07-09 22:17:31 +08:00
|
|
|
.legalIf(isRegisterType(0));
|
2019-01-08 09:30:02 +08:00
|
|
|
|
2019-08-14 00:09:07 +08:00
|
|
|
// TODO: Don't fully scalarize v2s16 pieces
|
|
|
|
getActionDefinitionsBuilder(G_SHUFFLE_VECTOR).lower();
|
|
|
|
|
2018-03-12 21:35:43 +08:00
|
|
|
// Merge/Unmerge
|
|
|
|
for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {
|
|
|
|
unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;
|
|
|
|
unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;
|
|
|
|
|
2019-01-21 02:40:36 +08:00
|
|
|
auto notValidElt = [=](const LegalityQuery &Query, unsigned TypeIdx) {
|
|
|
|
const LLT &Ty = Query.Types[TypeIdx];
|
|
|
|
if (Ty.isVector()) {
|
|
|
|
const LLT &EltTy = Ty.getElementType();
|
|
|
|
if (EltTy.getSizeInBits() < 8 || EltTy.getSizeInBits() > 64)
|
|
|
|
return true;
|
|
|
|
if (!isPowerOf2_32(EltTy.getSizeInBits()))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
2018-03-12 21:35:43 +08:00
|
|
|
getActionDefinitionsBuilder(Op)
|
2019-01-30 07:17:35 +08:00
|
|
|
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 16)
|
|
|
|
// Clamp the little scalar to s8-s256 and make it a power of 2. It's not
|
|
|
|
// worth considering the multiples of 64 since 2*192 and 2*384 are not
|
|
|
|
// valid.
|
|
|
|
.clampScalar(LitTyIdx, S16, S256)
|
|
|
|
.widenScalarToNextPow2(LitTyIdx, /*Min*/ 32)
|
2019-08-22 00:59:10 +08:00
|
|
|
.moreElementsIf(isSmallOddVector(BigTyIdx), oneMoreElement(BigTyIdx))
|
2019-08-14 00:26:28 +08:00
|
|
|
.fewerElementsIf(all(typeIs(0, S16), vectorWiderThan(1, 32),
|
|
|
|
elementTypeIs(1, S16)),
|
|
|
|
changeTo(1, V2S16))
|
2019-01-21 02:40:36 +08:00
|
|
|
// Break up vectors with weird elements into scalars
|
|
|
|
.fewerElementsIf(
|
|
|
|
[=](const LegalityQuery &Query) { return notValidElt(Query, 0); },
|
2019-01-25 08:51:00 +08:00
|
|
|
scalarize(0))
|
2019-01-21 02:40:36 +08:00
|
|
|
.fewerElementsIf(
|
|
|
|
[=](const LegalityQuery &Query) { return notValidElt(Query, 1); },
|
2019-01-25 08:51:00 +08:00
|
|
|
scalarize(1))
|
2019-01-21 02:40:36 +08:00
|
|
|
.clampScalar(BigTyIdx, S32, S512)
|
2019-08-02 03:10:05 +08:00
|
|
|
.lowerFor({{S16, V2S16}})
|
2019-01-21 02:40:36 +08:00
|
|
|
.widenScalarIf(
|
|
|
|
[=](const LegalityQuery &Query) {
|
|
|
|
const LLT &Ty = Query.Types[BigTyIdx];
|
|
|
|
return !isPowerOf2_32(Ty.getSizeInBits()) &&
|
|
|
|
Ty.getSizeInBits() % 16 != 0;
|
|
|
|
},
|
|
|
|
[=](const LegalityQuery &Query) {
|
|
|
|
// Pick the next power of 2, or a multiple of 64 over 128.
|
|
|
|
// Whichever is smaller.
|
|
|
|
const LLT &Ty = Query.Types[BigTyIdx];
|
|
|
|
unsigned NewSizeInBits = 1 << Log2_32_Ceil(Ty.getSizeInBits() + 1);
|
|
|
|
if (NewSizeInBits >= 256) {
|
|
|
|
unsigned RoundedTo = alignTo<64>(Ty.getSizeInBits() + 1);
|
|
|
|
if (RoundedTo < NewSizeInBits)
|
|
|
|
NewSizeInBits = RoundedTo;
|
|
|
|
}
|
|
|
|
return std::make_pair(BigTyIdx, LLT::scalar(NewSizeInBits));
|
|
|
|
})
|
2018-03-12 21:35:43 +08:00
|
|
|
.legalIf([=](const LegalityQuery &Query) {
|
|
|
|
const LLT &BigTy = Query.Types[BigTyIdx];
|
|
|
|
const LLT &LitTy = Query.Types[LitTyIdx];
|
2019-01-21 02:40:36 +08:00
|
|
|
|
|
|
|
if (BigTy.isVector() && BigTy.getSizeInBits() < 32)
|
|
|
|
return false;
|
|
|
|
if (LitTy.isVector() && LitTy.getSizeInBits() < 32)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return BigTy.getSizeInBits() % 16 == 0 &&
|
|
|
|
LitTy.getSizeInBits() % 16 == 0 &&
|
2018-03-12 21:35:43 +08:00
|
|
|
BigTy.getSizeInBits() <= 512;
|
|
|
|
})
|
|
|
|
// Any vectors left are the wrong size. Scalarize them.
|
2019-01-25 08:51:00 +08:00
|
|
|
.scalarize(0)
|
|
|
|
.scalarize(1);
|
2018-03-12 21:35:43 +08:00
|
|
|
}
|
|
|
|
|
[globalisel] Add G_SEXT_INREG
Summary:
Targets often have instructions that can sign-extend certain cases faster
than the equivalent shift-left/arithmetic-shift-right. Such cases can be
identified by matching a shift-left/shift-right pair but there are some
issues with this in the context of combines. For example, suppose you can
sign-extend 8-bit up to 32-bit with a target extend instruction.
%1:_(s32) = G_SHL %0:_(s32), i32 24 # (I've inlined the G_CONSTANT for brevity)
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_ASHR %2:_(s32), i32 1
would reasonably combine to:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 25
which no longer matches the special case. If your shifts and extend are
equal cost, this would break even as a pair of shifts but if your shift is
more expensive than the extend then it's cheaper as:
%2:_(s32) = G_SEXT_INREG %0:_(s32), i32 8
%3:_(s32) = G_ASHR %2:_(s32), i32 1
It's possible to match the shift-pair in ISel and emit an extend and ashr.
However, this is far from the only way to break this shift pair and make
it hard to match the extends. Another example is that with the right
known-zeros, this:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 24
%3:_(s32) = G_MUL %2:_(s32), i32 2
can become:
%1:_(s32) = G_SHL %0:_(s32), i32 24
%2:_(s32) = G_ASHR %1:_(s32), i32 23
All upstream targets have been configured to lower it to the current
G_SHL,G_ASHR pair but will likely want to make it legal in some cases to
handle their faster cases.
To follow-up: Provide a way to legalize based on the constant. At the
moment, I'm thinking that the best way to achieve this is to provide the
MI in LegalityQuery but that opens the door to breaking core principles
of the legalizer (legality is not context sensitive). That said, it's
worth noting that looking at other instructions and acting on that
information doesn't violate this principle in itself. It's only a
violation if, at the end of legalization, a pass that checks legality
without being able to see the context would say an instruction might not be
legal. That's a fairly subtle distinction so to give a concrete example,
saying %2 in:
%1 = G_CONSTANT 16
%2 = G_SEXT_INREG %0, %1
is legal is in violation of that principle if the legality of %2 depends
on %1 being constant and/or being 16. However, legalizing to either:
%2 = G_SEXT_INREG %0, 16
or:
%1 = G_CONSTANT 16
%2:_(s32) = G_SHL %0, %1
%3:_(s32) = G_ASHR %2, %1
depending on whether %1 is constant and 16 does not violate that principle
since both outputs are genuinely legal.
Reviewers: bogner, aditya_nandakumar, volkan, aemerson, paquette, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, rovka, kristof.beyls, javed.absar, hiraditya, jrtc27, atanasyan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61289
llvm-svn: 368487
2019-08-10 05:11:20 +08:00
|
|
|
getActionDefinitionsBuilder(G_SEXT_INREG).lower();
|
|
|
|
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
computeTables();
|
2018-06-01 00:16:48 +08:00
|
|
|
verify(*ST.getInstrInfo());
|
Re-commit AMDGPU/GlobalISel: Add support for simple shaders
Fix build when global-isel is disabled and fix a warning.
Summary: We can select constant/global G_LOAD, global G_STORE, and G_GEP.
Reviewers: qcolombet, MatzeB, t.p.northover, ab, arsenm
Subscribers: mehdi_amini, vkalintiris, kzhuravl, wdng, nhaehnle, mgorny, yaxunl, tony-tye, modocache, llvm-commits, dberris
Differential Revision: https://reviews.llvm.org/D26730
llvm-svn: 293551
2017-01-31 05:56:46 +08:00
|
|
|
}
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
|
|
|
|
MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &MIRBuilder,
|
|
|
|
GISelChangeObserver &Observer) const {
|
|
|
|
switch (MI.getOpcode()) {
|
|
|
|
case TargetOpcode::G_ADDRSPACE_CAST:
|
|
|
|
return legalizeAddrSpaceCast(MI, MRI, MIRBuilder);
|
2019-05-17 20:19:57 +08:00
|
|
|
case TargetOpcode::G_FRINT:
|
|
|
|
return legalizeFrint(MI, MRI, MIRBuilder);
|
2019-05-17 20:20:05 +08:00
|
|
|
case TargetOpcode::G_FCEIL:
|
|
|
|
return legalizeFceil(MI, MRI, MIRBuilder);
|
2019-05-17 20:20:01 +08:00
|
|
|
case TargetOpcode::G_INTRINSIC_TRUNC:
|
|
|
|
return legalizeIntrinsicTrunc(MI, MRI, MIRBuilder);
|
2019-05-18 07:05:18 +08:00
|
|
|
case TargetOpcode::G_SITOFP:
|
|
|
|
return legalizeITOFP(MI, MRI, MIRBuilder, true);
|
|
|
|
case TargetOpcode::G_UITOFP:
|
|
|
|
return legalizeITOFP(MI, MRI, MIRBuilder, false);
|
2019-07-11 00:31:19 +08:00
|
|
|
case TargetOpcode::G_FMINNUM:
|
|
|
|
case TargetOpcode::G_FMAXNUM:
|
|
|
|
case TargetOpcode::G_FMINNUM_IEEE:
|
|
|
|
case TargetOpcode::G_FMAXNUM_IEEE:
|
|
|
|
return legalizeMinNumMaxNum(MI, MRI, MIRBuilder);
|
2019-07-16 03:40:59 +08:00
|
|
|
case TargetOpcode::G_EXTRACT_VECTOR_ELT:
|
|
|
|
return legalizeExtractVectorElt(MI, MRI, MIRBuilder);
|
|
|
|
case TargetOpcode::G_INSERT_VECTOR_ELT:
|
2019-07-16 03:43:04 +08:00
|
|
|
return legalizeInsertVectorElt(MI, MRI, MIRBuilder);
|
2019-08-30 04:06:48 +08:00
|
|
|
case TargetOpcode::G_FSIN:
|
|
|
|
case TargetOpcode::G_FCOS:
|
|
|
|
return legalizeSinCos(MI, MRI, MIRBuilder);
|
2019-02-08 10:40:47 +08:00
|
|
|
default:
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
llvm_unreachable("expected switch to return");
|
|
|
|
}
|
|
|
|
|
2019-06-28 09:16:46 +08:00
|
|
|
Register AMDGPULegalizerInfo::getSegmentAperture(
|
2019-02-08 10:40:47 +08:00
|
|
|
unsigned AS,
|
|
|
|
MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
MachineFunction &MF = MIRBuilder.getMF();
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
|
|
|
|
if (ST.hasApertureRegs()) {
|
|
|
|
// FIXME: Use inline constants (src_{shared, private}_base) instead of
|
|
|
|
// getreg.
|
|
|
|
unsigned Offset = AS == AMDGPUAS::LOCAL_ADDRESS ?
|
|
|
|
AMDGPU::Hwreg::OFFSET_SRC_SHARED_BASE :
|
|
|
|
AMDGPU::Hwreg::OFFSET_SRC_PRIVATE_BASE;
|
|
|
|
unsigned WidthM1 = AS == AMDGPUAS::LOCAL_ADDRESS ?
|
|
|
|
AMDGPU::Hwreg::WIDTH_M1_SRC_SHARED_BASE :
|
|
|
|
AMDGPU::Hwreg::WIDTH_M1_SRC_PRIVATE_BASE;
|
|
|
|
unsigned Encoding =
|
|
|
|
AMDGPU::Hwreg::ID_MEM_BASES << AMDGPU::Hwreg::ID_SHIFT_ |
|
|
|
|
Offset << AMDGPU::Hwreg::OFFSET_SHIFT_ |
|
|
|
|
WidthM1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_;
|
|
|
|
|
2019-06-28 09:16:46 +08:00
|
|
|
Register ApertureReg = MRI.createGenericVirtualRegister(S32);
|
|
|
|
Register GetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
MIRBuilder.buildInstr(AMDGPU::S_GETREG_B32)
|
|
|
|
.addDef(GetReg)
|
|
|
|
.addImm(Encoding);
|
|
|
|
MRI.setType(GetReg, S32);
|
|
|
|
|
2019-04-15 13:04:20 +08:00
|
|
|
auto ShiftAmt = MIRBuilder.buildConstant(S32, WidthM1 + 1);
|
2019-02-08 10:40:47 +08:00
|
|
|
MIRBuilder.buildInstr(TargetOpcode::G_SHL)
|
|
|
|
.addDef(ApertureReg)
|
|
|
|
.addUse(GetReg)
|
2019-04-15 13:04:20 +08:00
|
|
|
.addUse(ShiftAmt.getReg(0));
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
return ApertureReg;
|
|
|
|
}
|
|
|
|
|
2019-06-28 09:16:46 +08:00
|
|
|
Register QueuePtr = MRI.createGenericVirtualRegister(
|
2019-02-08 10:40:47 +08:00
|
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS, 64));
|
|
|
|
|
2019-09-05 10:20:29 +08:00
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
if (!loadInputValue(QueuePtr, MIRBuilder, &MFI->getArgInfo().QueuePtr))
|
|
|
|
return Register();
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
// Offset into amd_queue_t for group_segment_aperture_base_hi /
|
|
|
|
// private_segment_aperture_base_hi.
|
|
|
|
uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44;
|
|
|
|
|
|
|
|
// FIXME: Don't use undef
|
|
|
|
Value *V = UndefValue::get(PointerType::get(
|
|
|
|
Type::getInt8Ty(MF.getFunction().getContext()),
|
|
|
|
AMDGPUAS::CONSTANT_ADDRESS));
|
|
|
|
|
|
|
|
MachinePointerInfo PtrInfo(V, StructOffset);
|
|
|
|
MachineMemOperand *MMO = MF.getMachineMemOperand(
|
|
|
|
PtrInfo,
|
|
|
|
MachineMemOperand::MOLoad |
|
|
|
|
MachineMemOperand::MODereferenceable |
|
|
|
|
MachineMemOperand::MOInvariant,
|
|
|
|
4,
|
|
|
|
MinAlign(64, StructOffset));
|
|
|
|
|
2019-06-24 23:50:29 +08:00
|
|
|
Register LoadResult = MRI.createGenericVirtualRegister(S32);
|
|
|
|
Register LoadAddr;
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
MIRBuilder.materializeGEP(LoadAddr, QueuePtr, LLT::scalar(64), StructOffset);
|
|
|
|
MIRBuilder.buildLoad(LoadResult, LoadAddr, *MMO);
|
|
|
|
return LoadResult;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULegalizerInfo::legalizeAddrSpaceCast(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
MachineFunction &MF = MIRBuilder.getMF();
|
|
|
|
|
|
|
|
MIRBuilder.setInstr(MI);
|
|
|
|
|
2019-08-28 08:58:24 +08:00
|
|
|
const LLT S32 = LLT::scalar(32);
|
2019-06-24 23:50:29 +08:00
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
LLT DstTy = MRI.getType(Dst);
|
|
|
|
LLT SrcTy = MRI.getType(Src);
|
|
|
|
unsigned DestAS = DstTy.getAddressSpace();
|
|
|
|
unsigned SrcAS = SrcTy.getAddressSpace();
|
|
|
|
|
|
|
|
// TODO: Avoid reloading from the queue ptr for each cast, or at least each
|
|
|
|
// vector element.
|
|
|
|
assert(!DstTy.isVector());
|
|
|
|
|
|
|
|
const AMDGPUTargetMachine &TM
|
|
|
|
= static_cast<const AMDGPUTargetMachine &>(MF.getTarget());
|
|
|
|
|
|
|
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
|
|
|
if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) {
|
2019-02-08 22:16:11 +08:00
|
|
|
MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BITCAST));
|
2019-02-08 10:40:47 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-08-28 08:58:24 +08:00
|
|
|
if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
|
|
|
|
// Truncate.
|
|
|
|
MIRBuilder.buildExtract(Dst, Src, 0);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) {
|
|
|
|
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
uint32_t AddrHiVal = Info->get32BitAddressHighBits();
|
|
|
|
|
|
|
|
// FIXME: This is a bit ugly due to creating a merge of 2 pointers to
|
|
|
|
// another. Merge operands are required to be the same type, but creating an
|
|
|
|
// extra ptrtoint would be kind of pointless.
|
|
|
|
auto HighAddr = MIRBuilder.buildConstant(
|
|
|
|
LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal);
|
|
|
|
MIRBuilder.buildMerge(Dst, {Src, HighAddr.getReg(0)});
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-02-08 10:40:47 +08:00
|
|
|
if (SrcAS == AMDGPUAS::FLAT_ADDRESS) {
|
|
|
|
assert(DestAS == AMDGPUAS::LOCAL_ADDRESS ||
|
|
|
|
DestAS == AMDGPUAS::PRIVATE_ADDRESS);
|
|
|
|
unsigned NullVal = TM.getNullPointerValue(DestAS);
|
|
|
|
|
2019-04-15 13:04:20 +08:00
|
|
|
auto SegmentNull = MIRBuilder.buildConstant(DstTy, NullVal);
|
|
|
|
auto FlatNull = MIRBuilder.buildConstant(SrcTy, 0);
|
2019-02-08 10:40:47 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register PtrLo32 = MRI.createGenericVirtualRegister(DstTy);
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
// Extract low 32-bits of the pointer.
|
|
|
|
MIRBuilder.buildExtract(PtrLo32, Src, 0);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
|
2019-04-15 13:04:20 +08:00
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, FlatNull.getReg(0));
|
|
|
|
MIRBuilder.buildSelect(Dst, CmpRes, PtrLo32, SegmentNull.getReg(0));
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-08-28 08:58:24 +08:00
|
|
|
if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
if (!ST.hasFlatAddressSpace())
|
|
|
|
return false;
|
2019-02-08 10:40:47 +08:00
|
|
|
|
2019-04-15 13:04:20 +08:00
|
|
|
auto SegmentNull =
|
|
|
|
MIRBuilder.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS));
|
|
|
|
auto FlatNull =
|
|
|
|
MIRBuilder.buildConstant(DstTy, TM.getNullPointerValue(DestAS));
|
2019-02-08 10:40:47 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register ApertureReg = getSegmentAperture(DestAS, MRI, MIRBuilder);
|
2019-09-05 10:20:29 +08:00
|
|
|
if (!ApertureReg.isValid())
|
|
|
|
return false;
|
2019-02-08 10:40:47 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register CmpRes = MRI.createGenericVirtualRegister(LLT::scalar(1));
|
2019-04-15 13:04:20 +08:00
|
|
|
MIRBuilder.buildICmp(CmpInst::ICMP_NE, CmpRes, Src, SegmentNull.getReg(0));
|
2019-02-08 10:40:47 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register BuildPtr = MRI.createGenericVirtualRegister(DstTy);
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
// Coerce the type of the low half of the result so we can use merge_values.
|
2019-08-28 08:58:24 +08:00
|
|
|
Register SrcAsInt = MRI.createGenericVirtualRegister(S32);
|
2019-02-08 10:40:47 +08:00
|
|
|
MIRBuilder.buildInstr(TargetOpcode::G_PTRTOINT)
|
|
|
|
.addDef(SrcAsInt)
|
|
|
|
.addUse(Src);
|
|
|
|
|
|
|
|
// TODO: Should we allow mismatched types but matching sizes in merges to
|
|
|
|
// avoid the ptrtoint?
|
|
|
|
MIRBuilder.buildMerge(BuildPtr, {SrcAsInt, ApertureReg});
|
2019-04-15 13:04:20 +08:00
|
|
|
MIRBuilder.buildSelect(Dst, CmpRes, BuildPtr, FlatNull.getReg(0));
|
2019-02-08 10:40:47 +08:00
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2019-05-17 20:19:57 +08:00
|
|
|
|
|
|
|
bool AMDGPULegalizerInfo::legalizeFrint(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &MIRBuilder) const {
|
|
|
|
MIRBuilder.setInstr(MI);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Src = MI.getOperand(1).getReg();
|
2019-05-17 20:19:57 +08:00
|
|
|
LLT Ty = MRI.getType(Src);
|
|
|
|
assert(Ty.isScalar() && Ty.getSizeInBits() == 64);
|
|
|
|
|
|
|
|
APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
|
|
|
|
APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
|
|
|
|
|
|
|
|
auto C1 = MIRBuilder.buildFConstant(Ty, C1Val);
|
|
|
|
auto CopySign = MIRBuilder.buildFCopysign(Ty, C1, Src);
|
|
|
|
|
|
|
|
// TODO: Should this propagate fast-math-flags?
|
|
|
|
auto Tmp1 = MIRBuilder.buildFAdd(Ty, Src, CopySign);
|
|
|
|
auto Tmp2 = MIRBuilder.buildFSub(Ty, Tmp1, CopySign);
|
|
|
|
|
|
|
|
auto C2 = MIRBuilder.buildFConstant(Ty, C2Val);
|
|
|
|
auto Fabs = MIRBuilder.buildFAbs(Ty, Src);
|
|
|
|
|
|
|
|
auto Cond = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, LLT::scalar(1), Fabs, C2);
|
|
|
|
MIRBuilder.buildSelect(MI.getOperand(0).getReg(), Cond, Src, Tmp2);
|
|
|
|
return true;
|
2019-05-17 20:20:05 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULegalizerInfo::legalizeFceil(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
B.setInstr(MI);
|
|
|
|
|
2019-05-17 20:59:27 +08:00
|
|
|
const LLT S1 = LLT::scalar(1);
|
|
|
|
const LLT S64 = LLT::scalar(64);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Src = MI.getOperand(1).getReg();
|
2019-05-17 20:59:27 +08:00
|
|
|
assert(MRI.getType(Src) == S64);
|
2019-05-17 20:20:05 +08:00
|
|
|
|
|
|
|
// result = trunc(src)
|
|
|
|
// if (src > 0.0 && src != result)
|
|
|
|
// result += 1.0
|
|
|
|
|
|
|
|
auto Trunc = B.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC, {S64}, {Src});
|
|
|
|
|
|
|
|
const auto Zero = B.buildFConstant(S64, 0.0);
|
|
|
|
const auto One = B.buildFConstant(S64, 1.0);
|
|
|
|
auto Lt0 = B.buildFCmp(CmpInst::FCMP_OGT, S1, Src, Zero);
|
|
|
|
auto NeTrunc = B.buildFCmp(CmpInst::FCMP_ONE, S1, Src, Trunc);
|
|
|
|
auto And = B.buildAnd(S1, Lt0, NeTrunc);
|
|
|
|
auto Add = B.buildSelect(S64, And, One, Zero);
|
|
|
|
|
|
|
|
// TODO: Should this propagate fast-math-flags?
|
|
|
|
B.buildFAdd(MI.getOperand(0).getReg(), Trunc, Add);
|
|
|
|
return true;
|
2019-05-17 20:19:57 +08:00
|
|
|
}
|
2019-05-17 20:20:01 +08:00
|
|
|
|
|
|
|
static MachineInstrBuilder extractF64Exponent(unsigned Hi,
|
|
|
|
MachineIRBuilder &B) {
|
|
|
|
const unsigned FractBits = 52;
|
|
|
|
const unsigned ExpBits = 11;
|
|
|
|
LLT S32 = LLT::scalar(32);
|
|
|
|
|
|
|
|
auto Const0 = B.buildConstant(S32, FractBits - 32);
|
|
|
|
auto Const1 = B.buildConstant(S32, ExpBits);
|
|
|
|
|
|
|
|
auto ExpPart = B.buildIntrinsic(Intrinsic::amdgcn_ubfe, {S32}, false)
|
|
|
|
.addUse(Const0.getReg(0))
|
|
|
|
.addUse(Const1.getReg(0));
|
|
|
|
|
|
|
|
return B.buildSub(S32, ExpPart, B.buildConstant(S32, 1023));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULegalizerInfo::legalizeIntrinsicTrunc(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
B.setInstr(MI);
|
|
|
|
|
2019-05-17 20:59:27 +08:00
|
|
|
const LLT S1 = LLT::scalar(1);
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
const LLT S64 = LLT::scalar(64);
|
2019-05-17 20:20:01 +08:00
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Src = MI.getOperand(1).getReg();
|
2019-05-17 20:59:27 +08:00
|
|
|
assert(MRI.getType(Src) == S64);
|
2019-05-17 20:20:01 +08:00
|
|
|
|
|
|
|
// TODO: Should this use extract since the low half is unused?
|
|
|
|
auto Unmerge = B.buildUnmerge({S32, S32}, Src);
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Hi = Unmerge.getReg(1);
|
2019-05-17 20:20:01 +08:00
|
|
|
|
|
|
|
// Extract the upper half, since this is where we will find the sign and
|
|
|
|
// exponent.
|
|
|
|
auto Exp = extractF64Exponent(Hi, B);
|
|
|
|
|
|
|
|
const unsigned FractBits = 52;
|
|
|
|
|
|
|
|
// Extract the sign bit.
|
|
|
|
const auto SignBitMask = B.buildConstant(S32, UINT32_C(1) << 31);
|
|
|
|
auto SignBit = B.buildAnd(S32, Hi, SignBitMask);
|
|
|
|
|
|
|
|
const auto FractMask = B.buildConstant(S64, (UINT64_C(1) << FractBits) - 1);
|
|
|
|
|
|
|
|
const auto Zero32 = B.buildConstant(S32, 0);
|
|
|
|
|
|
|
|
// Extend back to 64-bits.
|
|
|
|
auto SignBit64 = B.buildMerge(S64, {Zero32.getReg(0), SignBit.getReg(0)});
|
|
|
|
|
|
|
|
auto Shr = B.buildAShr(S64, FractMask, Exp);
|
|
|
|
auto Not = B.buildNot(S64, Shr);
|
|
|
|
auto Tmp0 = B.buildAnd(S64, Src, Not);
|
|
|
|
auto FiftyOne = B.buildConstant(S32, FractBits - 1);
|
|
|
|
|
|
|
|
auto ExpLt0 = B.buildICmp(CmpInst::ICMP_SLT, S1, Exp, Zero32);
|
|
|
|
auto ExpGt51 = B.buildICmp(CmpInst::ICMP_SGT, S1, Exp, FiftyOne);
|
|
|
|
|
|
|
|
auto Tmp1 = B.buildSelect(S64, ExpLt0, SignBit64, Tmp0);
|
|
|
|
B.buildSelect(MI.getOperand(0).getReg(), ExpGt51, Src, Tmp1);
|
|
|
|
return true;
|
|
|
|
}
|
2019-05-18 07:05:18 +08:00
|
|
|
|
|
|
|
bool AMDGPULegalizerInfo::legalizeITOFP(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B, bool Signed) const {
|
|
|
|
B.setInstr(MI);
|
|
|
|
|
2019-06-25 00:16:12 +08:00
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Src = MI.getOperand(1).getReg();
|
2019-05-18 07:05:18 +08:00
|
|
|
|
|
|
|
const LLT S64 = LLT::scalar(64);
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
|
|
|
|
assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
|
|
|
|
|
|
|
|
auto Unmerge = B.buildUnmerge({S32, S32}, Src);
|
|
|
|
|
|
|
|
auto CvtHi = Signed ?
|
|
|
|
B.buildSITOFP(S64, Unmerge.getReg(1)) :
|
|
|
|
B.buildUITOFP(S64, Unmerge.getReg(1));
|
|
|
|
|
|
|
|
auto CvtLo = B.buildUITOFP(S64, Unmerge.getReg(0));
|
|
|
|
|
|
|
|
auto ThirtyTwo = B.buildConstant(S32, 32);
|
|
|
|
auto LdExp = B.buildIntrinsic(Intrinsic::amdgcn_ldexp, {S64}, false)
|
|
|
|
.addUse(CvtHi.getReg(0))
|
|
|
|
.addUse(ThirtyTwo.getReg(0));
|
|
|
|
|
|
|
|
// TODO: Should this propagate fast-math-flags?
|
|
|
|
B.buildFAdd(Dst, LdExp, CvtLo);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
2019-07-02 02:40:23 +08:00
|
|
|
|
2019-07-11 00:31:19 +08:00
|
|
|
bool AMDGPULegalizerInfo::legalizeMinNumMaxNum(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
MachineFunction &MF = B.getMF();
|
|
|
|
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
|
|
|
const bool IsIEEEOp = MI.getOpcode() == AMDGPU::G_FMINNUM_IEEE ||
|
|
|
|
MI.getOpcode() == AMDGPU::G_FMAXNUM_IEEE;
|
|
|
|
|
|
|
|
// With ieee_mode disabled, the instructions have the correct behavior
|
|
|
|
// already for G_FMINNUM/G_FMAXNUM
|
|
|
|
if (!MFI->getMode().IEEE)
|
|
|
|
return !IsIEEEOp;
|
|
|
|
|
|
|
|
if (IsIEEEOp)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
MachineIRBuilder HelperBuilder(MI);
|
|
|
|
GISelObserverWrapper DummyObserver;
|
|
|
|
LegalizerHelper Helper(MF, DummyObserver, HelperBuilder);
|
|
|
|
HelperBuilder.setMBB(*MI.getParent());
|
|
|
|
return Helper.lowerFMinNumMaxNum(MI) == LegalizerHelper::Legalized;
|
|
|
|
}
|
|
|
|
|
2019-07-16 03:40:59 +08:00
|
|
|
bool AMDGPULegalizerInfo::legalizeExtractVectorElt(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
// TODO: Should move some of this into LegalizerHelper.
|
|
|
|
|
|
|
|
// TODO: Promote dynamic indexing of s16 to s32
|
|
|
|
// TODO: Dynamic s64 indexing is only legal for SGPR.
|
|
|
|
Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(2).getReg(), MRI);
|
|
|
|
if (!IdxVal) // Dynamic case will be selected to register indexing.
|
|
|
|
return true;
|
|
|
|
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Vec = MI.getOperand(1).getReg();
|
|
|
|
|
|
|
|
LLT VecTy = MRI.getType(Vec);
|
|
|
|
LLT EltTy = VecTy.getElementType();
|
|
|
|
assert(EltTy == MRI.getType(Dst));
|
|
|
|
|
|
|
|
B.setInstr(MI);
|
|
|
|
|
|
|
|
if (IdxVal.getValue() < VecTy.getNumElements())
|
|
|
|
B.buildExtract(Dst, Vec, IdxVal.getValue() * EltTy.getSizeInBits());
|
|
|
|
else
|
|
|
|
B.buildUndef(Dst);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-07-16 03:43:04 +08:00
|
|
|
bool AMDGPULegalizerInfo::legalizeInsertVectorElt(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
// TODO: Should move some of this into LegalizerHelper.
|
|
|
|
|
|
|
|
// TODO: Promote dynamic indexing of s16 to s32
|
|
|
|
// TODO: Dynamic s64 indexing is only legal for SGPR.
|
|
|
|
Optional<int64_t> IdxVal = getConstantVRegVal(MI.getOperand(3).getReg(), MRI);
|
|
|
|
if (!IdxVal) // Dynamic case will be selected to register indexing.
|
|
|
|
return true;
|
|
|
|
|
|
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
|
|
Register Vec = MI.getOperand(1).getReg();
|
|
|
|
Register Ins = MI.getOperand(2).getReg();
|
|
|
|
|
|
|
|
LLT VecTy = MRI.getType(Vec);
|
|
|
|
LLT EltTy = VecTy.getElementType();
|
|
|
|
assert(EltTy == MRI.getType(Ins));
|
|
|
|
|
|
|
|
B.setInstr(MI);
|
|
|
|
|
|
|
|
if (IdxVal.getValue() < VecTy.getNumElements())
|
|
|
|
B.buildInsert(Dst, Vec, Ins, IdxVal.getValue() * EltTy.getSizeInBits());
|
|
|
|
else
|
|
|
|
B.buildUndef(Dst);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-08-30 04:06:48 +08:00
|
|
|
bool AMDGPULegalizerInfo::legalizeSinCos(
|
|
|
|
MachineInstr &MI, MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
B.setInstr(MI);
|
|
|
|
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
Register SrcReg = MI.getOperand(1).getReg();
|
|
|
|
LLT Ty = MRI.getType(DstReg);
|
|
|
|
unsigned Flags = MI.getFlags();
|
|
|
|
|
|
|
|
Register TrigVal;
|
|
|
|
auto OneOver2Pi = B.buildFConstant(Ty, 0.5 / M_PI);
|
|
|
|
if (ST.hasTrigReducedRange()) {
|
|
|
|
auto MulVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags);
|
|
|
|
TrigVal = B.buildIntrinsic(Intrinsic::amdgcn_fract, {Ty}, false)
|
|
|
|
.addUse(MulVal.getReg(0))
|
|
|
|
.setMIFlags(Flags).getReg(0);
|
|
|
|
} else
|
|
|
|
TrigVal = B.buildFMul(Ty, SrcReg, OneOver2Pi, Flags).getReg(0);
|
|
|
|
|
|
|
|
Intrinsic::ID TrigIntrin = MI.getOpcode() == AMDGPU::G_FSIN ?
|
|
|
|
Intrinsic::amdgcn_sin : Intrinsic::amdgcn_cos;
|
|
|
|
B.buildIntrinsic(TrigIntrin, makeArrayRef<Register>(DstReg), false)
|
|
|
|
.addUse(TrigVal)
|
|
|
|
.setMIFlags(Flags);
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-07-02 02:40:23 +08:00
|
|
|
// Return the use branch instruction, otherwise null if the usage is invalid.
|
|
|
|
static MachineInstr *verifyCFIntrinsic(MachineInstr &MI,
|
|
|
|
MachineRegisterInfo &MRI) {
|
|
|
|
Register CondDef = MI.getOperand(0).getReg();
|
|
|
|
if (!MRI.hasOneNonDBGUse(CondDef))
|
|
|
|
return nullptr;
|
|
|
|
|
|
|
|
MachineInstr &UseMI = *MRI.use_instr_nodbg_begin(CondDef);
|
|
|
|
return UseMI.getParent() == MI.getParent() &&
|
|
|
|
UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
|
|
|
|
}
|
|
|
|
|
2019-07-02 02:45:36 +08:00
|
|
|
Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
|
|
|
|
Register Reg, LLT Ty) const {
|
|
|
|
Register LiveIn = MRI.getLiveInVirtReg(Reg);
|
|
|
|
if (LiveIn)
|
|
|
|
return LiveIn;
|
|
|
|
|
|
|
|
Register NewReg = MRI.createGenericVirtualRegister(Ty);
|
|
|
|
MRI.addLiveIn(Reg, NewReg);
|
|
|
|
return NewReg;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
|
|
|
|
const ArgDescriptor *Arg) const {
|
2019-09-05 10:20:29 +08:00
|
|
|
if (!Arg->isRegister() || !Arg->getRegister().isValid())
|
2019-07-02 02:45:36 +08:00
|
|
|
return false; // TODO: Handle these
|
|
|
|
|
|
|
|
assert(Arg->getRegister().isPhysical());
|
|
|
|
|
|
|
|
MachineRegisterInfo &MRI = *B.getMRI();
|
|
|
|
|
|
|
|
LLT Ty = MRI.getType(DstReg);
|
|
|
|
Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
|
|
|
|
|
|
|
|
if (Arg->isMasked()) {
|
|
|
|
// TODO: Should we try to emit this once in the entry block?
|
|
|
|
const LLT S32 = LLT::scalar(32);
|
|
|
|
const unsigned Mask = Arg->getMask();
|
|
|
|
const unsigned Shift = countTrailingZeros<unsigned>(Mask);
|
|
|
|
|
|
|
|
auto ShiftAmt = B.buildConstant(S32, Shift);
|
|
|
|
auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
|
|
|
|
B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
|
|
|
|
} else
|
|
|
|
B.buildCopy(DstReg, LiveIn);
|
|
|
|
|
|
|
|
// Insert the argument copy if it doens't already exist.
|
|
|
|
// FIXME: It seems EmitLiveInCopies isn't called anywhere?
|
|
|
|
if (!MRI.getVRegDef(LiveIn)) {
|
|
|
|
MachineBasicBlock &EntryMBB = B.getMF().front();
|
|
|
|
EntryMBB.addLiveIn(Arg->getRegister());
|
|
|
|
B.setInsertPt(EntryMBB, EntryMBB.begin());
|
|
|
|
B.buildCopy(LiveIn, Arg->getRegister());
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
|
|
|
|
MachineInstr &MI,
|
|
|
|
MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B,
|
|
|
|
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
|
|
|
|
B.setInstr(MI);
|
|
|
|
|
|
|
|
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
|
|
|
|
|
|
|
|
const ArgDescriptor *Arg;
|
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
|
|
|
|
if (!Arg) {
|
|
|
|
LLVM_DEBUG(dbgs() << "Required arg register missing\n");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
[AMDGPU/GlobalISel] Add llvm.amdgcn.fdiv.fast legalization.
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: volkan, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64966
llvm-svn: 367344
2019-07-31 02:49:16 +08:00
|
|
|
bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI,
|
|
|
|
MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
B.setInstr(MI);
|
|
|
|
Register Res = MI.getOperand(0).getReg();
|
|
|
|
Register LHS = MI.getOperand(2).getReg();
|
|
|
|
Register RHS = MI.getOperand(3).getReg();
|
|
|
|
uint16_t Flags = MI.getFlags();
|
|
|
|
|
|
|
|
LLT S32 = LLT::scalar(32);
|
|
|
|
LLT S1 = LLT::scalar(1);
|
|
|
|
|
|
|
|
auto Abs = B.buildFAbs(S32, RHS, Flags);
|
|
|
|
const APFloat C0Val(1.0f);
|
|
|
|
|
|
|
|
auto C0 = B.buildConstant(S32, 0x6f800000);
|
|
|
|
auto C1 = B.buildConstant(S32, 0x2f800000);
|
|
|
|
auto C2 = B.buildConstant(S32, FloatToBits(1.0f));
|
|
|
|
|
|
|
|
auto CmpRes = B.buildFCmp(CmpInst::FCMP_OGT, S1, Abs, C0, Flags);
|
|
|
|
auto Sel = B.buildSelect(S32, CmpRes, C1, C2, Flags);
|
|
|
|
|
|
|
|
auto Mul0 = B.buildFMul(S32, RHS, Sel, Flags);
|
|
|
|
|
|
|
|
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {S32}, false)
|
|
|
|
.addUse(Mul0.getReg(0))
|
|
|
|
.setMIFlags(Flags);
|
|
|
|
|
|
|
|
auto Mul1 = B.buildFMul(S32, LHS, RCP, Flags);
|
|
|
|
|
|
|
|
B.buildFMul(Res, Sel, Mul1, Flags);
|
|
|
|
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-07-02 02:49:01 +08:00
|
|
|
bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
|
|
|
|
MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
|
|
|
|
if (!MFI->isEntryFunction()) {
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
|
|
|
|
}
|
|
|
|
|
|
|
|
B.setInstr(MI);
|
|
|
|
|
|
|
|
uint64_t Offset =
|
|
|
|
ST.getTargetLowering()->getImplicitParameterOffset(
|
|
|
|
B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
|
|
|
|
Register DstReg = MI.getOperand(0).getReg();
|
|
|
|
LLT DstTy = MRI.getType(DstReg);
|
|
|
|
LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
|
|
|
|
|
|
|
|
const ArgDescriptor *Arg;
|
|
|
|
const TargetRegisterClass *RC;
|
|
|
|
std::tie(Arg, RC)
|
|
|
|
= MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
|
|
|
|
if (!Arg)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
|
|
|
|
if (!loadInputValue(KernargPtrReg, B, Arg))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
|
|
|
|
MI.eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2019-07-02 02:40:23 +08:00
|
|
|
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
|
|
|
|
MachineRegisterInfo &MRI,
|
|
|
|
MachineIRBuilder &B) const {
|
|
|
|
// Replace the use G_BRCOND with the exec manipulate and branch pseudos.
|
|
|
|
switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) {
|
|
|
|
case Intrinsic::amdgcn_if: {
|
|
|
|
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
|
|
|
|
const SIRegisterInfo *TRI
|
|
|
|
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
|
|
|
|
|
|
|
|
B.setInstr(*BrCond);
|
|
|
|
Register Def = MI.getOperand(1).getReg();
|
|
|
|
Register Use = MI.getOperand(3).getReg();
|
|
|
|
B.buildInstr(AMDGPU::SI_IF)
|
|
|
|
.addDef(Def)
|
|
|
|
.addUse(Use)
|
|
|
|
.addMBB(BrCond->getOperand(1).getMBB());
|
|
|
|
|
|
|
|
MRI.setRegClass(Def, TRI->getWaveMaskRegClass());
|
|
|
|
MRI.setRegClass(Use, TRI->getWaveMaskRegClass());
|
|
|
|
MI.eraseFromParent();
|
|
|
|
BrCond->eraseFromParent();
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
case Intrinsic::amdgcn_loop: {
|
|
|
|
if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) {
|
|
|
|
const SIRegisterInfo *TRI
|
|
|
|
= static_cast<const SIRegisterInfo *>(MRI.getTargetRegisterInfo());
|
|
|
|
|
|
|
|
B.setInstr(*BrCond);
|
|
|
|
Register Reg = MI.getOperand(2).getReg();
|
|
|
|
B.buildInstr(AMDGPU::SI_LOOP)
|
|
|
|
.addUse(Reg)
|
|
|
|
.addMBB(BrCond->getOperand(1).getMBB());
|
|
|
|
MI.eraseFromParent();
|
|
|
|
BrCond->eraseFromParent();
|
|
|
|
MRI.setRegClass(Reg, TRI->getWaveMaskRegClass());
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
2019-07-02 02:49:01 +08:00
|
|
|
case Intrinsic::amdgcn_kernarg_segment_ptr:
|
|
|
|
return legalizePreloadedArgIntrin(
|
|
|
|
MI, MRI, B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
|
|
|
|
case Intrinsic::amdgcn_implicitarg_ptr:
|
|
|
|
return legalizeImplicitArgPtr(MI, MRI, B);
|
2019-07-02 02:45:36 +08:00
|
|
|
case Intrinsic::amdgcn_workitem_id_x:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::WORKITEM_ID_X);
|
|
|
|
case Intrinsic::amdgcn_workitem_id_y:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
|
|
|
|
case Intrinsic::amdgcn_workitem_id_z:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
|
2019-07-02 02:47:22 +08:00
|
|
|
case Intrinsic::amdgcn_workgroup_id_x:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::WORKGROUP_ID_X);
|
|
|
|
case Intrinsic::amdgcn_workgroup_id_y:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::WORKGROUP_ID_Y);
|
|
|
|
case Intrinsic::amdgcn_workgroup_id_z:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::WORKGROUP_ID_Z);
|
2019-07-02 02:50:50 +08:00
|
|
|
case Intrinsic::amdgcn_dispatch_ptr:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::DISPATCH_PTR);
|
|
|
|
case Intrinsic::amdgcn_queue_ptr:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::QUEUE_PTR);
|
|
|
|
case Intrinsic::amdgcn_implicit_buffer_ptr:
|
|
|
|
return legalizePreloadedArgIntrin(
|
|
|
|
MI, MRI, B, AMDGPUFunctionArgInfo::IMPLICIT_BUFFER_PTR);
|
|
|
|
case Intrinsic::amdgcn_dispatch_id:
|
|
|
|
return legalizePreloadedArgIntrin(MI, MRI, B,
|
|
|
|
AMDGPUFunctionArgInfo::DISPATCH_ID);
|
[AMDGPU/GlobalISel] Add llvm.amdgcn.fdiv.fast legalization.
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: volkan, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D64966
llvm-svn: 367344
2019-07-31 02:49:16 +08:00
|
|
|
case Intrinsic::amdgcn_fdiv_fast:
|
|
|
|
return legalizeFDIVFast(MI, MRI, B);
|
2019-07-02 02:40:23 +08:00
|
|
|
default:
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|