forked from OSchip/llvm-project
Revert "[openmp][nfc] Refactor GridValues"
Failed a nvptx codegen test
This reverts commit 2a47a84b40
.
This commit is contained in:
parent
cd1b950141
commit
b1efeface7
|
@ -210,6 +210,9 @@ protected:
|
|||
unsigned char RegParmMax, SSERegParmMax;
|
||||
TargetCXXABI TheCXXABI;
|
||||
const LangASMap *AddrSpaceMap;
|
||||
const llvm::omp::GV *GridValues =
|
||||
nullptr; // target-specific GPU grid values that must be
|
||||
// consistent between host RTL (plugin), device RTL, and clang.
|
||||
|
||||
mutable StringRef PlatformName;
|
||||
mutable VersionTuple PlatformMinVersion;
|
||||
|
@ -1407,10 +1410,10 @@ public:
|
|||
return LangAS::Default;
|
||||
}
|
||||
|
||||
// access target-specific GPU grid values that must be consistent between
|
||||
// host RTL (plugin), deviceRTL and clang.
|
||||
virtual const llvm::omp::GV &getGridValue() const {
|
||||
llvm_unreachable("getGridValue not implemented on this target");
|
||||
/// Return a target-specific GPU grid values
|
||||
const llvm::omp::GV &getGridValue() const {
|
||||
assert(GridValues != nullptr && "GridValues not initialized");
|
||||
return *GridValues;
|
||||
}
|
||||
|
||||
/// Retrieve the name of the platform as it is used in the
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include "clang/Basic/MacroBuilder.h"
|
||||
#include "clang/Basic/TargetBuiltins.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace clang::targets;
|
||||
|
@ -334,6 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
|
|||
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
|
||||
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
|
||||
: DataLayoutStringR600);
|
||||
GridValues = &llvm::omp::AMDGPUGridValues;
|
||||
|
||||
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
|
||||
!isAMDGCN(Triple));
|
||||
|
|
|
@ -370,10 +370,6 @@ public:
|
|||
return getLangASFromTargetAS(Constant);
|
||||
}
|
||||
|
||||
const llvm::omp::GV &getGridValue() const override {
|
||||
return llvm::omp::AMDGPUGridValues;
|
||||
}
|
||||
|
||||
/// \returns Target specific vtbl ptr address space.
|
||||
unsigned getVtblPtrAddressSpace() const override {
|
||||
return static_cast<unsigned>(Constant);
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
#include "clang/Basic/MacroBuilder.h"
|
||||
#include "clang/Basic/TargetBuiltins.h"
|
||||
#include "llvm/ADT/StringSwitch.h"
|
||||
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace clang::targets;
|
||||
|
@ -64,6 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
|
|||
TLSSupported = false;
|
||||
VLASupported = false;
|
||||
AddrSpaceMap = &NVPTXAddrSpaceMap;
|
||||
GridValues = &llvm::omp::NVPTXGridValues;
|
||||
UseAddrSpaceMapMangling = true;
|
||||
|
||||
// Define available target features
|
||||
|
|
|
@ -147,10 +147,6 @@ public:
|
|||
Opts["cl_khr_local_int32_extended_atomics"] = true;
|
||||
}
|
||||
|
||||
const llvm::omp::GV &getGridValue() const override {
|
||||
return llvm::omp::NVPTXGridValues;
|
||||
}
|
||||
|
||||
/// \returns If a target requires an address within a target specific address
|
||||
/// space \p AddressSpace to be converted in order to be used, then return the
|
||||
/// corresponding target specific DWARF address space.
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||
#include "llvm/IR/IntrinsicsNVPTX.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
|
||||
using namespace clang;
|
||||
using namespace CodeGen;
|
||||
|
@ -107,7 +106,8 @@ public:
|
|||
/// is the same for all known NVPTX architectures.
|
||||
enum MachineConfiguration : unsigned {
|
||||
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
|
||||
/// specific Grid Values like GV_Warp_Size, GV_Slot_Size
|
||||
/// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
|
||||
/// and GV_Warp_Size_Log2_Mask.
|
||||
|
||||
/// Global memory alignment for performance.
|
||||
GlobalMemoryAlignment = 128,
|
||||
|
@ -535,8 +535,7 @@ public:
|
|||
/// on the NVPTX device, to generate more efficient code.
|
||||
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
unsigned LaneIDBits =
|
||||
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
|
||||
unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
|
||||
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
|
||||
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
|
||||
}
|
||||
|
@ -546,9 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
|
|||
/// on the NVPTX device, to generate more efficient code.
|
||||
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
|
||||
CGBuilderTy &Bld = CGF.Builder;
|
||||
unsigned LaneIDBits =
|
||||
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
|
||||
unsigned LaneIDMask = ~0 >> (32u - LaneIDBits);
|
||||
unsigned LaneIDMask =
|
||||
CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
|
||||
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
|
||||
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
|
||||
"nvptx_lane_id");
|
||||
|
|
|
@ -62,13 +62,19 @@ struct GV {
|
|||
const unsigned GV_Slot_Size;
|
||||
/// The default value of maximum number of threads in a worker warp.
|
||||
const unsigned GV_Warp_Size;
|
||||
|
||||
constexpr unsigned warpSlotSize() const {
|
||||
return GV_Warp_Size * GV_Slot_Size;
|
||||
}
|
||||
|
||||
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
|
||||
/// for NVPTX.
|
||||
const unsigned GV_Warp_Size_32;
|
||||
/// The number of bits required to represent the max number of threads in warp
|
||||
const unsigned GV_Warp_Size_Log2;
|
||||
/// GV_Warp_Size * GV_Slot_Size,
|
||||
const unsigned GV_Warp_Slot_Size;
|
||||
/// the maximum number of teams.
|
||||
const unsigned GV_Max_Teams;
|
||||
/// Global Memory Alignment
|
||||
const unsigned GV_Mem_Align;
|
||||
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
|
||||
const unsigned GV_Warp_Size_Log2_Mask;
|
||||
// An alternative to the heavy data sharing infrastructure that uses global
|
||||
// memory is one that uses device __shared__ memory. The amount of such space
|
||||
// (in bytes) reserved by the OpenMP runtime is noted here.
|
||||
|
@ -77,32 +83,47 @@ struct GV {
|
|||
const unsigned GV_Max_WG_Size;
|
||||
// The default maximum team size for a working group
|
||||
const unsigned GV_Default_WG_Size;
|
||||
|
||||
constexpr unsigned maxWarpNumber() const {
|
||||
return GV_Max_WG_Size / GV_Warp_Size;
|
||||
}
|
||||
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
|
||||
const unsigned GV_Max_Warp_Number;
|
||||
/// The slot size that should be reserved for a working warp.
|
||||
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
|
||||
const unsigned GV_Warp_Size_Log2_MaskL;
|
||||
};
|
||||
|
||||
/// For AMDGPU GPUs
|
||||
static constexpr GV AMDGPUGridValues = {
|
||||
448, // GV_Threads
|
||||
256, // GV_Slot_Size
|
||||
64, // GV_Warp_Size
|
||||
128, // GV_Max_Teams
|
||||
896, // GV_SimpleBufferSize
|
||||
1024, // GV_Max_WG_Size,
|
||||
256, // GV_Default_WG_Size
|
||||
448, // GV_Threads
|
||||
256, // GV_Slot_Size
|
||||
64, // GV_Warp_Size
|
||||
32, // GV_Warp_Size_32
|
||||
6, // GV_Warp_Size_Log2
|
||||
64 * 256, // GV_Warp_Slot_Size
|
||||
128, // GV_Max_Teams
|
||||
256, // GV_Mem_Align
|
||||
63, // GV_Warp_Size_Log2_Mask
|
||||
896, // GV_SimpleBufferSize
|
||||
1024, // GV_Max_WG_Size,
|
||||
256, // GV_Defaut_WG_Size
|
||||
1024 / 64, // GV_Max_WG_Size / GV_WarpSize
|
||||
63 // GV_Warp_Size_Log2_MaskL
|
||||
};
|
||||
|
||||
/// For Nvidia GPUs
|
||||
static constexpr GV NVPTXGridValues = {
|
||||
992, // GV_Threads
|
||||
256, // GV_Slot_Size
|
||||
32, // GV_Warp_Size
|
||||
1024, // GV_Max_Teams
|
||||
896, // GV_SimpleBufferSize
|
||||
1024, // GV_Max_WG_Size
|
||||
128, // GV_Default_WG_Size
|
||||
992, // GV_Threads
|
||||
256, // GV_Slot_Size
|
||||
32, // GV_Warp_Size
|
||||
32, // GV_Warp_Size_32
|
||||
5, // GV_Warp_Size_Log2
|
||||
32 * 256, // GV_Warp_Slot_Size
|
||||
1024, // GV_Max_Teams
|
||||
256, // GV_Mem_Align
|
||||
(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
|
||||
896, // GV_SimpleBufferSize
|
||||
1024, // GV_Max_WG_Size
|
||||
128, // GV_Defaut_WG_Size
|
||||
1024 / 32, // GV_Max_WG_Size / GV_WarpSize
|
||||
31 // GV_Warp_Size_Log2_MaskL
|
||||
};
|
||||
|
||||
} // namespace omp
|
||||
|
|
Loading…
Reference in New Issue