Revert "[openmp][nfc] Refactor GridValues"

Failed a nvptx codegen test
This reverts commit 2a47a84b40.
This commit is contained in:
Jon Chesterfield 2021-08-20 18:17:27 +01:00
parent cd1b950141
commit b1efeface7
7 changed files with 60 additions and 42 deletions

View File

@ -210,6 +210,9 @@ protected:
unsigned char RegParmMax, SSERegParmMax; unsigned char RegParmMax, SSERegParmMax;
TargetCXXABI TheCXXABI; TargetCXXABI TheCXXABI;
const LangASMap *AddrSpaceMap; const LangASMap *AddrSpaceMap;
const llvm::omp::GV *GridValues =
nullptr; // target-specific GPU grid values that must be
// consistent between host RTL (plugin), device RTL, and clang.
mutable StringRef PlatformName; mutable StringRef PlatformName;
mutable VersionTuple PlatformMinVersion; mutable VersionTuple PlatformMinVersion;
@ -1407,10 +1410,10 @@ public:
return LangAS::Default; return LangAS::Default;
} }
// access target-specific GPU grid values that must be consistent between /// Return a target-specific GPU grid values
// host RTL (plugin), deviceRTL and clang. const llvm::omp::GV &getGridValue() const {
virtual const llvm::omp::GV &getGridValue() const { assert(GridValues != nullptr && "GridValues not initialized");
llvm_unreachable("getGridValue not implemented on this target"); return *GridValues;
} }
/// Retrieve the name of the platform as it is used in the /// Retrieve the name of the platform as it is used in the

View File

@ -17,6 +17,7 @@
#include "clang/Basic/MacroBuilder.h" #include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringSwitch.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
using namespace clang; using namespace clang;
using namespace clang::targets; using namespace clang::targets;
@ -334,6 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
llvm::AMDGPU::getArchAttrR600(GPUKind)) { llvm::AMDGPU::getArchAttrR600(GPUKind)) {
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
: DataLayoutStringR600); : DataLayoutStringR600);
GridValues = &llvm::omp::AMDGPUGridValues;
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D || setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
!isAMDGCN(Triple)); !isAMDGCN(Triple));

View File

@ -370,10 +370,6 @@ public:
return getLangASFromTargetAS(Constant); return getLangASFromTargetAS(Constant);
} }
const llvm::omp::GV &getGridValue() const override {
return llvm::omp::AMDGPUGridValues;
}
/// \returns Target specific vtbl ptr address space. /// \returns Target specific vtbl ptr address space.
unsigned getVtblPtrAddressSpace() const override { unsigned getVtblPtrAddressSpace() const override {
return static_cast<unsigned>(Constant); return static_cast<unsigned>(Constant);

View File

@ -16,6 +16,7 @@
#include "clang/Basic/MacroBuilder.h" #include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h" #include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/StringSwitch.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
using namespace clang; using namespace clang;
using namespace clang::targets; using namespace clang::targets;
@ -64,6 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
TLSSupported = false; TLSSupported = false;
VLASupported = false; VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap; AddrSpaceMap = &NVPTXAddrSpaceMap;
GridValues = &llvm::omp::NVPTXGridValues;
UseAddrSpaceMapMangling = true; UseAddrSpaceMapMangling = true;
// Define available target features // Define available target features

View File

@ -147,10 +147,6 @@ public:
Opts["cl_khr_local_int32_extended_atomics"] = true; Opts["cl_khr_local_int32_extended_atomics"] = true;
} }
const llvm::omp::GV &getGridValue() const override {
return llvm::omp::NVPTXGridValues;
}
/// \returns If a target requires an address within a target specific address /// \returns If a target requires an address within a target specific address
/// space \p AddressSpace to be converted in order to be used, then return the /// space \p AddressSpace to be converted in order to be used, then return the
/// corresponding target specific DWARF address space. /// corresponding target specific DWARF address space.

View File

@ -22,7 +22,6 @@
#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h" #include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/IR/IntrinsicsNVPTX.h" #include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/Support/MathExtras.h"
using namespace clang; using namespace clang;
using namespace CodeGen; using namespace CodeGen;
@ -107,7 +106,8 @@ public:
/// is the same for all known NVPTX architectures. /// is the same for all known NVPTX architectures.
enum MachineConfiguration : unsigned { enum MachineConfiguration : unsigned {
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target /// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
/// specific Grid Values like GV_Warp_Size, GV_Slot_Size /// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
/// and GV_Warp_Size_Log2_Mask.
/// Global memory alignment for performance. /// Global memory alignment for performance.
GlobalMemoryAlignment = 128, GlobalMemoryAlignment = 128,
@ -535,8 +535,7 @@ public:
/// on the NVPTX device, to generate more efficient code. /// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) { static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder; CGBuilderTy &Bld = CGF.Builder;
unsigned LaneIDBits = unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id"); return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
} }
@ -546,9 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
/// on the NVPTX device, to generate more efficient code. /// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) { static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder; CGBuilderTy &Bld = CGF.Builder;
unsigned LaneIDBits = unsigned LaneIDMask =
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size); CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
unsigned LaneIDMask = ~0 >> (32u - LaneIDBits);
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime()); auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask), return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
"nvptx_lane_id"); "nvptx_lane_id");

View File

@ -62,13 +62,19 @@ struct GV {
const unsigned GV_Slot_Size; const unsigned GV_Slot_Size;
/// The default value of maximum number of threads in a worker warp. /// The default value of maximum number of threads in a worker warp.
const unsigned GV_Warp_Size; const unsigned GV_Warp_Size;
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
constexpr unsigned warpSlotSize() const { /// for NVPTX.
return GV_Warp_Size * GV_Slot_Size; const unsigned GV_Warp_Size_32;
} /// The number of bits required to represent the max number of threads in warp
const unsigned GV_Warp_Size_Log2;
/// GV_Warp_Size * GV_Slot_Size,
const unsigned GV_Warp_Slot_Size;
/// the maximum number of teams. /// the maximum number of teams.
const unsigned GV_Max_Teams; const unsigned GV_Max_Teams;
/// Global Memory Alignment
const unsigned GV_Mem_Align;
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
const unsigned GV_Warp_Size_Log2_Mask;
// An alternative to the heavy data sharing infrastructure that uses global // An alternative to the heavy data sharing infrastructure that uses global
// memory is one that uses device __shared__ memory. The amount of such space // memory is one that uses device __shared__ memory. The amount of such space
// (in bytes) reserved by the OpenMP runtime is noted here. // (in bytes) reserved by the OpenMP runtime is noted here.
@ -77,32 +83,47 @@ struct GV {
const unsigned GV_Max_WG_Size; const unsigned GV_Max_WG_Size;
// The default maximum team size for a working group // The default maximum team size for a working group
const unsigned GV_Default_WG_Size; const unsigned GV_Default_WG_Size;
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
constexpr unsigned maxWarpNumber() const { const unsigned GV_Max_Warp_Number;
return GV_Max_WG_Size / GV_Warp_Size; /// The slot size that should be reserved for a working warp.
} /// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
const unsigned GV_Warp_Size_Log2_MaskL;
}; };
/// For AMDGPU GPUs /// For AMDGPU GPUs
static constexpr GV AMDGPUGridValues = { static constexpr GV AMDGPUGridValues = {
448, // GV_Threads 448, // GV_Threads
256, // GV_Slot_Size 256, // GV_Slot_Size
64, // GV_Warp_Size 64, // GV_Warp_Size
128, // GV_Max_Teams 32, // GV_Warp_Size_32
896, // GV_SimpleBufferSize 6, // GV_Warp_Size_Log2
1024, // GV_Max_WG_Size, 64 * 256, // GV_Warp_Slot_Size
256, // GV_Default_WG_Size 128, // GV_Max_Teams
256, // GV_Mem_Align
63, // GV_Warp_Size_Log2_Mask
896, // GV_SimpleBufferSize
1024, // GV_Max_WG_Size,
256, // GV_Defaut_WG_Size
1024 / 64, // GV_Max_WG_Size / GV_WarpSize
63 // GV_Warp_Size_Log2_MaskL
}; };
/// For Nvidia GPUs /// For Nvidia GPUs
static constexpr GV NVPTXGridValues = { static constexpr GV NVPTXGridValues = {
992, // GV_Threads 992, // GV_Threads
256, // GV_Slot_Size 256, // GV_Slot_Size
32, // GV_Warp_Size 32, // GV_Warp_Size
1024, // GV_Max_Teams 32, // GV_Warp_Size_32
896, // GV_SimpleBufferSize 5, // GV_Warp_Size_Log2
1024, // GV_Max_WG_Size 32 * 256, // GV_Warp_Slot_Size
128, // GV_Default_WG_Size 1024, // GV_Max_Teams
256, // GV_Mem_Align
(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
896, // GV_SimpleBufferSize
1024, // GV_Max_WG_Size
128, // GV_Defaut_WG_Size
1024 / 32, // GV_Max_WG_Size / GV_WarpSize
31 // GV_Warp_Size_Log2_MaskL
}; };
} // namespace omp } // namespace omp