Revert "[openmp][nfc] Refactor GridValues"

Failed a nvptx codegen test
This reverts commit 2a47a84b40.
This commit is contained in:
Jon Chesterfield 2021-08-20 18:17:27 +01:00
parent cd1b950141
commit b1efeface7
7 changed files with 60 additions and 42 deletions

View File

@ -210,6 +210,9 @@ protected:
unsigned char RegParmMax, SSERegParmMax;
TargetCXXABI TheCXXABI;
const LangASMap *AddrSpaceMap;
const llvm::omp::GV *GridValues =
nullptr; // target-specific GPU grid values that must be
// consistent between host RTL (plugin), device RTL, and clang.
mutable StringRef PlatformName;
mutable VersionTuple PlatformMinVersion;
@ -1407,10 +1410,10 @@ public:
return LangAS::Default;
}
// access target-specific GPU grid values that must be consistent between
// host RTL (plugin), deviceRTL and clang.
virtual const llvm::omp::GV &getGridValue() const {
llvm_unreachable("getGridValue not implemented on this target");
/// Return a target-specific GPU grid values
const llvm::omp::GV &getGridValue() const {
assert(GridValues != nullptr && "GridValues not initialized");
return *GridValues;
}
/// Retrieve the name of the platform as it is used in the

View File

@ -17,6 +17,7 @@
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
using namespace clang;
using namespace clang::targets;
@ -334,6 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
: DataLayoutStringR600);
GridValues = &llvm::omp::AMDGPUGridValues;
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
!isAMDGCN(Triple));

View File

@ -370,10 +370,6 @@ public:
return getLangASFromTargetAS(Constant);
}
const llvm::omp::GV &getGridValue() const override {
return llvm::omp::AMDGPUGridValues;
}
/// \returns Target specific vtbl ptr address space.
unsigned getVtblPtrAddressSpace() const override {
return static_cast<unsigned>(Constant);

View File

@ -16,6 +16,7 @@
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
using namespace clang;
using namespace clang::targets;
@ -64,6 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
TLSSupported = false;
VLASupported = false;
AddrSpaceMap = &NVPTXAddrSpaceMap;
GridValues = &llvm::omp::NVPTXGridValues;
UseAddrSpaceMapMangling = true;
// Define available target features

View File

@ -147,10 +147,6 @@ public:
Opts["cl_khr_local_int32_extended_atomics"] = true;
}
const llvm::omp::GV &getGridValue() const override {
return llvm::omp::NVPTXGridValues;
}
/// \returns If a target requires an address within a target specific address
/// space \p AddressSpace to be converted in order to be used, then return the
/// corresponding target specific DWARF address space.

View File

@ -22,7 +22,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
#include "llvm/Support/MathExtras.h"
using namespace clang;
using namespace CodeGen;
@ -107,7 +106,8 @@ public:
/// is the same for all known NVPTX architectures.
enum MachineConfiguration : unsigned {
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
/// specific Grid Values like GV_Warp_Size, GV_Slot_Size
/// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
/// and GV_Warp_Size_Log2_Mask.
/// Global memory alignment for performance.
GlobalMemoryAlignment = 128,
@ -535,8 +535,7 @@ public:
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
unsigned LaneIDBits =
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
}
@ -546,9 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
/// on the NVPTX device, to generate more efficient code.
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
CGBuilderTy &Bld = CGF.Builder;
unsigned LaneIDBits =
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
unsigned LaneIDMask = ~0 >> (32u - LaneIDBits);
unsigned LaneIDMask =
CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
"nvptx_lane_id");

View File

@ -62,13 +62,19 @@ struct GV {
const unsigned GV_Slot_Size;
/// The default value of maximum number of threads in a worker warp.
const unsigned GV_Warp_Size;
constexpr unsigned warpSlotSize() const {
return GV_Warp_Size * GV_Slot_Size;
}
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
/// for NVPTX.
const unsigned GV_Warp_Size_32;
/// The number of bits required to represent the max number of threads in warp
const unsigned GV_Warp_Size_Log2;
/// GV_Warp_Size * GV_Slot_Size,
const unsigned GV_Warp_Slot_Size;
/// the maximum number of teams.
const unsigned GV_Max_Teams;
/// Global Memory Alignment
const unsigned GV_Mem_Align;
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
const unsigned GV_Warp_Size_Log2_Mask;
// An alternative to the heavy data sharing infrastructure that uses global
// memory is one that uses device __shared__ memory. The amount of such space
// (in bytes) reserved by the OpenMP runtime is noted here.
@ -77,32 +83,47 @@ struct GV {
const unsigned GV_Max_WG_Size;
// The default maximum team size for a working group
const unsigned GV_Default_WG_Size;
constexpr unsigned maxWarpNumber() const {
return GV_Max_WG_Size / GV_Warp_Size;
}
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
const unsigned GV_Max_Warp_Number;
/// The slot size that should be reserved for a working warp.
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
const unsigned GV_Warp_Size_Log2_MaskL;
};
/// For AMDGPU GPUs
static constexpr GV AMDGPUGridValues = {
448, // GV_Threads
256, // GV_Slot_Size
64, // GV_Warp_Size
128, // GV_Max_Teams
896, // GV_SimpleBufferSize
1024, // GV_Max_WG_Size,
256, // GV_Default_WG_Size
448, // GV_Threads
256, // GV_Slot_Size
64, // GV_Warp_Size
32, // GV_Warp_Size_32
6, // GV_Warp_Size_Log2
64 * 256, // GV_Warp_Slot_Size
128, // GV_Max_Teams
256, // GV_Mem_Align
63, // GV_Warp_Size_Log2_Mask
896, // GV_SimpleBufferSize
1024, // GV_Max_WG_Size,
256, // GV_Defaut_WG_Size
1024 / 64, // GV_Max_WG_Size / GV_WarpSize
63 // GV_Warp_Size_Log2_MaskL
};
/// For Nvidia GPUs
static constexpr GV NVPTXGridValues = {
992, // GV_Threads
256, // GV_Slot_Size
32, // GV_Warp_Size
1024, // GV_Max_Teams
896, // GV_SimpleBufferSize
1024, // GV_Max_WG_Size
128, // GV_Default_WG_Size
992, // GV_Threads
256, // GV_Slot_Size
32, // GV_Warp_Size
32, // GV_Warp_Size_32
5, // GV_Warp_Size_Log2
32 * 256, // GV_Warp_Slot_Size
1024, // GV_Max_Teams
256, // GV_Mem_Align
(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
896, // GV_SimpleBufferSize
1024, // GV_Max_WG_Size
128, // GV_Defaut_WG_Size
1024 / 32, // GV_Max_WG_Size / GV_WarpSize
31 // GV_Warp_Size_Log2_MaskL
};
} // namespace omp