forked from OSchip/llvm-project
Revert "[openmp][nfc] Refactor GridValues"
Failed a nvptx codegen test
This reverts commit 2a47a84b40
.
This commit is contained in:
parent
cd1b950141
commit
b1efeface7
|
@ -210,6 +210,9 @@ protected:
|
||||||
unsigned char RegParmMax, SSERegParmMax;
|
unsigned char RegParmMax, SSERegParmMax;
|
||||||
TargetCXXABI TheCXXABI;
|
TargetCXXABI TheCXXABI;
|
||||||
const LangASMap *AddrSpaceMap;
|
const LangASMap *AddrSpaceMap;
|
||||||
|
const llvm::omp::GV *GridValues =
|
||||||
|
nullptr; // target-specific GPU grid values that must be
|
||||||
|
// consistent between host RTL (plugin), device RTL, and clang.
|
||||||
|
|
||||||
mutable StringRef PlatformName;
|
mutable StringRef PlatformName;
|
||||||
mutable VersionTuple PlatformMinVersion;
|
mutable VersionTuple PlatformMinVersion;
|
||||||
|
@ -1407,10 +1410,10 @@ public:
|
||||||
return LangAS::Default;
|
return LangAS::Default;
|
||||||
}
|
}
|
||||||
|
|
||||||
// access target-specific GPU grid values that must be consistent between
|
/// Return a target-specific GPU grid values
|
||||||
// host RTL (plugin), deviceRTL and clang.
|
const llvm::omp::GV &getGridValue() const {
|
||||||
virtual const llvm::omp::GV &getGridValue() const {
|
assert(GridValues != nullptr && "GridValues not initialized");
|
||||||
llvm_unreachable("getGridValue not implemented on this target");
|
return *GridValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retrieve the name of the platform as it is used in the
|
/// Retrieve the name of the platform as it is used in the
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include "clang/Basic/MacroBuilder.h"
|
#include "clang/Basic/MacroBuilder.h"
|
||||||
#include "clang/Basic/TargetBuiltins.h"
|
#include "clang/Basic/TargetBuiltins.h"
|
||||||
#include "llvm/ADT/StringSwitch.h"
|
#include "llvm/ADT/StringSwitch.h"
|
||||||
|
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||||
|
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
using namespace clang::targets;
|
using namespace clang::targets;
|
||||||
|
@ -334,6 +335,7 @@ AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
|
||||||
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
|
llvm::AMDGPU::getArchAttrR600(GPUKind)) {
|
||||||
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
|
resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
|
||||||
: DataLayoutStringR600);
|
: DataLayoutStringR600);
|
||||||
|
GridValues = &llvm::omp::AMDGPUGridValues;
|
||||||
|
|
||||||
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
|
setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
|
||||||
!isAMDGCN(Triple));
|
!isAMDGCN(Triple));
|
||||||
|
|
|
@ -370,10 +370,6 @@ public:
|
||||||
return getLangASFromTargetAS(Constant);
|
return getLangASFromTargetAS(Constant);
|
||||||
}
|
}
|
||||||
|
|
||||||
const llvm::omp::GV &getGridValue() const override {
|
|
||||||
return llvm::omp::AMDGPUGridValues;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns Target specific vtbl ptr address space.
|
/// \returns Target specific vtbl ptr address space.
|
||||||
unsigned getVtblPtrAddressSpace() const override {
|
unsigned getVtblPtrAddressSpace() const override {
|
||||||
return static_cast<unsigned>(Constant);
|
return static_cast<unsigned>(Constant);
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
#include "clang/Basic/MacroBuilder.h"
|
#include "clang/Basic/MacroBuilder.h"
|
||||||
#include "clang/Basic/TargetBuiltins.h"
|
#include "clang/Basic/TargetBuiltins.h"
|
||||||
#include "llvm/ADT/StringSwitch.h"
|
#include "llvm/ADT/StringSwitch.h"
|
||||||
|
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||||
|
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
using namespace clang::targets;
|
using namespace clang::targets;
|
||||||
|
@ -64,6 +65,7 @@ NVPTXTargetInfo::NVPTXTargetInfo(const llvm::Triple &Triple,
|
||||||
TLSSupported = false;
|
TLSSupported = false;
|
||||||
VLASupported = false;
|
VLASupported = false;
|
||||||
AddrSpaceMap = &NVPTXAddrSpaceMap;
|
AddrSpaceMap = &NVPTXAddrSpaceMap;
|
||||||
|
GridValues = &llvm::omp::NVPTXGridValues;
|
||||||
UseAddrSpaceMapMangling = true;
|
UseAddrSpaceMapMangling = true;
|
||||||
|
|
||||||
// Define available target features
|
// Define available target features
|
||||||
|
|
|
@ -147,10 +147,6 @@ public:
|
||||||
Opts["cl_khr_local_int32_extended_atomics"] = true;
|
Opts["cl_khr_local_int32_extended_atomics"] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
const llvm::omp::GV &getGridValue() const override {
|
|
||||||
return llvm::omp::NVPTXGridValues;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// \returns If a target requires an address within a target specific address
|
/// \returns If a target requires an address within a target specific address
|
||||||
/// space \p AddressSpace to be converted in order to be used, then return the
|
/// space \p AddressSpace to be converted in order to be used, then return the
|
||||||
/// corresponding target specific DWARF address space.
|
/// corresponding target specific DWARF address space.
|
||||||
|
|
|
@ -22,7 +22,6 @@
|
||||||
#include "llvm/ADT/SmallPtrSet.h"
|
#include "llvm/ADT/SmallPtrSet.h"
|
||||||
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
#include "llvm/Frontend/OpenMP/OMPGridValues.h"
|
||||||
#include "llvm/IR/IntrinsicsNVPTX.h"
|
#include "llvm/IR/IntrinsicsNVPTX.h"
|
||||||
#include "llvm/Support/MathExtras.h"
|
|
||||||
|
|
||||||
using namespace clang;
|
using namespace clang;
|
||||||
using namespace CodeGen;
|
using namespace CodeGen;
|
||||||
|
@ -107,7 +106,8 @@ public:
|
||||||
/// is the same for all known NVPTX architectures.
|
/// is the same for all known NVPTX architectures.
|
||||||
enum MachineConfiguration : unsigned {
|
enum MachineConfiguration : unsigned {
|
||||||
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
|
/// See "llvm/Frontend/OpenMP/OMPGridValues.h" for various related target
|
||||||
/// specific Grid Values like GV_Warp_Size, GV_Slot_Size
|
/// specific Grid Values like GV_Warp_Size, GV_Warp_Size_Log2,
|
||||||
|
/// and GV_Warp_Size_Log2_Mask.
|
||||||
|
|
||||||
/// Global memory alignment for performance.
|
/// Global memory alignment for performance.
|
||||||
GlobalMemoryAlignment = 128,
|
GlobalMemoryAlignment = 128,
|
||||||
|
@ -535,8 +535,7 @@ public:
|
||||||
/// on the NVPTX device, to generate more efficient code.
|
/// on the NVPTX device, to generate more efficient code.
|
||||||
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
|
static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
|
||||||
CGBuilderTy &Bld = CGF.Builder;
|
CGBuilderTy &Bld = CGF.Builder;
|
||||||
unsigned LaneIDBits =
|
unsigned LaneIDBits = CGF.getTarget().getGridValue().GV_Warp_Size_Log2;
|
||||||
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
|
|
||||||
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
|
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
|
||||||
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
|
return Bld.CreateAShr(RT.getGPUThreadID(CGF), LaneIDBits, "nvptx_warp_id");
|
||||||
}
|
}
|
||||||
|
@ -546,9 +545,8 @@ static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
|
||||||
/// on the NVPTX device, to generate more efficient code.
|
/// on the NVPTX device, to generate more efficient code.
|
||||||
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
|
static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
|
||||||
CGBuilderTy &Bld = CGF.Builder;
|
CGBuilderTy &Bld = CGF.Builder;
|
||||||
unsigned LaneIDBits =
|
unsigned LaneIDMask =
|
||||||
llvm::Log2_32(CGF.getTarget().getGridValue().GV_Warp_Size);
|
CGF.getContext().getTargetInfo().getGridValue().GV_Warp_Size_Log2_Mask;
|
||||||
unsigned LaneIDMask = ~0 >> (32u - LaneIDBits);
|
|
||||||
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
|
auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
|
||||||
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
|
return Bld.CreateAnd(RT.getGPUThreadID(CGF), Bld.getInt32(LaneIDMask),
|
||||||
"nvptx_lane_id");
|
"nvptx_lane_id");
|
||||||
|
|
|
@ -62,13 +62,19 @@ struct GV {
|
||||||
const unsigned GV_Slot_Size;
|
const unsigned GV_Slot_Size;
|
||||||
/// The default value of maximum number of threads in a worker warp.
|
/// The default value of maximum number of threads in a worker warp.
|
||||||
const unsigned GV_Warp_Size;
|
const unsigned GV_Warp_Size;
|
||||||
|
/// Alternate warp size for some AMDGCN architectures. Same as GV_Warp_Size
|
||||||
constexpr unsigned warpSlotSize() const {
|
/// for NVPTX.
|
||||||
return GV_Warp_Size * GV_Slot_Size;
|
const unsigned GV_Warp_Size_32;
|
||||||
}
|
/// The number of bits required to represent the max number of threads in warp
|
||||||
|
const unsigned GV_Warp_Size_Log2;
|
||||||
|
/// GV_Warp_Size * GV_Slot_Size,
|
||||||
|
const unsigned GV_Warp_Slot_Size;
|
||||||
/// the maximum number of teams.
|
/// the maximum number of teams.
|
||||||
const unsigned GV_Max_Teams;
|
const unsigned GV_Max_Teams;
|
||||||
|
/// Global Memory Alignment
|
||||||
|
const unsigned GV_Mem_Align;
|
||||||
|
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
|
||||||
|
const unsigned GV_Warp_Size_Log2_Mask;
|
||||||
// An alternative to the heavy data sharing infrastructure that uses global
|
// An alternative to the heavy data sharing infrastructure that uses global
|
||||||
// memory is one that uses device __shared__ memory. The amount of such space
|
// memory is one that uses device __shared__ memory. The amount of such space
|
||||||
// (in bytes) reserved by the OpenMP runtime is noted here.
|
// (in bytes) reserved by the OpenMP runtime is noted here.
|
||||||
|
@ -77,32 +83,47 @@ struct GV {
|
||||||
const unsigned GV_Max_WG_Size;
|
const unsigned GV_Max_WG_Size;
|
||||||
// The default maximum team size for a working group
|
// The default maximum team size for a working group
|
||||||
const unsigned GV_Default_WG_Size;
|
const unsigned GV_Default_WG_Size;
|
||||||
|
// This is GV_Max_WG_Size / GV_WarpSize. 32 for NVPTX and 16 for AMDGCN.
|
||||||
constexpr unsigned maxWarpNumber() const {
|
const unsigned GV_Max_Warp_Number;
|
||||||
return GV_Max_WG_Size / GV_Warp_Size;
|
/// The slot size that should be reserved for a working warp.
|
||||||
}
|
/// (~0u >> (GV_Warp_Size - GV_Warp_Size_Log2))
|
||||||
|
const unsigned GV_Warp_Size_Log2_MaskL;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// For AMDGPU GPUs
|
/// For AMDGPU GPUs
|
||||||
static constexpr GV AMDGPUGridValues = {
|
static constexpr GV AMDGPUGridValues = {
|
||||||
448, // GV_Threads
|
448, // GV_Threads
|
||||||
256, // GV_Slot_Size
|
256, // GV_Slot_Size
|
||||||
64, // GV_Warp_Size
|
64, // GV_Warp_Size
|
||||||
128, // GV_Max_Teams
|
32, // GV_Warp_Size_32
|
||||||
896, // GV_SimpleBufferSize
|
6, // GV_Warp_Size_Log2
|
||||||
1024, // GV_Max_WG_Size,
|
64 * 256, // GV_Warp_Slot_Size
|
||||||
256, // GV_Default_WG_Size
|
128, // GV_Max_Teams
|
||||||
|
256, // GV_Mem_Align
|
||||||
|
63, // GV_Warp_Size_Log2_Mask
|
||||||
|
896, // GV_SimpleBufferSize
|
||||||
|
1024, // GV_Max_WG_Size,
|
||||||
|
256, // GV_Defaut_WG_Size
|
||||||
|
1024 / 64, // GV_Max_WG_Size / GV_WarpSize
|
||||||
|
63 // GV_Warp_Size_Log2_MaskL
|
||||||
};
|
};
|
||||||
|
|
||||||
/// For Nvidia GPUs
|
/// For Nvidia GPUs
|
||||||
static constexpr GV NVPTXGridValues = {
|
static constexpr GV NVPTXGridValues = {
|
||||||
992, // GV_Threads
|
992, // GV_Threads
|
||||||
256, // GV_Slot_Size
|
256, // GV_Slot_Size
|
||||||
32, // GV_Warp_Size
|
32, // GV_Warp_Size
|
||||||
1024, // GV_Max_Teams
|
32, // GV_Warp_Size_32
|
||||||
896, // GV_SimpleBufferSize
|
5, // GV_Warp_Size_Log2
|
||||||
1024, // GV_Max_WG_Size
|
32 * 256, // GV_Warp_Slot_Size
|
||||||
128, // GV_Default_WG_Size
|
1024, // GV_Max_Teams
|
||||||
|
256, // GV_Mem_Align
|
||||||
|
(~0u >> (32 - 5)), // GV_Warp_Size_Log2_Mask
|
||||||
|
896, // GV_SimpleBufferSize
|
||||||
|
1024, // GV_Max_WG_Size
|
||||||
|
128, // GV_Defaut_WG_Size
|
||||||
|
1024 / 32, // GV_Max_WG_Size / GV_WarpSize
|
||||||
|
31 // GV_Warp_Size_Log2_MaskL
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace omp
|
} // namespace omp
|
||||||
|
|
Loading…
Reference in New Issue