forked from OSchip/llvm-project
[OpenMP] Improve alignment handling in the new device runtime
This commit is contained in:
parent
cbb709e251
commit
ed7ec860f0
|
@ -63,6 +63,15 @@ inline uint32_t popc(uint64_t V) {
|
||||||
return __builtin_popcountl(V);
|
return __builtin_popcountl(V);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return \p V aligned "upwards" according to \p Align.
|
||||||
|
template <typename Ty1, typename Ty2> inline Ty1 align_up(Ty1 V, Ty2 Align) {
|
||||||
|
return ((V + Ty1(Align) - 1) / Ty1(Align)) * Ty1(Align);
|
||||||
|
}
|
||||||
|
/// Return \p V aligned "downwards" according to \p Align.
|
||||||
|
template <typename Ty1, typename Ty2> inline Ty1 align_down(Ty1 V, Ty2 Align) {
|
||||||
|
return V - V % Align;
|
||||||
|
}
|
||||||
|
|
||||||
#define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
|
#define OMP_LIKELY(EXPR) __builtin_expect((bool)(EXPR), true)
|
||||||
#define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
|
#define OMP_UNLIKELY(EXPR) __builtin_expect((bool)(EXPR), false)
|
||||||
|
|
||||||
|
|
|
@ -85,8 +85,8 @@ private:
|
||||||
/// Compute the size of the storage space reserved for a thread.
|
/// Compute the size of the storage space reserved for a thread.
|
||||||
uint32_t computeThreadStorageTotal() {
|
uint32_t computeThreadStorageTotal() {
|
||||||
uint32_t NumLanesInBlock = mapping::getNumberOfProcessorElements();
|
uint32_t NumLanesInBlock = mapping::getNumberOfProcessorElements();
|
||||||
return (state::SharedScratchpadSize - NumLanesInBlock + 1) /
|
return utils::align_down((state::SharedScratchpadSize / NumLanesInBlock),
|
||||||
NumLanesInBlock;
|
Alignment);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the top address of the warp data stack, that is the first address
|
/// Return the top address of the warp data stack, that is the first address
|
||||||
|
@ -114,7 +114,7 @@ void SharedMemorySmartStackTy::init(bool IsSPMD) {
|
||||||
|
|
||||||
void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
|
void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
|
||||||
// First align the number of requested bytes.
|
// First align the number of requested bytes.
|
||||||
uint64_t AlignedBytes = (Bytes + (Alignment - 1)) / Alignment * Alignment;
|
uint64_t AlignedBytes = utils::align_up(Bytes, Alignment);
|
||||||
|
|
||||||
uint32_t StorageTotal = computeThreadStorageTotal();
|
uint32_t StorageTotal = computeThreadStorageTotal();
|
||||||
|
|
||||||
|
@ -136,7 +136,7 @@ void *SharedMemorySmartStackTy::push(uint64_t Bytes) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) {
|
void SharedMemorySmartStackTy::pop(void *Ptr, uint32_t Bytes) {
|
||||||
uint64_t AlignedBytes = (Bytes + (Alignment - 1)) / Alignment * Alignment;
|
uint64_t AlignedBytes = utils::align_up(Bytes, Alignment);
|
||||||
if (Ptr >= &Data[0] && Ptr < &Data[state::SharedScratchpadSize]) {
|
if (Ptr >= &Data[0] && Ptr < &Data[state::SharedScratchpadSize]) {
|
||||||
int TId = mapping::getThreadIdInBlock();
|
int TId = mapping::getThreadIdInBlock();
|
||||||
Usage[TId] -= AlignedBytes;
|
Usage[TId] -= AlignedBytes;
|
||||||
|
|
Loading…
Reference in New Issue