!46383 bugfix for gpu hash table operators' result is error on GPU3090
Merge pull request !46383 from zyli2020/r2.0.0-alpha
This commit is contained in:
commit
02473f3389
|
@ -35,6 +35,9 @@ if(ENABLE_GPU)
|
|||
string(REPLACE "-arch=sm_53;" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
|
||||
string(REPLACE "-gencode=arch=compute_53,code=sm_53;" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
|
||||
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_70,code=sm_70)
|
||||
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_75,code=sm_75)
|
||||
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_80,code=sm_80)
|
||||
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_86,code=compute_86)
|
||||
list(APPEND CUDA_NVCC_FLAGS -std=c++17)
|
||||
list(REMOVE_DUPLICATES CUDA_NVCC_FLAGS)
|
||||
cuda_add_library(gpu_hash_table STATIC ${CUDA_SRC_LIST})
|
||||
|
|
|
@ -268,7 +268,9 @@ bool GPUHashTable<Key, Value, Allocator>::Insert(const Key *keys, size_t key_num
|
|||
|
||||
// 2. Insert values into map by indices in blocks.
|
||||
size_t total_insert_size = value_dim_ * key_num;
|
||||
InsertValues<<<GET_BLOCKS(total_insert_size), GET_THREADS, 0, cuda_stream>>>(
|
||||
auto block_size = GET_THREADS_MAXSIZE(kBlockSize);
|
||||
auto grid_size = CUDA_BLOCKS_CAL(GET_CTX_DEVICE_ID, total_insert_size, block_size);
|
||||
InsertValues<<<grid_size, block_size, 0, cuda_stream>>>(
|
||||
value_dim_, total_insert_size, indices, value, elements_per_block_, lookup_cnts_ptr_, min_lookup_cnt_before_permit_,
|
||||
global_timestamp_, update_timestamps_ptr_, idle_flags_ptr_, blocks_ptr_);
|
||||
|
||||
|
|
|
@ -219,7 +219,7 @@ class GPUHashTable : public HashTable<Key, Value> {
|
|||
size_t capacity_{0};
|
||||
|
||||
// The number of elements of one block.
|
||||
static const size_t elements_per_block_{kInitialCapacity};
|
||||
static constexpr size_t elements_per_block_{kInitialCapacity};
|
||||
|
||||
// Record the number of successfully inserted keys.
|
||||
cuda::atomic<std::size_t, cuda::thread_scope_device> *insert_success_number_{nullptr};
|
||||
|
@ -230,7 +230,7 @@ class GPUHashTable : public HashTable<Key, Value> {
|
|||
curandStatePhilox4_32_10_t *random_gen_state_{nullptr};
|
||||
|
||||
// The block size used to launch cuda kernel for inserting normal distribution random values.
|
||||
int random_gen_threads_per_block_{GET_THREADS};
|
||||
int random_gen_threads_per_block_{kBlockSize};
|
||||
// The grid size used to launch cuda kernel for inserting normal distribution random values.
|
||||
int random_gen_block_count_{(kMaxThreadsPerBlockRandomGen - 1) / random_gen_threads_per_block_ + 1};
|
||||
|
||||
|
|
Loading…
Reference in New Issue