!46383 bugfix for gpu hash table operators' result is error on GPU3090

Merge pull request !46383 from zyli2020/r2.0.0-alpha
This commit is contained in:
i-robot 2022-12-07 07:57:54 +00:00 committed by Gitee
commit 02473f3389
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
3 changed files with 8 additions and 3 deletions

View File

@ -35,6 +35,9 @@ if(ENABLE_GPU)
string(REPLACE "-arch=sm_53;" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
string(REPLACE "-gencode=arch=compute_53,code=sm_53;" "" CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_70,code=sm_70)
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_75,code=sm_75)
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_80,code=sm_80)
list(APPEND CUDA_NVCC_FLAGS -gencode=arch=compute_86,code=compute_86)
list(APPEND CUDA_NVCC_FLAGS -std=c++17)
list(REMOVE_DUPLICATES CUDA_NVCC_FLAGS)
cuda_add_library(gpu_hash_table STATIC ${CUDA_SRC_LIST})

View File

@ -268,7 +268,9 @@ bool GPUHashTable<Key, Value, Allocator>::Insert(const Key *keys, size_t key_num
// 2. Insert values into map by indices in blocks.
size_t total_insert_size = value_dim_ * key_num;
InsertValues<<<GET_BLOCKS(total_insert_size), GET_THREADS, 0, cuda_stream>>>(
auto block_size = GET_THREADS_MAXSIZE(kBlockSize);
auto grid_size = CUDA_BLOCKS_CAL(GET_CTX_DEVICE_ID, total_insert_size, block_size);
InsertValues<<<grid_size, block_size, 0, cuda_stream>>>(
value_dim_, total_insert_size, indices, value, elements_per_block_, lookup_cnts_ptr_, min_lookup_cnt_before_permit_,
global_timestamp_, update_timestamps_ptr_, idle_flags_ptr_, blocks_ptr_);

View File

@ -219,7 +219,7 @@ class GPUHashTable : public HashTable<Key, Value> {
size_t capacity_{0};
// The number of elements of one block.
static const size_t elements_per_block_{kInitialCapacity};
static constexpr size_t elements_per_block_{kInitialCapacity};
// Record the number of successfully inserted keys.
cuda::atomic<std::size_t, cuda::thread_scope_device> *insert_success_number_{nullptr};
@ -230,7 +230,7 @@ class GPUHashTable : public HashTable<Key, Value> {
curandStatePhilox4_32_10_t *random_gen_state_{nullptr};
// The block size used to launch cuda kernel for inserting normal distribution random values.
int random_gen_threads_per_block_{GET_THREADS};
int random_gen_threads_per_block_{kBlockSize};
// The grid size used to launch cuda kernel for inserting normal distribution random values.
int random_gen_block_count_{(kMaxThreadsPerBlockRandomGen - 1) / random_gen_threads_per_block_ + 1};