forked from mindspore-Ecosystem/mindspore
!8502 GPU optimize check sm warning log
From: @VectorSL Reviewed-by: @cristoval Signed-off-by: @cristoval
This commit is contained in:
commit
fb3a7b4d5a
|
@ -127,13 +127,15 @@ std::pair<bool, size_t> GpuKernelFactory::GpuKernelAttrCheck(const std::string &
|
||||||
auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize();
|
auto attr_size = (&(iter->second))->at(attr_index).first.GetInputSize();
|
||||||
// data type matching check of all input parameters of kernel
|
// data type matching check of all input parameters of kernel
|
||||||
for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) {
|
for (size_t input_index = 0; input_index < kernel_info->GetInputNum(); input_index++) {
|
||||||
if (marjor_sm < RECOMMEND_SM && kernel_info->GetInputDeviceType(input_index) == kNumberTypeFloat16) {
|
const bool check_sm = mindspore::device::gpu::CudaCommon::GetInstance().check_sm();
|
||||||
|
if (check_sm && marjor_sm < RECOMMEND_SM && kernel_info->GetInputDeviceType(input_index) == kNumberTypeFloat16) {
|
||||||
if (marjor_sm < MINIUM_SM) {
|
if (marjor_sm < MINIUM_SM) {
|
||||||
MS_LOG(EXCEPTION) << "Half precision ops can be used on Devices which computing capacity is >= " << MINIUM_SM
|
MS_LOG(EXCEPTION) << "Half precision ops can be used on Devices which computing capacity is >= " << MINIUM_SM
|
||||||
<< ", but the current device's computing capacity is " << marjor_sm;
|
<< ", but the current device's computing capacity is " << marjor_sm;
|
||||||
}
|
}
|
||||||
MS_LOG(WARNING) << "It is recommended to use devices with a computing capacity >= " << RECOMMEND_SM
|
MS_LOG(WARNING) << "It is recommended to use devices with a computing capacity >= " << RECOMMEND_SM
|
||||||
<< ", but the current device's computing capacity is " << marjor_sm;
|
<< ", but the current device's computing capacity is " << marjor_sm;
|
||||||
|
mindspore::device::gpu::CudaCommon::GetInstance().set_check_sm(false);
|
||||||
}
|
}
|
||||||
if (kernel_info->GetInputDeviceType(input_index) !=
|
if (kernel_info->GetInputDeviceType(input_index) !=
|
||||||
(iter->second)[attr_index].first.GetInputAttr(input_index % attr_size).first) {
|
(iter->second)[attr_index].first.GetInputAttr(input_index % attr_size).first) {
|
||||||
|
|
|
@ -31,6 +31,8 @@ class CudaCommon {
|
||||||
return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_);
|
return std::min(((total_threads - 1) / threads_per_block_) + 1, max_blocks_);
|
||||||
}
|
}
|
||||||
size_t share_memory_size() const { return max_share_memory_; }
|
size_t share_memory_size() const { return max_share_memory_; }
|
||||||
|
void set_check_sm(const bool &flag) { check_sm_ = flag; }
|
||||||
|
bool check_sm() const { return check_sm_; }
|
||||||
|
|
||||||
static CudaCommon &GetInstance() {
|
static CudaCommon &GetInstance() {
|
||||||
static CudaCommon instance;
|
static CudaCommon instance;
|
||||||
|
@ -55,6 +57,7 @@ class CudaCommon {
|
||||||
int threads_per_block_;
|
int threads_per_block_;
|
||||||
int major_sm_;
|
int major_sm_;
|
||||||
size_t max_share_memory_;
|
size_t max_share_memory_;
|
||||||
|
bool check_sm_{true};
|
||||||
};
|
};
|
||||||
#define GET_BLOCKS(total_threads) mindspore::device::gpu::CudaCommon::GetInstance().blocks_num(total_threads)
|
#define GET_BLOCKS(total_threads) mindspore::device::gpu::CudaCommon::GetInstance().blocks_num(total_threads)
|
||||||
#define GET_THREADS mindspore::device::gpu::CudaCommon::GetInstance().threads_num()
|
#define GET_THREADS mindspore::device::gpu::CudaCommon::GetInstance().threads_num()
|
||||||
|
|
Loading…
Reference in New Issue