!58010 fix opencl resize failed
Merge pull request !58010 from liyan2022/master
This commit is contained in:
commit
eb51dea967
|
@ -411,3 +411,6 @@ mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/
|
|||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fractional_max_pool_3d_with_fixed_ksize_proto.cc:ge::IMPLEMT_COMMON_INFERFUNC
|
||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/sspaddmm.cc:ge::CUST_IMPLEMT_INFERFUNC
|
||||
mindspore/mindspore/ccsrc/plugin/device/ascend/kernel/aicpu/aicpu_ops/customize/op_proto/fractional_max_pool_3d_grad_with_fixed_ksize_proto.cc:ge::IMPLEMT_COMMON_INFERFUNC
|
||||
mindspore/mindspore/lite/src/litert/kernel/opencl/kernel/conv2d.cc:mindspore::kernel::UseWinograd4x4To6x6
|
||||
mindspore/mindspore/lite/src/litert/kernel/opencl/kernel/fullconnection.cc:mindspore::kernel::FullConnectionOpenCLKernel::CheckSpecs
|
||||
|
||||
|
|
|
@ -467,7 +467,7 @@ int Conv2DOpenCLKernel::InitFilter() {
|
|||
}
|
||||
}
|
||||
|
||||
FreeStoredData(stored_filter_);
|
||||
FreeStoredData(&stored_filter_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -523,7 +523,7 @@ int Conv2DOpenCLKernel::InitBias() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_bias_);
|
||||
FreeStoredData(&stored_bias_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2019-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -94,8 +94,10 @@ int Conv2dTransposeOpenCLKernel::SetGlobalLocal() {
|
|||
int oh = out_tensors_[0]->shape()[1];
|
||||
int ow = out_tensors_[0]->shape()[2];
|
||||
local_size_ = {16, 1, 16};
|
||||
global_size_ = {(size_t)UP_ROUND(UP_DIV(oh, 2), stride_h), (size_t)UP_ROUND(UP_DIV(ow, 2), stride_w),
|
||||
(size_t)co4 * (size_t)n};
|
||||
// static_cast<size_t>()
|
||||
global_size_ = {static_cast<size_t>(UP_ROUND(UP_DIV(oh, 2), stride_h)),
|
||||
static_cast<size_t>(UP_ROUND(UP_DIV(ow, 2), stride_w)),
|
||||
static_cast<size_t>(co4) * static_cast<size_t>(n)};
|
||||
AlignGlobalLocal(global_size_, local_size_);
|
||||
|
||||
return RET_OK;
|
||||
|
@ -236,7 +238,7 @@ int Conv2dTransposeOpenCLKernel::InitFilter() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_weight_);
|
||||
FreeStoredData(&stored_weight_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -290,7 +292,7 @@ int Conv2dTransposeOpenCLKernel::InitBias() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_bias_);
|
||||
FreeStoredData(&stored_bias_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2019-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -189,7 +189,7 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
|
|||
ConvertFilter(origin_weight, temp_filter.data(), src_type, dst_type, plane_in, plane_out, out_info.C);
|
||||
if (filter_type_ == MemType::IMG) {
|
||||
size_t img_dtype = ocl_runtime_->GetFp16Enable() ? CL_HALF_FLOAT : CL_FLOAT;
|
||||
ImageSize img_size{(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype};
|
||||
ImageSize img_size{static_cast<size_t>(plane_out) / C4NUM, static_cast<size_t>(out_info.N) * CO4, img_dtype};
|
||||
packed_weight_ = allocator->Malloc(img_size, temp_filter.data());
|
||||
|
||||
} else {
|
||||
|
@ -199,7 +199,7 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
|
|||
MS_LOG(ERROR) << "Malloc failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_weight_);
|
||||
FreeStoredData(&stored_weight_);
|
||||
return RET_OK;
|
||||
}
|
||||
#else
|
||||
|
@ -236,7 +236,7 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
|
|||
ConvertFilter(origin_weight, temp_filter.data(), src_type, dst_type, plane_in, plane_out, out_info.C);
|
||||
if (filter_type_ == MemType::IMG) {
|
||||
size_t img_dtype = CL_FLOAT;
|
||||
ImageSize img_size{(size_t)plane_out / C4NUM, (size_t)out_info.N * CO4, img_dtype};
|
||||
ImageSize img_size{static_cast<size_t>(plane_out) / C4NUM, static_cast<size_t>(out_info.N) * CO4, img_dtype};
|
||||
packed_weight_ = allocator->Malloc(img_size, temp_filter.data());
|
||||
|
||||
} else {
|
||||
|
@ -246,7 +246,7 @@ int DepthwiseConv2dOpenCLKernel::InitWeights() {
|
|||
MS_LOG(ERROR) << "Malloc data failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_weight_);
|
||||
FreeStoredData(&stored_weight_);
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
@ -292,7 +292,7 @@ int DepthwiseConv2dOpenCLKernel::InitBias() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
FreeStoredData(stored_bias_);
|
||||
FreeStoredData(&stored_bias_);
|
||||
return RET_OK;
|
||||
}
|
||||
#else
|
||||
|
@ -325,7 +325,7 @@ int DepthwiseConv2dOpenCLKernel::InitBias() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
FreeStoredData(stored_bias_);
|
||||
FreeStoredData(&stored_bias_);
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
@ -395,8 +395,8 @@ int DepthwiseConv2dOpenCLKernel::SetGlobalLocal() {
|
|||
auto out_info = GpuTensorInfo(out_tensors_[0]);
|
||||
// set global
|
||||
size_t CO4 = UP_DIV(out_info.C, C4NUM * block_size_.C);
|
||||
global_size_ = {CO4, (size_t)UP_DIV(out_info.W, block_size_.W),
|
||||
(size_t)UP_DIV(out_info.H, block_size_.H) * out_info.N};
|
||||
global_size_ = {CO4, static_cast<size_t>(UP_DIV(out_info.W, block_size_.W)),
|
||||
static_cast<size_t>(UP_DIV(out_info.H, block_size_.H)) * out_info.N};
|
||||
// set local
|
||||
int local_max = filter_type_ == MemType::IMG ? 64 : 128; // IMG : 64, BUFFER : 128
|
||||
if (ocl_runtime_->DeviceComputeUnits() > 16) { // Max Device Compute Units : 16
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2019-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -63,7 +63,9 @@ int FullConnectionOpenCLKernel::CheckSpecs() {
|
|||
if (input_nhw < N_) {
|
||||
MS_LOG(WARNING) << "Unsupported fullconnection shape";
|
||||
}
|
||||
if (!in_tensors_.at(kWeightIndex)->IsConst()) {
|
||||
auto weight_tensor = in_tensors_.at(kWeightIndex)->ConvertToTensorC();
|
||||
bool is_const = (weight_tensor->category_ == ConstTensor || weight_tensor->category_ == ConstScalar);
|
||||
if (!is_const) {
|
||||
weight_var_ = true;
|
||||
if (!param->b_transpose_) {
|
||||
MS_LOG(WARNING) << "If fullconnection input weight is not constant, b_transpose_ should be true.";
|
||||
|
@ -123,9 +125,11 @@ int FullConnectionOpenCLKernel::Prepare() {
|
|||
|
||||
int FullConnectionOpenCLKernel::InitWeights() {
|
||||
if (!weight_var_) {
|
||||
auto ret = InitFilter();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
if (!(stored_weight_ == nullptr && in_tensors_.at(kWeightIndex)->data() == nullptr)) {
|
||||
auto ret = InitFilter();
|
||||
if (ret != RET_OK) {
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
return InitBias();
|
||||
|
@ -153,7 +157,7 @@ int FullConnectionOpenCLKernel::InitFilter() {
|
|||
auto padWeightFp16 = reinterpret_cast<float16_t *>(padWeight_);
|
||||
memset(padWeight_, 0x00, nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size);
|
||||
void *src_data = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data() : stored_weight_;
|
||||
MS_ASSERT(src_data);
|
||||
CHECK_NULL_RETURN(src_data);
|
||||
auto originWeightFp32 = reinterpret_cast<float *>(src_data);
|
||||
auto originWeightFp16 = reinterpret_cast<float16_t *>(src_data);
|
||||
bool isModelFp16 = in_tensors_.at(kWeightIndex)->data_type() == kNumberTypeFloat16;
|
||||
|
@ -199,7 +203,7 @@ int FullConnectionOpenCLKernel::InitFilter() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_weight_);
|
||||
FreeStoredData(&stored_weight_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -230,7 +234,7 @@ int FullConnectionOpenCLKernel::InitBias() {
|
|||
memset(bias_, 0x00, co4 * C4NUM * dtype_size);
|
||||
if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
|
||||
void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data() : stored_bias_;
|
||||
MS_ASSERT(src_data);
|
||||
CHECK_NULL_RETURN(src_data);
|
||||
if (in_tensors_[kBiasIndex]->data_type() == kNumberTypeFloat32 && enable_fp16_) {
|
||||
for (int i = 0; i < CO_; i++) {
|
||||
reinterpret_cast<float16_t *>(bias_)[i] = reinterpret_cast<float *>(src_data)[i];
|
||||
|
@ -247,7 +251,7 @@ int FullConnectionOpenCLKernel::InitBias() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_bias_);
|
||||
FreeStoredData(&stored_bias_);
|
||||
return RET_OK;
|
||||
}
|
||||
#else
|
||||
|
@ -271,7 +275,7 @@ int FullConnectionOpenCLKernel::InitFilter() {
|
|||
auto padWeight = reinterpret_cast<float *>(padWeight_);
|
||||
memset(padWeight_, 0x00, nhw_remainder * intensor_shape.Slice * co4 * C4NUM * C4NUM * dtype_size);
|
||||
void *src_data = stored_weight_ == nullptr ? in_tensors_.at(kWeightIndex)->data() : stored_weight_;
|
||||
MS_ASSERT(src_data);
|
||||
CHECK_NULL_RETURN(src_data);
|
||||
auto originWeight = reinterpret_cast<float *>(src_data);
|
||||
|
||||
// pad weight
|
||||
|
@ -303,7 +307,7 @@ int FullConnectionOpenCLKernel::InitFilter() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_weight_);
|
||||
FreeStoredData(&stored_weight_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -331,14 +335,14 @@ int FullConnectionOpenCLKernel::InitBias() {
|
|||
memset(bias_, 0x00, co4 * C4NUM * dtype_size);
|
||||
if (in_tensors_.size() == INPUT_TENSOR_SIZE_3) {
|
||||
void *src_data = stored_bias_ == nullptr ? in_tensors_.at(kBiasIndex)->data() : stored_bias_;
|
||||
MS_ASSERT(src_data);
|
||||
CHECK_NULL_RETURN(src_data);
|
||||
memcpy(bias_, src_data, CO_ * dtype_size);
|
||||
}
|
||||
if (allocator->UnmapBuffer(bias_) != RET_OK) {
|
||||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_bias_);
|
||||
FreeStoredData(&stored_bias_);
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
* Copyright 2019-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -262,7 +262,7 @@ int MatMulOpenCLKernel::InitWeights() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_weight_);
|
||||
FreeStoredData(&stored_weight_);
|
||||
return InitBias();
|
||||
}
|
||||
|
||||
|
@ -309,7 +309,7 @@ int MatMulOpenCLKernel::InitBias() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_bias_);
|
||||
FreeStoredData(&stored_bias_);
|
||||
return RET_OK;
|
||||
}
|
||||
#else
|
||||
|
@ -342,7 +342,7 @@ int MatMulOpenCLKernel::InitBias() {
|
|||
MS_LOG(ERROR) << "UnmapBuffer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
FreeStoredData(stored_bias_);
|
||||
FreeStoredData(&stored_bias_);
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -186,7 +186,7 @@ int WinogradOpenCLKernel::InitFilter() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
FreeStoredData(stored_filter_);
|
||||
FreeStoredData(&stored_filter_);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020-2022 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -379,9 +379,10 @@ void *StoreTensorData(lite::Tensor *tensor) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void FreeStoredData(void *data) {
|
||||
if (data != nullptr) {
|
||||
free(data);
|
||||
void FreeStoredData(void **data) {
|
||||
if (*data != nullptr) {
|
||||
free(*data);
|
||||
*data = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2023 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -61,7 +61,7 @@ int CheckParamLikeTensor(const std::string &kernel_name, const std::string &tens
|
|||
|
||||
void *StoreTensorData(lite::Tensor *tensor);
|
||||
|
||||
void FreeStoredData(void *data);
|
||||
void FreeStoredData(void **data);
|
||||
|
||||
std::vector<std::string> CreateBuildOptionsExtByDType(TypeId type_id);
|
||||
|
||||
|
|
|
@ -329,6 +329,10 @@ void TestMain(const std::vector<ArgsTupleWithDtype> &input_infos, std::tuple<std
|
|||
|
||||
// check result
|
||||
ocl_runtime->GetAllocator()->MapBuffer(output.data(), CL_MAP_READ, nullptr, true);
|
||||
if (output.data() == nullptr) {
|
||||
MS_LOG(ERROR) << "output data null.";
|
||||
return;
|
||||
}
|
||||
CompareOutput<float>(output.data(), expect_data, output.ElementsNum(), atol, rtol, print_data);
|
||||
ocl_runtime->GetAllocator()->UnmapBuffer(output.data());
|
||||
|
||||
|
|
Loading…
Reference in New Issue