forked from mindspore-Ecosystem/mindspore
fix bug: conv2d performance, profiling mode
This commit is contained in:
parent
a76668ce84
commit
2802572a62
|
@ -30,11 +30,11 @@ __constant sampler_t smp_zero = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP
|
|||
exp1 = exp(-data); \
|
||||
data = (exp0 - exp1) / (exp0 + exp1);
|
||||
|
||||
#define DO_LEAKY_RELU(data) \
|
||||
if (data.x < 0) data.x *= alpha; \
|
||||
if (data.y < 0) data.y *= alpha; \
|
||||
if (data.z < 0) data.z *= alpha; \
|
||||
if (data.w < 0) data.w *= alpha;
|
||||
#define DO_LEAKY_RELU(data) \
|
||||
data.x = data.x > 0 ? data.x : data.x * alpha; \
|
||||
data.y = data.y > 0 ? data.y : data.y * alpha; \
|
||||
data.z = data.z > 0 ? data.z : data.z * alpha; \
|
||||
data.w = data.w > 0 ? data.w : data.w * alpha;
|
||||
|
||||
__kernel void Conv2D_H1W1C1(__read_only image2d_t input, __write_only image2d_t output, __global FLT4 *weight,
|
||||
__global FLT4 *bias, int4 input_shape, int4 output_shape, int4 kernel_stride, int4 pad,
|
||||
|
|
|
@ -229,6 +229,7 @@ int OpenCLSubGraph::Init() {
|
|||
return ret;
|
||||
}
|
||||
auto opencl_exec = reinterpret_cast<lite::opencl::OpenCLExecutor *>(executor_);
|
||||
// If tuning_mode is DEFAULT, just malloc memory for reuse.
|
||||
ret = opencl_exec->RunOrTune(in_tensors_, out_tensors_, nodes_, allocator_, nullptr, nullptr, true);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Run opencl executor failed: " << ret;
|
||||
|
|
|
@ -31,8 +31,11 @@ int OpenCLExecutor::RunOrTune(std::vector<Tensor *> &inputs, std::vector<Tensor
|
|||
std::vector<kernel::LiteKernel *> &kernels, Allocator *allocator,
|
||||
const KernelCallBack &before, const KernelCallBack &after, bool is_tune) {
|
||||
int ret{RET_OK};
|
||||
ocl_runtime.GetInstance()->SetProfiling(is_tune);
|
||||
|
||||
auto opencl_runtime_ins = ocl_runtime.GetInstance();
|
||||
auto profiling_tmp = opencl_runtime_ins->isProfiling();
|
||||
if (is_tune) {
|
||||
opencl_runtime_ins->SetProfiling(true);
|
||||
}
|
||||
kernel::LiteKernelUtil::InitTensorRefCount(kernels);
|
||||
for (auto *kernel : kernels) {
|
||||
MS_ASSERT(kernel);
|
||||
|
@ -79,6 +82,10 @@ int OpenCLExecutor::RunOrTune(std::vector<Tensor *> &inputs, std::vector<Tensor
|
|||
MS_LOG(ERROR) << "run kernel failed, name: " << kernel->name();
|
||||
return ret;
|
||||
}
|
||||
if (profiling_tmp) {
|
||||
MS_LOG(INFO) << "OpenCl kernel " << kernel->name() << "(" << kernel->type_str()
|
||||
<< ") execute time is: " << op_kernel->GetProfilingTimeMs() << "ms";
|
||||
}
|
||||
}
|
||||
if (after != nullptr) {
|
||||
if (!after(TensorVectorCast(kernel->in_tensors()), TensorVectorCast(kernel->out_tensors()), callbackParam)) {
|
||||
|
@ -92,16 +99,8 @@ int OpenCLExecutor::RunOrTune(std::vector<Tensor *> &inputs, std::vector<Tensor
|
|||
MS_LOG(WARNING) << "DecOutTensorRefCount for kernel" << kernel->name() << " failed";
|
||||
}
|
||||
}
|
||||
#ifdef Debug
|
||||
MS_LOG(INFO) << "OpenCl kernel " << kernel->name() << "(" << kernel->type_str()
|
||||
<< ") execute time is: " << op_kernel->GetProfilingTimeMs() << "ms";
|
||||
#endif
|
||||
}
|
||||
#ifdef Debug
|
||||
ocl_runtime.GetInstance()->SetProfiling(true);
|
||||
#else
|
||||
ocl_runtime.GetInstance()->SetProfiling(false);
|
||||
#endif
|
||||
opencl_runtime_ins->SetProfiling(profiling_tmp);
|
||||
return ret;
|
||||
}
|
||||
} // namespace mindspore::lite::opencl
|
||||
|
|
Loading…
Reference in New Issue