From 4368d9c158f966c7b5090609db6cb87c4aa33841 Mon Sep 17 00:00:00 2001 From: zhujingxuan Date: Fri, 9 Jul 2021 16:47:53 +0800 Subject: [PATCH] fix vector push_back realloc memory issue --- .../kernel_compiler/cpu/argmax_cpu_kernel.cc | 4 +-- .../cpu/argmin_with_value_cpu_kernel.cc | 4 +-- .../kernel_compiler/cpu/concat_cpu_kernel.cc | 3 +- .../backend/kernel_compiler/cpu/cpu_kernel.cc | 7 ++--- .../kernel_compiler/cpu/cpu_kernel_factory.cc | 3 +- .../cpu/l2normalize_grad_cpu_kernel.cc | 31 +++++++++++-------- .../cpu/l2normalize_grad_cpu_kernel.h | 4 +-- .../cpu/map_cache_idx_cpu_kernel.cc | 4 +-- .../cpu/masked_select_cpu_kernel.cc | 4 +-- .../cpu/mkldnn/matmul_cpu_kernel.cc | 2 +- .../cpu/pad_and_shift_cpu_kernel.cc | 4 +-- .../cpu/sub_and_filter_cpu_kernel.cc | 4 +-- .../kernel_compiler/cpu/unique_cpu_kernel.cc | 4 +-- 13 files changed, 42 insertions(+), 36 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc index 28f9c3d17b1..ff33cc751ca 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_cpu_kernel.cc @@ -79,14 +79,14 @@ bool ArgmaxCPUKernel::Launch(const std::vector &inputs, c auto input = reinterpret_cast(inputs[0]->addr); auto output = reinterpret_cast(outputs[0]->addr); + std::vector array_axis(dim_axis_); for (size_t i = 0; i < num_before_axis_; i++) { size_t src_index_i = i * dim_axis_ * num_after_axis_; for (size_t j = 0; j < num_after_axis_; j++) { - std::vector array_axis; size_t src_index_j = src_index_i + j; for (size_t k = 0; k < dim_axis_; k++) { size_t src_index_k = k * num_after_axis_ + src_index_j; - array_axis.push_back(static_cast(input[src_index_k])); + array_axis[k] = static_cast(input[src_index_k]); } auto max_ops = std::max_element(array_axis.begin(), array_axis.end()); auto max_index = static_cast(std::distance(array_axis.begin(), max_ops)); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/argmin_with_value_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmin_with_value_cpu_kernel.cc index 5f8de4101a9..104854229f2 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/argmin_with_value_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmin_with_value_cpu_kernel.cc @@ -82,14 +82,14 @@ bool ArgMinWithValueCPUKernel::Launch(const std::vector & auto output0 = reinterpret_cast(outputs[0]->addr); auto output1 = reinterpret_cast(outputs[1]->addr); + std::vector array_axis(dim_axis_); for (size_t i = 0; i < num_before_axis_; i++) { size_t src_index_i = i * dim_axis_ * num_after_axis_; for (size_t j = 0; j < num_after_axis_; j++) { - std::vector array_axis; size_t src_index_j = src_index_i + j; for (size_t k = 0; k < dim_axis_; k++) { size_t src_index_k = k * num_after_axis_ + src_index_j; - array_axis.push_back(static_cast(input[src_index_k])); + array_axis[k] = static_cast(input[src_index_k]); } auto min_ops = std::min_element(array_axis.begin(), array_axis.end()); auto min_index = static_cast(std::distance(array_axis.begin(), min_ops)); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc index 20f7a18a048..d723407ae93 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/concat_cpu_kernel.cc @@ -40,10 +40,11 @@ bool ConcatCPUKernel::Launch(const std::vector &inputs, c } size_t input_num = AnfAlgo::GetInputTensorNum(node_); std::vector> input_flat_shape_list; + input_flat_shape_list.reserve(input_num); for (size_t i = 0; i < input_num; i++) { auto input_shape_i = AnfAlgo::GetPrevNodeOutputInferShape(node_, i); auto flat_shape = CPUKernelUtils::FlatShapeByAxis(input_shape_i, axis_); - input_flat_shape_list.push_back(flat_shape); + input_flat_shape_list.emplace_back(flat_shape); } auto output_addr = reinterpret_cast(outputs[0]->addr); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc index 6be00ca19cb..cfea6b42976 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.cc @@ -110,7 +110,6 @@ std::vector CPUKernelUtils::FlatShapeByAxis(const std::vector &s } size_t dim_row = 1; size_t dim_col = 1; - std::vector flat_shape; for (size_t i = 0; i < shape.size(); ++i) { if (SizeToInt(i) < axis) { dim_row *= shape[i]; @@ -118,9 +117,9 @@ std::vector CPUKernelUtils::FlatShapeByAxis(const std::vector &s dim_col *= shape[i]; } } - flat_shape.push_back(dim_row); - flat_shape.push_back(dim_col); - return flat_shape; + // referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision + // returning a vector won't cause extra vector constructed or moved + return std::vector{dim_row, dim_col}; } BroadcastIterator::BroadcastIterator(std::vector input_shape_a, std::vector input_shape_b, diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc index be9b8a5d933..12ae560be86 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel_factory.cc @@ -157,9 +157,10 @@ std::vector CPUKernelFactory::GetSupportedKernelAttrList(const std:: MS_LOG(EXCEPTION) << "Not registered CPU kernel: op[" << kernel_name << "]!"; } auto creators = iter->second; + result.reserve(creators.size()); for (size_t index = 0; index < creators.size(); ++index) { auto attr_creator = creators[index]; - result.push_back(attr_creator.first); + result.emplace_back(attr_creator.first); } return result; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.cc index 6c187b96a05..a4096a64251 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.cc @@ -52,14 +52,10 @@ bool L2NormalizeGradCPUKernel::Launch(const std::vector &inputs, auto output_size = outputs[0]->size / sizeof(T); auto task = [&](size_t start, size_t end) { for (size_t i = start; i < end; i++) { - std::vector high_dim_index; - OneDimIndexToHighDimIndex(i, &high_dim_index); - std::vector input_x_vector; - GetVector(&input_x_vector, high_dim_index, input_x); - std::vector dout_vector; - GetVector(&dout_vector, high_dim_index, dout); - std::vector y_vector; - GetVector(&y_vector, high_dim_index, y); + std::vector high_dim_index = OneDimIndexToHighDimIndex(i); + std::vector input_x_vector = GetVector(high_dim_index, input_x); + std::vector dout_vector = GetVector(high_dim_index, dout); + std::vector y_vector = GetVector(high_dim_index, y); GetOutput(input_x_vector, y_vector, dout_vector, high_dim_index, &output[i]); } }; @@ -95,11 +91,16 @@ void L2NormalizeGradCPUKernel::CheckIONumber(const CNodePtr &kernel_node) { } template -void L2NormalizeGradCPUKernel::OneDimIndexToHighDimIndex(size_t one_dim_index, std::vector *high_dim_index) { +std::vector L2NormalizeGradCPUKernel::OneDimIndexToHighDimIndex(size_t one_dim_index) { + std::vector high_dim_index; + high_dim_index.reserve(dim_elem_num_list_.size()); for (const auto &item : dim_elem_num_list_) { - high_dim_index->push_back(one_dim_index / item); + high_dim_index.push_back(one_dim_index / item); one_dim_index %= item; } + // referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision + // returning a vector won't cause extra vector constructed or moved + return high_dim_index; } template @@ -113,16 +114,20 @@ void L2NormalizeGradCPUKernel::HighDimIndexToOneDimIndex(size_t *one_dim_inde } template -void L2NormalizeGradCPUKernel::GetVector(std::vector *x_vector, const std::vector &high_dim_index, - const T *x) { +std::vector L2NormalizeGradCPUKernel::GetVector(const std::vector &high_dim_index, const T *x) { auto x_shape = input_shape_list_[0]; + std::vector x_vector; + x_vector.reserve(x_shape[axis_]); for (size_t i = 0; i < x_shape[axis_]; i++) { size_t oneDimIndex = 0; std::vector tmp_high_dim_index = high_dim_index; tmp_high_dim_index[axis_] = i; HighDimIndexToOneDimIndex(&oneDimIndex, tmp_high_dim_index); - x_vector->push_back(x[oneDimIndex]); + x_vector.emplace_back(x[oneDimIndex]); } + // referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision + // returning a vector won't cause extra vector constructed or moved + return x_vector; } template diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.h index 982f1e67896..e1cfe3cbb46 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/l2normalize_grad_cpu_kernel.h @@ -38,9 +38,9 @@ class L2NormalizeGradCPUKernel : public CPUKernel { private: void CheckInputShape(const std::vector &output_shape); void CheckIONumber(const CNodePtr &kernel_node); - void OneDimIndexToHighDimIndex(size_t one_dim_index, std::vector *high_dim_index); + std::vector OneDimIndexToHighDimIndex(size_t one_dim_index); void HighDimIndexToOneDimIndex(size_t *one_dim_index, const std::vector &high_dim_index); - void GetVector(std::vector *x_vector, const std::vector &high_dim_index, const T *x); + std::vector GetVector(const std::vector &high_dim_index, const T *x); void GetSumOfProduct(const std::vector &x_vector, const std::vector &y_vector, T *ss); void GetOutput(const std::vector &input_x_vector, const std::vector &y_vector, const std::vector &dout_vector, const std::vector &high_dim_index, T *output); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.cc index 97680cbe29f..4453226ec0e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/map_cache_idx_cpu_kernel.cc @@ -46,10 +46,10 @@ int Compress(HashmapEntry *entry_p, const size_t &length, T entry) { void UpdateShape(size_t miss_count, const CNodePtr &node_) { std::vector out_shape; out_shape.emplace_back(miss_count); - std::vector dtypes; size_t output_num = AnfAlgo::GetOutputTensorNum(node_); + std::vector dtypes(output_num); for (size_t i = 0; i < output_num; i++) { - dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i)); + dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetOutputInferShape(node_, 0), out_shape, out_shape, out_shape}, node_.get()); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.cc index 9154b9363c9..7bd02061f93 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/masked_select_cpu_kernel.cc @@ -70,10 +70,10 @@ bool MaskedSelectCPUKernel::Launch(const std::vector &inp } std::vector out_shape; out_shape.emplace_back(j); - std::vector dtypes; size_t output_num = AnfAlgo::GetOutputTensorNum(node_); + std::vector dtypes(output_num); for (size_t i = 0; i < output_num; i++) { - dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i)); + dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape}, node_.get()); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc index a13d842a702..1fd8ddfe341 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/mkldnn/matmul_cpu_kernel.cc @@ -227,7 +227,7 @@ void MatMulCPUKernel::ParallelRun(float *output) { std::vector tasks; size_t thread_index = 0; while (thread_index < thread_count_) { - tasks.push_back(std::bind(&MatMulCPUKernel::FloatRun, this, thread_index)); + tasks.emplace_back(std::bind(&MatMulCPUKernel::FloatRun, this, thread_index)); thread_index++; } (void)common::ThreadPool::GetInstance().SyncRun(tasks); diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_and_shift_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_and_shift_cpu_kernel.cc index 981679a7755..3b1d72e785d 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_and_shift_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/pad_and_shift_cpu_kernel.cc @@ -76,14 +76,14 @@ void PadAndShiftCPUKernel::LaunchKernel(const std::vector &inputs, } std::vector out_shape; out_shape.emplace_back(output_size); - std::vector dtypes; auto node_ = node_wpt_.lock(); if (!node_) { MS_LOG(EXCEPTION) << "node_wpt_ is expired."; } auto output_nums = AnfAlgo::GetOutputTensorNum(node_); + std::vector dtypes(output_nums); for (size_t i = 0; i < output_nums; i++) { - dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i)); + dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape}, node_.get()); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.cc index cd8940b6692..eca8fa0cafb 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/sub_and_filter_cpu_kernel.cc @@ -72,10 +72,10 @@ void SubAndFilterCPUKernel::LaunchKernel(const std::vector &inputs, MS_LOG(INFO) << "SubAndFilter output count is " << count; std::vector out_shape; out_shape.emplace_back(count); - std::vector dtypes; size_t output_num = AnfAlgo::GetOutputTensorNum(node_); + std::vector dtypes(output_num); for (size_t i = 0; i < output_num; i++) { - dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i)); + dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, out_shape}, node_.get()); } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc index daa9930e9e8..fbbc223dd46 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc @@ -57,10 +57,10 @@ bool UniqueCPUKernel::Launch(const std::vector &inputs, } std::vector out_shape; out_shape.emplace_back(output_size_); - std::vector dtypes; size_t output_num = AnfAlgo::GetOutputTensorNum(node_); + std::vector dtypes(output_num); for (size_t i = 0; i < output_num; i++) { - dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i)); + dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i); } AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, AnfAlgo::GetOutputInferShape(node_, 1)}, node_.get()); }