!20241 fix vector push_back realloc memory issue

Merge pull request !20241 from zhujingxuan/master
This commit is contained in:
i-robot 2021-07-14 09:14:16 +00:00 committed by Gitee
commit 1767f3acf7
13 changed files with 42 additions and 36 deletions

View File

@ -79,14 +79,14 @@ bool ArgmaxCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, c
auto input = reinterpret_cast<T *>(inputs[0]->addr);
auto output = reinterpret_cast<int32_t *>(outputs[0]->addr);
std::vector<float> array_axis(dim_axis_);
for (size_t i = 0; i < num_before_axis_; i++) {
size_t src_index_i = i * dim_axis_ * num_after_axis_;
for (size_t j = 0; j < num_after_axis_; j++) {
std::vector<float> array_axis;
size_t src_index_j = src_index_i + j;
for (size_t k = 0; k < dim_axis_; k++) {
size_t src_index_k = k * num_after_axis_ + src_index_j;
array_axis.push_back(static_cast<float>(input[src_index_k]));
array_axis[k] = static_cast<float>(input[src_index_k]);
}
auto max_ops = std::max_element(array_axis.begin(), array_axis.end());
auto max_index = static_cast<int32_t>(std::distance(array_axis.begin(), max_ops));

View File

@ -82,14 +82,14 @@ bool ArgMinWithValueCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &
auto output0 = reinterpret_cast<int32_t *>(outputs[0]->addr);
auto output1 = reinterpret_cast<T *>(outputs[1]->addr);
std::vector<float> array_axis(dim_axis_);
for (size_t i = 0; i < num_before_axis_; i++) {
size_t src_index_i = i * dim_axis_ * num_after_axis_;
for (size_t j = 0; j < num_after_axis_; j++) {
std::vector<float> array_axis;
size_t src_index_j = src_index_i + j;
for (size_t k = 0; k < dim_axis_; k++) {
size_t src_index_k = k * num_after_axis_ + src_index_j;
array_axis.push_back(static_cast<float>(input[src_index_k]));
array_axis[k] = static_cast<float>(input[src_index_k]);
}
auto min_ops = std::min_element(array_axis.begin(), array_axis.end());
auto min_index = static_cast<int32_t>(std::distance(array_axis.begin(), min_ops));

View File

@ -40,10 +40,11 @@ bool ConcatCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, c
}
size_t input_num = AnfAlgo::GetInputTensorNum(node_);
std::vector<std::vector<size_t>> input_flat_shape_list;
input_flat_shape_list.reserve(input_num);
for (size_t i = 0; i < input_num; i++) {
auto input_shape_i = AnfAlgo::GetPrevNodeOutputInferShape(node_, i);
auto flat_shape = CPUKernelUtils::FlatShapeByAxis(input_shape_i, axis_);
input_flat_shape_list.push_back(flat_shape);
input_flat_shape_list.emplace_back(flat_shape);
}
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);

View File

@ -110,7 +110,6 @@ std::vector<size_t> CPUKernelUtils::FlatShapeByAxis(const std::vector<size_t> &s
}
size_t dim_row = 1;
size_t dim_col = 1;
std::vector<size_t> flat_shape;
for (size_t i = 0; i < shape.size(); ++i) {
if (SizeToInt(i) < axis) {
dim_row *= shape[i];
@ -118,9 +117,9 @@ std::vector<size_t> CPUKernelUtils::FlatShapeByAxis(const std::vector<size_t> &s
dim_col *= shape[i];
}
}
flat_shape.push_back(dim_row);
flat_shape.push_back(dim_col);
return flat_shape;
// referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision
// returning a vector won't cause extra vector constructed or moved
return std::vector<size_t>{dim_row, dim_col};
}
BroadcastIterator::BroadcastIterator(std::vector<size_t> input_shape_a, std::vector<size_t> input_shape_b,

View File

@ -157,9 +157,10 @@ std::vector<KernelAttr> CPUKernelFactory::GetSupportedKernelAttrList(const std::
MS_LOG(EXCEPTION) << "Not registered CPU kernel: op[" << kernel_name << "]!";
}
auto creators = iter->second;
result.reserve(creators.size());
for (size_t index = 0; index < creators.size(); ++index) {
auto attr_creator = creators[index];
result.push_back(attr_creator.first);
result.emplace_back(attr_creator.first);
}
return result;
}

View File

@ -52,14 +52,10 @@ bool L2NormalizeGradCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs,
auto output_size = outputs[0]->size / sizeof(T);
auto task = [&](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
std::vector<size_t> high_dim_index;
OneDimIndexToHighDimIndex(i, &high_dim_index);
std::vector<T> input_x_vector;
GetVector(&input_x_vector, high_dim_index, input_x);
std::vector<T> dout_vector;
GetVector(&dout_vector, high_dim_index, dout);
std::vector<T> y_vector;
GetVector(&y_vector, high_dim_index, y);
std::vector<size_t> high_dim_index = OneDimIndexToHighDimIndex(i);
std::vector<T> input_x_vector = GetVector(high_dim_index, input_x);
std::vector<T> dout_vector = GetVector(high_dim_index, dout);
std::vector<T> y_vector = GetVector(high_dim_index, y);
GetOutput(input_x_vector, y_vector, dout_vector, high_dim_index, &output[i]);
}
};
@ -95,11 +91,16 @@ void L2NormalizeGradCPUKernel<T>::CheckIONumber(const CNodePtr &kernel_node) {
}
template <typename T>
void L2NormalizeGradCPUKernel<T>::OneDimIndexToHighDimIndex(size_t one_dim_index, std::vector<size_t> *high_dim_index) {
std::vector<size_t> L2NormalizeGradCPUKernel<T>::OneDimIndexToHighDimIndex(size_t one_dim_index) {
std::vector<size_t> high_dim_index;
high_dim_index.reserve(dim_elem_num_list_.size());
for (const auto &item : dim_elem_num_list_) {
high_dim_index->push_back(one_dim_index / item);
high_dim_index.push_back(one_dim_index / item);
one_dim_index %= item;
}
// referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision
// returning a vector won't cause extra vector constructed or moved
return high_dim_index;
}
template <typename T>
@ -113,16 +114,20 @@ void L2NormalizeGradCPUKernel<T>::HighDimIndexToOneDimIndex(size_t *one_dim_inde
}
template <typename T>
void L2NormalizeGradCPUKernel<T>::GetVector(std::vector<T> *x_vector, const std::vector<size_t> &high_dim_index,
const T *x) {
std::vector<T> L2NormalizeGradCPUKernel<T>::GetVector(const std::vector<size_t> &high_dim_index, const T *x) {
auto x_shape = input_shape_list_[0];
std::vector<T> x_vector;
x_vector.reserve(x_shape[axis_]);
for (size_t i = 0; i < x_shape[axis_]; i++) {
size_t oneDimIndex = 0;
std::vector<size_t> tmp_high_dim_index = high_dim_index;
tmp_high_dim_index[axis_] = i;
HighDimIndexToOneDimIndex(&oneDimIndex, tmp_high_dim_index);
x_vector->push_back(x[oneDimIndex]);
x_vector.emplace_back(x[oneDimIndex]);
}
// referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision
// returning a vector won't cause extra vector constructed or moved
return x_vector;
}
template <typename T>

View File

@ -38,9 +38,9 @@ class L2NormalizeGradCPUKernel : public CPUKernel {
private:
void CheckInputShape(const std::vector<size_t> &output_shape);
void CheckIONumber(const CNodePtr &kernel_node);
void OneDimIndexToHighDimIndex(size_t one_dim_index, std::vector<size_t> *high_dim_index);
std::vector<size_t> OneDimIndexToHighDimIndex(size_t one_dim_index);
void HighDimIndexToOneDimIndex(size_t *one_dim_index, const std::vector<size_t> &high_dim_index);
void GetVector(std::vector<T> *x_vector, const std::vector<size_t> &high_dim_index, const T *x);
std::vector<T> GetVector(const std::vector<size_t> &high_dim_index, const T *x);
void GetSumOfProduct(const std::vector<T> &x_vector, const std::vector<T> &y_vector, T *ss);
void GetOutput(const std::vector<T> &input_x_vector, const std::vector<T> &y_vector,
const std::vector<T> &dout_vector, const std::vector<size_t> &high_dim_index, T *output);

View File

@ -46,10 +46,10 @@ int Compress(HashmapEntry<T> *entry_p, const size_t &length, T entry) {
void UpdateShape(size_t miss_count, const CNodePtr &node_) {
std::vector<size_t> out_shape;
out_shape.emplace_back(miss_count);
std::vector<TypeId> dtypes;
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
std::vector<TypeId> dtypes(output_num);
for (size_t i = 0; i < output_num; i++) {
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
}
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetOutputInferShape(node_, 0), out_shape, out_shape, out_shape},
node_.get());

View File

@ -70,10 +70,10 @@ bool MaskedSelectCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp
}
std::vector<size_t> out_shape;
out_shape.emplace_back(j);
std::vector<TypeId> dtypes;
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
std::vector<TypeId> dtypes(output_num);
for (size_t i = 0; i < output_num; i++) {
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
}
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape}, node_.get());
}

View File

@ -227,7 +227,7 @@ void MatMulCPUKernel::ParallelRun(float *output) {
std::vector<common::Task> tasks;
size_t thread_index = 0;
while (thread_index < thread_count_) {
tasks.push_back(std::bind(&MatMulCPUKernel::FloatRun, this, thread_index));
tasks.emplace_back(std::bind(&MatMulCPUKernel::FloatRun, this, thread_index));
thread_index++;
}
(void)common::ThreadPool::GetInstance().SyncRun(tasks);

View File

@ -76,14 +76,14 @@ void PadAndShiftCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
}
std::vector<size_t> out_shape;
out_shape.emplace_back(output_size);
std::vector<TypeId> dtypes;
auto node_ = node_wpt_.lock();
if (!node_) {
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
}
auto output_nums = AnfAlgo::GetOutputTensorNum(node_);
std::vector<TypeId> dtypes(output_nums);
for (size_t i = 0; i < output_nums; i++) {
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
}
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape}, node_.get());
}

View File

@ -72,10 +72,10 @@ void SubAndFilterCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
MS_LOG(INFO) << "SubAndFilter output count is " << count;
std::vector<size_t> out_shape;
out_shape.emplace_back(count);
std::vector<TypeId> dtypes;
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
std::vector<TypeId> dtypes(output_num);
for (size_t i = 0; i < output_num; i++) {
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
}
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, out_shape}, node_.get());
}

View File

@ -57,10 +57,10 @@ bool UniqueCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
}
std::vector<size_t> out_shape;
out_shape.emplace_back(output_size_);
std::vector<TypeId> dtypes;
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
std::vector<TypeId> dtypes(output_num);
for (size_t i = 0; i < output_num; i++) {
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
}
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, AnfAlgo::GetOutputInferShape(node_, 1)}, node_.get());
}