forked from mindspore-Ecosystem/mindspore
!20241 fix vector push_back realloc memory issue
Merge pull request !20241 from zhujingxuan/master
This commit is contained in:
commit
1767f3acf7
|
@ -79,14 +79,14 @@ bool ArgmaxCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, c
|
|||
auto input = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto output = reinterpret_cast<int32_t *>(outputs[0]->addr);
|
||||
|
||||
std::vector<float> array_axis(dim_axis_);
|
||||
for (size_t i = 0; i < num_before_axis_; i++) {
|
||||
size_t src_index_i = i * dim_axis_ * num_after_axis_;
|
||||
for (size_t j = 0; j < num_after_axis_; j++) {
|
||||
std::vector<float> array_axis;
|
||||
size_t src_index_j = src_index_i + j;
|
||||
for (size_t k = 0; k < dim_axis_; k++) {
|
||||
size_t src_index_k = k * num_after_axis_ + src_index_j;
|
||||
array_axis.push_back(static_cast<float>(input[src_index_k]));
|
||||
array_axis[k] = static_cast<float>(input[src_index_k]);
|
||||
}
|
||||
auto max_ops = std::max_element(array_axis.begin(), array_axis.end());
|
||||
auto max_index = static_cast<int32_t>(std::distance(array_axis.begin(), max_ops));
|
||||
|
|
|
@ -82,14 +82,14 @@ bool ArgMinWithValueCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &
|
|||
auto output0 = reinterpret_cast<int32_t *>(outputs[0]->addr);
|
||||
auto output1 = reinterpret_cast<T *>(outputs[1]->addr);
|
||||
|
||||
std::vector<float> array_axis(dim_axis_);
|
||||
for (size_t i = 0; i < num_before_axis_; i++) {
|
||||
size_t src_index_i = i * dim_axis_ * num_after_axis_;
|
||||
for (size_t j = 0; j < num_after_axis_; j++) {
|
||||
std::vector<float> array_axis;
|
||||
size_t src_index_j = src_index_i + j;
|
||||
for (size_t k = 0; k < dim_axis_; k++) {
|
||||
size_t src_index_k = k * num_after_axis_ + src_index_j;
|
||||
array_axis.push_back(static_cast<float>(input[src_index_k]));
|
||||
array_axis[k] = static_cast<float>(input[src_index_k]);
|
||||
}
|
||||
auto min_ops = std::min_element(array_axis.begin(), array_axis.end());
|
||||
auto min_index = static_cast<int32_t>(std::distance(array_axis.begin(), min_ops));
|
||||
|
|
|
@ -40,10 +40,11 @@ bool ConcatCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs, c
|
|||
}
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(node_);
|
||||
std::vector<std::vector<size_t>> input_flat_shape_list;
|
||||
input_flat_shape_list.reserve(input_num);
|
||||
for (size_t i = 0; i < input_num; i++) {
|
||||
auto input_shape_i = AnfAlgo::GetPrevNodeOutputInferShape(node_, i);
|
||||
auto flat_shape = CPUKernelUtils::FlatShapeByAxis(input_shape_i, axis_);
|
||||
input_flat_shape_list.push_back(flat_shape);
|
||||
input_flat_shape_list.emplace_back(flat_shape);
|
||||
}
|
||||
|
||||
auto output_addr = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
|
|
|
@ -110,7 +110,6 @@ std::vector<size_t> CPUKernelUtils::FlatShapeByAxis(const std::vector<size_t> &s
|
|||
}
|
||||
size_t dim_row = 1;
|
||||
size_t dim_col = 1;
|
||||
std::vector<size_t> flat_shape;
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
if (SizeToInt(i) < axis) {
|
||||
dim_row *= shape[i];
|
||||
|
@ -118,9 +117,9 @@ std::vector<size_t> CPUKernelUtils::FlatShapeByAxis(const std::vector<size_t> &s
|
|||
dim_col *= shape[i];
|
||||
}
|
||||
}
|
||||
flat_shape.push_back(dim_row);
|
||||
flat_shape.push_back(dim_col);
|
||||
return flat_shape;
|
||||
// referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision
|
||||
// returning a vector won't cause extra vector constructed or moved
|
||||
return std::vector<size_t>{dim_row, dim_col};
|
||||
}
|
||||
|
||||
BroadcastIterator::BroadcastIterator(std::vector<size_t> input_shape_a, std::vector<size_t> input_shape_b,
|
||||
|
|
|
@ -157,9 +157,10 @@ std::vector<KernelAttr> CPUKernelFactory::GetSupportedKernelAttrList(const std::
|
|||
MS_LOG(EXCEPTION) << "Not registered CPU kernel: op[" << kernel_name << "]!";
|
||||
}
|
||||
auto creators = iter->second;
|
||||
result.reserve(creators.size());
|
||||
for (size_t index = 0; index < creators.size(); ++index) {
|
||||
auto attr_creator = creators[index];
|
||||
result.push_back(attr_creator.first);
|
||||
result.emplace_back(attr_creator.first);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -52,14 +52,10 @@ bool L2NormalizeGradCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs,
|
|||
auto output_size = outputs[0]->size / sizeof(T);
|
||||
auto task = [&](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
std::vector<size_t> high_dim_index;
|
||||
OneDimIndexToHighDimIndex(i, &high_dim_index);
|
||||
std::vector<T> input_x_vector;
|
||||
GetVector(&input_x_vector, high_dim_index, input_x);
|
||||
std::vector<T> dout_vector;
|
||||
GetVector(&dout_vector, high_dim_index, dout);
|
||||
std::vector<T> y_vector;
|
||||
GetVector(&y_vector, high_dim_index, y);
|
||||
std::vector<size_t> high_dim_index = OneDimIndexToHighDimIndex(i);
|
||||
std::vector<T> input_x_vector = GetVector(high_dim_index, input_x);
|
||||
std::vector<T> dout_vector = GetVector(high_dim_index, dout);
|
||||
std::vector<T> y_vector = GetVector(high_dim_index, y);
|
||||
GetOutput(input_x_vector, y_vector, dout_vector, high_dim_index, &output[i]);
|
||||
}
|
||||
};
|
||||
|
@ -95,11 +91,16 @@ void L2NormalizeGradCPUKernel<T>::CheckIONumber(const CNodePtr &kernel_node) {
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
void L2NormalizeGradCPUKernel<T>::OneDimIndexToHighDimIndex(size_t one_dim_index, std::vector<size_t> *high_dim_index) {
|
||||
std::vector<size_t> L2NormalizeGradCPUKernel<T>::OneDimIndexToHighDimIndex(size_t one_dim_index) {
|
||||
std::vector<size_t> high_dim_index;
|
||||
high_dim_index.reserve(dim_elem_num_list_.size());
|
||||
for (const auto &item : dim_elem_num_list_) {
|
||||
high_dim_index->push_back(one_dim_index / item);
|
||||
high_dim_index.push_back(one_dim_index / item);
|
||||
one_dim_index %= item;
|
||||
}
|
||||
// referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision
|
||||
// returning a vector won't cause extra vector constructed or moved
|
||||
return high_dim_index;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -113,16 +114,20 @@ void L2NormalizeGradCPUKernel<T>::HighDimIndexToOneDimIndex(size_t *one_dim_inde
|
|||
}
|
||||
|
||||
template <typename T>
|
||||
void L2NormalizeGradCPUKernel<T>::GetVector(std::vector<T> *x_vector, const std::vector<size_t> &high_dim_index,
|
||||
const T *x) {
|
||||
std::vector<T> L2NormalizeGradCPUKernel<T>::GetVector(const std::vector<size_t> &high_dim_index, const T *x) {
|
||||
auto x_shape = input_shape_list_[0];
|
||||
std::vector<T> x_vector;
|
||||
x_vector.reserve(x_shape[axis_]);
|
||||
for (size_t i = 0; i < x_shape[axis_]; i++) {
|
||||
size_t oneDimIndex = 0;
|
||||
std::vector<size_t> tmp_high_dim_index = high_dim_index;
|
||||
tmp_high_dim_index[axis_] = i;
|
||||
HighDimIndexToOneDimIndex(&oneDimIndex, tmp_high_dim_index);
|
||||
x_vector->push_back(x[oneDimIndex]);
|
||||
x_vector.emplace_back(x[oneDimIndex]);
|
||||
}
|
||||
// referred to Copy elision https://en.cppreference.com/w/cpp/language/copy_elision
|
||||
// returning a vector won't cause extra vector constructed or moved
|
||||
return x_vector;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -38,9 +38,9 @@ class L2NormalizeGradCPUKernel : public CPUKernel {
|
|||
private:
|
||||
void CheckInputShape(const std::vector<size_t> &output_shape);
|
||||
void CheckIONumber(const CNodePtr &kernel_node);
|
||||
void OneDimIndexToHighDimIndex(size_t one_dim_index, std::vector<size_t> *high_dim_index);
|
||||
std::vector<size_t> OneDimIndexToHighDimIndex(size_t one_dim_index);
|
||||
void HighDimIndexToOneDimIndex(size_t *one_dim_index, const std::vector<size_t> &high_dim_index);
|
||||
void GetVector(std::vector<T> *x_vector, const std::vector<size_t> &high_dim_index, const T *x);
|
||||
std::vector<T> GetVector(const std::vector<size_t> &high_dim_index, const T *x);
|
||||
void GetSumOfProduct(const std::vector<T> &x_vector, const std::vector<T> &y_vector, T *ss);
|
||||
void GetOutput(const std::vector<T> &input_x_vector, const std::vector<T> &y_vector,
|
||||
const std::vector<T> &dout_vector, const std::vector<size_t> &high_dim_index, T *output);
|
||||
|
|
|
@ -46,10 +46,10 @@ int Compress(HashmapEntry<T> *entry_p, const size_t &length, T entry) {
|
|||
void UpdateShape(size_t miss_count, const CNodePtr &node_) {
|
||||
std::vector<size_t> out_shape;
|
||||
out_shape.emplace_back(miss_count);
|
||||
std::vector<TypeId> dtypes;
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
|
||||
std::vector<TypeId> dtypes(output_num);
|
||||
for (size_t i = 0; i < output_num; i++) {
|
||||
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
|
||||
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
|
||||
}
|
||||
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {AnfAlgo::GetOutputInferShape(node_, 0), out_shape, out_shape, out_shape},
|
||||
node_.get());
|
||||
|
|
|
@ -70,10 +70,10 @@ bool MaskedSelectCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inp
|
|||
}
|
||||
std::vector<size_t> out_shape;
|
||||
out_shape.emplace_back(j);
|
||||
std::vector<TypeId> dtypes;
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
|
||||
std::vector<TypeId> dtypes(output_num);
|
||||
for (size_t i = 0; i < output_num; i++) {
|
||||
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
|
||||
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
|
||||
}
|
||||
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape}, node_.get());
|
||||
}
|
||||
|
|
|
@ -227,7 +227,7 @@ void MatMulCPUKernel::ParallelRun(float *output) {
|
|||
std::vector<common::Task> tasks;
|
||||
size_t thread_index = 0;
|
||||
while (thread_index < thread_count_) {
|
||||
tasks.push_back(std::bind(&MatMulCPUKernel::FloatRun, this, thread_index));
|
||||
tasks.emplace_back(std::bind(&MatMulCPUKernel::FloatRun, this, thread_index));
|
||||
thread_index++;
|
||||
}
|
||||
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
|
||||
|
|
|
@ -76,14 +76,14 @@ void PadAndShiftCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
|
|||
}
|
||||
std::vector<size_t> out_shape;
|
||||
out_shape.emplace_back(output_size);
|
||||
std::vector<TypeId> dtypes;
|
||||
auto node_ = node_wpt_.lock();
|
||||
if (!node_) {
|
||||
MS_LOG(EXCEPTION) << "node_wpt_ is expired.";
|
||||
}
|
||||
auto output_nums = AnfAlgo::GetOutputTensorNum(node_);
|
||||
std::vector<TypeId> dtypes(output_nums);
|
||||
for (size_t i = 0; i < output_nums; i++) {
|
||||
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
|
||||
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
|
||||
}
|
||||
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape}, node_.get());
|
||||
}
|
||||
|
|
|
@ -72,10 +72,10 @@ void SubAndFilterCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
|
|||
MS_LOG(INFO) << "SubAndFilter output count is " << count;
|
||||
std::vector<size_t> out_shape;
|
||||
out_shape.emplace_back(count);
|
||||
std::vector<TypeId> dtypes;
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
|
||||
std::vector<TypeId> dtypes(output_num);
|
||||
for (size_t i = 0; i < output_num; i++) {
|
||||
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
|
||||
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
|
||||
}
|
||||
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, out_shape}, node_.get());
|
||||
}
|
||||
|
|
|
@ -57,10 +57,10 @@ bool UniqueCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
|||
}
|
||||
std::vector<size_t> out_shape;
|
||||
out_shape.emplace_back(output_size_);
|
||||
std::vector<TypeId> dtypes;
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(node_);
|
||||
std::vector<TypeId> dtypes(output_num);
|
||||
for (size_t i = 0; i < output_num; i++) {
|
||||
dtypes.push_back(AnfAlgo::GetOutputDeviceDataType(node_, i));
|
||||
dtypes[i] = AnfAlgo::GetOutputDeviceDataType(node_, i);
|
||||
}
|
||||
AnfAlgo::SetOutputInferTypeAndShape(dtypes, {out_shape, AnfAlgo::GetOutputInferShape(node_, 1)}, node_.get());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue