From cd8674328f2ca7b5af787e46585fa20bd28ff970 Mon Sep 17 00:00:00 2001 From: hanhuifeng2020 Date: Thu, 15 Sep 2022 16:14:03 +0800 Subject: [PATCH] support DynamicBroadcastGradientArgs op on cpu --- .../dynamic_broadcast_grad_args_cpu_kernel.cc | 216 ++++++++++++++++++ .../dynamic_broadcast_grad_args_cpu_kernel.h | 82 +++++++ 2 files changed, 298 insertions(+) create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.cc create mode 100644 mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.h diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.cc b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.cc new file mode 100644 index 00000000000..3b8603a7e3e --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.cc @@ -0,0 +1,216 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include + +#include "mindspore/core/ops/dynamic_broadcast_gradient_args.h" +#include "plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.h" +#include "plugin/device/cpu/hal/device/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr size_t kDynamicBroadcastGradientArgsInputsNum = 2; +constexpr size_t kDynamicBroadcastGradientArgsOutputsNum = 2; +constexpr char kKernelName[] = "DynamicBroadcastGradientArgs"; +using KernelRunFunc = DynamicBroadcastGradientArgsCpuKernelMod::KernelRunFunc; +} // namespace + +template +void AddGradReduceIdx(std::vector> *grad_reduce_idx, std::vector cur_one, bool none_one, + const size_t max_rank, size_t j) { + MS_EXCEPTION_IF_NULL(grad_reduce_idx); + for (size_t i = 0; i < kDynamicBroadcastGradientArgsInputsNum; i++) { + if (cur_one[i] && !none_one) { + (void)(*grad_reduce_idx)[i].emplace_back(SizeToLong(max_rank - 1 - j)); + } + } +} + +template +std::vector> GetGradIndex(const std::vector> &revers_shapes, const size_t max_rank) { + std::vector> grad_reduce_index(kDynamicBroadcastGradientArgsInputsNum); + std::vector pre_one(kDynamicBroadcastGradientArgsInputsNum); + std::vector cur_one(kDynamicBroadcastGradientArgsInputsNum); + for (size_t i = 0; i < kDynamicBroadcastGradientArgsInputsNum; i++) { + pre_one[i] = false; + cur_one[i] = false; + } + bool set_one = false; + for (size_t j = 0; j < max_rank; j++) { + int out_dim = -1; + bool out_dim_set = false; + bool none_one = true; + for (size_t i = 0; i < kDynamicBroadcastGradientArgsInputsNum; i++) { + if (revers_shapes[i][j] == 1) { + cur_one[i] = true; + none_one = false; + } else { + cur_one[i] = false; + if (!out_dim_set || revers_shapes[i][j] == static_cast(out_dim)) { + out_dim = static_cast(revers_shapes[i][j]); + out_dim_set = true; + } else { + MS_LOG(EXCEPTION) << "Can not broadcast inputs[0] and inputs[1]."; + } + } + } + if (!out_dim_set) { + for (size_t i = 0; i < kDynamicBroadcastGradientArgsInputsNum; i++) { + (void)grad_reduce_index[i].emplace_back(max_rank - 1 - j); + } + continue; + } else if (std::equal(cur_one.begin(), cur_one.end(), pre_one.begin()) && set_one) { + AddGradReduceIdx(&grad_reduce_index, cur_one, none_one, max_rank, j); + } else { + AddGradReduceIdx(&grad_reduce_index, cur_one, none_one, max_rank, j); + } + set_one = true; + for (size_t i = 0; i < kDynamicBroadcastGradientArgsInputsNum; i++) { + pre_one[i] = cur_one[i]; + } + } + return grad_reduce_index; +} + +template +size_t SetOuputValue(S *addr, const std::vector &grad_reduce_idx, size_t input_num) { + size_t index_num = grad_reduce_idx.size(); + for (size_t i = 0; i < index_num; i++) { + addr[i] = static_cast(grad_reduce_idx[index_num - 1 - i]); + } + + return index_num; +} + +template +bool DynamicBroadcastGradientArgsCpuKernelMod::LaunchKernel(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + const T *s0_addr = reinterpret_cast(inputs[0]->addr); + const T *s1_addr = reinterpret_cast(inputs[1]->addr); + S *r0_addr = reinterpret_cast(outputs[0]->addr); + S *r1_addr = reinterpret_cast(outputs[1]->addr); + std::vector ranks = {input_size_list_[0] / sizeof(T), input_size_list_[1] / sizeof(T)}; + + std::vector> grad_reduce_idx(kDynamicBroadcastGradientArgsInputsNum); + bool all_equal = true; + size_t max_rank = ranks[0] > ranks[1] ? ranks[0] : ranks[1]; + size_t min_rank = ranks[0] < ranks[1] ? ranks[0] : ranks[1]; + for (size_t i = 0; i < min_rank; i++) { + if (s0_addr[i] != s1_addr[i]) { + all_equal = false; + break; + } + } + if (!all_equal) { + // Reverse shapes + std::vector> reverse_shapes(kDynamicBroadcastGradientArgsInputsNum); + for (size_t j = 0; j < ranks[0]; j++) { + reverse_shapes[0].push_back(s0_addr[ranks[0] - j - 1]); + } + if (reverse_shapes[0].size() < max_rank) { + reverse_shapes[0].resize(max_rank, 1); + } + + for (size_t j = 0; j < ranks[1]; j++) { + reverse_shapes[1].push_back(s1_addr[ranks[1] - j - 1]); + } + if (reverse_shapes[1].size() < max_rank) { + reverse_shapes[1].resize(max_rank, 1); + } + + grad_reduce_idx = GetGradIndex(reverse_shapes, max_rank); + } + + r0_size_ = SetOuputValue(r0_addr, grad_reduce_idx[0], input_size_list_[0] / sizeof(T)); + r1_size_ = SetOuputValue(r1_addr, grad_reduce_idx[1], input_size_list_[1] / sizeof(T)); + + return true; +} + +bool DynamicBroadcastGradientArgsCpuKernelMod::Init(const BaseOperatorPtr &base_operator, + const std::vector &inputs, + const std::vector &outputs) { + auto kernel_ptr = std::dynamic_pointer_cast(base_operator); + if (!kernel_ptr) { + MS_LOG(ERROR) << "cast DynamicBroadcastGradientArgs ops failed!"; + return false; + } + kernel_name_ = kernel_ptr->name(); + + if (inputs.size() != kDynamicBroadcastGradientArgsInputsNum || + outputs.size() != kDynamicBroadcastGradientArgsOutputsNum) { + MS_LOG(ERROR) << kernel_name_ << ": input and output size should be " << kDynamicBroadcastGradientArgsInputsNum + << " and " << kDynamicBroadcastGradientArgsOutputsNum << ", but get " << inputs.size() << " and " + << outputs.size(); + return false; + } + + if (!MatchKernelFunc(base_operator, inputs, outputs)) { + return false; + } + + return true; +} + +int DynamicBroadcastGradientArgsCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, + const std::vector &inputs, + const std::vector &outputs, + const std::map &inputsOnHost) { + if (KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost) == static_cast(KRET_RESIZE_FAILED)) { + MS_LOG(WARNING) << kernel_name_ << " reinit failed."; + return static_cast(KRET_RESIZE_FAILED); + } + // get input_shape + outputs_ = outputs; + is_need_retrieve_output_shape_ = true; + + return static_cast(KRET_OK); +} + +const std::vector> &DynamicBroadcastGradientArgsCpuKernelMod::GetFuncList() const { + static const std::vector> func_list = { + {KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + &DynamicBroadcastGradientArgsCpuKernelMod::LaunchKernel}, + {KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt64), + &DynamicBroadcastGradientArgsCpuKernelMod::LaunchKernel}, + {KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + &DynamicBroadcastGradientArgsCpuKernelMod::LaunchKernel}, + {KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeInt64), + &DynamicBroadcastGradientArgsCpuKernelMod::LaunchKernel}, + }; + return func_list; +} +MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, DynamicBroadcastGradientArgs, DynamicBroadcastGradientArgsCpuKernelMod); +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.h b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.h new file mode 100644 index 00000000000..4d5f01e8333 --- /dev/null +++ b/mindspore/ccsrc/plugin/device/cpu/kernel/dynamic_broadcast_grad_args_cpu_kernel.h @@ -0,0 +1,82 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_DYN_BROADCAST_GRAD_ARGS_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_DYN_BROADCAST_GRAD_ARGS_KERNEL_H_ + +#include +#include +#include +#include +#include + +#include "plugin/device/cpu/kernel/cpu_kernel.h" +#include "plugin/factory/ms_factory.h" + +namespace mindspore { +namespace kernel { +class DynamicBroadcastGradientArgsCpuKernelMod : public NativeCpuKernelMod, + public MatchKernelHelper { + public: + DynamicBroadcastGradientArgsCpuKernelMod() : r0_size_(0), r1_size_(0) { ResetResource(); } + ~DynamicBroadcastGradientArgsCpuKernelMod() override = default; + + bool Init(const BaseOperatorPtr &base_operator, const std::vector &inputs, + const std::vector &outputs) override; + + int Resize( + const BaseOperatorPtr &base_operator, const std::vector &inputs, + const std::vector &outputs, + const std::map &inputsOnHost = std::map()) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override { + return kernel_func_(this, inputs, workspace, outputs); + } + + const std::vector> &GetFuncList() const override; + + std::vector GetOpSupport() override { return OpSupport(); } + + void ResetResource() noexcept { + input_size_list_.clear(); + output_size_list_.clear(); + workspace_size_list_.clear(); + } + + std::vector GetOutputs() override { + ShapeVector r0_shape{SizeToLong(r0_size_)}; + ShapeVector r1_shape{SizeToLong(r1_size_)}; + + outputs_[0]->SetShapeVector(r0_shape); + outputs_[1]->SetShapeVector(r1_shape); + + return outputs_; + } + + protected: + template + bool LaunchKernel(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs); + + size_t r0_size_; + size_t r1_size_; + std::vector outputs_; +}; +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_DYN_BROADCAST_GRAD_ARGS_KERNEL_H_