forked from mindspore-Ecosystem/mindspore
!44458 Refractor AdaptiveAvgPool2DKernelMod/AngleAtomEnergyGpuKernelMod/ConcatCpuKernelMod/ConcatV2FwdGpuKernelMod
Merge pull request !44458 from hanhuifeng/kernel_mod_new
This commit is contained in:
commit
5fe262a7c3
|
@ -17,47 +17,59 @@
|
|||
#include "plugin/device/cpu/kernel/concat_cpu_kernel.h"
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
#include "mindspore/core/ops/concat.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
constexpr size_t kConcatOutputsNum = 1;
|
||||
} // namespace
|
||||
void ConcatCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
cnode_ptr_ = kernel_node;
|
||||
axis_ = LongToInt(common::AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS));
|
||||
auto input_1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (axis_ < 0) {
|
||||
axis_ = axis_ + SizeToInt(input_1_shape.size());
|
||||
}
|
||||
|
||||
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
|
||||
bool ConcatCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
|
||||
auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||
if (!is_match) {
|
||||
MS_LOG(EXCEPTION) << "Concat does not support this kernel data type: " << kernel_attr;
|
||||
}
|
||||
|
||||
kernel_func_ = func_list_[index].second;
|
||||
kernel_name_ = base_operator->name();
|
||||
auto kernel_ptr = std::dynamic_pointer_cast<ops::Concat>(base_operator);
|
||||
MS_EXCEPTION_IF_NULL(kernel_ptr);
|
||||
ori_axis_ = kernel_ptr->get_axis();
|
||||
return true;
|
||||
}
|
||||
|
||||
int ConcatCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
|
||||
if (int ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
inputs_shape_.clear();
|
||||
for (size_t i = 0; i < inputs.size(); ++i) {
|
||||
inputs_shape_.push_back(inputs[i]->GetShapeVector());
|
||||
}
|
||||
axis_ = ori_axis_;
|
||||
if (axis_ < 0) {
|
||||
axis_ = axis_ + SizeToInt(inputs_shape_[0].size());
|
||||
}
|
||||
return KRET_OK;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool ConcatCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto node_ = cnode_ptr_.lock();
|
||||
if (!node_) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cnode_ptr_(kernel_node) is expired. Error no: " << node_;
|
||||
}
|
||||
const size_t input_num = common::AnfAlgo::GetInputTensorNum(node_);
|
||||
const size_t input_num = inputs.size();
|
||||
CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConcatOutputsNum, kernel_name_);
|
||||
|
||||
std::vector<ShapeVector> input_flat_shape_list;
|
||||
input_flat_shape_list.reserve(input_num);
|
||||
for (size_t i = 0; i < input_num; i++) {
|
||||
auto input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i);
|
||||
auto input_shape_i = inputs_shape_[i];
|
||||
auto flat_shape = CPUKernelUtils::FlatShapeByAxis(input_shape_i, axis_);
|
||||
(void)input_flat_shape_list.emplace_back(flat_shape);
|
||||
}
|
||||
|
|
|
@ -14,12 +14,13 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONCAT_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONCAT_CPU_KERNEL_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CONCAT_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CONCAT_CPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <complex>
|
||||
#include <map>
|
||||
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
|
@ -29,12 +30,16 @@ namespace kernel {
|
|||
using complex64 = std::complex<float>;
|
||||
using complex128 = std::complex<double>;
|
||||
|
||||
class ConcatCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
class ConcatCpuKernelMod : public NativeCpuKernelMod {
|
||||
public:
|
||||
ConcatCpuKernelMod() = default;
|
||||
~ConcatCpuKernelMod() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override {
|
||||
|
@ -50,9 +55,11 @@ class ConcatCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
|||
const std::vector<kernel::AddressPtr> &)>;
|
||||
static std::vector<std::pair<KernelAttr, ConcatFunc>> func_list_;
|
||||
ConcatFunc kernel_func_;
|
||||
int ori_axis_{0};
|
||||
int axis_{0};
|
||||
std::vector<ShapeVector> inputs_shape_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONCAT_CPU_KERNEL_H_
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CONCAT_CPU_KERNEL_H_
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
|
||||
namespace mindspore {
|
||||
|
@ -21,50 +23,138 @@ namespace kernel {
|
|||
template <typename T>
|
||||
using Complex = mindspore::utils::Complex<T>;
|
||||
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128),
|
||||
ConcatV2FwdGpuKernelMod, Complex<double>)
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64),
|
||||
ConcatV2FwdGpuKernelMod, Complex<float>)
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
ConcatV2FwdGpuKernelMod, double)
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ConcatV2FwdGpuKernelMod, float)
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
ConcatV2FwdGpuKernelMod, half)
|
||||
const std::vector<std::pair<KernelAttr, ConcatV2FwdGpuKernelMod::KernelRunFunc>> &ConcatV2FwdGpuKernelMod::GetFuncList()
|
||||
const {
|
||||
static const std::vector<std::pair<KernelAttr, ConcatV2FwdGpuKernelMod::KernelRunFunc>> func_list = {
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<Complex<double>>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<Complex<float>>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<double>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<float>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<half>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<int64_t>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<int32_t>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<int16_t>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<char>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<uint64_t>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<uint>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<uint16_t>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<uchar>},
|
||||
{KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||
&ConcatV2FwdGpuKernelMod::LaunchKernel<bool>}};
|
||||
return func_list;
|
||||
}
|
||||
|
||||
MS_REG_GPU_KERNEL_ONE(Concat,
|
||||
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ConcatV2FwdGpuKernelMod, int64_t)
|
||||
MS_REG_GPU_KERNEL_ONE(Concat,
|
||||
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ConcatV2FwdGpuKernelMod, int)
|
||||
MS_REG_GPU_KERNEL_ONE(Concat,
|
||||
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
ConcatV2FwdGpuKernelMod, short) // NOLINT
|
||||
MS_REG_GPU_KERNEL_ONE(Concat,
|
||||
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
|
||||
ConcatV2FwdGpuKernelMod, char)
|
||||
bool ConcatV2FwdGpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
stream_ptr_ = stream_ptr;
|
||||
return kernel_func_(this, inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
|
||||
ConcatV2FwdGpuKernelMod, uint64_t)
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
|
||||
ConcatV2FwdGpuKernelMod, uint)
|
||||
MS_REG_GPU_KERNEL_ONE(
|
||||
Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
|
||||
ConcatV2FwdGpuKernelMod, uint16_t)
|
||||
MS_REG_GPU_KERNEL_ONE(Concat,
|
||||
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
ConcatV2FwdGpuKernelMod, uchar)
|
||||
template <typename T>
|
||||
bool ConcatV2FwdGpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
if (input_num_ == 0) {
|
||||
return true;
|
||||
}
|
||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||
T **inputs_device = GetDeviceAddress<T *>(workspace, 0);
|
||||
int *len_axis_device = GetDeviceAddress<int>(workspace, 1);
|
||||
for (int i = 0; i < input_num_; i++) {
|
||||
auto input_index = not_null_input_index_[i];
|
||||
inputs_host_[i] = GetDeviceAddress<T>(inputs, input_index);
|
||||
}
|
||||
CHECK_CUDA_RET_WITH_ERROR_NOTRACE(
|
||||
cudaMemcpyAsync(inputs_device, inputs_host_.data(), sizeof(T *) * input_num_, cudaMemcpyHostToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr_)),
|
||||
"ConcatV2 opt cudaMemcpyAsync inputs failed");
|
||||
CHECK_CUDA_RET_WITH_ERROR_NOTRACE(
|
||||
cudaMemcpyAsync(len_axis_device, len_axis_.data(), sizeof(int) * input_num_, cudaMemcpyHostToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr_)),
|
||||
"ConcatV2 opt cudaMemcpyAsync length on axis failed");
|
||||
output_size_ = output_size_list_[0] / sizeof(T);
|
||||
ConcatKernel(output_size_, input_num_, all_size_before_axis_, all_size_axis_, len_axis_device, inputs_device, output,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr_));
|
||||
return true;
|
||||
}
|
||||
|
||||
MS_REG_GPU_KERNEL_ONE(Concat,
|
||||
KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||
ConcatV2FwdGpuKernelMod, bool)
|
||||
bool ConcatV2FwdGpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (!MatchKernelFunc(base_operator, inputs, outputs)) {
|
||||
return false;
|
||||
}
|
||||
kernel_name_ = base_operator->name();
|
||||
auto prim = base_operator->GetPrim();
|
||||
MS_EXCEPTION_IF_NULL(prim);
|
||||
ori_axis_ = GetValue<int64_t>(prim->GetAttr("axis"));
|
||||
origin_data_format_ = GetValue<std::string>(prim->GetAttr("operator_origin_format"));
|
||||
len_axis_.resize(inputs.size());
|
||||
return true;
|
||||
}
|
||||
|
||||
int ConcatV2FwdGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
|
||||
if (int ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
auto input_0_shape = inputs[0]->GetDeviceShapeAdaptively();
|
||||
int dims = SizeToInt(input_0_shape.size());
|
||||
axis_ = ori_axis_;
|
||||
if (axis_ < -dims || axis_ >= dims) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' must be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
if (axis_ < 0) {
|
||||
axis_ += dims;
|
||||
}
|
||||
auto input_format = mindspore::FormatEnumToString(inputs[0]->GetFormat());
|
||||
axis_ = AxisTransform(origin_data_format_, input_format, axis_);
|
||||
|
||||
not_null_input_index_.clear();
|
||||
len_axis_.clear();
|
||||
input_num_ = inputs.size();
|
||||
for (int i = 0; i < input_num_; i++) {
|
||||
auto input_shape = inputs[i]->GetDeviceShapeAdaptively();
|
||||
auto is_null_input = CHECK_NULL_INPUT(input_shape);
|
||||
if (!is_null_input) {
|
||||
not_null_input_index_.push_back(i);
|
||||
len_axis_.push_back(LongToInt(input_shape[axis_]));
|
||||
}
|
||||
}
|
||||
input_num_ = not_null_input_index_.size();
|
||||
workspace_size_list_.push_back(sizeof(void *) * input_num_);
|
||||
workspace_size_list_.push_back(sizeof(int) * input_num_);
|
||||
inputs_host_.resize(input_num_);
|
||||
|
||||
auto output_shape = outputs[0]->GetDeviceShapeAdaptively();
|
||||
all_size_before_axis_ = 1;
|
||||
all_size_axis_ = 1;
|
||||
for (int i = 0; i < SizeToInt(output_shape.size()); i++) {
|
||||
if (i > axis_) {
|
||||
all_size_before_axis_ *= LongToInt(output_shape[i]);
|
||||
all_size_axis_ *= LongToInt(output_shape[i]);
|
||||
}
|
||||
if (i == axis_) {
|
||||
all_size_before_axis_ *= LongToInt(output_shape[i]);
|
||||
}
|
||||
}
|
||||
return KRET_OK;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeGpuKernelMod, Concat, ConcatV2FwdGpuKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,152 +14,58 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_CONCATV2_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_CONCATV2_GPU_KERNEL_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_ARRAYS_CONCATV2_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_ARRAYS_CONCATV2_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel.h"
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/concatv2_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class ConcatV2FwdGpuKernelMod : public DeprecatedNativeGpuKernelMod {
|
||||
class ConcatV2FwdGpuKernelMod : public NativeGpuKernelMod, public MatchKernelHelper<ConcatV2FwdGpuKernelMod> {
|
||||
public:
|
||||
ConcatV2FwdGpuKernelMod()
|
||||
: axis_(0),
|
||||
input_num_(1),
|
||||
output_size_(0),
|
||||
all_size_before_axis_(1),
|
||||
all_size_axis_(1),
|
||||
kernel_name_("ConcatV2"),
|
||||
inputs_host_(nullptr),
|
||||
len_axis_(nullptr) {}
|
||||
ConcatV2FwdGpuKernelMod() = default;
|
||||
~ConcatV2FwdGpuKernelMod() override = default;
|
||||
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;
|
||||
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (input_num_ == 0) {
|
||||
return true;
|
||||
}
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||
T **inputs_device = GetDeviceAddress<T *>(workspace, 0);
|
||||
int *len_axis_device = GetDeviceAddress<int>(workspace, 1);
|
||||
int current_dim = 0;
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
T *input = GetPossiblyNullDeviceAddress<T>(inputs, i);
|
||||
if (input != nullptr) {
|
||||
inputs_host_[current_dim] = input;
|
||||
current_dim++;
|
||||
}
|
||||
}
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(inputs_device, inputs_host_.get(), sizeof(T *) * input_num_,
|
||||
cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"ConcatV2 opt cudaMemcpyAsync inputs failed");
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(len_axis_device, len_axis_.get(), sizeof(int) * input_num_,
|
||||
cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"ConcatV2 opt cudaMemcpyAsync length on axis failed");
|
||||
ConcatKernel(output_size_, input_num_, all_size_before_axis_, all_size_axis_, len_axis_device, inputs_device,
|
||||
output, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
kernel_node_ = kernel_node;
|
||||
if (!CheckParam(kernel_node)) {
|
||||
return false;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
|
||||
int dims = SizeToInt(input_shape.size());
|
||||
axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
|
||||
if (axis_ < -dims || axis_ >= dims) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' must be in the range [-" << dims << "," << dims
|
||||
<< "), but got " << axis_;
|
||||
}
|
||||
if (axis_ < 0) {
|
||||
axis_ += dims;
|
||||
}
|
||||
auto origin_data_format = AnfAlgo::GetOriginDataFormat(kernel_node);
|
||||
auto input_format = AnfAlgo::GetInputFormat(kernel_node, 0);
|
||||
axis_ = AxisTransform(origin_data_format, input_format, axis_);
|
||||
|
||||
input_num_ = SizeToInt(common::AnfAlgo::GetInputTensorNum(kernel_node));
|
||||
inputs_host_ = std::make_unique<T *[]>(input_num_);
|
||||
len_axis_ = std::make_unique<int[]>(input_num_);
|
||||
int current_dim = 0;
|
||||
for (int i = 0; i < input_num_; i++) {
|
||||
size_t input_size = 1;
|
||||
auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
|
||||
for (size_t j = 0; j < input_shape.size(); j++) {
|
||||
input_size *= static_cast<size_t>(input_shape[j]);
|
||||
}
|
||||
|
||||
if (input_size == 0) {
|
||||
input_num_--;
|
||||
} else {
|
||||
input_size_list_.push_back(input_size * sizeof(T));
|
||||
len_axis_[current_dim] = LongToInt(input_shape[axis_]);
|
||||
current_dim++;
|
||||
}
|
||||
}
|
||||
workspace_size_list_.push_back(sizeof(T *) * input_num_);
|
||||
workspace_size_list_.push_back(sizeof(int) * input_num_);
|
||||
|
||||
auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
|
||||
output_size_ = SizeOf(output_shape);
|
||||
for (int i = 0; i < SizeToInt(output_shape.size()); i++) {
|
||||
if (i > axis_) {
|
||||
all_size_before_axis_ *= LongToInt(output_shape[i]);
|
||||
all_size_axis_ *= LongToInt(output_shape[i]);
|
||||
}
|
||||
if (i == axis_) {
|
||||
all_size_before_axis_ *= LongToInt(output_shape[i]);
|
||||
}
|
||||
}
|
||||
output_size_list_.push_back(output_size_ * sizeof(T));
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
void ResetResource() noexcept override {
|
||||
ResetSizeLists();
|
||||
axis_ = 0;
|
||||
input_num_ = 1;
|
||||
output_size_ = 0;
|
||||
all_size_before_axis_ = 1;
|
||||
all_size_axis_ = 1;
|
||||
kernel_name_ = "ConcatV2";
|
||||
inputs_host_ = nullptr;
|
||||
len_axis_ = nullptr;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {}
|
||||
std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); }
|
||||
|
||||
private:
|
||||
bool CheckParam(const CNodePtr &kernel_node) {
|
||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs must be 1, but got " << output_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
int axis_;
|
||||
int input_num_;
|
||||
size_t output_size_;
|
||||
int all_size_before_axis_;
|
||||
int all_size_axis_;
|
||||
std::string kernel_name_;
|
||||
std::unique_ptr<T *[]> inputs_host_;
|
||||
std::unique_ptr<int[]> len_axis_;
|
||||
template <typename T>
|
||||
bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs);
|
||||
|
||||
int axis_{0};
|
||||
int ori_axis_{0};
|
||||
int input_num_{1};
|
||||
size_t output_size_{0};
|
||||
int all_size_before_axis_{1};
|
||||
int all_size_axis_{1};
|
||||
std::string kernel_name_{"ConcatV2"};
|
||||
std::vector<int> not_null_input_index_;
|
||||
std::vector<int> len_axis_;
|
||||
std::vector<void *> inputs_host_;
|
||||
std::string origin_data_format_;
|
||||
void *stream_ptr_{nullptr};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_CONCATV2_GPU_KERNEL_H_
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_ARRAYS_CONCATV2_GPU_KERNEL_H_
|
||||
|
|
|
@ -189,6 +189,7 @@ bool SliceGpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std:
|
|||
bool SliceGpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
auto kernel_ptr = std::dynamic_pointer_cast<ops::Slice>(base_operator);
|
||||
MS_EXCEPTION_IF_NULL(kernel_ptr);
|
||||
kernel_name_ = kernel_ptr->name();
|
||||
auto tensor_attr = GetKernelAttrFromTensors(inputs, outputs);
|
||||
auto [is_match, index] = MatchKernelAttr(tensor_attr, GetOpSupport());
|
||||
|
|
|
@ -15,17 +15,92 @@
|
|||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.h"
|
||||
#include "ops/adaptive_avg_pool_2d.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_ONE(AdaptiveAvgPool2D,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
AdaptiveAvgPool2DKernelMod, half)
|
||||
MS_REG_GPU_KERNEL_ONE(AdaptiveAvgPool2D,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
AdaptiveAvgPool2DKernelMod, float)
|
||||
MS_REG_GPU_KERNEL_ONE(AdaptiveAvgPool2D,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
AdaptiveAvgPool2DKernelMod, double)
|
||||
constexpr uint kNumberTwo = 2;
|
||||
constexpr uint kNumberThree = 2;
|
||||
|
||||
using KernelRunFunc = AdaptiveAvgPool2DKernelMod::KernelRunFunc;
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &AdaptiveAvgPool2DKernelMod::GetFuncList() const {
|
||||
static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
&AdaptiveAvgPool2DKernelMod::LaunchKernel<half>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
&AdaptiveAvgPool2DKernelMod::LaunchKernel<float>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
&AdaptiveAvgPool2DKernelMod::LaunchKernel<double>}};
|
||||
return func_list;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool AdaptiveAvgPool2DKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
|
||||
ApplyAdaptiveAvgPool2D(size_, input_height_, input_width_, output_height_, output_width_, input_addr, output_addr,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr_));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AdaptiveAvgPool2DKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
stream_ptr_ = stream_ptr;
|
||||
return kernel_func_(this, inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
bool AdaptiveAvgPool2DKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (!MatchKernelFunc(base_operator, inputs, outputs)) {
|
||||
return false;
|
||||
}
|
||||
kernel_name_ = base_operator->name();
|
||||
size_t input_num = inputs.size();
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs must be 1, but got " << input_num;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int AdaptiveAvgPool2DKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
|
||||
if (int ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
|
||||
auto input_shape = inputs[0]->GetShapeVector();
|
||||
auto output_shape = outputs[0]->GetShapeVector();
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_) {
|
||||
return KRET_OK;
|
||||
}
|
||||
len_ = static_cast<uint>(input_shape.size());
|
||||
if (len_ < kNumberTwo) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than " << kNumberTwo
|
||||
<< ", but got " << len_;
|
||||
}
|
||||
input_height_ = static_cast<uint>(input_shape[len_ - kNumberTwo]);
|
||||
input_width_ = static_cast<uint>(input_shape[len_ - 1]);
|
||||
size_ = static_cast<uint>(len_ == kNumberThree ? input_shape[0] : input_shape[0] * input_shape[1]);
|
||||
|
||||
uint out_len = static_cast<uint>(output_shape.size());
|
||||
if (out_len < kNumberTwo) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than " << kNumberTwo
|
||||
<< ", but got " << out_len;
|
||||
}
|
||||
output_height_ = static_cast<uint>(output_shape[out_len - kNumberTwo]);
|
||||
output_width_ = static_cast<uint>(output_shape[out_len - 1]);
|
||||
return KRET_OK;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeGpuKernelMod, AdaptiveAvgPool2D, AdaptiveAvgPool2DKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,115 +14,56 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAPTIVEAVGPOOL2D_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAPTIVEAVGPOOL2D_GPU_KERNEL_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_NN_ADAPTIVE_AVG_POOL2D_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_NN_ADAPTIVE_AVG_POOL2D_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel.h"
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/adaptive_avg_pool2d_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class AdaptiveAvgPool2DKernelMod : public DeprecatedNativeGpuKernelMod {
|
||||
|
||||
class AdaptiveAvgPool2DKernelMod : public NativeGpuKernelMod, public MatchKernelHelper<AdaptiveAvgPool2DKernelMod> {
|
||||
public:
|
||||
AdaptiveAvgPool2DKernelMod()
|
||||
: input_size_(0),
|
||||
output_size_(0),
|
||||
len(0),
|
||||
input_height(0),
|
||||
input_width(0),
|
||||
output_height(0),
|
||||
output_width(0),
|
||||
size(0),
|
||||
is_null_input_(false),
|
||||
kernel_name_("AdaptiveAvgPool2D") {}
|
||||
AdaptiveAvgPool2DKernelMod() = default;
|
||||
~AdaptiveAvgPool2DKernelMod() override = default;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
if (is_null_input_) {
|
||||
return true;
|
||||
}
|
||||
T *input_addr = GetDeviceAddress<T>(inputs, 0);
|
||||
T *output_addr = GetDeviceAddress<T>(outputs, 0);
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;
|
||||
|
||||
ApplyAdaptiveAvgPool2D(size, input_height, input_width, output_height, output_width, input_addr, output_addr,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); }
|
||||
|
||||
return true;
|
||||
}
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto shape_addr = common::AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "output_size");
|
||||
kernel_node_ = kernel_node;
|
||||
if (shape_addr.size() == 1) {
|
||||
output_height = shape_addr[0];
|
||||
output_width = shape_addr[0];
|
||||
} else if (shape_addr.size() == 2) {
|
||||
output_height = static_cast<uint>(shape_addr[0]);
|
||||
output_width = static_cast<uint>(shape_addr[1]);
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'output_size' must be 1 or 2, but got "
|
||||
<< shape_addr.size();
|
||||
}
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;
|
||||
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs must be 1, but got " << input_num;
|
||||
}
|
||||
|
||||
input_size_ = sizeof(T);
|
||||
output_size_ = sizeof(T);
|
||||
|
||||
auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
is_null_input_ =
|
||||
CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
|
||||
if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape, output_shape})) {
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
len = static_cast<uint>(input_shape.size());
|
||||
|
||||
if (len < 2) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
|
||||
<< len;
|
||||
}
|
||||
|
||||
input_height = static_cast<uint>(input_shape[len - 2]);
|
||||
input_width = static_cast<uint>(input_shape[len - 1]);
|
||||
size = static_cast<uint>(len == 3 ? input_shape[0] : input_shape[0] * input_shape[1]);
|
||||
input_size_ *= SizeOf(input_shape);
|
||||
output_size_ *= SizeOf(output_shape);
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(input_size_);
|
||||
output_size_list_.push_back(output_size_);
|
||||
}
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
||||
private:
|
||||
size_t input_size_;
|
||||
size_t output_size_;
|
||||
uint len;
|
||||
uint input_height;
|
||||
uint input_width;
|
||||
uint output_height;
|
||||
uint output_width;
|
||||
uint size;
|
||||
bool is_null_input_;
|
||||
std::string kernel_name_;
|
||||
template <typename T>
|
||||
bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs);
|
||||
|
||||
uint len_{0};
|
||||
uint input_height_{0};
|
||||
uint input_width_{0};
|
||||
uint output_height_{0};
|
||||
uint output_width_{0};
|
||||
uint size_{0};
|
||||
bool is_null_input_{false};
|
||||
std::string kernel_name_{"AdaptiveAvgPool2D"};
|
||||
void *stream_ptr_{nullptr};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAPTIVEAVGPOOL2D_GPU_KERNEL_H_
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_NN_ADAPTIVE_AVG_POOL2D_GPU_KERNEL_H_
|
||||
|
|
|
@ -15,19 +15,74 @@
|
|||
*/
|
||||
|
||||
#include "plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.h"
|
||||
#include "ops/angle_atom_energy.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_TWO(AngleAtomEnergy,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
AngleAtomEnergyGpuKernelMod, float, int)
|
||||
using KernelRunFunc = AngleAtomEnergyGpuKernelMod::KernelRunFunc;
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &AngleAtomEnergyGpuKernelMod::GetFuncList() const {
|
||||
static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
|
||||
{KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
&AngleAtomEnergyGpuKernelMod::LaunchKernel<float, int>}};
|
||||
return func_list;
|
||||
}
|
||||
|
||||
bool AngleAtomEnergyGpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (!MatchKernelFunc(base_operator, inputs, outputs)) {
|
||||
return false;
|
||||
}
|
||||
auto kernel_ptr = std::dynamic_pointer_cast<ops::AngleAtomEnergy>(base_operator);
|
||||
MS_EXCEPTION_IF_NULL(kernel_ptr);
|
||||
kernel_name_ = kernel_ptr->name();
|
||||
angle_numbers_ = static_cast<int>(kernel_ptr->get_angle_numbers());
|
||||
return true;
|
||||
}
|
||||
|
||||
int AngleAtomEnergyGpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
|
||||
const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
|
||||
if (int ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
|
||||
return ret;
|
||||
}
|
||||
auto shape_uint_crd = inputs[0]->GetShapeVector();
|
||||
ele_uint_crd_ = SizeOf(shape_uint_crd);
|
||||
return KRET_OK;
|
||||
}
|
||||
|
||||
template <typename T, typename T1>
|
||||
bool AngleAtomEnergyGpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
|
||||
auto scaler_f = GetDeviceAddress<T>(inputs, 1);
|
||||
auto atom_a = GetDeviceAddress<const T1>(inputs, 2);
|
||||
auto atom_b = GetDeviceAddress<const T1>(inputs, 3);
|
||||
auto atom_c = GetDeviceAddress<const T1>(inputs, 4);
|
||||
auto angle_k = GetDeviceAddress<T>(inputs, 5);
|
||||
auto angle_theta0 = GetDeviceAddress<T>(inputs, 6);
|
||||
|
||||
auto ene = GetDeviceAddress<T>(outputs, 0);
|
||||
AngleAtomEnergy(angle_numbers_, ele_uint_crd_, uint_crd_f, scaler_f, atom_a, atom_b, atom_c, angle_k, angle_theta0,
|
||||
ene, reinterpret_cast<cudaStream_t>(stream_ptr_));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AngleAtomEnergyGpuKernelMod::Launch(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
stream_ptr_ = stream_ptr;
|
||||
return kernel_func_(this, inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG(NativeGpuKernelMod, AngleAtomEnergy, AngleAtomEnergyGpuKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -14,87 +14,49 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
|
||||
#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
|
||||
#include <cuda_runtime_api.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel.h"
|
||||
#include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
|
||||
#include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_atom_energy_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T, typename T1>
|
||||
class AngleAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod {
|
||||
class AngleAtomEnergyGpuKernelMod : public NativeGpuKernelMod, public MatchKernelHelper<AngleAtomEnergyGpuKernelMod> {
|
||||
public:
|
||||
AngleAtomEnergyGpuKernelMod() : ele_uint_crd(1) {}
|
||||
AngleAtomEnergyGpuKernelMod() = default;
|
||||
~AngleAtomEnergyGpuKernelMod() override = default;
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
angle_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "angle_numbers"));
|
||||
auto shape_uint_crd = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto shape_scaler = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto shape_atom_a = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
auto shape_atom_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
auto shape_atom_c = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
|
||||
auto shape_angle_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
|
||||
auto shape_angle_theta0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
|
||||
const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;
|
||||
|
||||
ele_uint_crd *= SizeOf(shape_uint_crd);
|
||||
ele_scaler *= SizeOf(shape_scaler);
|
||||
ele_atom_a *= SizeOf(shape_atom_a);
|
||||
ele_atom_b *= SizeOf(shape_atom_b);
|
||||
ele_atom_c *= SizeOf(shape_atom_c);
|
||||
ele_angle_k *= SizeOf(shape_angle_k);
|
||||
ele_angle_theta0 *= SizeOf(shape_angle_theta0);
|
||||
std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); };
|
||||
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
|
||||
auto scaler_f = GetDeviceAddress<T>(inputs, 1);
|
||||
auto atom_a = GetDeviceAddress<const T1>(inputs, 2);
|
||||
auto atom_b = GetDeviceAddress<const T1>(inputs, 3);
|
||||
auto atom_c = GetDeviceAddress<const T1>(inputs, 4);
|
||||
auto angle_k = GetDeviceAddress<T>(inputs, 5);
|
||||
auto angle_theta0 = GetDeviceAddress<T>(inputs, 6);
|
||||
int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
|
||||
const std::vector<KernelTensorPtr> &outputs,
|
||||
const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;
|
||||
|
||||
auto ene = GetDeviceAddress<T>(outputs, 0);
|
||||
AngleAtomEnergy(angle_numbers, ele_uint_crd, uint_crd_f, scaler_f, atom_a, atom_b, atom_c, angle_k, angle_theta0,
|
||||
ene, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(ele_uint_crd * sizeof(T1));
|
||||
input_size_list_.push_back(ele_scaler * sizeof(T));
|
||||
input_size_list_.push_back(ele_atom_a * sizeof(T1));
|
||||
input_size_list_.push_back(ele_atom_b * sizeof(T1));
|
||||
input_size_list_.push_back(ele_atom_c * sizeof(T1));
|
||||
input_size_list_.push_back(ele_angle_k * sizeof(T));
|
||||
input_size_list_.push_back(ele_angle_theta0 * sizeof(T));
|
||||
|
||||
output_size_list_.push_back(ele_uint_crd * sizeof(T));
|
||||
}
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
||||
private:
|
||||
size_t ele_uint_crd = 1;
|
||||
size_t ele_scaler = 1;
|
||||
size_t ele_atom_a = 1;
|
||||
size_t ele_atom_b = 1;
|
||||
size_t ele_atom_c = 1;
|
||||
size_t ele_angle_k = 1;
|
||||
size_t ele_angle_theta0 = 1;
|
||||
template <typename T, typename T1>
|
||||
bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs);
|
||||
|
||||
int angle_numbers;
|
||||
int angle_numbers_{0};
|
||||
size_t ele_uint_crd_{1};
|
||||
void *stream_ptr_{nullptr};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif
|
||||
|
||||
#endif // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
|
||||
|
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CORE_OPS_ADAPTIVE_AVG_POOL_2D_H_
|
||||
#define MINDSPORE_CORE_OPS_ADAPTIVE_AVG_POOL_2D_H_
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "ops/base_operator.h"
|
||||
#include "mindapi/base/types.h"
|
||||
#include "ops/adaptive_avg_pool_2d_v1.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ops {
|
||||
constexpr auto kNameAdaptiveAvgPool2D = "AdaptiveAvgPool2D";
|
||||
class MIND_API AdaptiveAvgPool2D : public AdaptiveAvgPool2DV1 {
|
||||
public:
|
||||
MIND_API_BASE_MEMBER(AdaptiveAvgPool2D);
|
||||
|
||||
/// \brief Constructor.
|
||||
AdaptiveAvgPool2D() : AdaptiveAvgPool2DV1(kNameAdaptiveAvgPool2D) {}
|
||||
};
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CORE_OPS_ADAPTIVE_AVG_POOL_2D_H_
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "ops/adaptive_avg_pool_2d_v1.h"
|
||||
#include "ops/adaptive_avg_pool_2d.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
|
@ -34,10 +35,18 @@ abstract::ShapePtr AdaptiveAvgPool2DV1InferShape(const PrimitivePtr &primitive,
|
|||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
auto op_name = primitive->name();
|
||||
auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
|
||||
const int64_t input_num_dims = SizeToLong(x_shape.size());
|
||||
CheckAndConvertUtils::CheckInRange("dim of x", input_num_dims, kIncludeBoth, {3, 4}, op_name);
|
||||
for (size_t i = 0; i < x_shape.size(); i++) {
|
||||
CheckAndConvertUtils::CheckInteger(std::to_string(i) + "th dimension of x", x_shape[i], kGreaterEqual, 1, op_name);
|
||||
if (!IsDynamicRank(x_shape)) {
|
||||
const int64_t input_num_dims = SizeToLong(x_shape.size());
|
||||
CheckAndConvertUtils::CheckInRange("dim of x", input_num_dims, kIncludeBoth, {3, 4}, op_name);
|
||||
} else {
|
||||
return std::make_shared<abstract::Shape>(x_shape);
|
||||
}
|
||||
|
||||
if (!IsDynamicShape(x_shape)) {
|
||||
for (size_t i = 0; i < x_shape.size(); i++) {
|
||||
CheckAndConvertUtils::CheckInteger(std::to_string(i) + "th dimension of x", x_shape[i], kGreaterEqual, 1,
|
||||
op_name);
|
||||
}
|
||||
}
|
||||
|
||||
const auto &output_size_ptr = primitive->GetAttr("output_size");
|
||||
|
@ -67,6 +76,7 @@ TypePtr AdaptiveAvgPool2DV1InferType(const PrimitivePtr &primitive, const std::v
|
|||
} // namespace
|
||||
|
||||
MIND_API_OPERATOR_IMPL(AdaptiveAvgPool2DV1, BaseOperator);
|
||||
MIND_API_OPERATOR_IMPL(AdaptiveAvgPool2D, AdaptiveAvgPool2DV1);
|
||||
AbstractBasePtr AdaptiveAvgPool2DV1Infer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
MS_EXCEPTION_IF_NULL(primitive);
|
||||
|
@ -76,8 +86,8 @@ AbstractBasePtr AdaptiveAvgPool2DV1Infer(const abstract::AnalysisEnginePtr &, co
|
|||
auto shapes = AdaptiveAvgPool2DV1InferShape(primitive, input_args);
|
||||
return abstract::MakeAbstract(shapes, types);
|
||||
}
|
||||
|
||||
REGISTER_PRIMITIVE_EVAL_IMPL(AdaptiveAvgPool2DV1, prim::kPrimAdaptiveAvgPool2DV1, AdaptiveAvgPool2DV1Infer, nullptr,
|
||||
true);
|
||||
REGISTER_PRIMITIVE_EVAL_IMPL(AdaptiveAvgPool2D, prim::kPrimAdaptiveAvgPool2D, AdaptiveAvgPool2DV1Infer, nullptr, true);
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#define MINDSPORE_CORE_OPS_ADAPTIVE_AVG_POOL_2D_V1_H_
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "ops/base_operator.h"
|
||||
#include "mindapi/base/types.h"
|
||||
|
@ -29,6 +30,7 @@ class MIND_API AdaptiveAvgPool2DV1 : public BaseOperator {
|
|||
public:
|
||||
MIND_API_BASE_MEMBER(AdaptiveAvgPool2DV1);
|
||||
AdaptiveAvgPool2DV1() : BaseOperator(kNameAdaptiveAvgPool2DV1) { InitIOName({"x"}, {"y"}); }
|
||||
explicit AdaptiveAvgPool2DV1(const std::string &kName) : BaseOperator(kName) { InitIOName({"x"}, {"y"}); }
|
||||
};
|
||||
abstract::AbstractBasePtr AdaptiveAvgPool2DV1Infer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const std::vector<abstract::AbstractBasePtr> &input_args);
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ops/angle_atom_energy.h"
|
||||
|
||||
#include <set>
|
||||
|
||||
#include "ops/op_utils.h"
|
||||
#include "utils/check_convert_utils.h"
|
||||
#include "mindapi/src/helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ops {
|
||||
MIND_API_OPERATOR_IMPL(AngleAtomEnergy, BaseOperator);
|
||||
class AngleAtomEnergyInfer : public abstract::OpInferBase {
|
||||
public:
|
||||
BaseShapePtr InferShape(const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) const override {
|
||||
auto prim_name = primitive->name();
|
||||
(void)CheckAndConvertUtils::CheckInteger("input number", SizeToLong(input_args.size()), kEqual, kInputNum,
|
||||
prim_name);
|
||||
auto uint_crd_f_shape_ptr = input_args[kInputIndex0]->BuildShape();
|
||||
auto uint_crd_f_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(uint_crd_f_shape_ptr)[kShape];
|
||||
if (!IsDynamic(uint_crd_f_shape)) {
|
||||
(void)CheckAndConvertUtils::CheckInteger("uint_crd_f_shape", SizeToLong(uint_crd_f_shape.size()), kEqual, kTwo,
|
||||
prim_name);
|
||||
(void)CheckAndConvertUtils::CheckInteger("uint_crd_f_shape[1]", SizeToLong(uint_crd_f_shape[1]), kEqual, kThree,
|
||||
prim_name);
|
||||
}
|
||||
auto scaler_f_shape_ptr = input_args[kInputIndex1]->BuildShape();
|
||||
auto scaler_f_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(scaler_f_shape_ptr)[kShape];
|
||||
(void)CheckAndConvertUtils::CheckInteger("scaler_f_shape", SizeToLong(scaler_f_shape.size()), kEqual, 1, prim_name);
|
||||
if (!IsDynamic(scaler_f_shape)) {
|
||||
(void)CheckAndConvertUtils::CheckInteger("scaler_f_shape", SizeToLong(scaler_f_shape[0]), kEqual, kThree,
|
||||
prim_name);
|
||||
}
|
||||
auto angle_numbers = GetValue<int64_t>(primitive->GetAttr("angle_numbers"));
|
||||
for (size_t input_index = 2; input_index < kInputNum; ++input_index) {
|
||||
auto cur_input_shape_ptr = input_args[input_index]->BuildShape();
|
||||
auto cur_input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(cur_input_shape_ptr)[kShape];
|
||||
(void)CheckAndConvertUtils::CheckInteger("input_dim", SizeToLong(cur_input_shape.size()), kEqual, 1, prim_name);
|
||||
if (!IsDynamic(cur_input_shape)) {
|
||||
(void)CheckAndConvertUtils::CheckInteger("input_shape", SizeToLong(cur_input_shape[0]), kEqual, angle_numbers,
|
||||
prim_name);
|
||||
}
|
||||
}
|
||||
ShapeVector out_shape{uint_crd_f_shape[0]};
|
||||
return std::make_shared<abstract::Shape>(out_shape);
|
||||
}
|
||||
|
||||
TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override {
|
||||
auto prim_name = primitive->name();
|
||||
std::set<TypePtr> uint32_type = {kUInt32};
|
||||
auto uint_crd_f_dtype = input_args[kInputIndex0]->BuildType();
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("uint_crd_f", uint_crd_f_dtype, uint32_type, prim_name);
|
||||
std::set<TypePtr> float32_type = {kFloat32};
|
||||
auto scaler_f_type = input_args[kInputIndex1]->BuildType();
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("scaler_f", scaler_f_type, float32_type, prim_name);
|
||||
std::set<TypePtr> int32_type = {kInt32};
|
||||
auto atom_a_type = input_args[kInputIndex2]->BuildType();
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("atom_a", atom_a_type, int32_type, prim_name);
|
||||
auto atom_b_type = input_args[kInputIndex3]->BuildType();
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("atom_b", atom_b_type, int32_type, prim_name);
|
||||
auto atom_c_type = input_args[kInputIndex4]->BuildType();
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("atom_c", atom_c_type, int32_type, prim_name);
|
||||
auto angle_k_type = input_args[kInputIndex5]->BuildType();
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("angle_k", angle_k_type, float32_type, prim_name);
|
||||
auto angle_theta0_type = input_args[kInputIndex6]->BuildType();
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("angle_theta0", angle_theta0_type, float32_type, prim_name);
|
||||
return angle_k_type;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr size_t kInputNum = 7;
|
||||
static constexpr size_t kTwo = 2;
|
||||
static constexpr size_t kThree = 3;
|
||||
};
|
||||
|
||||
void AngleAtomEnergy::Init(const int64_t angle_numbers) { this->set_angle_numbers(angle_numbers); }
|
||||
|
||||
void AngleAtomEnergy::set_angle_numbers(const int64_t angle_numbers) {
|
||||
(void)this->AddAttr("angle_numbers", api::MakeValue(angle_numbers));
|
||||
}
|
||||
|
||||
int64_t AngleAtomEnergy::get_angle_numbers() const {
|
||||
auto value_ptr = GetAttr("angle_numbers");
|
||||
return GetValue<int64_t>(value_ptr);
|
||||
}
|
||||
|
||||
REGISTER_PRIMITIVE_OP_INFER_IMPL(AngleAtomEnergy, prim::kPrimAngleAtomEnergy, AngleAtomEnergyInfer, false);
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,51 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CORE_OPS_ANGLE_ATOM_ENERGY_H_
|
||||
#define MINDSPORE_CORE_OPS_ANGLE_ATOM_ENERGY_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
#include "ops/base_operator.h"
|
||||
#include "mindapi/base/types.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ops {
|
||||
constexpr auto kNameAngleAtomEnergy = "AngleAtomEnergy";
|
||||
/// \brief AngleAtomEnergy operation. Refer to Python API @ref mindspore.ops.AngleAtomEnergy for more details.
|
||||
class MIND_API AngleAtomEnergy : public BaseOperator {
|
||||
public:
|
||||
MIND_API_BASE_MEMBER(AngleAtomEnergy);
|
||||
/// \brief Constructor.
|
||||
AngleAtomEnergy() : BaseOperator(kNameAngleAtomEnergy) {
|
||||
InitIOName({"uint_crd_f", "scaler_f", "atom_a", "atom_b", "atom_c", "angle_k", "angle_theta0"}, {"ene"});
|
||||
}
|
||||
/// \brief Init. Refer to the parameters of Python API @ref mindspore.ops.AngleAtomEnergy for the inputs.
|
||||
void Init(const int64_t angle_numbers);
|
||||
/// \brief Set angle_numbers.
|
||||
void set_angle_numbers(const int64_t angle_numbers);
|
||||
/// \brief Get angle_numbers.
|
||||
///
|
||||
/// \return angle_numbers.
|
||||
int64_t get_angle_numbers() const;
|
||||
};
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CORE_OPS_ANGLE_ATOM_ENERGY_H_
|
|
@ -741,6 +741,7 @@ GVAR_DEF(PrimitivePtr, kPrimApplyGradientDescent, std::make_shared<Primitive>("A
|
|||
GVAR_DEF(PrimitivePtr, kPrimApplyPowerSignD, std::make_shared<Primitive>("ApplyPowerSign"));
|
||||
GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool3D, std::make_shared<Primitive>("AdaptiveAvgPool3D"));
|
||||
GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool3DGrad, std::make_shared<Primitive>("AdaptiveAvgPool3DGrad"));
|
||||
GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool2D, std::make_shared<Primitive>("AdaptiveAvgPool2D"));
|
||||
GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool2DV1, std::make_shared<Primitive>("AdaptiveAvgPool2DV1"));
|
||||
GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool2DGradV1, std::make_shared<Primitive>("AdaptiveAvgPool2DGradV1"));
|
||||
GVAR_DEF(PrimitivePtr, kPrimBesselI0e, std::make_shared<Primitive>("BesselI0e"));
|
||||
|
@ -1563,6 +1564,9 @@ GVAR_DEF(PrimitivePtr, kPrimAdamApplyOneWithDecayAssign, std::make_shared<Primit
|
|||
// OCR Ops
|
||||
GVAR_DEF(PrimitivePtr, kPrimOCRRecognitionPreHandle, std::make_shared<Primitive>("OCRRecognitionPreHandle"));
|
||||
|
||||
// Sponge Ops
|
||||
GVAR_DEF(PrimitivePtr, kPrimAngleAtomEnergy, std::make_shared<Primitive>("AngleAtomEnergy"));
|
||||
|
||||
class DoSignaturePrimitive : public Primitive {
|
||||
public:
|
||||
explicit DoSignaturePrimitive(const std::string &name, const ValuePtr &function)
|
||||
|
|
|
@ -211,80 +211,6 @@ class AdaptiveAvgPool3D(Primitive):
|
|||
self.init_prim_io_names(inputs=['x'], outputs=['y'])
|
||||
|
||||
|
||||
class AdaptiveAvgPool2D(PrimitiveWithInfer):
|
||||
r"""
|
||||
2D adaptive average pooling for temporal data.
|
||||
|
||||
Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more detail.
|
||||
|
||||
Supported Platforms:
|
||||
``GPU``
|
||||
|
||||
Examples:
|
||||
>>> # case 1: output_size=(None, 2)
|
||||
>>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
|
||||
... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
|
||||
... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
|
||||
>>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2))
|
||||
>>> output = adaptive_avg_pool_2d(input_x)
|
||||
>>> print(output)
|
||||
[[[[1.5 2.5]
|
||||
[4.5 5.5]
|
||||
[7.5 8.5]]
|
||||
[[1.5 2.5]
|
||||
[4.5 5.5]
|
||||
[7.5 8.5]]
|
||||
[[1.5 2.5]
|
||||
[4.5 5.5]
|
||||
[7.5 8.5]]]]
|
||||
>>> # case 2: output_size=2
|
||||
>>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2)
|
||||
>>> output = adaptive_avg_pool_2d(input_x)
|
||||
>>> print(output)
|
||||
[[[[3. 4.]
|
||||
[6. 7.]]
|
||||
[[3. 4.]
|
||||
[6. 7.]]
|
||||
[[3. 4.]
|
||||
[6. 7.]]]]
|
||||
>>> # case 3: output_size=(1, 2)
|
||||
>>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2))
|
||||
>>> output = adaptive_avg_pool_2d(input_x)
|
||||
>>> print(output)
|
||||
[[[[4.5 5.5]]
|
||||
[[4.5 5.5]]
|
||||
[[4.5 5.5]]]]
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, output_size):
|
||||
"""Initialize AdaptiveAvgPool2D."""
|
||||
validator.check_value_type("output_size", output_size, [int, tuple], self.name)
|
||||
if isinstance(output_size, tuple):
|
||||
validator.check_int(len(output_size), 2, Rel.EQ, 'length of output_size', self.name)
|
||||
self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
|
||||
|
||||
def infer_shape(self, x_shape):
|
||||
if len(x_shape) <= len(self.output_size):
|
||||
raise ValueError("input_x {} dimension must be larger than output_size {} "
|
||||
"dimension".format(x_shape, self.output_size))
|
||||
validator.check_int(len(x_shape), 5, Rel.LT, 'input_x_dimensions', self.name)
|
||||
for input_x_dimension in x_shape:
|
||||
validator.check_int(input_x_dimension, 0, Rel.GT, 'input_x dimension', self.name)
|
||||
zipped = zip(self.output_size, x_shape[-len(self.output_size):])
|
||||
out_size = [i if i is not None else j for i, j in zipped]
|
||||
for item in out_size:
|
||||
validator.check_value_type("item of output_size", item, [int], self.name)
|
||||
self.add_prim_attr('output_size', out_size)
|
||||
output_shape = x_shape[:len(x_shape) - len(out_size)] + out_size
|
||||
return output_shape
|
||||
|
||||
def infer_dtype(self, x_dtype):
|
||||
validator.check_tensor_dtype_valid("x_dtype", x_dtype, [mstype.float16, mstype.float32, mstype.float64],
|
||||
self.name)
|
||||
return x_dtype
|
||||
|
||||
|
||||
class AdaptiveAvgPool2DV1(Primitive):
|
||||
r"""
|
||||
AdaptiveAvgPool2DV1 operation.
|
||||
|
@ -393,6 +319,57 @@ class AdaptiveAvgPool2DV1(Primitive):
|
|||
self.add_prim_attr('output_size', self.output_size)
|
||||
|
||||
|
||||
class AdaptiveAvgPool2D(AdaptiveAvgPool2DV1):
|
||||
r"""
|
||||
2D adaptive average pooling for temporal data.
|
||||
|
||||
Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more detail.
|
||||
|
||||
Supported Platforms:
|
||||
``GPU``
|
||||
|
||||
Examples:
|
||||
>>> # case 1: output_size=(None, 2)
|
||||
>>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
|
||||
... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
|
||||
... [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
|
||||
>>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2))
|
||||
>>> output = adaptive_avg_pool_2d(input_x)
|
||||
>>> print(output)
|
||||
[[[[1.5 2.5]
|
||||
[4.5 5.5]
|
||||
[7.5 8.5]]
|
||||
[[1.5 2.5]
|
||||
[4.5 5.5]
|
||||
[7.5 8.5]]
|
||||
[[1.5 2.5]
|
||||
[4.5 5.5]
|
||||
[7.5 8.5]]]]
|
||||
>>> # case 2: output_size=2
|
||||
>>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2)
|
||||
>>> output = adaptive_avg_pool_2d(input_x)
|
||||
>>> print(output)
|
||||
[[[[3. 4.]
|
||||
[6. 7.]]
|
||||
[[3. 4.]
|
||||
[6. 7.]]
|
||||
[[3. 4.]
|
||||
[6. 7.]]]]
|
||||
>>> # case 3: output_size=(1, 2)
|
||||
>>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2))
|
||||
>>> output = adaptive_avg_pool_2d(input_x)
|
||||
>>> print(output)
|
||||
[[[[4.5 5.5]]
|
||||
[[4.5 5.5]]
|
||||
[[4.5 5.5]]]]
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, output_size):
|
||||
"""Initialize AdaptiveAvgPool2D."""
|
||||
super(AdaptiveAvgPool2D, self).__init__(output_size)
|
||||
|
||||
|
||||
class AdaptiveMaxPool2D(Primitive):
|
||||
r"""
|
||||
AdaptiveMaxPool2D operation.
|
||||
|
|
|
@ -20,7 +20,7 @@ Note:
|
|||
|
||||
import math
|
||||
|
||||
from ..primitive import PrimitiveWithInfer, prim_attr_register
|
||||
from ..primitive import Primitive, PrimitiveWithInfer, prim_attr_register
|
||||
from ..._checkparam import Rel
|
||||
from ..._checkparam import Validator as validator
|
||||
from ...common import dtype as mstype
|
||||
|
@ -1053,7 +1053,7 @@ class AngleEnergy(PrimitiveWithInfer):
|
|||
return angle_k_type
|
||||
|
||||
|
||||
class AngleAtomEnergy(PrimitiveWithInfer):
|
||||
class AngleAtomEnergy(Primitive):
|
||||
"""
|
||||
Add the potential energy caused by angle terms to the total potential
|
||||
energy of each atom. Assume the number of angles is m and the
|
||||
|
@ -1103,39 +1103,6 @@ class AngleAtomEnergy(PrimitiveWithInfer):
|
|||
outputs=['ene'])
|
||||
self.add_prim_attr('angle_numbers', self.angle_numbers)
|
||||
|
||||
def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
|
||||
angle_theta0_shape):
|
||||
cls_name = self.name
|
||||
n = uint_crd_f_shape[0]
|
||||
m = self.angle_numbers
|
||||
validator.check_int(len(uint_crd_f_shape), 2, Rel.EQ, "uint_crd_f_dim", cls_name)
|
||||
validator.check_int(len(scaler_f_shape), 1, Rel.EQ, "scaler_f_dim", cls_name)
|
||||
validator.check_int(len(atom_a_shape), 1, Rel.EQ, "atom_a_dim", cls_name)
|
||||
validator.check_int(len(atom_b_shape), 1, Rel.EQ, "atom_b_dim", cls_name)
|
||||
validator.check_int(len(atom_c_shape), 1, Rel.EQ, "atom_c_dim", cls_name)
|
||||
validator.check_int(len(angle_k_shape), 1, Rel.EQ, "angle_k_dim", cls_name)
|
||||
validator.check_int(len(angle_theta0_shape), 1, Rel.EQ, "angle_theta0_dim", cls_name)
|
||||
|
||||
validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape[1]", cls_name)
|
||||
validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
|
||||
validator.check_int(atom_a_shape[0], m, Rel.EQ, "atom_a_shape", cls_name)
|
||||
validator.check_int(atom_b_shape[0], m, Rel.EQ, "atom_b_shape", cls_name)
|
||||
validator.check_int(atom_c_shape[0], m, Rel.EQ, "atom_c_shape", cls_name)
|
||||
validator.check_int(angle_k_shape[0], m, Rel.EQ, "angle_k_shape", cls_name)
|
||||
validator.check_int(angle_theta0_shape[0], m, Rel.EQ, "angle_theta0_shape", cls_name)
|
||||
return [n,]
|
||||
|
||||
def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
|
||||
angle_theta0_type):
|
||||
validator.check_tensor_dtype_valid('uint_crd_f', uint_crd_f_dtype, [mstype.uint32], self.name)
|
||||
validator.check_tensor_dtype_valid('scaler_f', scaler_f_type, [mstype.float32], self.name)
|
||||
validator.check_tensor_dtype_valid('atom_a', atom_a_type, [mstype.int32], self.name)
|
||||
validator.check_tensor_dtype_valid('atom_b', atom_b_type, [mstype.int32], self.name)
|
||||
validator.check_tensor_dtype_valid('atom_c', atom_c_type, [mstype.int32], self.name)
|
||||
validator.check_tensor_dtype_valid('angle_k', angle_k_type, [mstype.float32], self.name)
|
||||
validator.check_tensor_dtype_valid('angle_theta0', angle_theta0_type, [mstype.float32], self.name)
|
||||
return angle_k_type
|
||||
|
||||
|
||||
class AngleForceWithAtomEnergy(PrimitiveWithInfer):
|
||||
"""
|
||||
|
|
Loading…
Reference in New Issue