forked from mindspore-Ecosystem/mindspore
!33220 [MS][OPS]add new cpu opertor Ger
Merge pull request !33220 from KXiong/master
This commit is contained in:
commit
845e9c5451
|
@ -0,0 +1,146 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "plugin/device/cpu/kernel/ger_cpu_kernel.h"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <functional>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
const size_t kGerInputsNum = 2;
|
||||
const size_t kGerOutputsNum = 1;
|
||||
|
||||
template <typename T>
|
||||
class GerCpuTypeFunc : public CpuKernelFunc {
|
||||
public:
|
||||
GerCpuTypeFunc() = default;
|
||||
~GerCpuTypeFunc() override = default;
|
||||
bool RunFunc(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override {
|
||||
const auto *input1 = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
const auto *input2 = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto *output = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
compute_func_(this, input1, input2, output);
|
||||
return true;
|
||||
}
|
||||
|
||||
void InitFunc(const CNodePtr &kernel_node) override {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
CHECK_KERNEL_INPUTS_NUM(input_num, kGerInputsNum, common::AnfAlgo::GetCNodeName(kernel_node));
|
||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(output_num, kGerOutputsNum, common::AnfAlgo::GetCNodeName(kernel_node));
|
||||
input_type_1_ = AnfAlgo::GetInputDeviceDataType(kernel_node, 0);
|
||||
input_type_2_ = AnfAlgo::GetOutputDeviceDataType(kernel_node, 0);
|
||||
if (input_type_1_ != input_type_2_) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_
|
||||
<< "', input1 and input2 must have the same type. But got input1 type " << input_type_1_
|
||||
<< ", input2 type " << input_type_2_;
|
||||
}
|
||||
input_shape_1_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
input_shape_2_ = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
output_shape_ = common::AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
|
||||
compute_func_ = &GerCpuTypeFunc<T>::Compute;
|
||||
}
|
||||
|
||||
private:
|
||||
std::string kernel_name_;
|
||||
TypeId input_type_1_{kTypeUnknown};
|
||||
TypeId input_type_2_{kTypeUnknown};
|
||||
std::vector<size_t> input_shape_1_;
|
||||
std::vector<size_t> input_shape_2_;
|
||||
std::vector<size_t> output_shape_;
|
||||
|
||||
void Compute(const T *input1, const T *input2, T *output);
|
||||
|
||||
using TypeComputeFunc = std::function<void(GerCpuTypeFunc *, const T *, const T *, T *)>;
|
||||
TypeComputeFunc compute_func_{nullptr};
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void GerCpuTypeFunc<T>::Compute(const T *input1, const T *input2, T *output) {
|
||||
if (output_shape_.size() == 0) {
|
||||
(void)output_shape_.insert(output_shape_.begin(), 1);
|
||||
}
|
||||
size_t output_size_ = 1;
|
||||
for (size_t i = 0; i < output_shape_.size(); ++i) {
|
||||
output_size_ *= output_shape_[i];
|
||||
}
|
||||
size_t input2_size_ = input_shape_2_[0];
|
||||
auto task = [&input1, &input2, &output, input2_size_](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
size_t input1_index = static_cast<size_t>(i / input2_size_);
|
||||
size_t input2_index = static_cast<size_t>(i % input2_size_);
|
||||
output[i] = static_cast<T>(input1[input1_index] * input2[input2_index]);
|
||||
}
|
||||
};
|
||||
ParallelLaunchAutoSearch(task, output_size_, this, ¶llel_search_info_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::shared_ptr<CpuKernelFunc> SpecializeGerFunc() {
|
||||
return std::make_shared<GerCpuTypeFunc<T>>();
|
||||
}
|
||||
using GerCpuFuncCreator = std::function<std::shared_ptr<CpuKernelFunc>()>;
|
||||
static std::vector<std::pair<KernelAttr, GerCpuFuncCreator>> kernel_attr_lists = {
|
||||
{{KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
SpecializeGerFunc<float16>},
|
||||
{KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
SpecializeGerFunc<float>}}};
|
||||
} // namespace
|
||||
|
||||
void GerCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name_ != kernel_type_) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', kernel type should be '" << kernel_name_ << "', but got "
|
||||
<< kernel_type_;
|
||||
}
|
||||
|
||||
auto kernel_attr = GetKernelAttrFromNode(kernel_node);
|
||||
auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
|
||||
if (!is_match) {
|
||||
MS_LOG(EXCEPTION) << "'" << kernel_name_ << "' does not support this kernel data type: " << kernel_attr;
|
||||
}
|
||||
|
||||
func_obj_ = kernel_attr_lists[index].second();
|
||||
func_obj_->InitFunc(kernel_node);
|
||||
}
|
||||
|
||||
std::vector<KernelAttr> GerCpuKernelMod::GetOpSupport() {
|
||||
std::vector<KernelAttr> support_list;
|
||||
(void)std::transform(kernel_attr_lists.begin(), kernel_attr_lists.end(), std::back_inserter(support_list),
|
||||
[](const std::pair<KernelAttr, GerCpuFuncCreator> &pair) { return pair.first; });
|
||||
|
||||
return support_list;
|
||||
}
|
||||
|
||||
MS_KERNEL_FACTORY_REG_BY_CREATOR(NativeCpuKernelMod, Ger,
|
||||
[]() { return std::make_shared<GerCpuKernelMod>(prim::kPrimGer->name()); });
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,54 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GER_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GER_CPU_KERNEL_H_
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <complex>
|
||||
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
#include "plugin/device/cpu/kernel/nnacl/arithmetic.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class GerCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
public:
|
||||
GerCpuKernelMod() = default;
|
||||
explicit GerCpuKernelMod(const std::string &kernel_type) : kernel_type_(kernel_type) {}
|
||||
~GerCpuKernelMod() override = default;
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override {
|
||||
return func_obj_->RunFunc(inputs, workspace, outputs);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<KernelAttr> GetOpSupport() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<CpuKernelFunc> func_obj_;
|
||||
std::string kernel_type_{"Unknown"};
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_GER_CPU_KERNEL_H_
|
|
@ -0,0 +1,77 @@
|
|||
# Copyright 2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore import context
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
|
||||
class NetGer(nn.Cell):
|
||||
"""Net of ger."""
|
||||
|
||||
def __init__(self):
|
||||
"""Init."""
|
||||
super(NetGer, self).__init__()
|
||||
self.ger = P.Ger()
|
||||
|
||||
def construct(self, x, y):
|
||||
"""Construct."""
|
||||
return self.ger(x, y)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_ger_float16():
|
||||
"""
|
||||
Feature: Ger cpu kernel
|
||||
Description: test the rightness of Ger cpu kernel.
|
||||
Expectation: Success.
|
||||
"""
|
||||
x_array = np.array([1, 2, 3, 4]).astype('float16')
|
||||
y_array = np.array([1, 2, 3]).astype('float16')
|
||||
input_x = Tensor(x_array)
|
||||
input_y = Tensor(y_array)
|
||||
net = NetGer()
|
||||
output = net(input_x, input_y)
|
||||
print(output)
|
||||
expect = x_array.reshape(4, 1) * y_array.reshape(1, 3)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_ger_float32():
|
||||
"""
|
||||
Feature: Ger cpu kernel
|
||||
Description: test the rightness of Ger cpu kernel.
|
||||
Expectation: Success.
|
||||
"""
|
||||
x_array = np.array([1, 2, 3, 4]).astype('float32')
|
||||
y_array = np.array([1, 2, 3]).astype('float32')
|
||||
input_x = Tensor(x_array)
|
||||
input_y = Tensor(y_array)
|
||||
net = NetGer()
|
||||
output = net(input_x, input_y)
|
||||
print(output)
|
||||
expect = x_array.reshape(4, 1) * y_array.reshape(1, 3)
|
||||
assert np.allclose(output.asnumpy(), expect)
|
Loading…
Reference in New Issue