forked from mindspore-Ecosystem/mindspore
!13559 broadcast_to op supported on cpu
From: @wangyanling10 Reviewed-by: Signed-off-by:
This commit is contained in:
commit
04e3dbaad0
|
@ -0,0 +1,121 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "backend/kernel_compiler/cpu/broadcast_to_cpu_kernel.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void BroadcastToCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||||
|
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||||
|
input_shape_ = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||||
|
output_shape_ = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||||
|
|
||||||
|
size_t offset = output_shape_.size() - input_shape_.size();
|
||||||
|
for (size_t i = 0; i < offset; ++i) {
|
||||||
|
input_shape_.insert(input_shape_.begin(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < input_shape_.size(); ++i) {
|
||||||
|
if (output_shape_[i] < input_shape_[i] || output_shape_[i] % input_shape_[i] != 0) {
|
||||||
|
MS_LOG(EXCEPTION) << "Cannot broadcast input tensor with shape " << input_shape_ << " to "
|
||||||
|
<< "output tensor with shape " << output_shape_
|
||||||
|
<< ". Output shape must be the integer times of input shape at the " << i << " dim!";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (size_t j = 0; j < output_shape_.size(); j++) {
|
||||||
|
nums_ *= output_shape_[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp_ptr_ = reinterpret_cast<T *>(malloc(nums_ * sizeof(T)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// BroadcastTo
|
||||||
|
template <typename T>
|
||||||
|
void BroadcastToCPUKernel<T>::BroadcastToImpl(size_t dim) {
|
||||||
|
if (dim == output_shape_.size() - 1) {
|
||||||
|
size_t input_nums = 1;
|
||||||
|
for (size_t j = 0; j < input_shape_.size() - 1; ++j) {
|
||||||
|
input_nums *= input_shape_[j];
|
||||||
|
}
|
||||||
|
size_t rate = output_shape_[dim] / input_shape_[dim];
|
||||||
|
|
||||||
|
for (size_t j = 0; j < input_nums; ++j) {
|
||||||
|
T *in_ptr = input_ptr_ + input_shape_[dim] * j;
|
||||||
|
for (size_t i = 0; i < rate; ++i) {
|
||||||
|
T *out_ptr = tmp_ptr_ + (j * rate + i) * input_shape_[dim];
|
||||||
|
memcpy_s(out_ptr, input_shape_[dim] * sizeof(T), in_ptr, input_shape_[dim] * sizeof(T));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size_t elems = input_shape_[dim] * rate * input_nums;
|
||||||
|
memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
BroadcastToImpl(dim + 1);
|
||||||
|
|
||||||
|
size_t rate = output_shape_[dim] / input_shape_[dim];
|
||||||
|
if (rate > 1) {
|
||||||
|
size_t elems_nums = 1;
|
||||||
|
for (size_t j = output_shape_.size() - 1; j > dim; --j) {
|
||||||
|
elems_nums *= output_shape_[j];
|
||||||
|
}
|
||||||
|
size_t input_nums = 1;
|
||||||
|
for (size_t j = 0; j < dim; ++j) {
|
||||||
|
input_nums *= input_shape_[j];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t j = 0; j < input_nums; ++j) {
|
||||||
|
T *in_ptr = output_ptr_ + elems_nums * j;
|
||||||
|
for (size_t i = 0; i < rate; ++i) {
|
||||||
|
T *out_ptr = tmp_ptr_ + (j * rate + i) * elems_nums;
|
||||||
|
memcpy_s(out_ptr, elems_nums * sizeof(T), in_ptr, elems_nums * sizeof(T));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
size_t elems = elems_nums * rate * input_nums;
|
||||||
|
memcpy_s(output_ptr_, elems * sizeof(T), tmp_ptr_, elems * sizeof(T));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
bool BroadcastToCPUKernel<T>::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||||
|
const std::vector<AddressPtr> &outputs) {
|
||||||
|
if (inputs.size() != 1 || outputs.size() != 1) {
|
||||||
|
MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((inputs[0] == nullptr) || (inputs[0]->size == 0)) {
|
||||||
|
MS_LOG(EXCEPTION) << "Input data is NULL!";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((outputs[0] == nullptr) || (outputs[0]->size == 0)) {
|
||||||
|
MS_LOG(EXCEPTION) << "Output data is NULL!";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
input_ptr_ = reinterpret_cast<T *>(inputs[0]->addr);
|
||||||
|
output_ptr_ = reinterpret_cast<T *>(outputs[0]->addr);
|
||||||
|
|
||||||
|
BroadcastToImpl(0);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,65 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2021Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_BROADCAST_TO_CPU_KERNEL_H
|
||||||
|
#define MINDSPORE_BROADCAST_TO_CPU_KERNEL_H
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <memory>
|
||||||
|
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||||
|
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace kernel {
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
class BroadcastToCPUKernel : public CPUKernel {
|
||||||
|
public:
|
||||||
|
BroadcastToCPUKernel() = default;
|
||||||
|
~BroadcastToCPUKernel() override {
|
||||||
|
if (tmp_ptr_ != nullptr) {
|
||||||
|
free(tmp_ptr_);
|
||||||
|
tmp_ptr_ = nullptr;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||||
|
const std::vector<AddressPtr> &outputs) override;
|
||||||
|
void InitKernel(const CNodePtr &kernel_node) override;
|
||||||
|
|
||||||
|
void BroadcastToImpl(size_t dim);
|
||||||
|
|
||||||
|
size_t Index(const size_t &index, const size_t &dim) { return dim == 1 ? 0 : index; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::vector<size_t> input_shape_;
|
||||||
|
std::vector<size_t> output_shape_;
|
||||||
|
size_t nums_{1};
|
||||||
|
T *input_ptr_{nullptr};
|
||||||
|
T *output_ptr_{nullptr};
|
||||||
|
T *tmp_ptr_{nullptr};
|
||||||
|
};
|
||||||
|
|
||||||
|
MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||||
|
BroadcastToCPUKernel<float>);
|
||||||
|
MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||||
|
BroadcastToCPUKernel<int>);
|
||||||
|
MS_REG_CPU_KERNEL(BroadcastTo, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
|
||||||
|
BroadcastToCPUKernel<bool>);
|
||||||
|
} // namespace kernel
|
||||||
|
} // namespace mindspore
|
||||||
|
|
||||||
|
#endif // MINDSPORE_BROADCAST_TO_CPU_KERNEL_H
|
|
@ -118,7 +118,7 @@ class SequentialCell(Cell):
|
||||||
TypeError: If the type of the `args` is not list or OrderedDict.
|
TypeError: If the type of the `args` is not list or OrderedDict.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> conv = nn.Conv2d(3, 2, 3, pad_mode='valid', weight_init="ones")
|
>>> conv = nn.Conv2d(3, 2, 3, pad_mode='valid', weight_init="ones")
|
||||||
|
|
|
@ -555,7 +555,7 @@ class Conv2dTranspose(_Conv):
|
||||||
ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0).
|
ValueError: If `pad_mode` is not equal to 'pad' and `padding` is not equal to (0, 0, 0, 0).
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
|
>>> net = nn.Conv2dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
|
||||||
|
@ -740,7 +740,7 @@ class Conv1dTranspose(_Conv):
|
||||||
ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
|
ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
|
>>> net = nn.Conv1dTranspose(3, 64, 4, has_bias=False, weight_init='normal', pad_mode='pad')
|
||||||
|
|
|
@ -81,7 +81,7 @@ class Embedding(Cell):
|
||||||
ValueError: If `padding_idx` is an int which not in range [0, `vocab_size`].
|
ValueError: If `padding_idx` is an int which not in range [0, `vocab_size`].
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> net = nn.Embedding(20000, 768, True)
|
>>> net = nn.Embedding(20000, 768, True)
|
||||||
|
|
|
@ -226,7 +226,7 @@ class SSIM(Cell):
|
||||||
ValueError: If `filter_size` is less than 0.
|
ValueError: If `filter_size` is less than 0.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> net = nn.SSIM()
|
>>> net = nn.SSIM()
|
||||||
|
@ -417,7 +417,7 @@ class PSNR(Cell):
|
||||||
ValueError: If length of shape of `img1` or `img2` is not equal to 4.
|
ValueError: If length of shape of `img1` or `img2` is not equal to 4.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> net = nn.PSNR()
|
>>> net = nn.PSNR()
|
||||||
|
|
|
@ -78,7 +78,7 @@ class ReduceLogSumExp(Cell):
|
||||||
TypeError: If dtype of `x` is neither float16 nor float32.
|
TypeError: If dtype of `x` is neither float16 nor float32.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
|
>>> input_x = Tensor(np.random.randn(3, 4, 5, 6).astype(np.float32))
|
||||||
|
@ -926,7 +926,7 @@ class Moments(Cell):
|
||||||
TypeError: If dtype of `input_x` is neither float16 nor float32.
|
TypeError: If dtype of `input_x` is neither float16 nor float32.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> net = nn.Moments(axis=3, keep_dims=True)
|
>>> net = nn.Moments(axis=3, keep_dims=True)
|
||||||
|
|
|
@ -293,7 +293,7 @@ class FakeQuantWithMinMaxObserver(UniformQuantObserver):
|
||||||
TypeError: If `quant_delay` is not greater than or equal to 0.
|
TypeError: If `quant_delay` is not greater than or equal to 0.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> fake_quant = nn.FakeQuantWithMinMaxObserver()
|
>>> fake_quant = nn.FakeQuantWithMinMaxObserver()
|
||||||
|
@ -448,7 +448,7 @@ class Conv2dBnFoldQuantOneConv(Cell):
|
||||||
ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
|
ValueError: If `pad_mode` is not one of 'same', 'valid', 'pad'.
|
||||||
|
|
||||||
Supported Platforms:
|
Supported Platforms:
|
||||||
``Ascend`` ``GPU``
|
``Ascend`` ``GPU`` ``CPU``
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
>>> qconfig = compression.quant.create_quant_config()
|
>>> qconfig = compression.quant.create_quant_config()
|
||||||
|
|
|
@ -0,0 +1,95 @@
|
||||||
|
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
import mindspore.context as context
|
||||||
|
from mindspore.common.tensor import Tensor
|
||||||
|
from mindspore.ops import operations as P
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.level0
|
||||||
|
@pytest.mark.platform_x86_gpu_training
|
||||||
|
@pytest.mark.env_onecard
|
||||||
|
def test_broadcast():
|
||||||
|
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||||
|
|
||||||
|
shape = (4, 5, 2, 3, 4, 5, 6)
|
||||||
|
x_np = np.random.rand(2, 3, 1, 5, 1).astype(np.float32)
|
||||||
|
output = P.BroadcastTo(shape)(Tensor(x_np))
|
||||||
|
expect = np.broadcast_to(x_np, shape)
|
||||||
|
assert np.allclose(output.asnumpy(), expect)
|
||||||
|
|
||||||
|
shape = (3, 4, 5, 6)
|
||||||
|
x_np = np.random.rand(3, 1, 5, 1).astype(np.float32)
|
||||||
|
output = P.BroadcastTo(shape)(Tensor(x_np))
|
||||||
|
expect = np.broadcast_to(x_np, shape)
|
||||||
|
assert np.allclose(output.asnumpy(), expect)
|
||||||
|
|
||||||
|
x1_np = np.random.rand(3, 1, 5, 1).astype(np.float16)
|
||||||
|
output = P.BroadcastTo(shape)(Tensor(x1_np))
|
||||||
|
expect = np.broadcast_to(x1_np, shape)
|
||||||
|
assert np.allclose(output.asnumpy(), expect)
|
||||||
|
|
||||||
|
shape = (2, 3, 4, 5)
|
||||||
|
x1_np = np.random.rand(4, 5).astype(np.float32)
|
||||||
|
output = P.BroadcastTo(shape)(Tensor(x1_np))
|
||||||
|
expect = np.broadcast_to(x1_np, shape)
|
||||||
|
assert np.allclose(output.asnumpy(), expect)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.level0
|
||||||
|
@pytest.mark.platform_x86_gpu_training
|
||||||
|
@pytest.mark.env_onecard
|
||||||
|
def test_broadcast_dyn_init():
|
||||||
|
"""
|
||||||
|
Test running the op with -1's in the init shape to support varied inputs.
|
||||||
|
"""
|
||||||
|
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||||
|
|
||||||
|
ms_shape = (-1, 4, 5, 6)
|
||||||
|
np_shape = (3, 4, 5, 6)
|
||||||
|
x_np = np.random.rand(3, 1, 5, 1).astype(np.float32)
|
||||||
|
output = P.BroadcastTo(ms_shape)(Tensor(x_np))
|
||||||
|
expect = np.broadcast_to(x_np, np_shape)
|
||||||
|
assert np.allclose(output.asnumpy(), expect)
|
||||||
|
|
||||||
|
x1_np = np.random.rand(3, 1, 5, 1).astype(np.float16)
|
||||||
|
output = P.BroadcastTo(ms_shape)(Tensor(x1_np))
|
||||||
|
expect = np.broadcast_to(x1_np, np_shape)
|
||||||
|
assert np.allclose(output.asnumpy(), expect)
|
||||||
|
|
||||||
|
ms_shape = (2, 3, -1, 5)
|
||||||
|
np_shape = (2, 3, 4, 5)
|
||||||
|
x1_np = np.random.rand(4, 5).astype(np.float32)
|
||||||
|
output = P.BroadcastTo(ms_shape)(Tensor(x1_np))
|
||||||
|
expect = np.broadcast_to(x1_np, np_shape)
|
||||||
|
assert np.allclose(output.asnumpy(), expect)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.level0
|
||||||
|
@pytest.mark.platform_x86_gpu_training
|
||||||
|
@pytest.mark.env_onecard
|
||||||
|
def test_broadcast_dyn_invalid_init():
|
||||||
|
"""
|
||||||
|
Test running the op with -1's in the init shape in incorrect positions.
|
||||||
|
Expected to fail.
|
||||||
|
"""
|
||||||
|
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||||
|
ms_shape = (2, -1, 4, 5)
|
||||||
|
x_np = np.random.rand(4, 5).astype(np.float32)
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
P.BroadcastTo(ms_shape)(Tensor(x_np))
|
Loading…
Reference in New Issue