forked from mindspore-Ecosystem/mindspore
!20476 Adding 9 object-detection operators in CPU
Merge pull request !20476 from huangbo/object_detection_2
This commit is contained in:
commit
66f4756555
|
@ -0,0 +1,105 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
size_t get_element_num(const std::vector<size_t> &shape) {
|
||||
size_t size = 1;
|
||||
for (size_t i = 0; i < shape.size(); i++) {
|
||||
size *= shape[i];
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool check_validation(const std::vector<size_t> &shape, const size_t num_before_axis, const size_t num_after_axis,
|
||||
const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.size() != 1 || outputs.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!";
|
||||
return false;
|
||||
}
|
||||
size_t data_size = sizeof(T);
|
||||
size_t input_size = get_element_num(shape) * data_size;
|
||||
size_t output_num = num_before_axis * num_after_axis;
|
||||
size_t out0_size = output_num * sizeof(int);
|
||||
size_t out1_size = output_num * data_size;
|
||||
if (inputs[0]->size != input_size || outputs[0]->size != out0_size || outputs[1]->size != out1_size) {
|
||||
MS_LOG(EXCEPTION) << "Invalid input or output data size!";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
void ArgMaxWithValueCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
size_t shape_len = shape_.size();
|
||||
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
axis += static_cast<int64_t>(shape_len);
|
||||
if (axis < 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid axis:" << axis << ", should in range [-1, " << (shape_len - 1) << "]";
|
||||
}
|
||||
axis = axis % static_cast<int64_t>(shape_len);
|
||||
num_before_axis_ = 1;
|
||||
num_after_axis_ = 1;
|
||||
for (size_t i = 0; i < shape_len; i++) {
|
||||
if (static_cast<int64_t>(i) < axis) {
|
||||
num_before_axis_ *= shape_[i];
|
||||
} else if (static_cast<int64_t>(i) > axis) {
|
||||
num_after_axis_ *= shape_[i];
|
||||
}
|
||||
}
|
||||
dim_axis_ = shape_[axis];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool ArgMaxWithValueCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (!check_validation<T>(shape_, num_before_axis_, num_after_axis_, inputs, outputs)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto input = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto output0 = reinterpret_cast<int32_t *>(outputs[0]->addr);
|
||||
auto output1 = reinterpret_cast<T *>(outputs[1]->addr);
|
||||
|
||||
for (size_t i = 0; i < num_before_axis_; i++) {
|
||||
size_t src_index_i = i * dim_axis_ * num_after_axis_;
|
||||
for (size_t j = 0; j < num_after_axis_; j++) {
|
||||
std::vector<float> array_axis;
|
||||
size_t src_index_j = src_index_i + j;
|
||||
for (size_t k = 0; k < dim_axis_; k++) {
|
||||
size_t src_index_k = k * num_after_axis_ + src_index_j;
|
||||
array_axis.push_back(static_cast<float>(input[src_index_k]));
|
||||
}
|
||||
auto max_ops = std::max_element(array_axis.begin(), array_axis.end());
|
||||
auto max_index = static_cast<int32_t>(std::distance(array_axis.begin(), max_ops));
|
||||
auto dst_index = i * num_after_axis_ + j;
|
||||
output0[dst_index] = max_index;
|
||||
auto src_index = IntToSize(max_index) * num_after_axis_ + src_index_j;
|
||||
output1[dst_index] = input[src_index];
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class ArgMaxWithValueCPUKernel : public CPUKernel {
|
||||
public:
|
||||
ArgMaxWithValueCPUKernel() = default;
|
||||
~ArgMaxWithValueCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
std::vector<size_t> shape_;
|
||||
size_t num_before_axis_;
|
||||
size_t num_after_axis_;
|
||||
size_t dim_axis_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(ArgMaxWithValue, KernelAttr(), ArgMaxWithValueCPUKernel, float);
|
||||
MS_REG_CPU_KERNEL_T(ArgMaxWithValue, KernelAttr(), ArgMaxWithValueCPUKernel, float16);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_
|
|
@ -0,0 +1,142 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
template <typename T>
|
||||
void BoundingBoxDecodeCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input num is " << input_num << ", but BoundingBoxDecode needs 2 inputs.";
|
||||
}
|
||||
|
||||
const size_t coordinate_size = 4;
|
||||
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueTuple>() ||
|
||||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueList>()) {
|
||||
means_ = AnfAlgo::GetNodeAttr<std::vector<float>>(kernel_node, "means");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
|
||||
float mean = AnfAlgo::GetNodeAttr<float>(kernel_node, "means");
|
||||
for (size_t i = 0; i < coordinate_size; i++) {
|
||||
means_.emplace_back(mean);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Attribute means type is invalid.";
|
||||
}
|
||||
|
||||
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueTuple>() ||
|
||||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueList>()) {
|
||||
stds_ = AnfAlgo::GetNodeAttr<std::vector<float>>(kernel_node, "stds");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
|
||||
float std = AnfAlgo::GetNodeAttr<float>(kernel_node, "stds");
|
||||
for (size_t i = 0; i < coordinate_size; i++) {
|
||||
stds_.emplace_back(std);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
|
||||
}
|
||||
|
||||
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
|
||||
MS_LOG(EXCEPTION) << "The size of means or stds is less than 4.";
|
||||
}
|
||||
|
||||
std::vector<int64_t> max_shape_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "max_shape");
|
||||
(void)std::transform(max_shape_me.begin(), max_shape_me.end(), std::back_inserter(max_shape_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
wh_ratio_clip_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "wh_ratio_clip");
|
||||
|
||||
if (max_shape_.size() < 2) {
|
||||
MS_LOG(EXCEPTION) << "The size of max_shape is less than 2.";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool BoundingBoxDecodeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto anchor_box = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto deltas = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto bboxes = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
|
||||
T ms1 = static_cast<T>(max_shape_[0]);
|
||||
T ms2 = static_cast<T>(max_shape_[1]);
|
||||
|
||||
if (inputs[0]->size != inputs[1]->size) {
|
||||
MS_LOG(ERROR) << "Anchor box size must be equal to deltas box size: " << inputs[1]->size << ", but got"
|
||||
<< inputs[0]->size;
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t coordinate = 4;
|
||||
const size_t block_size = inputs[0]->size / sizeof(T);
|
||||
if ((block_size % coordinate) != 0) {
|
||||
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t elem_num = block_size / coordinate;
|
||||
auto task = [&](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
const size_t left_x = i * 4;
|
||||
const size_t left_y = i * 4 + 1;
|
||||
const size_t right_x = i * 4 + 2;
|
||||
const size_t right_y = i * 4 + 3;
|
||||
|
||||
T dx = deltas[left_x] * static_cast<T>(stds_[0]) + static_cast<T>(means_[0]);
|
||||
T dy = deltas[left_y] * static_cast<T>(stds_[1]) + static_cast<T>(means_[1]);
|
||||
T dw = deltas[right_x] * static_cast<T>(stds_[2]) + static_cast<T>(means_[2]);
|
||||
T dh = deltas[right_y] * static_cast<T>(stds_[3]) + static_cast<T>(means_[3]);
|
||||
|
||||
T max_ratio = static_cast<T>(abs(log(wh_ratio_clip_)));
|
||||
|
||||
dw = dw > max_ratio ? max_ratio : (dw < (-max_ratio) ? (-max_ratio) : dw);
|
||||
dh = dh > max_ratio ? max_ratio : (dh < (-max_ratio) ? (-max_ratio) : dh);
|
||||
|
||||
T px = (anchor_box[left_x] + anchor_box[right_x]) * static_cast<T>(0.5);
|
||||
T py = (anchor_box[left_y] + anchor_box[right_y]) * static_cast<T>(0.5);
|
||||
T pw = anchor_box[right_x] - anchor_box[left_x] + static_cast<T>(1.0);
|
||||
T ph = anchor_box[right_y] - anchor_box[left_y] + static_cast<T>(1.0);
|
||||
|
||||
T gx = px + pw * dx;
|
||||
T gy = py + ph * dy;
|
||||
T gw = pw * exp(dw);
|
||||
T gh = ph * exp(dh);
|
||||
|
||||
T x1 = gx - gw * static_cast<T>(0.5) + static_cast<T>(0.5);
|
||||
T y1 = gy - gh * static_cast<T>(0.5) + static_cast<T>(0.5);
|
||||
T x2 = gx + gw * static_cast<T>(0.5) - static_cast<T>(0.5);
|
||||
T y2 = gy + gh * static_cast<T>(0.5) - static_cast<T>(0.5);
|
||||
|
||||
x1 = x1 > ms2 ? ms2 : (x1 < static_cast<T>(0) ? static_cast<T>(0) : x1);
|
||||
y1 = y1 > ms1 ? ms1 : (y1 < static_cast<T>(0) ? static_cast<T>(0) : y1);
|
||||
x2 = x2 > ms2 ? ms2 : (x2 < static_cast<T>(0) ? static_cast<T>(0) : x2);
|
||||
y2 = y2 > ms1 ? ms1 : (y2 < static_cast<T>(0) ? static_cast<T>(0) : y2);
|
||||
|
||||
bboxes[left_x] = x1;
|
||||
bboxes[left_y] = y1;
|
||||
bboxes[right_x] = x2;
|
||||
bboxes[right_y] = y2;
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, elem_num);
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,56 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class BoundingBoxDecodeCPUKernel : public CPUKernel {
|
||||
public:
|
||||
BoundingBoxDecodeCPUKernel() = default;
|
||||
~BoundingBoxDecodeCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
std::vector<float> means_;
|
||||
std::vector<float> stds_;
|
||||
std::vector<int> max_shape_;
|
||||
float wh_ratio_clip_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
BoundingBoxDecode,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
BoundingBoxDecodeCPUKernel, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
BoundingBoxDecode,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
BoundingBoxDecodeCPUKernel, float16);
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_
|
|
@ -0,0 +1,115 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
template <typename T>
|
||||
void BoundingBoxEncodeCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input num is " << input_num << ", but BoundingBoxEncode needs 2 inputs.";
|
||||
}
|
||||
|
||||
const size_t coordinate_size = 4;
|
||||
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueTuple>() ||
|
||||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueList>()) {
|
||||
means_ = AnfAlgo::GetNodeAttr<std::vector<float>>(kernel_node, "means");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
|
||||
float mean = AnfAlgo::GetNodeAttr<float>(kernel_node, "means");
|
||||
for (size_t i = 0; i < coordinate_size; i++) {
|
||||
means_.emplace_back(mean);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Attribute means type is invalid.";
|
||||
}
|
||||
|
||||
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueTuple>() ||
|
||||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueList>()) {
|
||||
stds_ = AnfAlgo::GetNodeAttr<std::vector<float>>(kernel_node, "stds");
|
||||
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
|
||||
float std = AnfAlgo::GetNodeAttr<float>(kernel_node, "stds");
|
||||
for (size_t i = 0; i < coordinate_size; i++) {
|
||||
stds_.emplace_back(std);
|
||||
}
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
|
||||
}
|
||||
|
||||
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
|
||||
MS_LOG(EXCEPTION) << "The size of means or stds is less than 4.";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool BoundingBoxEncodeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto anchor_box = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto groundtruth_box = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto deltas = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
|
||||
if (inputs[0]->size != inputs[1]->size) {
|
||||
MS_LOG(ERROR) << "Anchor box size must be equal to groundtruth box size: " << inputs[1]->size << ", but got"
|
||||
<< inputs[0]->size;
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t coordinate = 4;
|
||||
const size_t block_size = inputs[0]->size / sizeof(T);
|
||||
if ((block_size % coordinate) != 0) {
|
||||
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t elem_num = block_size / coordinate;
|
||||
auto task = [&](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
const size_t left_x = i * 4;
|
||||
const size_t left_y = i * 4 + 1;
|
||||
const size_t right_x = i * 4 + 2;
|
||||
const size_t right_y = i * 4 + 3;
|
||||
|
||||
T px = (anchor_box[left_x] + anchor_box[right_x]) * static_cast<T>(0.5);
|
||||
T py = (anchor_box[left_y] + anchor_box[right_y]) * static_cast<T>(0.5);
|
||||
T pw = anchor_box[right_x] - anchor_box[left_x] + static_cast<T>(1.0);
|
||||
T ph = anchor_box[right_y] - anchor_box[left_y] + static_cast<T>(1.0);
|
||||
|
||||
T gx = (groundtruth_box[left_x] + groundtruth_box[right_x]) * static_cast<T>(0.5);
|
||||
T gy = (groundtruth_box[left_y] + groundtruth_box[right_y]) * static_cast<T>(0.5);
|
||||
T gw = groundtruth_box[right_x] - groundtruth_box[left_x] + static_cast<T>(1.0);
|
||||
T gh = groundtruth_box[right_y] - groundtruth_box[left_y] + static_cast<T>(1.0);
|
||||
|
||||
T dx = (gx - px) / pw;
|
||||
T dy = (gy - py) / ph;
|
||||
T dw = log(gw / pw);
|
||||
T dh = log(gh / ph);
|
||||
|
||||
deltas[left_x] = (dx - static_cast<T>(means_[0])) / static_cast<T>(stds_[0]);
|
||||
deltas[left_y] = (dy - static_cast<T>(means_[1])) / static_cast<T>(stds_[1]);
|
||||
deltas[right_x] = (dw - static_cast<T>(means_[2])) / static_cast<T>(stds_[2]);
|
||||
deltas[right_y] = (dh - static_cast<T>(means_[3])) / static_cast<T>(stds_[3]);
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, elem_num);
|
||||
|
||||
return true;
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,54 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class BoundingBoxEncodeCPUKernel : public CPUKernel {
|
||||
public:
|
||||
BoundingBoxEncodeCPUKernel() = default;
|
||||
~BoundingBoxEncodeCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
std::vector<float> means_;
|
||||
std::vector<float> stds_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
BoundingBoxEncode,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
BoundingBoxEncodeCPUKernel, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
BoundingBoxEncode,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
BoundingBoxEncodeCPUKernel, float16);
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_
|
|
@ -0,0 +1,84 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/check_valid_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
constexpr size_t kInputSize = 2;
|
||||
constexpr size_t kOutputSize = 1;
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
void CheckValidCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
anchor_box_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
img_metas_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool CheckValidCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
CheckParams(inputs, outputs);
|
||||
auto anchor_box = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto img_metas = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto output = reinterpret_cast<bool *>(outputs[0]->addr);
|
||||
const size_t coordinate = 4;
|
||||
const size_t elem_num = inputs[0]->size / sizeof(T) / coordinate;
|
||||
|
||||
auto task = [&](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
const size_t left_x = i * 4;
|
||||
const size_t left_y = i * 4 + 1;
|
||||
const size_t right_x = i * 4 + 2;
|
||||
const size_t right_y = i * 4 + 3;
|
||||
|
||||
bool valid_flag = false;
|
||||
valid_flag |= !(anchor_box[left_x] >= static_cast<T>(0.0));
|
||||
valid_flag |= !(anchor_box[left_y] >= static_cast<T>(0.0));
|
||||
valid_flag |= !(img_metas[1] * img_metas[2] - static_cast<T>(1.0) >= anchor_box[right_x]);
|
||||
valid_flag |= !(img_metas[0] * img_metas[2] - static_cast<T>(1.0) >= anchor_box[right_y]);
|
||||
|
||||
output[i] = !valid_flag;
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, elem_num);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CheckValidCPUKernel<T>::CheckParams(const std::vector<AddressPtr> &inputs,
|
||||
const std::vector<AddressPtr> &outputs) {
|
||||
// inputs: anchor_box, img_metas
|
||||
if (inputs.size() != kInputSize) {
|
||||
MS_LOG(EXCEPTION) << "Input number is: " << inputs.size() << ", but CheckValid needs " << kInputSize << " inputs.";
|
||||
}
|
||||
|
||||
// outputs: valid
|
||||
if (outputs.size() != kOutputSize) {
|
||||
MS_LOG(EXCEPTION) << "Output number is: " << outputs.size() << ", but CheckValid needs " << kOutputSize
|
||||
<< "outputs.";
|
||||
}
|
||||
if (outputs[0]->size / sizeof(bool) != inputs[0]->size / sizeof(T) / 4) {
|
||||
MS_LOG(EXCEPTION) << "The output dimensions must match the dimensions of img_metas.";
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,61 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class CheckValidCPUKernel : public CPUKernel {
|
||||
public:
|
||||
CheckValidCPUKernel() = default;
|
||||
~CheckValidCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
void CheckParams(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
||||
std::vector<size_t> anchor_box_shape_;
|
||||
std::vector<size_t> img_metas_shape_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
CheckValid,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool),
|
||||
CheckValidCPUKernel, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
CheckValid,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeBool),
|
||||
CheckValidCPUKernel, float16);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
CheckValid, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeBool),
|
||||
CheckValidCPUKernel, int16_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
CheckValid, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeBool),
|
||||
CheckValidCPUKernel, uint8_t);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_
|
|
@ -0,0 +1,219 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
template <typename T>
|
||||
void CropAndResizeCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 4) {
|
||||
MS_LOG(ERROR) << "Input num is " << input_num << ", but CropAndResize needs 4 inputs.";
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output num is " << output_num << ", but CropAndResize needs 1 output.";
|
||||
}
|
||||
|
||||
// input image
|
||||
auto input_image_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
size_t input_image_shape_len = input_image_shape.size();
|
||||
if (input_image_shape_len != 4) {
|
||||
MS_LOG(ERROR) << "Image tensor is " << input_image_shape_len << "-D, but CropAndResize supports only " << 4
|
||||
<< "-D image tensor.";
|
||||
}
|
||||
|
||||
input_height_ = input_image_shape[1];
|
||||
input_width_ = input_image_shape[2];
|
||||
|
||||
// input boxes
|
||||
auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
size_t input_boxes_shape_len = input_boxes_shape.size();
|
||||
if (input_boxes_shape_len != 2) {
|
||||
MS_LOG(ERROR) << "Box is rank " << input_boxes_shape_len << ", but CropAndResize supports only rank " << 2
|
||||
<< "for boxes.";
|
||||
}
|
||||
|
||||
// input box_index
|
||||
auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
|
||||
size_t input_box_index_shape_len = input_box_index_shape.size();
|
||||
if (input_box_index_shape_len != 1) {
|
||||
MS_LOG(ERROR) << "Box index is rank " << input_box_index_shape_len << ", but CropAndResize supports only rank " << 1
|
||||
<< "for box_index.";
|
||||
}
|
||||
|
||||
// input crop_size
|
||||
auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
|
||||
size_t input_crop_size_shape_len = input_crop_size_shape.size();
|
||||
if (input_crop_size_shape_len != 1) {
|
||||
MS_LOG(ERROR) << "Crop_size is rank " << input_crop_size_shape_len << "-D, but CropAndResize supports only rank "
|
||||
<< 1 << "for Crop_size.";
|
||||
}
|
||||
if (input_crop_size_shape[0] != 2) {
|
||||
MS_LOG(ERROR) << "Crop_size is size " << input_crop_size_shape[0] << "-D, but CropAndResize supports only size "
|
||||
<< 2 << "for Crop_size.";
|
||||
}
|
||||
|
||||
// output
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
auto output_shape_len = output_shape.size();
|
||||
output_size_ = 1;
|
||||
for (size_t i = 0; i < output_shape_len; i++) {
|
||||
output_size_ *= output_shape[i];
|
||||
}
|
||||
|
||||
// set expected output params
|
||||
final_height_ = output_shape[1];
|
||||
final_width_ = output_shape[2];
|
||||
channel_ = output_shape[3];
|
||||
|
||||
// get op parameters
|
||||
string method = AnfAlgo::GetNodeAttr<string>(kernel_node, "method");
|
||||
if (method == "bilinear") {
|
||||
method_ = 1;
|
||||
} else if (method == "nearest") {
|
||||
method_ = 2;
|
||||
} else { // bilinear-v2
|
||||
method_ = 3;
|
||||
}
|
||||
extrapolation_value_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "extrapolation_value");
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool CropAndResizeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto *input_image = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto *input_boxes = reinterpret_cast<float *>(inputs[1]->addr);
|
||||
auto *input_box_index = reinterpret_cast<int *>(inputs[2]->addr);
|
||||
auto *output = reinterpret_cast<float *>(outputs[0]->addr);
|
||||
|
||||
auto task = [&](size_t start, size_t end) {
|
||||
for (size_t pos = start; pos < end; pos++) {
|
||||
size_t pos_temp = pos;
|
||||
const int pos_channel = pos_temp % channel_;
|
||||
pos_temp = pos_temp / channel_;
|
||||
const int pos_x = pos_temp % final_width_;
|
||||
pos_temp = pos_temp / final_width_;
|
||||
const int pos_y = pos_temp % final_height_;
|
||||
const int pos_image_idx = pos_temp / final_height_;
|
||||
const int box_index = input_box_index[pos_image_idx];
|
||||
|
||||
// crop values
|
||||
const float y1 = input_boxes[4 * pos_image_idx + 0];
|
||||
const float x1 = input_boxes[4 * pos_image_idx + 1];
|
||||
const float y2 = input_boxes[4 * pos_image_idx + 2];
|
||||
const float x2 = input_boxes[4 * pos_image_idx + 3];
|
||||
|
||||
// set scale and target pixels
|
||||
float scale_height = final_height_ > 1 ? (y2 - y1) * (input_height_ - 1) / (final_height_ - 1) : 0;
|
||||
float scale_width = final_width_ > 1 ? (x2 - x1) * (input_width_ - 1) / (final_width_ - 1) : 0;
|
||||
float target_y =
|
||||
final_height_ > 1 ? y1 * (input_height_ - 1) + pos_y * scale_height : 0.5 * (y1 + y2) + (input_height_ - 1);
|
||||
float target_x =
|
||||
final_width_ > 1 ? x1 * (input_width_ - 1) + pos_x * scale_width : 0.5 * (x1 + x2) + (input_width_ - 1);
|
||||
|
||||
// use extrapolation value if out of range
|
||||
if (((target_x < 0) || (target_x > input_width_ - 1)) || ((target_y < 0) || (target_y > input_height_ - 1))) {
|
||||
if ((method_ == 1) || (method_ == 2)) {
|
||||
output[pos] = extrapolation_value_;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (method_ == 1) {
|
||||
// Bilinear
|
||||
const int top_y_index = floorf(target_y);
|
||||
const int bottom_y_index = ceilf(target_y);
|
||||
const int left_x_index = floorf(target_x);
|
||||
const int right_x_index = ceilf(target_x);
|
||||
const float y_lerp = target_y - top_y_index;
|
||||
const float x_lerp = target_x - left_x_index;
|
||||
const float top_left = static_cast<float>(
|
||||
input_image[((box_index * input_height_ + top_y_index) * input_width_ + left_x_index) * channel_ +
|
||||
pos_channel]);
|
||||
const float top_right = static_cast<float>(
|
||||
input_image[((box_index * input_height_ + top_y_index) * input_width_ + right_x_index) * channel_ +
|
||||
pos_channel]);
|
||||
const float bottom_left = static_cast<float>(
|
||||
input_image[((box_index * input_height_ + bottom_y_index) * input_width_ + left_x_index) * channel_ +
|
||||
pos_channel]);
|
||||
const float bottom_right = static_cast<float>(
|
||||
input_image[((box_index * input_height_ + bottom_y_index) * input_width_ + right_x_index) * channel_ +
|
||||
pos_channel]);
|
||||
const float top = top_left + (top_right - top_left) * x_lerp;
|
||||
const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
|
||||
output[pos] = top + (bottom - top) * y_lerp;
|
||||
} else if (method_ == 3) {
|
||||
int y1h = static_cast<int>(y1 * input_height_);
|
||||
int x1w = static_cast<int>(x1 * input_width_);
|
||||
int y2h = static_cast<int>(y2 * input_height_);
|
||||
int x2w = static_cast<int>(x2 * input_width_);
|
||||
int w = ((x2w - x1w + 1) > 1) ? x2w - x1w + 1 : 1;
|
||||
int h = ((y2h - y1h + 1) > 1) ? y2h - y1h + 1 : 1;
|
||||
|
||||
float y_point = (pos_y + 0.5) * (h / static_cast<float>(final_height_)) - 0.5;
|
||||
int top_y_index = floorf(y_point);
|
||||
top_y_index = std::min(std::max(0, top_y_index), h - 1);
|
||||
|
||||
int bottom_y_index = ceilf(y_point);
|
||||
bottom_y_index = std::min(std::max(0, bottom_y_index), h - 1);
|
||||
|
||||
float x_point = (pos_x + 0.5) * (w / static_cast<float>(final_width_)) - 0.5;
|
||||
int left_x_index = floorf(x_point);
|
||||
left_x_index = std::min(std::max(0, left_x_index), w - 1);
|
||||
|
||||
int right_x_index = ceilf(x_point);
|
||||
right_x_index = std::min(std::max(0, right_x_index), w - 1);
|
||||
|
||||
const float y_lerp = y_point - top_y_index;
|
||||
const float x_lerp = x_point - left_x_index;
|
||||
const int y_top_index = box_index * input_height_ + y1h + top_y_index;
|
||||
const int y_bottom_index = box_index * input_height_ + y1h + bottom_y_index;
|
||||
|
||||
const float top_left =
|
||||
static_cast<float>(input_image[(y_top_index * input_width_ + x1w + left_x_index) * channel_ + pos_channel]);
|
||||
const float top_right =
|
||||
static_cast<float>(input_image[(y_top_index * input_width_ + x1w + right_x_index) * channel_ + pos_channel]);
|
||||
const float bottom_left = static_cast<float>(
|
||||
input_image[(y_bottom_index * input_width_ + x1w + left_x_index) * channel_ + pos_channel]);
|
||||
const float bottom_right = static_cast<float>(
|
||||
input_image[(y_bottom_index * input_width_ + x1w + right_x_index) * channel_ + pos_channel]);
|
||||
|
||||
float ret = top_left * (1 - y_lerp) * (1 - x_lerp) + bottom_right * y_lerp * x_lerp +
|
||||
top_right * (1 - y_lerp) * x_lerp + bottom_left * y_lerp * (1 - x_lerp);
|
||||
output[pos] = ret;
|
||||
} else {
|
||||
// Nearest Neighbour
|
||||
const int closest_x_index = roundf(target_x);
|
||||
const int closest_y_index = roundf(target_y);
|
||||
const float val = static_cast<float>(
|
||||
input_image[((box_index * input_height_ + closest_y_index) * input_width_ + closest_x_index) * channel_ +
|
||||
pos_channel]);
|
||||
output[pos] = val;
|
||||
}
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, output_size_);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,213 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class CropAndResizeCPUKernel : public CPUKernel {
|
||||
public:
|
||||
CropAndResizeCPUKernel() = default;
|
||||
~CropAndResizeCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
int method_;
|
||||
float extrapolation_value_;
|
||||
int input_crop_size_;
|
||||
int output_size_;
|
||||
int input_height_;
|
||||
int input_width_;
|
||||
int final_height_;
|
||||
int final_width_;
|
||||
int channel_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, float16);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, float16);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, double);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, double);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, int8_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, int8_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, int16_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt16)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, int16_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, int8_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, int32_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, int64_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, int64_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, uint8_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, uint8_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, uint16_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(CropAndResize,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt16)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
CropAndResizeCPUKernel, uint16_t);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_
|
|
@ -0,0 +1,243 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
int NmsRoundUpPower2(int v) {
|
||||
v--;
|
||||
v |= v >> 1;
|
||||
v |= v >> 2;
|
||||
v |= v >> 4;
|
||||
v |= v >> 8;
|
||||
v |= v >> 16;
|
||||
v++;
|
||||
return v;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Swap(T *lhs, T *rhs) {
|
||||
T tmp = lhs[0];
|
||||
lhs[0] = rhs[0];
|
||||
rhs[0] = tmp;
|
||||
}
|
||||
|
||||
// Sorting function based on BitonicSort from TopK kernel
|
||||
template <typename T>
|
||||
void NMSWithMaskCPUKernel<T>::NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2,
|
||||
T *input, T *data_buff, int *index_buff, int box_size) {
|
||||
auto task1 = [&](int start, int end) {
|
||||
for (int i = start; i < end; i++) {
|
||||
data_buff[i] = (i < inner) ? input[(i * box_size) + 4] : std::numeric_limits<T>::max();
|
||||
index_buff[i] = i;
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task1, ceil_power2);
|
||||
|
||||
for (size_t i = 2; i <= static_cast<size_t>(ceil_power2); i <<= 1) {
|
||||
for (size_t j = (i >> 1); j > 0; j >>= 1) {
|
||||
auto task2 = [&](size_t start, size_t end) {
|
||||
for (size_t tid = start; tid < end; tid++) {
|
||||
size_t tid_comp = tid ^ j;
|
||||
if (tid_comp > tid) {
|
||||
if ((tid & i) == 0) {
|
||||
if (data_buff[tid] > data_buff[tid_comp]) {
|
||||
Swap(&data_buff[tid], &data_buff[tid_comp]);
|
||||
Swap(&index_buff[tid], &index_buff[tid_comp]);
|
||||
}
|
||||
} else {
|
||||
if (data_buff[tid] < data_buff[tid_comp]) {
|
||||
Swap(&data_buff[tid], &data_buff[tid_comp]);
|
||||
Swap(&index_buff[tid], &index_buff[tid_comp]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task2, ceil_power2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize per row mask array to all true
|
||||
template <typename T>
|
||||
void NMSWithMaskCPUKernel<T>::MaskInit(int numSq, bool *row_mask) {
|
||||
auto task = [&](int start, int end) {
|
||||
for (int mat_pos = start; mat_pos < end; mat_pos++) {
|
||||
row_mask[mat_pos] = true;
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, numSq);
|
||||
}
|
||||
|
||||
// copy data from input to output array sorted by indices returned from bitonic sort
|
||||
// flips boxes if asked to, default - false -> if (x1/y1 > x2/y2)
|
||||
template <typename T>
|
||||
void NMSWithMaskCPUKernel<T>::PopulateOutput(T *data_in, T *data_out, int *index_buff, const int num, int box_size,
|
||||
bool flip_mode) {
|
||||
auto task = [&](int start, int end) {
|
||||
for (int box_num = start; box_num < end; box_num++) {
|
||||
int correct_index = index_buff[(num - 1) - box_num]; // flip the array around
|
||||
int correct_arr_start = correct_index * box_size;
|
||||
int current_arr_start = box_num * box_size;
|
||||
if (flip_mode) { // flip boxes
|
||||
// check x
|
||||
if (data_in[correct_arr_start + 0] > data_in[correct_arr_start + 2]) {
|
||||
data_out[current_arr_start + 0] = data_in[correct_arr_start + 2];
|
||||
data_out[current_arr_start + 2] = data_in[correct_arr_start + 0];
|
||||
} else {
|
||||
data_out[current_arr_start + 0] = data_in[correct_arr_start + 0];
|
||||
data_out[current_arr_start + 2] = data_in[correct_arr_start + 2];
|
||||
}
|
||||
// check y
|
||||
if (data_in[correct_arr_start + 1] > data_in[correct_arr_start + 3]) {
|
||||
data_out[current_arr_start + 1] = data_in[correct_arr_start + 3];
|
||||
data_out[current_arr_start + 3] = data_in[correct_arr_start + 1];
|
||||
} else {
|
||||
data_out[current_arr_start + 1] = data_in[correct_arr_start + 1];
|
||||
data_out[current_arr_start + 3] = data_in[correct_arr_start + 3];
|
||||
}
|
||||
data_out[current_arr_start + 4] = data_in[correct_arr_start + 4];
|
||||
} else { // default behaviour, don't flip
|
||||
for (int x = 0; x < 5; x++) {
|
||||
data_out[current_arr_start + x] = data_in[correct_arr_start + x];
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, num);
|
||||
}
|
||||
|
||||
// populated return mask (init to all true) and return index array
|
||||
template <typename T>
|
||||
void NMSWithMaskCPUKernel<T>::Preprocess(const int num, int *sel_idx, bool *sel_boxes, T *output, int box_size) {
|
||||
auto task = [&](int start, int end) {
|
||||
for (int box_num = start; box_num < end; box_num++) {
|
||||
sel_idx[box_num] = box_num;
|
||||
sel_boxes[box_num] = true;
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, num);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool NMSWithMaskCPUKernel<T>::IouDecision(T *output, int box_A_ix, int box_B_ix, int box_A_start, int box_B_start,
|
||||
float IOU_value) {
|
||||
T x_1 = std::max(output[box_A_start + 0], output[box_B_start + 0]);
|
||||
T y_1 = std::max(output[box_A_start + 1], output[box_B_start + 1]);
|
||||
T x_2 = std::min(output[box_A_start + 2], output[box_B_start + 2]);
|
||||
T y_2 = std::min(output[box_A_start + 3], output[box_B_start + 3]);
|
||||
T width = std::max(x_2 - x_1, T(0)); // in case of no overlap
|
||||
T height = std::max(y_2 - y_1, T(0));
|
||||
|
||||
T area1 = (output[box_A_start + 2] - output[box_A_start + 0]) * (output[box_A_start + 3] - output[box_A_start + 1]);
|
||||
T area2 = (output[box_B_start + 2] - output[box_B_start + 0]) * (output[box_B_start + 3] - output[box_B_start + 1]);
|
||||
|
||||
T combined_area = area1 + area2;
|
||||
return !(((width * height) / (combined_area - (width * height))) > static_cast<T>(IOU_value));
|
||||
}
|
||||
|
||||
// Run parallel NMS pass
|
||||
// Every position in the row_mask array is updated wit correct IOU decision after being init to all True
|
||||
template <typename T>
|
||||
void NMSWithMaskCPUKernel<T>::NmsPass(const int num, const float IOU_value, T *output, bool *sel_boxes, int box_size,
|
||||
bool *row_mask) {
|
||||
auto task = [&](int start, int end) {
|
||||
for (int mask_index = start; mask_index < end; mask_index++) {
|
||||
int box_i = mask_index / num; // row in 2d row_mask array
|
||||
int box_j = mask_index % num; // col in 2d row_mask array
|
||||
if (box_j > box_i) { // skip when box_j index lower/equal to box_i - will remain true
|
||||
int box_i_start_index = box_i * box_size; // adjust starting indices
|
||||
int box_j_start_index = box_j * box_size;
|
||||
row_mask[mask_index] = IouDecision(output, box_i, box_j, box_i_start_index, box_j_start_index, IOU_value);
|
||||
}
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, num * num);
|
||||
}
|
||||
|
||||
// Reduce pass runs on 1 block to allow thread sync
|
||||
template <typename T>
|
||||
void NMSWithMaskCPUKernel<T>::ReducePass(const int num, bool *sel_boxes, bool *row_mask) {
|
||||
// loop over every box in order of high to low confidence score
|
||||
for (int i = 0; i < num - 1; ++i) {
|
||||
if (!sel_boxes[i]) {
|
||||
continue;
|
||||
}
|
||||
// every thread handles a different set of boxes (per all boxes in order)
|
||||
auto task = [&](int start, int end) {
|
||||
for (int j = start; j < end; j++) {
|
||||
sel_boxes[j] = sel_boxes[j] && row_mask[i * num + j];
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, num);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void NMSWithMaskCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
iou_value_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "iou_threshold");
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input num is " << input_num << ", but NMSWithMask needs 1 input.";
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 3) {
|
||||
MS_LOG(ERROR) << "Output num is " << output_num << ", but NMSWithMask needs 3 outputs.";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void NMSWithMaskCPUKernel<T>::InitInputOutputSize(const CNodePtr &kernel_node) {
|
||||
CPUKernel::InitInputOutputSize(kernel_node);
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
num_input_ = input_shape[0]; // Get N values in [N, 5] data.
|
||||
ceil_power_2 = NmsRoundUpPower2(num_input_);
|
||||
|
||||
workspace_size_list_.push_back(ceil_power_2 * sizeof(T)); // data buff
|
||||
workspace_size_list_.push_back(ceil_power_2 * sizeof(int)); // index buff
|
||||
workspace_size_list_.push_back(num_input_ * num_input_ * sizeof(bool)); // mask list
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool NMSWithMaskCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &workspace,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto input = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto data_buff = reinterpret_cast<T *>(workspace[0]->addr);
|
||||
auto index_buff = reinterpret_cast<int *>(workspace[1]->addr);
|
||||
auto row_mask = reinterpret_cast<bool *>(workspace[2]->addr);
|
||||
auto output = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
auto sel_idx = reinterpret_cast<int *>(outputs[1]->addr);
|
||||
auto sel_boxes = reinterpret_cast<bool *>(outputs[2]->addr);
|
||||
|
||||
NmsBitonicSortByKeyKernel(1, num_input_, ceil_power_2, input, data_buff, index_buff, box_size_);
|
||||
int total_val = num_input_ * num_input_;
|
||||
MaskInit(total_val, row_mask);
|
||||
PopulateOutput(input, output, index_buff, num_input_, box_size_, false);
|
||||
Preprocess(num_input_, sel_idx, sel_boxes, output, box_size_);
|
||||
NmsPass(num_input_, iou_value_, output, sel_boxes, box_size_, row_mask);
|
||||
ReducePass(num_input_, sel_boxes, row_mask);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,80 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class NMSWithMaskCPUKernel : public CPUKernel {
|
||||
public:
|
||||
NMSWithMaskCPUKernel() = default;
|
||||
~NMSWithMaskCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
void InitInputOutputSize(const CNodePtr &kernel_node) override;
|
||||
|
||||
private:
|
||||
void NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2, T *input, T *data_buff,
|
||||
int *index_buff, int box_size);
|
||||
|
||||
void MaskInit(int numSq, bool *row_mask);
|
||||
|
||||
void PopulateOutput(T *data_in, T *data_out, int *index_buff, const int num, int box_size, bool flip_mode);
|
||||
|
||||
void Preprocess(const int num, int *sel_idx, bool *sel_boxes, T *output, int box_size);
|
||||
|
||||
bool IouDecision(T *output, int box_A_ix, int box_B_ix, int box_A_start, int box_B_start, float IOU_value);
|
||||
|
||||
void NmsPass(const int num, const float IOU_value, T *output, bool *sel_boxes, int box_size, bool *row_mask);
|
||||
|
||||
void ReducePass(const int num, bool *sel_boxes, bool *row_mask);
|
||||
|
||||
int num_input_;
|
||||
float iou_value_;
|
||||
size_t ceil_power_2;
|
||||
static const int box_size_ = 5; // pre_defined box width
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(NMSWithMask,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeBool),
|
||||
NMSWithMaskCPUKernel, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(NMSWithMask,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeBool),
|
||||
NMSWithMaskCPUKernel, float16);
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_
|
|
@ -0,0 +1,225 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#define BLOCKSIZE 256
|
||||
#define MAX_DIMENSION 5
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
void ParseOutputCoordinate(std::vector<int64_t> dims, int32_t output_length, int32_t input_dim_size,
|
||||
int32_t input_total_count, const int *tmp_output, int *output) {
|
||||
int it = 0;
|
||||
int column = input_total_count / dims[0];
|
||||
for (int i = 0; i < output_length; i++) {
|
||||
int32_t tmp_output_number = tmp_output[i];
|
||||
int tmp_column = column;
|
||||
for (int j = 0; j < input_dim_size; j++) {
|
||||
if (j == input_dim_size - 1) {
|
||||
output[it++] = tmp_output_number;
|
||||
continue;
|
||||
}
|
||||
output[it++] = tmp_output_number / column;
|
||||
tmp_output_number = tmp_output_number % column;
|
||||
tmp_column = tmp_column / dims[j + 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GetOutputLength(bool *padding_flag, int32_t *output_length, int32_t *output_non_zero_length, int32_t count,
|
||||
int32_t non_zero_num) {
|
||||
if (count == 0) {
|
||||
*padding_flag = false;
|
||||
*output_length = non_zero_num;
|
||||
*output_non_zero_length = non_zero_num;
|
||||
} else if (count > 0 && count <= non_zero_num) {
|
||||
*padding_flag = false;
|
||||
*output_length = count;
|
||||
*output_non_zero_length = count;
|
||||
} else if (count > non_zero_num) {
|
||||
*padding_flag = true;
|
||||
*output_length = count;
|
||||
*output_non_zero_length = non_zero_num;
|
||||
} else {
|
||||
MS_LOG(EXCEPTION) << "Input count must be greater than or equal to 0, but is " << count;
|
||||
}
|
||||
}
|
||||
|
||||
void GetInputTotalCount(const std::vector<int64_t> &dims_, int32_t *input_total_count, const int32_t &input_dim_size) {
|
||||
for (int32_t i = 0; i < input_dim_size; i++) {
|
||||
*input_total_count *= dims_[i];
|
||||
}
|
||||
}
|
||||
|
||||
void UpdateOutput(const std::vector<int64_t> &dims_, const int32_t &non_zero_num, const int32_t &count_,
|
||||
const int32_t &output_length, const int *mask_dim, int32_t *output_coordinate, bool *mask) {
|
||||
for (int32_t i = non_zero_num * dims_.size(); i < static_cast<int32_t>(count_ * dims_.size()); i++) {
|
||||
output_coordinate[i] = 0;
|
||||
}
|
||||
for (int32_t i = 0; i < output_length; i++) {
|
||||
mask[i] = static_cast<bool>(mask_dim[i]);
|
||||
}
|
||||
for (int32_t i = non_zero_num; i < count_; i++) {
|
||||
mask[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
void RandomChoiceWithMaskCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input num is " << input_num << ", but RandomChoiceWithMask needs 1 input.";
|
||||
}
|
||||
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 2) {
|
||||
MS_LOG(ERROR) << "Output num is " << output_num << ", but RandomChoiceWithMask needs 2 outputs.";
|
||||
}
|
||||
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
input_shape_size_ = input_shape.size();
|
||||
if (input_shape_size_ < 1 || input_shape_size_ > MAX_DIMENSION) {
|
||||
MS_LOG(ERROR) << "Input is " << input_shape_size_
|
||||
<< "-D, but RandomChoiceWithMask supports only 1-D to 5-D inputs.";
|
||||
}
|
||||
|
||||
seed_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "seed"));
|
||||
seed2_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "seed2"));
|
||||
count_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "count"));
|
||||
|
||||
MS_LOG(INFO) << "This op attr count is " << count_;
|
||||
|
||||
for (size_t i = 0; i < input_num; i++) {
|
||||
auto input_i_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
|
||||
for (size_t j = 0; j < input_i_shape.size(); j++) {
|
||||
dims_.emplace_back(input_i_shape[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool RandomChoiceWithMaskCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto *input = reinterpret_cast<bool *>(inputs[0]->addr);
|
||||
auto *output_coordinate = reinterpret_cast<int32_t *>(outputs[0]->addr);
|
||||
auto *mask = reinterpret_cast<bool *>(outputs[1]->addr);
|
||||
int32_t input_dim_size = dims_.size();
|
||||
int32_t non_zero_num = 0;
|
||||
int32_t input_total_count = 1;
|
||||
|
||||
if (input_dim_size < 1 || input_dim_size > 5) {
|
||||
MS_LOG(EXCEPTION) << "Input dim size is " << input_dim_size << ", which is not supported.";
|
||||
}
|
||||
|
||||
int seedc = seed2_ != 0 ? seed2_ : (seed_ != 0 ? seed_ : generator_());
|
||||
GetInputTotalCount(dims_, &input_total_count, input_dim_size);
|
||||
int *input_dim = new (std::nothrow) int[input_total_count];
|
||||
if (input_dim == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Malloc memory failed!";
|
||||
return false;
|
||||
}
|
||||
for (int32_t i = 0; i < input_total_count; i++) {
|
||||
if (input[i] != 0) {
|
||||
input_dim[non_zero_num] = i;
|
||||
non_zero_num++;
|
||||
}
|
||||
}
|
||||
|
||||
bool padding_flag = false;
|
||||
int32_t output_length = 0;
|
||||
int32_t output_non_zero_length = 0;
|
||||
GetOutputLength(&padding_flag, &output_length, &output_non_zero_length, count_, non_zero_num);
|
||||
int *tmp_output = new (std::nothrow) int[output_length];
|
||||
if (tmp_output == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Malloc memory failed!";
|
||||
delete[] input_dim;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::mt19937 gen(seedc);
|
||||
std::uniform_int_distribution<> dis(0, non_zero_num - 1);
|
||||
int *mask_dim = new (std::nothrow) int[output_length];
|
||||
if (mask_dim == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Malloc memory failed!";
|
||||
delete[] input_dim;
|
||||
delete[] tmp_output;
|
||||
return false;
|
||||
}
|
||||
(void)memset_s(mask_dim, output_length, 0X00, output_length);
|
||||
(void)memset_s(tmp_output, output_length, 0X00, output_length);
|
||||
|
||||
for (int32_t i = 0; i < output_non_zero_length; i++) {
|
||||
int32_t mean = dis(gen);
|
||||
tmp_output[i] = input_dim[mean];
|
||||
mask_dim[i] = 1;
|
||||
}
|
||||
if (padding_flag) {
|
||||
int32_t index = 0;
|
||||
for (int32_t i = output_length - 1; i > non_zero_num; i--) {
|
||||
tmp_output[non_zero_num + index] = 0;
|
||||
mask_dim[non_zero_num + index] = 0;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
|
||||
int32_t copy_output_length = 0;
|
||||
if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) {
|
||||
MS_LOG(EXCEPTION) << "Output size exceed INT_MAX";
|
||||
delete[] input_dim;
|
||||
delete[] tmp_output;
|
||||
delete[] mask_dim;
|
||||
return false;
|
||||
}
|
||||
|
||||
copy_output_length = output_length * input_dim_size;
|
||||
int *output = new (std::nothrow) int[copy_output_length];
|
||||
if (output == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Malloc memory failed!";
|
||||
delete[] input_dim;
|
||||
delete[] tmp_output;
|
||||
delete[] mask_dim;
|
||||
return false;
|
||||
}
|
||||
(void)memset_s(output, copy_output_length, 0X00, copy_output_length);
|
||||
ParseOutputCoordinate(dims_, output_length, input_dim_size, input_total_count, tmp_output, output);
|
||||
|
||||
int32_t actual_output_length = count_ * dims_.size();
|
||||
copy_output_length = std::min(actual_output_length, copy_output_length);
|
||||
int32_t copy_output_bytes = 0;
|
||||
if (INT_MAX / static_cast<int>(sizeof(int32_t)) < copy_output_length) {
|
||||
MS_LOG(EXCEPTION) << "The output length is out of range!";
|
||||
delete[] input_dim;
|
||||
delete[] mask_dim;
|
||||
delete[] tmp_output;
|
||||
delete[] output;
|
||||
return false;
|
||||
}
|
||||
|
||||
copy_output_bytes = copy_output_length * sizeof(int32_t);
|
||||
memcpy_s(output_coordinate, copy_output_bytes, output, copy_output_bytes);
|
||||
UpdateOutput(dims_, non_zero_num, count_, output_length, mask_dim, output_coordinate, mask);
|
||||
delete[] input_dim;
|
||||
delete[] mask_dim;
|
||||
delete[] tmp_output;
|
||||
delete[] output;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,55 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <random>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
class RandomChoiceWithMaskCPUKernel : public CPUKernel {
|
||||
public:
|
||||
RandomChoiceWithMaskCPUKernel() = default;
|
||||
~RandomChoiceWithMaskCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
int32_t count_{0};
|
||||
std::vector<int64_t> dims_;
|
||||
int input_shape_size_{0};
|
||||
int seed_{0};
|
||||
int seed2_{0};
|
||||
int input_size_{1};
|
||||
std::mt19937 generator_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(
|
||||
RandomChoiceWithMask,
|
||||
KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
|
||||
RandomChoiceWithMaskCPUKernel);
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_
|
|
@ -0,0 +1,223 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/cpu/roi_align_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
constexpr size_t kInputSize = 2;
|
||||
constexpr size_t kOutputSize = 1;
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
// Get the input shapes
|
||||
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto rois_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
|
||||
auto x_shape_size = x_shape.size();
|
||||
if (x_shape_size != 4) {
|
||||
MS_LOG(ERROR) << "x shape size is " << x_shape_size << ", but should be 4.";
|
||||
}
|
||||
|
||||
channels_ = x_shape[1];
|
||||
height_ = x_shape[2];
|
||||
width_ = x_shape[3];
|
||||
|
||||
roi_rows_ = rois_shape[0];
|
||||
roi_cols_ = rois_shape[1];
|
||||
|
||||
pooled_height_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "pooled_height"));
|
||||
pooled_width_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "pooled_width"));
|
||||
spatial_scale_ = static_cast<T>(AnfAlgo::GetNodeAttr<float>(kernel_node, "spatial_scale"));
|
||||
sample_num_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "sample_num"));
|
||||
roi_end_mode_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "roi_end_mode"));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool ROIAlignCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
const T *input = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
const T *rois = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
auto out_data = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
|
||||
size_t elem_num = roi_rows_ * channels_ * pooled_height_ * pooled_width_;
|
||||
auto task = [&](size_t start, size_t end) {
|
||||
for (size_t thread_idx = start; thread_idx < end; thread_idx++) {
|
||||
int n = thread_idx / pooled_width_ / pooled_height_ / channels_;
|
||||
const T *roi_box = rois + n * roi_cols_;
|
||||
if (roi_box[1] < static_cast<T>(0.001) && roi_box[3] < static_cast<T>(0.001) &&
|
||||
roi_box[1] > static_cast<T>(-0.001) && roi_box[3] > static_cast<T>(-0.001)) {
|
||||
continue;
|
||||
}
|
||||
int offset = -1;
|
||||
int c, ph, pw, roi_bin_grid_h, roi_bin_grid_w;
|
||||
T bin_size_h, bin_size_w, roi_start_h, roi_start_w;
|
||||
|
||||
bin_box(thread_idx, rois, roi_cols_, spatial_scale_, sample_num_, roi_end_mode_, channels_, height_, width_,
|
||||
pooled_height_, pooled_width_, &offset, &n, &c, &ph, &pw, &roi_bin_grid_h, &roi_bin_grid_w, &bin_size_h,
|
||||
&bin_size_w, &roi_start_h, &roi_start_w);
|
||||
|
||||
// (n, c, ph, pw) is the base param of pooled map
|
||||
const T count_points_in_grid_cell = static_cast<T>(roi_bin_grid_h * roi_bin_grid_w);
|
||||
|
||||
T accumulate_val = static_cast<T>(0.);
|
||||
for (int iy = 0; iy < roi_bin_grid_h; iy++) {
|
||||
// Shift half point RIGHT for y / x, while previous scaled roi shift half point LEFT
|
||||
const T y = roi_start_h + static_cast<T>(ph) * bin_size_h +
|
||||
static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h);
|
||||
for (int ix = 0; ix < roi_bin_grid_w; ix++) {
|
||||
const T x = roi_start_w + static_cast<T>(pw) * bin_size_w +
|
||||
static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
|
||||
// bilinear interpolate by shifted y / x
|
||||
// calculate bilinear interpolation
|
||||
int x_low = 0, y_low = 0, x_high = 0, y_high = 0;
|
||||
T w1, w2, w3, w4;
|
||||
bilinear_interpolate(height_, width_, y, x, &x_low, &y_low, &x_high, &y_high, &w1, &w2, &w3, &w4);
|
||||
if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0 && y_low < height_ && y_high < height_ &&
|
||||
x_low < width_ && x_high < width_) {
|
||||
T v1 = input[offset + y_low * width_ + x_low];
|
||||
T v2 = input[offset + y_low * width_ + x_high];
|
||||
T v3 = input[offset + y_high * width_ + x_low];
|
||||
T v4 = input[offset + y_high * width_ + x_high];
|
||||
|
||||
T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
|
||||
accumulate_val += val;
|
||||
}
|
||||
}
|
||||
}
|
||||
accumulate_val /= count_points_in_grid_cell;
|
||||
|
||||
out_data[thread_idx] = accumulate_val;
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, elem_num);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignCPUKernel<T>::CheckParam(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.size() != kInputSize) {
|
||||
MS_LOG(EXCEPTION) << "Input number is: " << inputs.size() << ", but ROIAlign needs " << kInputSize << " inputs.";
|
||||
}
|
||||
|
||||
if (outputs.size() != kOutputSize) {
|
||||
MS_LOG(EXCEPTION) << "Output number is: " << outputs.size() << ", but ROIAlign needs " << kOutputSize << "outputs.";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignCPUKernel<T>::bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low,
|
||||
int *x_high, int *y_high, T *w1, T *w2, T *w3, T *w4) {
|
||||
constexpr float eps = 0.00007;
|
||||
if (y < static_cast<T>(-1.0) || y > static_cast<T>(height) || x < static_cast<T>(-1.0) || x > static_cast<T>(width)) {
|
||||
*w1 = *w2 = *w3 = *w4 = static_cast<T>(0);
|
||||
*x_low = *x_high = *y_low = *y_high = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
// low bounder is at least zero
|
||||
y = y <= static_cast<T>(.0) ? static_cast<T>(.0) : y;
|
||||
x = x <= static_cast<T>(.0) ? static_cast<T>(.0) : x;
|
||||
|
||||
// top left point
|
||||
*y_low = (y <= static_cast<T>(eps) ? 0 : static_cast<int>(floor(y)));
|
||||
*x_low = (x <= static_cast<T>(eps) ? 0 : static_cast<int>(floor(x)));
|
||||
|
||||
// bottom right point
|
||||
if (*y_low >= height - 1) {
|
||||
*y_high = *y_low = height - 1;
|
||||
y = static_cast<T>(*y_low);
|
||||
} else {
|
||||
*y_high = *y_low + 1;
|
||||
}
|
||||
|
||||
if (*x_low >= width - 1) {
|
||||
*x_high = *x_low = width - 1;
|
||||
x = static_cast<T>(*x_low);
|
||||
} else {
|
||||
*x_high = *x_low + 1;
|
||||
}
|
||||
|
||||
// distance to nearest points
|
||||
T lx, ly, hx, hy;
|
||||
ly = y - static_cast<T>(*y_low), lx = x - static_cast<T>(*x_low);
|
||||
hy = static_cast<T>(1.) - ly, hx = static_cast<T>(1.) - lx;
|
||||
|
||||
// weight is evaluated by the distance to point away.
|
||||
// the closer to point home, the more weight, the farther to point away.
|
||||
*w1 = hy * hx, *w2 = hy * lx, *w3 = ly * hx, *w4 = ly * lx;
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignCPUKernel<T>::bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale,
|
||||
const int sample_num, int roi_end_mode, const int channels, const int height,
|
||||
const int width, const int pooled_height, const int pooled_width, int *offset,
|
||||
int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h, int *roi_bin_grid_w,
|
||||
T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w) {
|
||||
// (n, c, ph, pw) is the base param of pooled map
|
||||
*pw = thread_idx % pooled_width;
|
||||
*ph = (thread_idx / pooled_width) % pooled_height;
|
||||
*c = (thread_idx / pooled_width / pooled_height) % channels;
|
||||
*n = thread_idx / pooled_width / pooled_height / channels;
|
||||
|
||||
// Roi has
|
||||
// 1. 4 points, or
|
||||
// 2. indicator + 4 points (1 + 4)
|
||||
const T *roi_box = roi_boxes + (*n) * roi_cols;
|
||||
int roi_batch_ind = 0;
|
||||
if (roi_cols == 5) {
|
||||
roi_batch_ind = static_cast<int>(rint(static_cast<float>(roi_box[0]) + static_cast<float>(0.00007)));
|
||||
roi_box++;
|
||||
}
|
||||
|
||||
// Scale and shift ROI
|
||||
*roi_start_w = roi_box[0] * spatial_scale;
|
||||
*roi_start_h = roi_box[1] * spatial_scale;
|
||||
T roi_end_w = (roi_box[2] + static_cast<T>(roi_end_mode)) * spatial_scale;
|
||||
T roi_end_h = (roi_box[3] + static_cast<T>(roi_end_mode)) * spatial_scale;
|
||||
|
||||
// New ROI height/width
|
||||
T roi_width = roi_end_w - (*roi_start_w);
|
||||
T roi_height = roi_end_h - (*roi_start_h);
|
||||
|
||||
if (roi_end_mode == 0) { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = roi_width > static_cast<T>(1.0) ? roi_width : static_cast<T>(1.0);
|
||||
roi_height = roi_height > static_cast<T>(1.0) ? roi_height : static_cast<T>(1.0);
|
||||
}
|
||||
|
||||
// ratio of roi / pooled
|
||||
*bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
|
||||
*bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
|
||||
|
||||
*offset = (roi_batch_ind * channels + (*c)) * height * width;
|
||||
|
||||
// grid (int) by Sample ratio if defined, otherwise by pooled H/W
|
||||
*roi_bin_grid_h = (sample_num > 0) ? sample_num : static_cast<int>(floor(roi_height / static_cast<T>(pooled_height)));
|
||||
*roi_bin_grid_w = (sample_num > 0) ? sample_num : static_cast<int>(floor(roi_width / static_cast<T>(pooled_width)));
|
||||
return;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,72 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class ROIAlignCPUKernel : public CPUKernel {
|
||||
public:
|
||||
ROIAlignCPUKernel() = default;
|
||||
~ROIAlignCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
void CheckParam(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
|
||||
|
||||
void bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low, int *x_high,
|
||||
int *y_high, T *w1, T *w2, T *w3, T *w4);
|
||||
|
||||
void bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, const int sample_num,
|
||||
int roi_end_mode, const int channels, const int height, const int width, const int pooled_height,
|
||||
const int pooled_width, int *offset, int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h,
|
||||
int *roi_bin_grid_w, T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w);
|
||||
|
||||
int pooled_height_;
|
||||
int pooled_width_;
|
||||
T spatial_scale_;
|
||||
int sample_num_;
|
||||
int roi_end_mode_;
|
||||
|
||||
int roi_rows_;
|
||||
int roi_cols_;
|
||||
int channels_;
|
||||
int height_;
|
||||
int width_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
ROIAlign,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ROIAlignCPUKernel, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
ROIAlign,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
ROIAlignCPUKernel, float16);
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
|
|
@ -0,0 +1,280 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.h"
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
|
||||
template <typename T, typename U>
|
||||
void AtomicAddTask(T *address, T val) {
|
||||
auto *address_as_ull = reinterpret_cast<U *>(address);
|
||||
U old = *address_as_ull;
|
||||
U assumed;
|
||||
T desired;
|
||||
T *assumed_t = NULL;
|
||||
U *desired_u = NULL;
|
||||
do {
|
||||
assumed = old;
|
||||
assumed_t = reinterpret_cast<T *>(&assumed);
|
||||
desired_u = reinterpret_cast<U *>(&desired);
|
||||
desired = *assumed_t + static_cast<T>(val);
|
||||
old = __sync_val_compare_and_swap(address_as_ull, assumed, *desired_u);
|
||||
} while (assumed != old);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void AtomicAdd(T *address, T val) {
|
||||
switch (sizeof(T)) {
|
||||
case 1: {
|
||||
AtomicAddTask<T, uint8_t>(address, val);
|
||||
break;
|
||||
}
|
||||
case 2: {
|
||||
AtomicAddTask<T, uint16_t>(address, val);
|
||||
break;
|
||||
}
|
||||
case 4: {
|
||||
AtomicAddTask<T, uint32_t>(address, val);
|
||||
break;
|
||||
}
|
||||
case 8: {
|
||||
AtomicAddTask<T, uint64_t>(address, val);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
|
||||
// Get the number of the input args
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(ERROR) << "Input number is: " << input_num << ", but ROIAlignGrad needs 2 inputs.";
|
||||
}
|
||||
|
||||
// Get the number of the output args
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is: " << output_num << ", but ROIAlignGrad needs 1 output.";
|
||||
}
|
||||
|
||||
// Get the input shapes
|
||||
auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto dy_shape_size = dy_shape.size();
|
||||
if (dy_shape_size != 4) {
|
||||
MS_LOG(ERROR) << "dy shape size is " << dy_shape_size << ", but should be 4.";
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
CheckParam(kernel_node);
|
||||
|
||||
auto rois_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
roi_rows_ = rois_shape[0];
|
||||
roi_cols_ = rois_shape[1];
|
||||
|
||||
std::vector<int64_t> xdiff_shape_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "xdiff_shape");
|
||||
(void)std::transform(xdiff_shape_me.begin(), xdiff_shape_me.end(), std::back_inserter(xdiff_shape_),
|
||||
[](const int64_t &value) { return static_cast<int>(value); });
|
||||
pooled_height_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "pooled_height"));
|
||||
pooled_width_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "pooled_width"));
|
||||
spatial_scale_ = static_cast<T>(AnfAlgo::GetNodeAttr<float>(kernel_node, "spatial_scale"));
|
||||
sample_num_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "sample_num"));
|
||||
roi_end_mode_ = 1;
|
||||
|
||||
batch_size_ = xdiff_shape_[0];
|
||||
channels_ = xdiff_shape_[1];
|
||||
height_ = xdiff_shape_[2];
|
||||
width_ = xdiff_shape_[3];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool ROIAlignGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
const T *dy = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
const T *rois = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
T *dx = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
|
||||
size_t size_init = batch_size_ * channels_ * height_ * width_;
|
||||
auto task1 = [&](size_t start, size_t end) {
|
||||
for (size_t thread_idx = start; thread_idx < end; thread_idx++) {
|
||||
dx[thread_idx] = static_cast<T>(0.);
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task1, size_init);
|
||||
|
||||
size_t elem_num = roi_rows_ * channels_ * pooled_height_ * pooled_width_;
|
||||
auto task2 = [&](size_t start, size_t end) {
|
||||
for (size_t thread_idx = start; thread_idx < end; thread_idx++) {
|
||||
int n = thread_idx / pooled_width_ / pooled_height_ / channels_;
|
||||
const T *roi_box = rois + n * roi_cols_;
|
||||
if (roi_box[1] < static_cast<T>(0.001) && roi_box[3] < static_cast<T>(0.001) &&
|
||||
roi_box[1] > static_cast<T>(-0.001) && roi_box[3] > static_cast<T>(-0.001)) {
|
||||
continue;
|
||||
}
|
||||
int offset = -1;
|
||||
int c, ph, pw, roi_bin_grid_h, roi_bin_grid_w;
|
||||
T bin_size_h, bin_size_w, roi_start_h, roi_start_w;
|
||||
|
||||
bin_box(thread_idx, rois, roi_cols_, spatial_scale_, sample_num_, roi_end_mode_, channels_, height_, width_,
|
||||
pooled_height_, pooled_width_, &offset, &n, &c, &ph, &pw, &roi_bin_grid_h, &roi_bin_grid_w, &bin_size_h,
|
||||
&bin_size_w, &roi_start_h, &roi_start_w);
|
||||
|
||||
// (n, c, ph, pw) is the base param of pooled map
|
||||
const T count_points_in_grid_cell = static_cast<T>(roi_bin_grid_h * roi_bin_grid_w);
|
||||
|
||||
int top_offset = (n * channels_ + c) * pooled_height_ * pooled_width_;
|
||||
const T *offset_top_diff = dy + top_offset;
|
||||
const T top_diff_this_bin = offset_top_diff[ph * pooled_width_ + pw];
|
||||
|
||||
for (int iy = 0; iy < roi_bin_grid_h; iy++) {
|
||||
// Shift half point RIGHT for y / x, while previous scaled roi shift half point LEFT
|
||||
const T y = roi_start_h + static_cast<T>(ph) * bin_size_h +
|
||||
static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h);
|
||||
for (int ix = 0; ix < roi_bin_grid_w; ix++) {
|
||||
const T x = roi_start_w + static_cast<T>(pw) * bin_size_w +
|
||||
static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
|
||||
// bilinear interpolate by shifted y / x
|
||||
// calculate bilinear interpolation
|
||||
int x_low = 0, y_low = 0, x_high = 0, y_high = 0;
|
||||
T w1, w2, w3, w4;
|
||||
bilinear_interpolate(height_, width_, y, x, &x_low, &y_low, &x_high, &y_high, &w1, &w2, &w3, &w4);
|
||||
if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0 && y_low < height_ && y_high < height_ &&
|
||||
x_low < width_ && x_high < width_) {
|
||||
T g1 = top_diff_this_bin * w1 / count_points_in_grid_cell;
|
||||
T g2 = top_diff_this_bin * w2 / count_points_in_grid_cell;
|
||||
T g3 = top_diff_this_bin * w3 / count_points_in_grid_cell;
|
||||
T g4 = top_diff_this_bin * w4 / count_points_in_grid_cell;
|
||||
|
||||
T *dx_1 = dx + offset + y_low * width_ + x_low;
|
||||
T *dx_2 = dx + offset + y_low * width_ + x_high;
|
||||
T *dx_3 = dx + offset + y_high * width_ + x_low;
|
||||
T *dx_4 = dx + offset + y_high * width_ + x_high;
|
||||
|
||||
AtomicAdd(dx_1, g1);
|
||||
AtomicAdd(dx_2, g2);
|
||||
AtomicAdd(dx_3, g3);
|
||||
AtomicAdd(dx_4, g4);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task2, elem_num);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignGradCPUKernel<T>::bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low,
|
||||
int *x_high, int *y_high, T *w1, T *w2, T *w3, T *w4) {
|
||||
constexpr float eps = 0.00007;
|
||||
if (y < static_cast<T>(-1.0) || y > static_cast<T>(height) || x < static_cast<T>(-1.0) || x > static_cast<T>(width)) {
|
||||
*w1 = *w2 = *w3 = *w4 = static_cast<T>(0);
|
||||
*x_low = *x_high = *y_low = *y_high = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
// low bounder is at least zero
|
||||
y = y <= static_cast<T>(.0) ? static_cast<T>(.0) : y;
|
||||
x = x <= static_cast<T>(.0) ? static_cast<T>(.0) : x;
|
||||
|
||||
// top left point
|
||||
*y_low = (y <= static_cast<T>(eps) ? 0 : static_cast<int>(floor(y)));
|
||||
*x_low = (x <= static_cast<T>(eps) ? 0 : static_cast<int>(floor(x)));
|
||||
|
||||
// bottom right point
|
||||
if (*y_low >= height - 1) {
|
||||
*y_high = *y_low = height - 1;
|
||||
y = static_cast<T>(*y_low);
|
||||
} else {
|
||||
*y_high = *y_low + 1;
|
||||
}
|
||||
|
||||
if (*x_low >= width - 1) {
|
||||
*x_high = *x_low = width - 1;
|
||||
x = static_cast<T>(*x_low);
|
||||
} else {
|
||||
*x_high = *x_low + 1;
|
||||
}
|
||||
|
||||
// distance to nearest points
|
||||
T lx, ly, hx, hy;
|
||||
ly = y - static_cast<T>(*y_low), lx = x - static_cast<T>(*x_low);
|
||||
hy = static_cast<T>(1.) - ly, hx = static_cast<T>(1.) - lx;
|
||||
|
||||
// weight is evaluated by the distance to point away.
|
||||
// the closer to point home, the more weight, the farther to point away.
|
||||
*w1 = hy * hx, *w2 = hy * lx, *w3 = ly * hx, *w4 = ly * lx;
|
||||
return;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void ROIAlignGradCPUKernel<T>::bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale,
|
||||
const int sample_num, int roi_end_mode, const int channels, const int height,
|
||||
const int width, const int pooled_height, const int pooled_width, int *offset,
|
||||
int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h, int *roi_bin_grid_w,
|
||||
T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w) {
|
||||
// (n, c, ph, pw) is the base param of pooled map
|
||||
*pw = thread_idx % pooled_width;
|
||||
*ph = (thread_idx / pooled_width) % pooled_height;
|
||||
*c = (thread_idx / pooled_width / pooled_height) % channels;
|
||||
*n = thread_idx / pooled_width / pooled_height / channels;
|
||||
|
||||
// Roi has
|
||||
// 1. 4 points, or
|
||||
// 2. indicator + 4 points (1 + 4)
|
||||
const T *roi_box = roi_boxes + (*n) * roi_cols;
|
||||
int roi_batch_ind = 0;
|
||||
if (roi_cols == 5) {
|
||||
roi_batch_ind = static_cast<int>(rint(static_cast<float>(roi_box[0]) + static_cast<float>(0.00007)));
|
||||
roi_box++;
|
||||
}
|
||||
|
||||
// Scale and shift ROI
|
||||
*roi_start_w = roi_box[0] * spatial_scale;
|
||||
*roi_start_h = roi_box[1] * spatial_scale;
|
||||
T roi_end_w = (roi_box[2] + static_cast<T>(roi_end_mode)) * spatial_scale;
|
||||
T roi_end_h = (roi_box[3] + static_cast<T>(roi_end_mode)) * spatial_scale;
|
||||
|
||||
// New ROI height/width
|
||||
T roi_width = roi_end_w - (*roi_start_w);
|
||||
T roi_height = roi_end_h - (*roi_start_h);
|
||||
|
||||
if (roi_end_mode == 0) { // backward compatibility
|
||||
// Force malformed ROIs to be 1x1
|
||||
roi_width = roi_width > static_cast<T>(1.0) ? roi_width : static_cast<T>(1.0);
|
||||
roi_height = roi_height > static_cast<T>(1.0) ? roi_height : static_cast<T>(1.0);
|
||||
}
|
||||
|
||||
// ratio of roi / pooled
|
||||
*bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
|
||||
*bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
|
||||
|
||||
*offset = (roi_batch_ind * channels + (*c)) * height * width;
|
||||
|
||||
// grid (int) by Sample ratio if defined, otherwise by pooled H/W
|
||||
*roi_bin_grid_h = (sample_num > 0) ? sample_num : static_cast<int>(floor(roi_height / static_cast<T>(pooled_height)));
|
||||
*roi_bin_grid_w = (sample_num > 0) ? sample_num : static_cast<int>(floor(roi_width / static_cast<T>(pooled_width)));
|
||||
return;
|
||||
}
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,75 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class ROIAlignGradCPUKernel : public CPUKernel {
|
||||
public:
|
||||
ROIAlignGradCPUKernel() = default;
|
||||
~ROIAlignGradCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
void CheckParam(const CNodePtr &kernel_node);
|
||||
|
||||
void bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low, int *x_high,
|
||||
int *y_high, T *w1, T *w2, T *w3, T *w4);
|
||||
|
||||
void bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, const int sample_num,
|
||||
int roi_end_mode, const int channels, const int height, const int width, const int pooled_height,
|
||||
const int pooled_width, int *offset, int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h,
|
||||
int *roi_bin_grid_w, T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w);
|
||||
|
||||
std::vector<int> xdiff_shape_;
|
||||
int pooled_height_;
|
||||
int pooled_width_;
|
||||
T spatial_scale_;
|
||||
int sample_num_;
|
||||
int roi_end_mode_;
|
||||
|
||||
int roi_rows_;
|
||||
int roi_cols_;
|
||||
int batch_size_;
|
||||
int channels_;
|
||||
int height_;
|
||||
int width_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
ROIAlignGrad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ROIAlignGradCPUKernel, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T(
|
||||
ROIAlignGrad,
|
||||
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
|
||||
ROIAlignGradCPUKernel, float16);
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_GRAD_CPU_KERNEL_H_
|
|
@ -0,0 +1,127 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.h"
|
||||
#include <string>
|
||||
#include "runtime/device/cpu/cpu_device_address.h"
|
||||
#include "common/thread_pool.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
template <typename S, typename T>
|
||||
void Compute(const ComputeParams<S, T> *params, const size_t start, const size_t end) {
|
||||
MS_EXCEPTION_IF_NULL(params);
|
||||
T *target = params->target_;
|
||||
S *indices = params->indices_;
|
||||
T *updates = params->updates_;
|
||||
std::vector<int> *out_strides = params->out_strides_;
|
||||
MS_EXCEPTION_IF_NULL(out_strides);
|
||||
|
||||
for (size_t i = start; i < end; ++i) {
|
||||
int offset = 0;
|
||||
for (int j = 0; j < params->indices_unit_rank_; ++j) {
|
||||
auto index = indices[i * params->indices_unit_rank_ + j];
|
||||
if (index < 0) {
|
||||
MS_LOG(EXCEPTION) << "Indices contains element " << index << " less than 0.";
|
||||
}
|
||||
offset += index * out_strides->at(j) * params->unit_size_;
|
||||
}
|
||||
target[offset] += updates[params->unit_size_ * i];
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename S, typename T>
|
||||
void ScatterNdCPUKernel<S, T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
Check(kernel_node);
|
||||
auto shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto updates_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
auto indices_unit_rank = indices_shape.back();
|
||||
if (indices_unit_rank > shape.size()) {
|
||||
MS_LOG(EXCEPTION) << "Value of last dimension of indices is greater than shape rank";
|
||||
}
|
||||
if (indices_shape.size() < 2) {
|
||||
MS_LOG(EXCEPTION) << "Indices has dimension less than 2";
|
||||
}
|
||||
if (updates_shape.size() != indices_shape.size() - 1 + shape.size() - indices_unit_rank) {
|
||||
MS_LOG(EXCEPTION) << "The ranks of update and indices are inconsistent";
|
||||
}
|
||||
for (size_t i = 0; i < indices_shape.size() - 1; ++i) {
|
||||
if (updates_shape[i] != indices_shape[i]) {
|
||||
MS_LOG(EXCEPTION) << "The shape of updates and indices are different in dimension " << i << " .";
|
||||
}
|
||||
}
|
||||
indices_unit_rank_ = SizeToInt(indices_unit_rank);
|
||||
unit_size_ = 1;
|
||||
for (size_t i = indices_shape.size() - 1; i < updates_shape.size(); ++i) {
|
||||
unit_size_ *= SizeToInt(updates_shape[i]);
|
||||
}
|
||||
num_units_ = 1;
|
||||
num_units_ *= updates_shape[indices_shape.size() - 2];
|
||||
for (int i = SizeToInt(indices_shape.size()) - 3; i >= 0; i--) {
|
||||
num_units_ *= updates_shape[i];
|
||||
}
|
||||
int out_stride = 1;
|
||||
out_strides_.push_back(out_stride);
|
||||
for (int i = indices_unit_rank_ - 2; i >= 0; i--) {
|
||||
out_stride *= shape[i + 1];
|
||||
out_strides_.push_back(out_stride);
|
||||
}
|
||||
reverse(out_strides_.begin(), out_strides_.end());
|
||||
}
|
||||
|
||||
template <typename S, typename T>
|
||||
bool ScatterNdCPUKernel<S, T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
auto target = reinterpret_cast<T *>(outputs[0]->addr);
|
||||
auto target_init = memset_s(target, outputs[0]->size / sizeof(T), static_cast<T>(0.0), outputs[0]->size / sizeof(T));
|
||||
if (target_init != EOK) {
|
||||
MS_LOG(EXCEPTION) << "ScatterNdCPUKernel Launch task memset failed.";
|
||||
}
|
||||
ComputeParams<S, T> params;
|
||||
params.target_ = target;
|
||||
params.indices_ = reinterpret_cast<S *>(inputs[0]->addr);
|
||||
params.updates_ = reinterpret_cast<T *>(inputs[1]->addr);
|
||||
params.target_mem_size_ = outputs[0]->size;
|
||||
params.unit_size_ = unit_size_;
|
||||
params.indices_unit_rank_ = indices_unit_rank_;
|
||||
params.out_strides_ = &out_strides_;
|
||||
|
||||
auto task = [&](size_t start, size_t end) {
|
||||
for (size_t idx = start; idx < end; idx++) {
|
||||
Compute<S, T>(¶ms, idx, idx + 1);
|
||||
}
|
||||
};
|
||||
CPUKernelUtils::ParallelFor(task, num_units_);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename S, typename T>
|
||||
void ScatterNdCPUKernel<S, T>::Check(const CNodePtr &kernel_node) {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 2) {
|
||||
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterNd needs 2 input.";
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterNd needs 1 output.";
|
||||
}
|
||||
}
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,150 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
|
||||
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename S, typename T>
|
||||
struct ComputeParams {
|
||||
T *target_{nullptr};
|
||||
S *indices_{nullptr};
|
||||
T *updates_{nullptr};
|
||||
int unit_size_{0};
|
||||
int indices_unit_rank_{0};
|
||||
std::vector<int> *out_strides_{nullptr};
|
||||
size_t target_mem_size_{0};
|
||||
};
|
||||
|
||||
template <typename S, typename T>
|
||||
class ScatterNdCPUKernel : public CPUKernel {
|
||||
public:
|
||||
ScatterNdCPUKernel() = default;
|
||||
~ScatterNdCPUKernel() override = default;
|
||||
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
void Check(const CNodePtr &kernel_node);
|
||||
|
||||
int unit_size_{0};
|
||||
size_t num_units_{0};
|
||||
int indices_unit_rank_{0};
|
||||
std::vector<int> out_strides_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
ScatterNdCPUKernel, int64_t, double);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterNdCPUKernel, int64_t, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ScatterNdCPUKernel, int64_t, int64_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterNdCPUKernel, int64_t, int32_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
ScatterNdCPUKernel, int64_t, int16_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterNdCPUKernel, int64_t, int8_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
|
||||
ScatterNdCPUKernel, int64_t, uint64_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
|
||||
ScatterNdCPUKernel, int64_t, uint32_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
|
||||
ScatterNdCPUKernel, int64_t, uint16_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterNdCPUKernel, int64_t, uint8_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
|
||||
ScatterNdCPUKernel, int32_t, double);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterNdCPUKernel, int32_t, float);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
|
||||
ScatterNdCPUKernel, int32_t, int64_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterNdCPUKernel, int32_t, int32_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
|
||||
ScatterNdCPUKernel, int32_t, int16_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterNdCPUKernel, int32_t, int8_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
|
||||
ScatterNdCPUKernel, int32_t, uint64_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
|
||||
ScatterNdCPUKernel, int32_t, uint32_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd,
|
||||
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
|
||||
ScatterNdCPUKernel, int32_t, uint16_t);
|
||||
|
||||
MS_REG_CPU_KERNEL_T_S(
|
||||
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterNdCPUKernel, int32_t, uint8_t);
|
||||
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_
|
|
@ -24,6 +24,7 @@ from .split import _split_cpu
|
|||
from .adam import _adam_cpu
|
||||
from .arg_max import _arg_max_cpu
|
||||
from .arg_min_with_value import _arg_min_with_value_cpu
|
||||
from .arg_max_with_value import _arg_max_with_value_cpu
|
||||
from .bias_add import _bias_add_cpu
|
||||
from .bias_add_grad import _bias_add_grad_cpu
|
||||
from .dropout import _dropout_cpu
|
||||
|
|
|
@ -0,0 +1,31 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""ArgMaxWithValue op"""
|
||||
from mindspore.ops.op_info_register import op_info_register, CpuRegOp, DataType
|
||||
|
||||
arg_max_with_value_op_info = CpuRegOp("ArgMaxWithValue") \
|
||||
.input(0, "x", "required") \
|
||||
.output(0, "indice", "required") \
|
||||
.output(1, "values", "required") \
|
||||
.dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \
|
||||
.dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \
|
||||
.get_op_info()
|
||||
|
||||
|
||||
@op_info_register(arg_max_with_value_op_info)
|
||||
def _arg_max_with_value_cpu():
|
||||
"""ArgMaxWithValue cpu register"""
|
||||
return
|
|
@ -1785,7 +1785,7 @@ class ArgMaxWithValue(PrimitiveWithInfer):
|
|||
TypeError: If `axis` is not an int.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> input_x = Tensor(np.array([0.0, 0.4, 0.6, 0.7, 0.1]), mindspore.float32)
|
||||
|
@ -3484,7 +3484,7 @@ class ScatterNd(PrimitiveWithInfer):
|
|||
ValueError: If any element of `shape` is less than 1.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> op = ops.ScatterNd()
|
||||
|
|
|
@ -59,7 +59,7 @@ class CropAndResize(PrimitiveWithInfer):
|
|||
ValueError: If `method` is not one of 'bilinear', 'nearest', 'bilinear_v2'.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> class CropAndResizeNet(nn.Cell):
|
||||
|
|
|
@ -4257,7 +4257,7 @@ class NMSWithMask(PrimitiveWithInfer):
|
|||
Tensor is not float16 or float32.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> bbox = np.array([[100.0, 100.0, 50.0, 68.0, 0.63], [150.0, 75.0, 165.0, 115.0, 0.55],
|
||||
|
|
|
@ -4262,7 +4262,7 @@ class ROIAlign(PrimitiveWithInfer):
|
|||
TypeError: If `features` or `rois` is not a Tensor.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> input_tensor = Tensor(np.array([[[[1., 2.], [3., 4.]]]]), mindspore.float32)
|
||||
|
|
|
@ -168,7 +168,7 @@ class BoundingBoxEncode(PrimitiveWithInfer):
|
|||
TypeError: If `anchor_box` or `groundtruth_box` is not a Tensor.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> anchor_box = Tensor([[2, 2, 2, 3], [2, 2, 2, 3]], mindspore.float32)
|
||||
|
@ -230,7 +230,7 @@ class BoundingBoxDecode(PrimitiveWithInfer):
|
|||
TypeError: If `anchor_box` or `deltas` is not a Tensor.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> anchor_box = Tensor([[4, 1, 2, 1], [2, 2, 2, 3]], mindspore.float32)
|
||||
|
@ -293,7 +293,7 @@ class CheckValid(PrimitiveWithInfer):
|
|||
TypeError: If dtype of `bboxes` or `img_metas` is neither float16 nor float32.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> import mindspore
|
||||
|
|
|
@ -404,7 +404,7 @@ class RandomChoiceWithMask(PrimitiveWithInfer):
|
|||
TypeError: If `input_x` is not a Tensor.
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``GPU``
|
||||
``Ascend`` ``GPU`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> rnd_choice_mask = ops.RandomChoiceWithMask()
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class NetArgmaxWithValue(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetArgmaxWithValue, self).__init__()
|
||||
axis1 = 0
|
||||
axis2 = -1
|
||||
self.argmax1 = P.ArgMaxWithValue(axis1)
|
||||
self.argmax2 = P.ArgMaxWithValue(axis2)
|
||||
self.argmax3 = P.ArgMaxWithValue()
|
||||
|
||||
def construct(self, x):
|
||||
return (self.argmax1(x), self.argmax2(x), self.argmax3(x))
|
||||
|
||||
|
||||
class NetArgmaxWithValueBig(nn.Cell):
|
||||
def __init__(self, axis=0):
|
||||
super(NetArgmaxWithValueBig, self).__init__()
|
||||
self.argmax = P.ArgMaxWithValue(axis)
|
||||
|
||||
def construct(self, x):
|
||||
return self.argmax(x)
|
||||
|
||||
|
||||
def argmaxwithvalue_base(data_type):
|
||||
x = Tensor(np.array([[1., 20., 5.],
|
||||
[67., 8., 9.],
|
||||
[130., 24., 15.],
|
||||
[0.3, -0.4, -15.]]).astype(data_type))
|
||||
expect1 = np.array([2, 2, 2]).astype(data_type)
|
||||
expect2 = np.array([1, 0, 0, 0]).astype(data_type)
|
||||
expect11 = np.array([130, 24, 15]).astype(data_type)
|
||||
expect22 = np.array([20, 67, 130, 0.3]).astype(data_type)
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
argmax = NetArgmaxWithValue()
|
||||
output = argmax(x)
|
||||
assert (output[0][0].asnumpy() == expect1).all()
|
||||
assert (output[0][1].asnumpy() == expect11).all()
|
||||
assert (output[1][0].asnumpy() == expect2).all()
|
||||
assert (output[1][1].asnumpy() == expect22).all()
|
||||
assert (output[2][0].asnumpy() == expect1).all()
|
||||
assert (output[2][1].asnumpy() == expect11).all()
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
argmax = NetArgmaxWithValue()
|
||||
output = argmax(x)
|
||||
assert (output[0][0].asnumpy() == expect1).all()
|
||||
assert (output[0][1].asnumpy() == expect11).all()
|
||||
assert (output[1][0].asnumpy() == expect2).all()
|
||||
assert (output[1][1].asnumpy() == expect22).all()
|
||||
assert (output[2][0].asnumpy() == expect1).all()
|
||||
assert (output[2][1].asnumpy() == expect11).all()
|
||||
|
||||
|
||||
def argmaxwithvalue_3d(data_type, shape_x):
|
||||
np.random.seed(2)
|
||||
x_np = np.random.random(shape_x).astype(data_type)
|
||||
x = Tensor(x_np)
|
||||
|
||||
argmax = NetArgmaxWithValueBig(0)
|
||||
output = argmax(x)
|
||||
expect1 = np.argmax(x_np, axis=0)
|
||||
expect2 = np.maximum.reduce(x_np, 0)
|
||||
assert (output[0].asnumpy() == expect1).all()
|
||||
assert (output[1].asnumpy() == expect2).all()
|
||||
|
||||
argmax = NetArgmaxWithValueBig(1)
|
||||
output = argmax(x)
|
||||
expect1 = np.argmax(x_np, axis=1)
|
||||
expect2 = np.maximum.reduce(x_np, 1)
|
||||
assert (output[0].asnumpy() == expect1).all()
|
||||
assert (output[1].asnumpy() == expect2).all()
|
||||
|
||||
argmax = NetArgmaxWithValueBig(2)
|
||||
output = argmax(x)
|
||||
expect1 = np.argmax(x_np, axis=2)
|
||||
expect2 = np.maximum.reduce(x_np, 2)
|
||||
assert (output[0].asnumpy() == expect1).all()
|
||||
assert (output[1].asnumpy() == expect2).all()
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmaxwithvalue_base_float32():
|
||||
argmaxwithvalue_base(np.float32)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmaxwithvalue_base_float16():
|
||||
argmaxwithvalue_base(np.float16)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmaxwithvalue_3d_float32():
|
||||
shape_x = (2, 32, 256)
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
argmaxwithvalue_3d(np.float32, shape_x)
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
argmaxwithvalue_3d(np.float32, shape_x)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmaxwithvalue_3d_float16():
|
||||
shape_x = (2, 64, 128)
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
argmaxwithvalue_3d(np.float16, shape_x)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmaxwithvalue_3d_big_float32():
|
||||
shape_x = (128, 1024, 1)
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
argmaxwithvalue_3d(np.float32, shape_x)
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
argmaxwithvalue_3d(np.float32, shape_x)
|
|
@ -0,0 +1,60 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class NetBoundingBoxDecode(nn.Cell):
|
||||
def __init__(self, means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)):
|
||||
super(NetBoundingBoxDecode, self).__init__()
|
||||
self.decode = P.BoundingBoxDecode(max_shape=(768, 1280), means=means, stds=stds,
|
||||
wh_ratio_clip=0.016)
|
||||
|
||||
def construct(self, anchor, groundtruth):
|
||||
return self.decode(anchor, groundtruth)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_boundingbox_decode():
|
||||
anchor = np.array([[4, 1, 2, 1], [2, 2, 2, 3]], np.float32)
|
||||
deltas = np.array([[3, 1, 2, 2], [1, 2, 1, 4]], np.float32)
|
||||
means = (0.1, 0.1, 0.2, 0.2)
|
||||
stds = (2.0, 2.0, 3.0, 3.0)
|
||||
anchor_box = Tensor(anchor, mindspore.float32)
|
||||
deltas_box = Tensor(deltas, mindspore.float32)
|
||||
expect_deltas = np.array([[28.6500, 0.0000, 0.0000, 33.8500],
|
||||
[0.0000, 0.0000, 15.8663, 72.7000]], np.float32)
|
||||
|
||||
error = np.ones(shape=[2, 4]) * 1.0e-4
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
boundingbox_decode = NetBoundingBoxDecode(means, stds)
|
||||
output = boundingbox_decode(anchor_box, deltas_box)
|
||||
diff = output.asnumpy() - expect_deltas
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU')
|
||||
boundingbox_decode = NetBoundingBoxDecode(means, stds)
|
||||
output = boundingbox_decode(anchor_box, deltas_box)
|
||||
diff = output.asnumpy() - expect_deltas
|
||||
assert np.all(abs(diff) < error)
|
|
@ -0,0 +1,80 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class NetBoundingBoxEncode(nn.Cell):
|
||||
def __init__(self, means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)):
|
||||
super(NetBoundingBoxEncode, self).__init__()
|
||||
self.encode = P.BoundingBoxEncode(means=means, stds=stds)
|
||||
|
||||
def construct(self, anchor, groundtruth):
|
||||
return self.encode(anchor, groundtruth)
|
||||
|
||||
def bbox2delta(proposals, gt, means, stds):
|
||||
px = (proposals[..., 0] + proposals[..., 2]) * 0.5
|
||||
py = (proposals[..., 1] + proposals[..., 3]) * 0.5
|
||||
pw = proposals[..., 2] - proposals[..., 0] + 1.0
|
||||
ph = proposals[..., 3] - proposals[..., 1] + 1.0
|
||||
|
||||
gx = (gt[..., 0] + gt[..., 2]) * 0.5
|
||||
gy = (gt[..., 1] + gt[..., 3]) * 0.5
|
||||
gw = gt[..., 2] - gt[..., 0] + 1.0
|
||||
gh = gt[..., 3] - gt[..., 1] + 1.0
|
||||
|
||||
dx = (gx - px) / pw
|
||||
dy = (gy - py) / ph
|
||||
dw = np.log(gw / pw)
|
||||
dh = np.log(gh / ph)
|
||||
means = np.array(means, np.float32)
|
||||
stds = np.array(stds, np.float32)
|
||||
deltas = np.stack([(dx - means[0]) / stds[0], (dy - means[1]) / stds[1],
|
||||
(dw - means[2]) / stds[2], (dh - means[3]) / stds[3]], axis=-1)
|
||||
|
||||
return deltas
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_boundingbox_encode():
|
||||
anchor = np.array([[4, 1, 6, 9], [2, 5, 5, 9]]).astype(np.float32)
|
||||
gt = np.array([[3, 2, 7, 7], [1, 5, 5, 8]]).astype(np.float32)
|
||||
means = (0.1, 0.1, 0.2, 0.2)
|
||||
stds = (2.0, 2.0, 3.0, 3.0)
|
||||
anchor_box = Tensor(anchor, mindspore.float32)
|
||||
groundtruth_box = Tensor(gt, mindspore.float32)
|
||||
expect_deltas = bbox2delta(anchor, gt, means, stds)
|
||||
|
||||
error = np.ones(shape=[2, 4]) * 1.0e-6
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
boundingbox_encode = NetBoundingBoxEncode(means, stds)
|
||||
output = boundingbox_encode(anchor_box, groundtruth_box)
|
||||
diff = output.asnumpy() - expect_deltas
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU')
|
||||
boundingbox_encode = NetBoundingBoxEncode(means, stds)
|
||||
output = boundingbox_encode(anchor_box, groundtruth_box)
|
||||
diff = output.asnumpy() - expect_deltas
|
||||
assert np.all(abs(diff) < error)
|
|
@ -0,0 +1,86 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class NetCheckValid(nn.Cell):
|
||||
def __init__(self):
|
||||
super(NetCheckValid, self).__init__()
|
||||
self.valid = P.CheckValid()
|
||||
|
||||
def construct(self, anchor, image_metas):
|
||||
return self.valid(anchor, image_metas)
|
||||
|
||||
def check_valid(nptype):
|
||||
anchor = np.array([[50, 0, 100, 700], [-2, 2, 8, 100], [10, 20, 300, 2000]], nptype)
|
||||
image_metas = np.array([768, 1280, 1], nptype)
|
||||
anchor_box = Tensor(anchor)
|
||||
image_metas_box = Tensor(image_metas)
|
||||
expect = np.array([True, False, False], np.bool)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
boundingbox_decode = NetCheckValid()
|
||||
output = boundingbox_decode(anchor_box, image_metas_box)
|
||||
assert np.array_equal(output.asnumpy(), expect)
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU')
|
||||
boundingbox_decode = NetCheckValid()
|
||||
output = boundingbox_decode(anchor_box, image_metas_box)
|
||||
assert np.array_equal(output.asnumpy(), expect)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_check_valid_float32():
|
||||
check_valid(np.float32)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_check_valid_float16():
|
||||
check_valid(np.float16)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_check_valid_int16():
|
||||
check_valid(np.int16)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_check_valid_uint8():
|
||||
anchor = np.array([[5, 0, 10, 70], [2, 2, 8, 10], [1, 2, 30, 200]], np.uint8)
|
||||
image_metas = np.array([76, 128, 1], np.uint8)
|
||||
anchor_box = Tensor(anchor)
|
||||
image_metas_box = Tensor(image_metas)
|
||||
expect = np.array([True, True, False], np.bool)
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
|
||||
boundingbox_decode = NetCheckValid()
|
||||
output = boundingbox_decode(anchor_box, image_metas_box)
|
||||
assert np.array_equal(output.asnumpy(), expect)
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU')
|
||||
boundingbox_decode = NetCheckValid()
|
||||
output = boundingbox_decode(anchor_box, image_metas_box)
|
||||
assert np.array_equal(output.asnumpy(), expect)
|
|
@ -0,0 +1,423 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from mindspore import context, Tensor
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore import nn
|
||||
|
||||
|
||||
class NetCropAndResize(nn.Cell):
|
||||
def __init__(self, method_="bilinear", extrapolation_value_=0.0):
|
||||
super(NetCropAndResize, self).__init__()
|
||||
self.op = P.CropAndResize(
|
||||
method=method_, extrapolation_value=extrapolation_value_)
|
||||
|
||||
def construct(self, image, boxes, box_index, channel):
|
||||
return self.op(image, boxes, box_index, channel)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_int8_bilinear(datatype=np.int8):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 32
|
||||
image_width = 18
|
||||
channels = 2
|
||||
crop_size = (5, 3)
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0, total_values).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("bilinear", 0.5)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[-111.0, -110.0], [-119.5, -118.5], [-128.0, -127.0]],
|
||||
[[28.5, 29.5], [20.0, 21.0], [11.5, 12.5]],
|
||||
[[-88.0, -87.0], [-96.5, -95.5], [-41.0, -40.0]],
|
||||
[[51.5, 52.5], [43.0, 44.0], [34.5, 35.5]],
|
||||
[[-65.0, -64.0], [-73.5, -72.5], [-82.0, -81.0]]],
|
||||
[[[0.0, 1.0], [29.75, 30.75], [0.5, 0.5]],
|
||||
[[-46.75, -45.75], [-17.0, -16.0], [0.5, 0.5]],
|
||||
[[-93.5, -92.5], [-63.75, -62.75], [0.5, 0.5]],
|
||||
[[3.75, 4.75], [-110.5, -109.5], [0.5, 0.5]],
|
||||
[[69.0, 70.0], [98.75, 99.75], [0.5, 0.5]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_int16_nearest(datatype=np.int16):
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 32
|
||||
image_width = 18
|
||||
channels = 2
|
||||
crop_size = (5, 3)
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0, total_values).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("nearest", 0.5)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[1170.0, 1171.0], [1160.0, 1161.0], [1152.0, 1153.0]],
|
||||
[[1314.0, 1315.0], [1304.0, 1305.0], [1296.0, 1297.0]],
|
||||
[[1458.0, 1459.0], [1448.0, 1449.0], [1440.0, 1441.0]],
|
||||
[[1602.0, 1603.0], [1592.0, 1593.0], [1584.0, 1585.0]],
|
||||
[[1746.0, 1747.0], [1736.0, 1737.0], [1728.0, 1729.0]]],
|
||||
[[[0.0, 1.0], [30.0, 31.0], [0.5, 0.5]],
|
||||
[[216.0, 217.0], [246.0, 247.0], [0.5, 0.5]],
|
||||
[[432.0, 433.0], [462.0, 463.0], [0.5, 0.5]],
|
||||
[[612.0, 613.0], [642.0, 643.0], [0.5, 0.5]],
|
||||
[[828.0, 829.0], [858.0, 859.0], [0.5, 0.5]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_int32_bilinear_v2(datatype=np.int32):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 32
|
||||
image_width = 18
|
||||
channels = 2
|
||||
crop_size = (5, 3)
|
||||
offset = 8795
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0 + offset, total_values + offset).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("bilinear_v2", 0.369)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[10008.199, 10009.199], [10008.2, 10009.2], [10008.199, 10009.2]],
|
||||
[[10130.6, 10131.6], [10130.6, 10131.6], [10130.601, 10131.6]],
|
||||
[[10253, 10253.999], [10253, 10254], [10253, 10254]],
|
||||
[[10375.4, 10376.398], [10375.4, 10376.4], [10375.4, 10376.399]],
|
||||
[[10497.799, 10498.799], [10497.801, 10498.8], [10497.8, 10498.8]]],
|
||||
[[[8876.667, 8877.667], [8898, 8899], [8919.334, 8920.333]],
|
||||
[[9056.667, 9057.667], [9078, 9079], [9099.333, 9100.333]],
|
||||
[[9236.667, 9237.667], [9258, 9259], [9279.333, 9280.333]],
|
||||
[[9416.667, 9417.667], [9438, 9439], [9459.333, 9460.333]],
|
||||
[[9596.667, 9597.667], [9618, 9619], [9639.333, 9640.334]]]]).astype(
|
||||
np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_float16_nearest(datatype=np.float16):
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 50
|
||||
image_width = 40
|
||||
channels = 3
|
||||
crop_size = (5, 3)
|
||||
offset = 0
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0 + offset, total_values + offset).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("nearest", 0.0)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[7380.0, 7380.0, 7384.0], [7352.0, 7352.0, 7352.0],
|
||||
[7320.0, 7320.0, 7320.0]],
|
||||
[[8224.0, 8224.0, 8224.0], [8192.0, 8192.0, 8192.0],
|
||||
[8160.0, 8160.0, 8160.0]],
|
||||
[[8944.0, 8944.0, 8944.0], [8912.0, 8912.0, 8912.0],
|
||||
[8880.0, 8880.0, 8880.0]],
|
||||
[[9664.0, 9664.0, 9664.0], [9632.0, 9632.0, 9632.0],
|
||||
[9600.0, 9600.0, 9600.0]],
|
||||
[[10496.0, 10504.0, 10504.0], [10472.0, 10472.0, 10472.0],
|
||||
[10440.0, 10440.0, 10440.0]]],
|
||||
[[[12.0, 13.0, 14.0], [108.0, 109.0, 110.0], [0.0, 0.0, 0.0]],
|
||||
[[1092.0, 1093.0, 1094.0], [1188.0, 1189.0, 1190.0], [0.0, 0.0, 0.0]],
|
||||
[[2172.0, 2172.0, 2174.0], [2268.0, 2268.0, 2270.0], [0.0, 0.0, 0.0]],
|
||||
[[3372.0, 3372.0, 3374.0], [3468.0, 3468.0, 3470.0], [0.0, 0.0, 0.0]],
|
||||
[[4452.0, 4452.0, 4456.0], [4548.0, 4548.0, 4552.0],
|
||||
[0.0, 0.0, 0.0]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_float32_bilinear(datatype=np.float32):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 512
|
||||
image_width = 256
|
||||
channels = 3
|
||||
crop_size = (5, 3)
|
||||
offset = 5000
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0 + offset, total_values + offset).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("bilinear", 0.0)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[488861.53, 488862.53, 488863.53],
|
||||
[488670.28, 488671.28, 488672.28],
|
||||
[488479.03, 488480.03, 488481.03]],
|
||||
[[539879.75, 539880.75, 539881.75],
|
||||
[539688.5, 539689.5, 539690.5],
|
||||
[539497.25, 539498.25, 539499.25]],
|
||||
[[590898.0, 590899.0, 590900.0], [590706.75, 590707.75, 590708.75],
|
||||
[590515.5, 590516.5, 590517.5]],
|
||||
[[641916.25, 641917.25, 641918.25], [641725.0, 641726.0, 641727.0],
|
||||
[641533.75, 641534.75, 641535.75]],
|
||||
[[692934.5, 692935.5, 692936.5], [692743.25, 692744.25, 692745.25],
|
||||
[692552.0, 692553.0, 692554.0]]],
|
||||
[[[5076.5, 5077.5, 5078.5], [5707.625, 5708.625, 5709.625], [0.0, 0.0, 0.0]],
|
||||
[[78660.5, 78661.5, 78662.5], [79291.625, 79292.625, 79293.625], [0.0, 0.0, 0.0]],
|
||||
[[152244.5, 152245.5, 152246.5], [152875.625, 152876.625, 152877.625],
|
||||
[0.0, 0.0, 0.0]],
|
||||
[[225828.5, 225829.5, 225830.5], [226459.625, 226460.625, 226461.625],
|
||||
[0.0, 0.0, 0.0]],
|
||||
[[299412.5, 299413.5, 299414.5], [300043.625, 300044.625, 300045.625],
|
||||
[0.0, 0.0, 0.0]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_float64_nearest(datatype=np.float64):
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 50
|
||||
image_width = 25
|
||||
channels = 3
|
||||
crop_size = (5, 3)
|
||||
offset = 7549
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0 + offset, total_values + offset).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("nearest", 0.0)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[12160.0, 12161.0, 12162.0], [12142.0, 12143.0, 12144.0],
|
||||
[12124.0, 12125.0, 12126.0]],
|
||||
[[12685.0, 12686.0, 12687.0], [12667.0, 12668.0, 12669.0],
|
||||
[12649.0, 12650.0, 12651.0]],
|
||||
[[13135.0, 13136.0, 13137.0], [13117.0, 13118.0, 13119.0],
|
||||
[13099.0, 13100.0, 13101.0]],
|
||||
[[13585.0, 13586.0, 13587.0], [13567.0, 13568.0, 13569.0],
|
||||
[13549.0, 13550.0, 13551.0]],
|
||||
[[14110.0, 14111.0, 14112.0], [14092.0, 14093.0, 14094.0],
|
||||
[14074.0, 14075.0, 14076.0]]],
|
||||
[[[7555.0, 7556.0, 7557.0], [7615.0, 7616.0, 7617.0], [0.0, 0.0, 0.0]],
|
||||
[[8230.0, 8231.0, 8232.0], [8290.0, 8291.0, 8292.0], [0.0, 0.0, 0.0]],
|
||||
[[8905.0, 8906.0, 8907.0], [8965.0, 8966.0, 8967.0], [0.0, 0.0, 0.0]],
|
||||
[[9655.0, 9656.0, 9657.0], [9715.0, 9716.0, 9717.0], [0.0, 0.0, 0.0]],
|
||||
[[10330.0, 10331.0, 10332.0], [10390.0, 10391.0, 10392.0],
|
||||
[0.0, 0.0, 0.0]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_int64_bilinearv2(datatype=np.int64):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 50
|
||||
image_width = 25
|
||||
channels = 3
|
||||
crop_size = (5, 3)
|
||||
offset = 7549
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0 + offset, total_values + offset).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("bilinear_v2", 0.0)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[12324.999, 12326, 12327], [12325, 12326, 12327],
|
||||
[12325, 12326, 12327.001]],
|
||||
[[12730, 12730.999, 12732], [12730, 12731, 12732],
|
||||
[12730, 12731, 12732]],
|
||||
[[13134.999, 13136, 13136.998], [13135, 13136, 13137],
|
||||
[13135, 13136, 13137]],
|
||||
[[13540, 13540.999, 13541.999], [13540, 13541, 13542],
|
||||
[13540, 13541, 13542]],
|
||||
[[13944.999, 13945.999, 13946.999], [13945, 13946.001, 13947],
|
||||
[13945, 13946, 13947]]],
|
||||
[[[7822, 7823, 7824], [7864, 7865, 7866], [7906, 7907, 7908]],
|
||||
[[8392, 8393, 8394], [8434, 8435, 8436], [8476, 8477, 8478]],
|
||||
[[8962, 8963, 8964], [9004, 9005, 9006], [9046, 9047, 9048]],
|
||||
[[9531.999, 9533.001, 9534], [9574, 9575, 9576], [9616, 9617, 9618.001]],
|
||||
[[10102, 10103, 10104], [10144, 10145, 10146],
|
||||
[10186, 10187, 10188]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_uint8_nearest(datatype=np.uint8):
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 7
|
||||
image_width = 5
|
||||
channels = 2
|
||||
crop_size = (5, 3)
|
||||
offset = 0
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0 + offset, total_values + offset).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("nearest", 0.0)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[84.0, 85.0], [82.0, 83.0], [80.0, 81.0]],
|
||||
[[94.0, 95.0], [92.0, 93.0], [90.0, 91.0]],
|
||||
[[104.0, 105.0], [102.0, 103.0], [100.0, 101.0]],
|
||||
[[114.0, 115.0], [112.0, 113.0], [110.0, 111.0]],
|
||||
[[124.0, 125.0], [122.0, 123.0], [120.0, 121.0]]],
|
||||
[[[0.0, 1.0], [8.0, 9.0], [0.0, 0.0]],
|
||||
[[10.0, 11.0], [18.0, 19.0], [0.0, 0.0]],
|
||||
[[20.0, 21.0], [28.0, 29.0], [0.0, 0.0]],
|
||||
[[30.0, 31.0], [38.0, 39.0], [0.0, 0.0]],
|
||||
[[50.0, 51.0], [58.0, 59.0], [0.0, 0.0]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_crop_and_resize_uint16_bilinear(datatype=np.uint16):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
batch_size = 2
|
||||
image_height = 50
|
||||
image_width = 30
|
||||
channels = 3
|
||||
crop_size = (5, 3)
|
||||
offset = 0
|
||||
total_values = batch_size * image_height * image_width * channels
|
||||
input_data = np.arange(0 + offset, total_values + offset).reshape(
|
||||
(batch_size, image_height, image_width, channels))
|
||||
input_boxes = np.array(
|
||||
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
|
||||
input_box_index = np.array([1, 0]).astype(np.int32)
|
||||
input_data_tensor = Tensor(input_data.astype(datatype))
|
||||
input_boxes_tensor = Tensor(input_boxes)
|
||||
input_box_index_tensor = Tensor(input_box_index)
|
||||
net = NetCropAndResize("bilinear", 0.0)
|
||||
output = net(input_data_tensor, input_boxes_tensor,
|
||||
input_box_index_tensor, crop_size)
|
||||
output_ms = output.asnumpy()
|
||||
expected_output = np.array([[[[5557.7998046875, 5558.7998046875, 5559.7998046875],
|
||||
[5536.0498046875, 5537.0498046875, 5538.0498046875],
|
||||
[5514.2998046875, 5515.2998046875, 5516.2998046875]],
|
||||
[[6131.10009765625, 6132.10009765625, 6133.10009765625],
|
||||
[6109.35009765625, 6110.35009765625, 6111.35009765625],
|
||||
[6087.60009765625, 6088.60009765625, 6089.60009765625]],
|
||||
[[6704.39990234375, 6705.39990234375, 6706.39990234375],
|
||||
[6682.64990234375, 6683.64990234375, 6684.64990234375],
|
||||
[6660.89990234375, 6661.89990234375, 6662.89990234375]],
|
||||
[[7277.7001953125, 7278.7001953125, 7279.7001953125],
|
||||
[7255.9501953125, 7256.9501953125, 7257.9501953125],
|
||||
[7234.2001953125, 7235.2001953125, 7236.2001953125]],
|
||||
[[7851.0, 7852.0, 7853.0], [7829.25, 7830.25, 7831.25],
|
||||
[7807.5, 7808.5, 7809.5]]],
|
||||
[[[8.700000762939453, 9.700000762939453, 10.700000762939453],
|
||||
[80.4749984741211, 81.4749984741211, 82.4749984741211],
|
||||
[0.0, 0.0, 0.0]],
|
||||
[[835.5750122070312, 836.5750122070312, 837.5750122070312],
|
||||
[907.3499755859375, 908.3499755859375, 909.3499755859375], [0.0, 0.0, 0.0]],
|
||||
[[1662.449951171875, 1663.449951171875, 1664.449951171875],
|
||||
[1734.2249755859375, 1735.2249755859375, 1736.2249755859375],
|
||||
[0.0, 0.0, 0.0]],
|
||||
[[2489.324951171875, 2490.324951171875, 2491.324951171875],
|
||||
[2561.10009765625, 2562.10009765625, 2563.10009765625], [0.0, 0.0, 0.0]],
|
||||
[[3316.199951171875, 3317.199951171875, 3318.199951171875],
|
||||
[3387.97509765625, 3388.97509765625, 3389.97509765625],
|
||||
[0.0, 0.0, 0.0]]]]).astype(np.float32)
|
||||
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
|
||||
diff = output_ms - expected_output
|
||||
assert np.all(abs(diff) < error)
|
|
@ -0,0 +1,109 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
def runMSRun(op, bbox):
|
||||
inputs = Tensor(bbox, mindspore.float32)
|
||||
box, _, mask = op(inputs)
|
||||
box = box.asnumpy()
|
||||
mask = mask.asnumpy()
|
||||
sel_idx = np.where(mask)
|
||||
sel_rows = box[sel_idx][:, 0:4]
|
||||
sel_score = box[sel_idx][:, -1]
|
||||
return sel_rows, sel_score
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_nms_with_mask_check_order():
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
nms_op = P.NMSWithMask(0.5)
|
||||
for _ in range(10):
|
||||
count = 4000
|
||||
box = np.random.randint(1, 100, size=(count, 4))
|
||||
box[:, 2] = box[:, 0] + box[:, 2]
|
||||
box[:, 3] = box[:, 1] + box[:, 3]
|
||||
unsorted_scores = np.random.rand(count, 1)
|
||||
bbox = np.hstack((box, unsorted_scores))
|
||||
bbox = Tensor(bbox, dtype=mindspore.float32)
|
||||
prop, _, _ = nms_op(bbox)
|
||||
ms_sorted_scores = (prop.asnumpy()[:, -1]) # select just scores
|
||||
np_sorted_scores = (np.sort(unsorted_scores, axis=0)[::-1][:, 0]) # sort manually
|
||||
np.testing.assert_array_almost_equal(
|
||||
ms_sorted_scores, np_sorted_scores)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_nms_with_mask_edge_case_1():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
# CASE 1 - FULL OVERLAP BOXES - Every box is duplicated and has a different score
|
||||
nms_op1 = P.NMSWithMask(0.3)
|
||||
bbox1 = [[12, 4, 33, 17, 0.6], [20, 11, 38, 23, 0.1], [20, 10, 45, 26, 0.9], [15, 17, 35, 38, 0.5],
|
||||
[10, 20, 30, 40, 0.4], [35, 35, 89, 90, 0.8], [12, 4, 33, 17, 0.3], [20, 11, 38, 23, 0.2],
|
||||
[20, 10, 45, 26, 0.1], [15, 17, 35, 38, 0.8], [10, 20, 30, 40, 0.41], [35, 35, 89, 90, 0.82]]
|
||||
expected_bbox = np.array([[20., 10., 45., 26.],
|
||||
[35., 35., 89., 90.],
|
||||
[15., 17., 35., 38.],
|
||||
[12., 4., 33., 17.]])
|
||||
expected_score = np.array([0.9, 0.82, 0.8, 0.6])
|
||||
|
||||
sel_rows, sel_score = runMSRun(nms_op1, bbox1)
|
||||
np.testing.assert_almost_equal(sel_rows, expected_bbox)
|
||||
np.testing.assert_almost_equal(sel_score, expected_score)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_nms_with_mask_edge_case_2():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
# CASE 2 - 0 value boxes - with valid scores
|
||||
nms_op2 = P.NMSWithMask(0.5)
|
||||
bbox2 = [[0, 0, 0, 0, 0.6], [0, 0, 0, 0, 0.1]]
|
||||
expected_bbox = np.array([[0., 0., 0., 0.],
|
||||
[0., 0., 0., 0.]])
|
||||
expected_score = np.array([0.6, 0.1])
|
||||
|
||||
sel_rows, sel_score = runMSRun(nms_op2, bbox2)
|
||||
np.testing.assert_almost_equal(sel_rows, expected_bbox)
|
||||
np.testing.assert_almost_equal(sel_score, expected_score)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_nms_with_mask_edge_case_3():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
# CASE 3 - x2/x1 and y2/y1 sequence out of place
|
||||
nms_op3 = P.NMSWithMask(0.7)
|
||||
bbox3 = [[70, 70, 45, 75, 0.6], [30, 33, 43, 29, 0.1]]
|
||||
expected_bbox = np.array([[70., 70., 45., 75.],
|
||||
[30., 33., 43., 29.]])
|
||||
expected_score = np.array([0.6, 0.1])
|
||||
|
||||
sel_rows, sel_score = runMSRun(nms_op3, bbox3)
|
||||
np.testing.assert_almost_equal(sel_rows, expected_bbox)
|
||||
np.testing.assert_almost_equal(sel_score, expected_score)
|
|
@ -0,0 +1,121 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class RCWM_count_in(nn.Cell):
|
||||
def __init__(self):
|
||||
super(RCWM_count_in, self).__init__()
|
||||
self.RCWM_count_in = P.RandomChoiceWithMask(count=4, seed=1)
|
||||
|
||||
def construct(self, x):
|
||||
return self.RCWM_count_in(x)
|
||||
|
||||
|
||||
class RCWM_count_out(nn.Cell):
|
||||
def __init__(self):
|
||||
super(RCWM_count_out, self).__init__()
|
||||
self.RCWM_count_out = P.RandomChoiceWithMask(count=10, seed=1)
|
||||
|
||||
def construct(self, x):
|
||||
return self.RCWM_count_out(x)
|
||||
|
||||
|
||||
class RCWM_3D(nn.Cell):
|
||||
def __init__(self):
|
||||
super(RCWM_3D, self).__init__()
|
||||
self.RCWM_3D = P.RandomChoiceWithMask(count=10, seed=1)
|
||||
|
||||
def construct(self, x):
|
||||
return self.RCWM_3D(x)
|
||||
|
||||
|
||||
class RCWM_1D(nn.Cell):
|
||||
def __init__(self):
|
||||
super(RCWM_1D, self).__init__()
|
||||
self.RCWM_1D = P.RandomChoiceWithMask(count=10, seed=9)
|
||||
|
||||
def construct(self, x):
|
||||
return self.RCWM_1D(x)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_RCWM_3D():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
input_tensor = Tensor(np.ones([3, 4, 5]).astype(np.bool))
|
||||
expect1 = (10, 3)
|
||||
expect2 = (10,)
|
||||
rcwm = RCWM_3D()
|
||||
output1, output2 = rcwm(input_tensor)
|
||||
assert output1.shape == expect1
|
||||
assert output2.shape == expect2
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_RCWM_count_out():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
input_tensor = Tensor(np.array([[1, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1],
|
||||
[0, 0, 0, 1]]).astype(np.bool))
|
||||
expect1 = (10, 2)
|
||||
expect2 = (10,)
|
||||
rcwm = RCWM_count_out()
|
||||
output1, output2 = rcwm(input_tensor)
|
||||
assert output1.shape == expect1
|
||||
assert output2.shape == expect2
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_RCWM_count_in():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
input_tensor = Tensor(np.array([[1, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1],
|
||||
[0, 0, 0, 1]]).astype(np.bool))
|
||||
expect1 = (4, 2)
|
||||
expect2 = (4,)
|
||||
rcwm = RCWM_count_in()
|
||||
output1, output2 = rcwm(input_tensor)
|
||||
assert output1.shape == expect1
|
||||
assert output2.shape == expect2
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_RCWM_1D():
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
input_tensor = Tensor(
|
||||
np.array([1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1]).astype(np.bool))
|
||||
expect_index = np.array([[0], [7], [9], [8], [8], [0],
|
||||
[2], [7], [0], [0]]).astype(np.int32)
|
||||
expect_mask = np.array(
|
||||
[True, True, True, True, True, True, True, True, False, False])
|
||||
rcwm = RCWM_1D()
|
||||
output1, output2 = rcwm(input_tensor)
|
||||
print(output1.asnumpy())
|
||||
print(output2)
|
||||
assert np.array_equal(output1.asnumpy(), expect_index)
|
||||
assert np.array_equal(output2.asnumpy(), expect_mask)
|
|
@ -0,0 +1,75 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops.operations import _grad_ops as G
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
|
||||
class NetROIAlignGrad(nn.Cell):
|
||||
def __init__(self, xdiff_shape, pooled_height, pooled_width, spatial_scale, sample_num):
|
||||
super(NetROIAlignGrad, self).__init__()
|
||||
self.roiAlignGrad = G.ROIAlignGrad(
|
||||
xdiff_shape,
|
||||
pooled_height,
|
||||
pooled_width,
|
||||
spatial_scale,
|
||||
sample_num)
|
||||
|
||||
def construct(self, dy, rois):
|
||||
return self.roiAlignGrad(dy, rois)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_roi_align_grad():
|
||||
def roi_align_grad_case(data_type):
|
||||
rois = Tensor(np.array([[0, -2.0, -2.0, 21.0, 21.0]], data_type))
|
||||
|
||||
dy = Tensor(np.array([[[
|
||||
[.1, .2, .3],
|
||||
[.1, .2, .3],
|
||||
[.1, .2, .3]
|
||||
]]], data_type))
|
||||
|
||||
xdiff_shape = (1, 1, 6, 6)
|
||||
pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
roi_align_grad = NetROIAlignGrad(
|
||||
xdiff_shape,
|
||||
pooled_height,
|
||||
pooled_width,
|
||||
spatial_scale,
|
||||
sample_num)
|
||||
output = roi_align_grad(dy, rois)
|
||||
#print(output)
|
||||
expect = ([[[[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
|
||||
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
|
||||
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
|
||||
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
|
||||
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
|
||||
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075]]]])
|
||||
np.testing.assert_almost_equal(output.asnumpy(), expect, decimal=4)
|
||||
|
||||
roi_align_grad_case(np.float32)
|
||||
roi_align_grad_case(np.float16)
|
|
@ -0,0 +1,75 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.context as context
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_roi_align():
|
||||
def roi_align_case(data_type):
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
|
||||
x = Tensor(np.array([[
|
||||
[[1, 2, 3, 4, 5, 6],
|
||||
[7, 8, 9, 10, 11, 12],
|
||||
[13, 14, 15, 16, 17, 18],
|
||||
[19, 20, 21, 22, 23, 24],
|
||||
[25, 26, 27, 28, 29, 30],
|
||||
[31, 32, 33, 34, 35, 36]]
|
||||
]], data_type))
|
||||
|
||||
# test case 1
|
||||
rois = Tensor(np.array([[0, -2.0, -2.0, 21.0, 21.0]], data_type))
|
||||
pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2
|
||||
roi_align = P.ROIAlign(pooled_height, pooled_width,
|
||||
spatial_scale, sample_num, 1)
|
||||
output = roi_align(x, rois)
|
||||
#print(output)
|
||||
expect = [[[[4.5, 6.5, 8.5],
|
||||
[16.5, 18.5, 20.5],
|
||||
[28.5, 30.5, 32.5]]]]
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
||||
# test case 2
|
||||
rois = Tensor(np.array([[0, -2.0, -2.0, 22.0, 22.0]], data_type))
|
||||
pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2
|
||||
roi_align = P.ROIAlign(pooled_height, pooled_width,
|
||||
spatial_scale, sample_num, 0)
|
||||
output = roi_align(x, rois)
|
||||
#print(output)
|
||||
expect = [[[[4.5, 6.5, 8.5],
|
||||
[16.5, 18.5, 20.5],
|
||||
[28.5, 30.5, 32.5]]]]
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
||||
# test case 3
|
||||
pooled_height, pooled_width, spatial_scale, sample_num = 2, 2, 1.0, -1
|
||||
rois = Tensor(np.array([[0, -2.0, -2.0, 22.0, 22.0]], data_type))
|
||||
roi_align = P.ROIAlign(pooled_height, pooled_width,
|
||||
spatial_scale, sample_num, 0)
|
||||
output = roi_align(x, rois)
|
||||
#print(output)
|
||||
expect = [[[[6.295, 0.],
|
||||
[0., 0.]]]]
|
||||
np.testing.assert_almost_equal(output.asnumpy(), expect, decimal=2)
|
||||
|
||||
roi_align_case(np.float32)
|
||||
roi_align_case(np.float16)
|
|
@ -0,0 +1,142 @@
|
|||
# Copyright 2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
import numpy as np
|
||||
import pytest
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.ops import operations as P
|
||||
|
||||
|
||||
class Net(nn.Cell):
|
||||
def __init__(self, _shape):
|
||||
super(Net, self).__init__()
|
||||
self.shape = _shape
|
||||
self.scatternd = P.ScatterNd()
|
||||
|
||||
def construct(self, indices, update):
|
||||
return self.scatternd(indices, update, self.shape)
|
||||
|
||||
|
||||
def scatternd_net(indices, update, _shape, expect):
|
||||
scatternd = Net(_shape)
|
||||
output = scatternd(Tensor(indices), Tensor(update))
|
||||
error = np.ones(shape=output.asnumpy().shape) * 1.0e-6
|
||||
diff = output.asnumpy() - expect
|
||||
assert np.all(diff < error)
|
||||
assert np.all(-diff < error)
|
||||
|
||||
def scatternd_positive(nptype):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int32)
|
||||
arr_update = np.array([3.2, 1.1, 5.3, -2.2, -1.0]).astype(nptype)
|
||||
shape = (2, 2)
|
||||
expect = np.array([[0., 5.3],
|
||||
[0., 1.1]]).astype(nptype)
|
||||
scatternd_net(arr_indices, arr_update, shape, expect)
|
||||
|
||||
arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int64)
|
||||
arr_update = np.array([3.2, 1.1, 5.3, -2.2, -1.0]).astype(nptype)
|
||||
shape = (2, 2)
|
||||
expect = np.array([[0., 5.3],
|
||||
[0., 1.1]]).astype(nptype)
|
||||
scatternd_net(arr_indices, arr_update, shape, expect)
|
||||
|
||||
def scatternd_negative(nptype):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
arr_indices = np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]]).astype(np.int32)
|
||||
arr_update = np.array([-13.4, -3.1, 5.1, -12.1, -1.0]).astype(nptype)
|
||||
shape = (2, 2)
|
||||
expect = np.array([[0., 0.],
|
||||
[-21.4, -3.1]]).astype(nptype)
|
||||
scatternd_net(arr_indices, arr_update, shape, expect)
|
||||
|
||||
arr_indices = np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]]).astype(np.int64)
|
||||
arr_update = np.array([-13.4, -3.1, 5.1, -12.1, -1.0]).astype(nptype)
|
||||
shape = (2, 2)
|
||||
expect = np.array([[0., 0.],
|
||||
[-21.4, -3.1]]).astype(nptype)
|
||||
scatternd_net(arr_indices, arr_update, shape, expect)
|
||||
|
||||
def scatternd_positive_uint(nptype):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int32)
|
||||
arr_update = np.array([3.2, 1.1, 5.3, 3.8, 1.2]).astype(nptype)
|
||||
shape = (2, 2)
|
||||
expect = np.array([[0., 12.],
|
||||
[0., 1.]]).astype(nptype)
|
||||
scatternd_net(arr_indices, arr_update, shape, expect)
|
||||
|
||||
arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int64)
|
||||
arr_update = np.array([3.2, 1.1, 5.3, 3.8, 1.2]).astype(nptype)
|
||||
shape = (2, 2)
|
||||
expect = np.array([[0., 12.],
|
||||
[0., 1.]]).astype(nptype)
|
||||
scatternd_net(arr_indices, arr_update, shape, expect)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatternd_float64():
|
||||
scatternd_positive(np.float64)
|
||||
scatternd_negative(np.float64)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatternd_float32():
|
||||
scatternd_positive(np.float32)
|
||||
scatternd_negative(np.float32)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatternd_int64():
|
||||
scatternd_positive(np.int64)
|
||||
scatternd_negative(np.int64)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatternd_int16():
|
||||
scatternd_positive(np.int16)
|
||||
scatternd_negative(np.int16)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatternd_uint64():
|
||||
scatternd_positive_uint(np.uint64)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatternd_uint32():
|
||||
scatternd_positive_uint(np.uint32)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatternd_uint16():
|
||||
scatternd_positive_uint(np.uint16)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatternd_uint8():
|
||||
scatternd_positive_uint(np.uint8)
|
Loading…
Reference in New Issue