!20476 Adding 9 object-detection operators in CPU

Merge pull request !20476 from huangbo/object_detection_2
This commit is contained in:
i-robot 2021-07-22 11:43:18 +00:00 committed by Gitee
commit 66f4756555
38 changed files with 3987 additions and 9 deletions

View File

@ -0,0 +1,105 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
namespace {
size_t get_element_num(const std::vector<size_t> &shape) {
size_t size = 1;
for (size_t i = 0; i < shape.size(); i++) {
size *= shape[i];
}
return size;
}
template <typename T>
bool check_validation(const std::vector<size_t> &shape, const size_t num_before_axis, const size_t num_after_axis,
const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != 1 || outputs.size() != 2) {
MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!";
return false;
}
size_t data_size = sizeof(T);
size_t input_size = get_element_num(shape) * data_size;
size_t output_num = num_before_axis * num_after_axis;
size_t out0_size = output_num * sizeof(int);
size_t out1_size = output_num * data_size;
if (inputs[0]->size != input_size || outputs[0]->size != out0_size || outputs[1]->size != out1_size) {
MS_LOG(EXCEPTION) << "Invalid input or output data size!";
return false;
}
return true;
}
} // namespace
template <typename T>
void ArgMaxWithValueCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
size_t shape_len = shape_.size();
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
axis += static_cast<int64_t>(shape_len);
if (axis < 0) {
MS_LOG(EXCEPTION) << "Invalid axis:" << axis << ", should in range [-1, " << (shape_len - 1) << "]";
}
axis = axis % static_cast<int64_t>(shape_len);
num_before_axis_ = 1;
num_after_axis_ = 1;
for (size_t i = 0; i < shape_len; i++) {
if (static_cast<int64_t>(i) < axis) {
num_before_axis_ *= shape_[i];
} else if (static_cast<int64_t>(i) > axis) {
num_after_axis_ *= shape_[i];
}
}
dim_axis_ = shape_[axis];
}
template <typename T>
bool ArgMaxWithValueCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
if (!check_validation<T>(shape_, num_before_axis_, num_after_axis_, inputs, outputs)) {
return false;
}
auto input = reinterpret_cast<T *>(inputs[0]->addr);
auto output0 = reinterpret_cast<int32_t *>(outputs[0]->addr);
auto output1 = reinterpret_cast<T *>(outputs[1]->addr);
for (size_t i = 0; i < num_before_axis_; i++) {
size_t src_index_i = i * dim_axis_ * num_after_axis_;
for (size_t j = 0; j < num_after_axis_; j++) {
std::vector<float> array_axis;
size_t src_index_j = src_index_i + j;
for (size_t k = 0; k < dim_axis_; k++) {
size_t src_index_k = k * num_after_axis_ + src_index_j;
array_axis.push_back(static_cast<float>(input[src_index_k]));
}
auto max_ops = std::max_element(array_axis.begin(), array_axis.end());
auto max_index = static_cast<int32_t>(std::distance(array_axis.begin(), max_ops));
auto dst_index = i * num_after_axis_ + j;
output0[dst_index] = max_index;
auto src_index = IntToSize(max_index) * num_after_axis_ + src_index_j;
output1[dst_index] = input[src_index];
}
}
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,50 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_
#include <vector>
#include <map>
#include <memory>
#include <algorithm>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class ArgMaxWithValueCPUKernel : public CPUKernel {
public:
ArgMaxWithValueCPUKernel() = default;
~ArgMaxWithValueCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
std::vector<size_t> shape_;
size_t num_before_axis_;
size_t num_after_axis_;
size_t dim_axis_;
};
MS_REG_CPU_KERNEL_T(ArgMaxWithValue, KernelAttr(), ArgMaxWithValueCPUKernel, float);
MS_REG_CPU_KERNEL_T(ArgMaxWithValue, KernelAttr(), ArgMaxWithValueCPUKernel, float16);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_

View File

@ -0,0 +1,142 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
template <typename T>
void BoundingBoxDecodeCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input num is " << input_num << ", but BoundingBoxDecode needs 2 inputs.";
}
const size_t coordinate_size = 4;
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueTuple>() ||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueList>()) {
means_ = AnfAlgo::GetNodeAttr<std::vector<float>>(kernel_node, "means");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
float mean = AnfAlgo::GetNodeAttr<float>(kernel_node, "means");
for (size_t i = 0; i < coordinate_size; i++) {
means_.emplace_back(mean);
}
} else {
MS_LOG(EXCEPTION) << "Attribute means type is invalid.";
}
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueTuple>() ||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueList>()) {
stds_ = AnfAlgo::GetNodeAttr<std::vector<float>>(kernel_node, "stds");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
float std = AnfAlgo::GetNodeAttr<float>(kernel_node, "stds");
for (size_t i = 0; i < coordinate_size; i++) {
stds_.emplace_back(std);
}
} else {
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
}
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
MS_LOG(EXCEPTION) << "The size of means or stds is less than 4.";
}
std::vector<int64_t> max_shape_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "max_shape");
(void)std::transform(max_shape_me.begin(), max_shape_me.end(), std::back_inserter(max_shape_),
[](const int64_t &value) { return static_cast<int>(value); });
wh_ratio_clip_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "wh_ratio_clip");
if (max_shape_.size() < 2) {
MS_LOG(EXCEPTION) << "The size of max_shape is less than 2.";
}
}
template <typename T>
bool BoundingBoxDecodeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto anchor_box = reinterpret_cast<T *>(inputs[0]->addr);
auto deltas = reinterpret_cast<T *>(inputs[1]->addr);
auto bboxes = reinterpret_cast<T *>(outputs[0]->addr);
T ms1 = static_cast<T>(max_shape_[0]);
T ms2 = static_cast<T>(max_shape_[1]);
if (inputs[0]->size != inputs[1]->size) {
MS_LOG(ERROR) << "Anchor box size must be equal to deltas box size: " << inputs[1]->size << ", but got"
<< inputs[0]->size;
return false;
}
const size_t coordinate = 4;
const size_t block_size = inputs[0]->size / sizeof(T);
if ((block_size % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
return false;
}
size_t elem_num = block_size / coordinate;
auto task = [&](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
const size_t left_x = i * 4;
const size_t left_y = i * 4 + 1;
const size_t right_x = i * 4 + 2;
const size_t right_y = i * 4 + 3;
T dx = deltas[left_x] * static_cast<T>(stds_[0]) + static_cast<T>(means_[0]);
T dy = deltas[left_y] * static_cast<T>(stds_[1]) + static_cast<T>(means_[1]);
T dw = deltas[right_x] * static_cast<T>(stds_[2]) + static_cast<T>(means_[2]);
T dh = deltas[right_y] * static_cast<T>(stds_[3]) + static_cast<T>(means_[3]);
T max_ratio = static_cast<T>(abs(log(wh_ratio_clip_)));
dw = dw > max_ratio ? max_ratio : (dw < (-max_ratio) ? (-max_ratio) : dw);
dh = dh > max_ratio ? max_ratio : (dh < (-max_ratio) ? (-max_ratio) : dh);
T px = (anchor_box[left_x] + anchor_box[right_x]) * static_cast<T>(0.5);
T py = (anchor_box[left_y] + anchor_box[right_y]) * static_cast<T>(0.5);
T pw = anchor_box[right_x] - anchor_box[left_x] + static_cast<T>(1.0);
T ph = anchor_box[right_y] - anchor_box[left_y] + static_cast<T>(1.0);
T gx = px + pw * dx;
T gy = py + ph * dy;
T gw = pw * exp(dw);
T gh = ph * exp(dh);
T x1 = gx - gw * static_cast<T>(0.5) + static_cast<T>(0.5);
T y1 = gy - gh * static_cast<T>(0.5) + static_cast<T>(0.5);
T x2 = gx + gw * static_cast<T>(0.5) - static_cast<T>(0.5);
T y2 = gy + gh * static_cast<T>(0.5) - static_cast<T>(0.5);
x1 = x1 > ms2 ? ms2 : (x1 < static_cast<T>(0) ? static_cast<T>(0) : x1);
y1 = y1 > ms1 ? ms1 : (y1 < static_cast<T>(0) ? static_cast<T>(0) : y1);
x2 = x2 > ms2 ? ms2 : (x2 < static_cast<T>(0) ? static_cast<T>(0) : x2);
y2 = y2 > ms1 ? ms1 : (y2 < static_cast<T>(0) ? static_cast<T>(0) : y2);
bboxes[left_x] = x1;
bboxes[left_y] = y1;
bboxes[right_x] = x2;
bboxes[right_y] = y2;
}
};
CPUKernelUtils::ParallelFor(task, elem_num);
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,56 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class BoundingBoxDecodeCPUKernel : public CPUKernel {
public:
BoundingBoxDecodeCPUKernel() = default;
~BoundingBoxDecodeCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
std::vector<float> means_;
std::vector<float> stds_;
std::vector<int> max_shape_;
float wh_ratio_clip_;
};
MS_REG_CPU_KERNEL_T(
BoundingBoxDecode,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BoundingBoxDecodeCPUKernel, float);
MS_REG_CPU_KERNEL_T(
BoundingBoxDecode,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BoundingBoxDecodeCPUKernel, float16);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_

View File

@ -0,0 +1,115 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
template <typename T>
void BoundingBoxEncodeCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input num is " << input_num << ", but BoundingBoxEncode needs 2 inputs.";
}
const size_t coordinate_size = 4;
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueTuple>() ||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<ValueList>()) {
means_ = AnfAlgo::GetNodeAttr<std::vector<float>>(kernel_node, "means");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa<FloatImm>()) {
float mean = AnfAlgo::GetNodeAttr<float>(kernel_node, "means");
for (size_t i = 0; i < coordinate_size; i++) {
means_.emplace_back(mean);
}
} else {
MS_LOG(EXCEPTION) << "Attribute means type is invalid.";
}
if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueTuple>() ||
AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<ValueList>()) {
stds_ = AnfAlgo::GetNodeAttr<std::vector<float>>(kernel_node, "stds");
} else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa<FloatImm>()) {
float std = AnfAlgo::GetNodeAttr<float>(kernel_node, "stds");
for (size_t i = 0; i < coordinate_size; i++) {
stds_.emplace_back(std);
}
} else {
MS_LOG(EXCEPTION) << "Attribute stds type is invalid.";
}
if (means_.size() < coordinate_size || stds_.size() < coordinate_size) {
MS_LOG(EXCEPTION) << "The size of means or stds is less than 4.";
}
}
template <typename T>
bool BoundingBoxEncodeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto anchor_box = reinterpret_cast<T *>(inputs[0]->addr);
auto groundtruth_box = reinterpret_cast<T *>(inputs[1]->addr);
auto deltas = reinterpret_cast<T *>(outputs[0]->addr);
if (inputs[0]->size != inputs[1]->size) {
MS_LOG(ERROR) << "Anchor box size must be equal to groundtruth box size: " << inputs[1]->size << ", but got"
<< inputs[0]->size;
return false;
}
const size_t coordinate = 4;
const size_t block_size = inputs[0]->size / sizeof(T);
if ((block_size % coordinate) != 0) {
MS_LOG(ERROR) << "The size of the box must be a multiple of 4.";
return false;
}
size_t elem_num = block_size / coordinate;
auto task = [&](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
const size_t left_x = i * 4;
const size_t left_y = i * 4 + 1;
const size_t right_x = i * 4 + 2;
const size_t right_y = i * 4 + 3;
T px = (anchor_box[left_x] + anchor_box[right_x]) * static_cast<T>(0.5);
T py = (anchor_box[left_y] + anchor_box[right_y]) * static_cast<T>(0.5);
T pw = anchor_box[right_x] - anchor_box[left_x] + static_cast<T>(1.0);
T ph = anchor_box[right_y] - anchor_box[left_y] + static_cast<T>(1.0);
T gx = (groundtruth_box[left_x] + groundtruth_box[right_x]) * static_cast<T>(0.5);
T gy = (groundtruth_box[left_y] + groundtruth_box[right_y]) * static_cast<T>(0.5);
T gw = groundtruth_box[right_x] - groundtruth_box[left_x] + static_cast<T>(1.0);
T gh = groundtruth_box[right_y] - groundtruth_box[left_y] + static_cast<T>(1.0);
T dx = (gx - px) / pw;
T dy = (gy - py) / ph;
T dw = log(gw / pw);
T dh = log(gh / ph);
deltas[left_x] = (dx - static_cast<T>(means_[0])) / static_cast<T>(stds_[0]);
deltas[left_y] = (dy - static_cast<T>(means_[1])) / static_cast<T>(stds_[1]);
deltas[right_x] = (dw - static_cast<T>(means_[2])) / static_cast<T>(stds_[2]);
deltas[right_y] = (dh - static_cast<T>(means_[3])) / static_cast<T>(stds_[3]);
}
};
CPUKernelUtils::ParallelFor(task, elem_num);
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,54 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class BoundingBoxEncodeCPUKernel : public CPUKernel {
public:
BoundingBoxEncodeCPUKernel() = default;
~BoundingBoxEncodeCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
std::vector<float> means_;
std::vector<float> stds_;
};
MS_REG_CPU_KERNEL_T(
BoundingBoxEncode,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
BoundingBoxEncodeCPUKernel, float);
MS_REG_CPU_KERNEL_T(
BoundingBoxEncode,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
BoundingBoxEncodeCPUKernel, float16);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_

View File

@ -0,0 +1,84 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/check_valid_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kInputSize = 2;
constexpr size_t kOutputSize = 1;
} // namespace
template <typename T>
void CheckValidCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
anchor_box_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
img_metas_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1);
}
template <typename T>
bool CheckValidCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
CheckParams(inputs, outputs);
auto anchor_box = reinterpret_cast<T *>(inputs[0]->addr);
auto img_metas = reinterpret_cast<T *>(inputs[1]->addr);
auto output = reinterpret_cast<bool *>(outputs[0]->addr);
const size_t coordinate = 4;
const size_t elem_num = inputs[0]->size / sizeof(T) / coordinate;
auto task = [&](size_t start, size_t end) {
for (size_t i = start; i < end; i++) {
const size_t left_x = i * 4;
const size_t left_y = i * 4 + 1;
const size_t right_x = i * 4 + 2;
const size_t right_y = i * 4 + 3;
bool valid_flag = false;
valid_flag |= !(anchor_box[left_x] >= static_cast<T>(0.0));
valid_flag |= !(anchor_box[left_y] >= static_cast<T>(0.0));
valid_flag |= !(img_metas[1] * img_metas[2] - static_cast<T>(1.0) >= anchor_box[right_x]);
valid_flag |= !(img_metas[0] * img_metas[2] - static_cast<T>(1.0) >= anchor_box[right_y]);
output[i] = !valid_flag;
}
};
CPUKernelUtils::ParallelFor(task, elem_num);
return true;
}
template <typename T>
void CheckValidCPUKernel<T>::CheckParams(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
// inputs: anchor_box, img_metas
if (inputs.size() != kInputSize) {
MS_LOG(EXCEPTION) << "Input number is: " << inputs.size() << ", but CheckValid needs " << kInputSize << " inputs.";
}
// outputs: valid
if (outputs.size() != kOutputSize) {
MS_LOG(EXCEPTION) << "Output number is: " << outputs.size() << ", but CheckValid needs " << kOutputSize
<< "outputs.";
}
if (outputs[0]->size / sizeof(bool) != inputs[0]->size / sizeof(T) / 4) {
MS_LOG(EXCEPTION) << "The output dimensions must match the dimensions of img_metas.";
}
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,61 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class CheckValidCPUKernel : public CPUKernel {
public:
CheckValidCPUKernel() = default;
~CheckValidCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
void CheckParams(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
std::vector<size_t> anchor_box_shape_;
std::vector<size_t> img_metas_shape_;
};
MS_REG_CPU_KERNEL_T(
CheckValid,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool),
CheckValidCPUKernel, float);
MS_REG_CPU_KERNEL_T(
CheckValid,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeBool),
CheckValidCPUKernel, float16);
MS_REG_CPU_KERNEL_T(
CheckValid, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeBool),
CheckValidCPUKernel, int16_t);
MS_REG_CPU_KERNEL_T(
CheckValid, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeBool),
CheckValidCPUKernel, uint8_t);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_

View File

@ -0,0 +1,219 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
template <typename T>
void CropAndResizeCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 4) {
MS_LOG(ERROR) << "Input num is " << input_num << ", but CropAndResize needs 4 inputs.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output num is " << output_num << ", but CropAndResize needs 1 output.";
}
// input image
auto input_image_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
size_t input_image_shape_len = input_image_shape.size();
if (input_image_shape_len != 4) {
MS_LOG(ERROR) << "Image tensor is " << input_image_shape_len << "-D, but CropAndResize supports only " << 4
<< "-D image tensor.";
}
input_height_ = input_image_shape[1];
input_width_ = input_image_shape[2];
// input boxes
auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
size_t input_boxes_shape_len = input_boxes_shape.size();
if (input_boxes_shape_len != 2) {
MS_LOG(ERROR) << "Box is rank " << input_boxes_shape_len << ", but CropAndResize supports only rank " << 2
<< "for boxes.";
}
// input box_index
auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
size_t input_box_index_shape_len = input_box_index_shape.size();
if (input_box_index_shape_len != 1) {
MS_LOG(ERROR) << "Box index is rank " << input_box_index_shape_len << ", but CropAndResize supports only rank " << 1
<< "for box_index.";
}
// input crop_size
auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
size_t input_crop_size_shape_len = input_crop_size_shape.size();
if (input_crop_size_shape_len != 1) {
MS_LOG(ERROR) << "Crop_size is rank " << input_crop_size_shape_len << "-D, but CropAndResize supports only rank "
<< 1 << "for Crop_size.";
}
if (input_crop_size_shape[0] != 2) {
MS_LOG(ERROR) << "Crop_size is size " << input_crop_size_shape[0] << "-D, but CropAndResize supports only size "
<< 2 << "for Crop_size.";
}
// output
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto output_shape_len = output_shape.size();
output_size_ = 1;
for (size_t i = 0; i < output_shape_len; i++) {
output_size_ *= output_shape[i];
}
// set expected output params
final_height_ = output_shape[1];
final_width_ = output_shape[2];
channel_ = output_shape[3];
// get op parameters
string method = AnfAlgo::GetNodeAttr<string>(kernel_node, "method");
if (method == "bilinear") {
method_ = 1;
} else if (method == "nearest") {
method_ = 2;
} else { // bilinear-v2
method_ = 3;
}
extrapolation_value_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "extrapolation_value");
}
template <typename T>
bool CropAndResizeCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto *input_image = reinterpret_cast<T *>(inputs[0]->addr);
auto *input_boxes = reinterpret_cast<float *>(inputs[1]->addr);
auto *input_box_index = reinterpret_cast<int *>(inputs[2]->addr);
auto *output = reinterpret_cast<float *>(outputs[0]->addr);
auto task = [&](size_t start, size_t end) {
for (size_t pos = start; pos < end; pos++) {
size_t pos_temp = pos;
const int pos_channel = pos_temp % channel_;
pos_temp = pos_temp / channel_;
const int pos_x = pos_temp % final_width_;
pos_temp = pos_temp / final_width_;
const int pos_y = pos_temp % final_height_;
const int pos_image_idx = pos_temp / final_height_;
const int box_index = input_box_index[pos_image_idx];
// crop values
const float y1 = input_boxes[4 * pos_image_idx + 0];
const float x1 = input_boxes[4 * pos_image_idx + 1];
const float y2 = input_boxes[4 * pos_image_idx + 2];
const float x2 = input_boxes[4 * pos_image_idx + 3];
// set scale and target pixels
float scale_height = final_height_ > 1 ? (y2 - y1) * (input_height_ - 1) / (final_height_ - 1) : 0;
float scale_width = final_width_ > 1 ? (x2 - x1) * (input_width_ - 1) / (final_width_ - 1) : 0;
float target_y =
final_height_ > 1 ? y1 * (input_height_ - 1) + pos_y * scale_height : 0.5 * (y1 + y2) + (input_height_ - 1);
float target_x =
final_width_ > 1 ? x1 * (input_width_ - 1) + pos_x * scale_width : 0.5 * (x1 + x2) + (input_width_ - 1);
// use extrapolation value if out of range
if (((target_x < 0) || (target_x > input_width_ - 1)) || ((target_y < 0) || (target_y > input_height_ - 1))) {
if ((method_ == 1) || (method_ == 2)) {
output[pos] = extrapolation_value_;
continue;
}
}
if (method_ == 1) {
// Bilinear
const int top_y_index = floorf(target_y);
const int bottom_y_index = ceilf(target_y);
const int left_x_index = floorf(target_x);
const int right_x_index = ceilf(target_x);
const float y_lerp = target_y - top_y_index;
const float x_lerp = target_x - left_x_index;
const float top_left = static_cast<float>(
input_image[((box_index * input_height_ + top_y_index) * input_width_ + left_x_index) * channel_ +
pos_channel]);
const float top_right = static_cast<float>(
input_image[((box_index * input_height_ + top_y_index) * input_width_ + right_x_index) * channel_ +
pos_channel]);
const float bottom_left = static_cast<float>(
input_image[((box_index * input_height_ + bottom_y_index) * input_width_ + left_x_index) * channel_ +
pos_channel]);
const float bottom_right = static_cast<float>(
input_image[((box_index * input_height_ + bottom_y_index) * input_width_ + right_x_index) * channel_ +
pos_channel]);
const float top = top_left + (top_right - top_left) * x_lerp;
const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp;
output[pos] = top + (bottom - top) * y_lerp;
} else if (method_ == 3) {
int y1h = static_cast<int>(y1 * input_height_);
int x1w = static_cast<int>(x1 * input_width_);
int y2h = static_cast<int>(y2 * input_height_);
int x2w = static_cast<int>(x2 * input_width_);
int w = ((x2w - x1w + 1) > 1) ? x2w - x1w + 1 : 1;
int h = ((y2h - y1h + 1) > 1) ? y2h - y1h + 1 : 1;
float y_point = (pos_y + 0.5) * (h / static_cast<float>(final_height_)) - 0.5;
int top_y_index = floorf(y_point);
top_y_index = std::min(std::max(0, top_y_index), h - 1);
int bottom_y_index = ceilf(y_point);
bottom_y_index = std::min(std::max(0, bottom_y_index), h - 1);
float x_point = (pos_x + 0.5) * (w / static_cast<float>(final_width_)) - 0.5;
int left_x_index = floorf(x_point);
left_x_index = std::min(std::max(0, left_x_index), w - 1);
int right_x_index = ceilf(x_point);
right_x_index = std::min(std::max(0, right_x_index), w - 1);
const float y_lerp = y_point - top_y_index;
const float x_lerp = x_point - left_x_index;
const int y_top_index = box_index * input_height_ + y1h + top_y_index;
const int y_bottom_index = box_index * input_height_ + y1h + bottom_y_index;
const float top_left =
static_cast<float>(input_image[(y_top_index * input_width_ + x1w + left_x_index) * channel_ + pos_channel]);
const float top_right =
static_cast<float>(input_image[(y_top_index * input_width_ + x1w + right_x_index) * channel_ + pos_channel]);
const float bottom_left = static_cast<float>(
input_image[(y_bottom_index * input_width_ + x1w + left_x_index) * channel_ + pos_channel]);
const float bottom_right = static_cast<float>(
input_image[(y_bottom_index * input_width_ + x1w + right_x_index) * channel_ + pos_channel]);
float ret = top_left * (1 - y_lerp) * (1 - x_lerp) + bottom_right * y_lerp * x_lerp +
top_right * (1 - y_lerp) * x_lerp + bottom_left * y_lerp * (1 - x_lerp);
output[pos] = ret;
} else {
// Nearest Neighbour
const int closest_x_index = roundf(target_x);
const int closest_y_index = roundf(target_y);
const float val = static_cast<float>(
input_image[((box_index * input_height_ + closest_y_index) * input_width_ + closest_x_index) * channel_ +
pos_channel]);
output[pos] = val;
}
}
};
CPUKernelUtils::ParallelFor(task, output_size_);
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,213 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_
#include <vector>
#include <string>
#include <algorithm>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class CropAndResizeCPUKernel : public CPUKernel {
public:
CropAndResizeCPUKernel() = default;
~CropAndResizeCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
int method_;
float extrapolation_value_;
int input_crop_size_;
int output_size_;
int input_height_;
int input_width_;
int final_height_;
int final_width_;
int channel_;
};
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, float16);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, float16);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, float);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, float);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, double);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeFloat64)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, double);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, int8_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, int8_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeInt16)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, int16_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeInt16)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, int16_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, int8_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, int32_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, int64_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, uint8_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, uint8_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeUInt16)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, uint16_t);
MS_REG_CPU_KERNEL_T(CropAndResize,
KernelAttr()
.AddInputAttr(kNumberTypeUInt16)
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeFloat32),
CropAndResizeCPUKernel, uint16_t);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_

View File

@ -0,0 +1,243 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
int NmsRoundUpPower2(int v) {
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
template <typename T>
void Swap(T *lhs, T *rhs) {
T tmp = lhs[0];
lhs[0] = rhs[0];
rhs[0] = tmp;
}
// Sorting function based on BitonicSort from TopK kernel
template <typename T>
void NMSWithMaskCPUKernel<T>::NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2,
T *input, T *data_buff, int *index_buff, int box_size) {
auto task1 = [&](int start, int end) {
for (int i = start; i < end; i++) {
data_buff[i] = (i < inner) ? input[(i * box_size) + 4] : std::numeric_limits<T>::max();
index_buff[i] = i;
}
};
CPUKernelUtils::ParallelFor(task1, ceil_power2);
for (size_t i = 2; i <= static_cast<size_t>(ceil_power2); i <<= 1) {
for (size_t j = (i >> 1); j > 0; j >>= 1) {
auto task2 = [&](size_t start, size_t end) {
for (size_t tid = start; tid < end; tid++) {
size_t tid_comp = tid ^ j;
if (tid_comp > tid) {
if ((tid & i) == 0) {
if (data_buff[tid] > data_buff[tid_comp]) {
Swap(&data_buff[tid], &data_buff[tid_comp]);
Swap(&index_buff[tid], &index_buff[tid_comp]);
}
} else {
if (data_buff[tid] < data_buff[tid_comp]) {
Swap(&data_buff[tid], &data_buff[tid_comp]);
Swap(&index_buff[tid], &index_buff[tid_comp]);
}
}
}
}
};
CPUKernelUtils::ParallelFor(task2, ceil_power2);
}
}
}
// Initialize per row mask array to all true
template <typename T>
void NMSWithMaskCPUKernel<T>::MaskInit(int numSq, bool *row_mask) {
auto task = [&](int start, int end) {
for (int mat_pos = start; mat_pos < end; mat_pos++) {
row_mask[mat_pos] = true;
}
};
CPUKernelUtils::ParallelFor(task, numSq);
}
// copy data from input to output array sorted by indices returned from bitonic sort
// flips boxes if asked to, default - false -> if (x1/y1 > x2/y2)
template <typename T>
void NMSWithMaskCPUKernel<T>::PopulateOutput(T *data_in, T *data_out, int *index_buff, const int num, int box_size,
bool flip_mode) {
auto task = [&](int start, int end) {
for (int box_num = start; box_num < end; box_num++) {
int correct_index = index_buff[(num - 1) - box_num]; // flip the array around
int correct_arr_start = correct_index * box_size;
int current_arr_start = box_num * box_size;
if (flip_mode) { // flip boxes
// check x
if (data_in[correct_arr_start + 0] > data_in[correct_arr_start + 2]) {
data_out[current_arr_start + 0] = data_in[correct_arr_start + 2];
data_out[current_arr_start + 2] = data_in[correct_arr_start + 0];
} else {
data_out[current_arr_start + 0] = data_in[correct_arr_start + 0];
data_out[current_arr_start + 2] = data_in[correct_arr_start + 2];
}
// check y
if (data_in[correct_arr_start + 1] > data_in[correct_arr_start + 3]) {
data_out[current_arr_start + 1] = data_in[correct_arr_start + 3];
data_out[current_arr_start + 3] = data_in[correct_arr_start + 1];
} else {
data_out[current_arr_start + 1] = data_in[correct_arr_start + 1];
data_out[current_arr_start + 3] = data_in[correct_arr_start + 3];
}
data_out[current_arr_start + 4] = data_in[correct_arr_start + 4];
} else { // default behaviour, don't flip
for (int x = 0; x < 5; x++) {
data_out[current_arr_start + x] = data_in[correct_arr_start + x];
}
}
}
};
CPUKernelUtils::ParallelFor(task, num);
}
// populated return mask (init to all true) and return index array
template <typename T>
void NMSWithMaskCPUKernel<T>::Preprocess(const int num, int *sel_idx, bool *sel_boxes, T *output, int box_size) {
auto task = [&](int start, int end) {
for (int box_num = start; box_num < end; box_num++) {
sel_idx[box_num] = box_num;
sel_boxes[box_num] = true;
}
};
CPUKernelUtils::ParallelFor(task, num);
}
template <typename T>
bool NMSWithMaskCPUKernel<T>::IouDecision(T *output, int box_A_ix, int box_B_ix, int box_A_start, int box_B_start,
float IOU_value) {
T x_1 = std::max(output[box_A_start + 0], output[box_B_start + 0]);
T y_1 = std::max(output[box_A_start + 1], output[box_B_start + 1]);
T x_2 = std::min(output[box_A_start + 2], output[box_B_start + 2]);
T y_2 = std::min(output[box_A_start + 3], output[box_B_start + 3]);
T width = std::max(x_2 - x_1, T(0)); // in case of no overlap
T height = std::max(y_2 - y_1, T(0));
T area1 = (output[box_A_start + 2] - output[box_A_start + 0]) * (output[box_A_start + 3] - output[box_A_start + 1]);
T area2 = (output[box_B_start + 2] - output[box_B_start + 0]) * (output[box_B_start + 3] - output[box_B_start + 1]);
T combined_area = area1 + area2;
return !(((width * height) / (combined_area - (width * height))) > static_cast<T>(IOU_value));
}
// Run parallel NMS pass
// Every position in the row_mask array is updated wit correct IOU decision after being init to all True
template <typename T>
void NMSWithMaskCPUKernel<T>::NmsPass(const int num, const float IOU_value, T *output, bool *sel_boxes, int box_size,
bool *row_mask) {
auto task = [&](int start, int end) {
for (int mask_index = start; mask_index < end; mask_index++) {
int box_i = mask_index / num; // row in 2d row_mask array
int box_j = mask_index % num; // col in 2d row_mask array
if (box_j > box_i) { // skip when box_j index lower/equal to box_i - will remain true
int box_i_start_index = box_i * box_size; // adjust starting indices
int box_j_start_index = box_j * box_size;
row_mask[mask_index] = IouDecision(output, box_i, box_j, box_i_start_index, box_j_start_index, IOU_value);
}
}
};
CPUKernelUtils::ParallelFor(task, num * num);
}
// Reduce pass runs on 1 block to allow thread sync
template <typename T>
void NMSWithMaskCPUKernel<T>::ReducePass(const int num, bool *sel_boxes, bool *row_mask) {
// loop over every box in order of high to low confidence score
for (int i = 0; i < num - 1; ++i) {
if (!sel_boxes[i]) {
continue;
}
// every thread handles a different set of boxes (per all boxes in order)
auto task = [&](int start, int end) {
for (int j = start; j < end; j++) {
sel_boxes[j] = sel_boxes[j] && row_mask[i * num + j];
}
};
CPUKernelUtils::ParallelFor(task, num);
}
}
template <typename T>
void NMSWithMaskCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
iou_value_ = AnfAlgo::GetNodeAttr<float>(kernel_node, "iou_threshold");
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input num is " << input_num << ", but NMSWithMask needs 1 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 3) {
MS_LOG(ERROR) << "Output num is " << output_num << ", but NMSWithMask needs 3 outputs.";
}
}
template <typename T>
void NMSWithMaskCPUKernel<T>::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
num_input_ = input_shape[0]; // Get N values in [N, 5] data.
ceil_power_2 = NmsRoundUpPower2(num_input_);
workspace_size_list_.push_back(ceil_power_2 * sizeof(T)); // data buff
workspace_size_list_.push_back(ceil_power_2 * sizeof(int)); // index buff
workspace_size_list_.push_back(num_input_ * num_input_ * sizeof(bool)); // mask list
}
template <typename T>
bool NMSWithMaskCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
auto input = reinterpret_cast<T *>(inputs[0]->addr);
auto data_buff = reinterpret_cast<T *>(workspace[0]->addr);
auto index_buff = reinterpret_cast<int *>(workspace[1]->addr);
auto row_mask = reinterpret_cast<bool *>(workspace[2]->addr);
auto output = reinterpret_cast<T *>(outputs[0]->addr);
auto sel_idx = reinterpret_cast<int *>(outputs[1]->addr);
auto sel_boxes = reinterpret_cast<bool *>(outputs[2]->addr);
NmsBitonicSortByKeyKernel(1, num_input_, ceil_power_2, input, data_buff, index_buff, box_size_);
int total_val = num_input_ * num_input_;
MaskInit(total_val, row_mask);
PopulateOutput(input, output, index_buff, num_input_, box_size_, false);
Preprocess(num_input_, sel_idx, sel_boxes, output, box_size_);
NmsPass(num_input_, iou_value_, output, sel_boxes, box_size_, row_mask);
ReducePass(num_input_, sel_boxes, row_mask);
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,80 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_
#include <vector>
#include <algorithm>
#include <limits>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class NMSWithMaskCPUKernel : public CPUKernel {
public:
NMSWithMaskCPUKernel() = default;
~NMSWithMaskCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
void InitInputOutputSize(const CNodePtr &kernel_node) override;
private:
void NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2, T *input, T *data_buff,
int *index_buff, int box_size);
void MaskInit(int numSq, bool *row_mask);
void PopulateOutput(T *data_in, T *data_out, int *index_buff, const int num, int box_size, bool flip_mode);
void Preprocess(const int num, int *sel_idx, bool *sel_boxes, T *output, int box_size);
bool IouDecision(T *output, int box_A_ix, int box_B_ix, int box_A_start, int box_B_start, float IOU_value);
void NmsPass(const int num, const float IOU_value, T *output, bool *sel_boxes, int box_size, bool *row_mask);
void ReducePass(const int num, bool *sel_boxes, bool *row_mask);
int num_input_;
float iou_value_;
size_t ceil_power_2;
static const int box_size_ = 5; // pre_defined box width
};
MS_REG_CPU_KERNEL_T(NMSWithMask,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeBool),
NMSWithMaskCPUKernel, float);
MS_REG_CPU_KERNEL_T(NMSWithMask,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeBool),
NMSWithMaskCPUKernel, float16);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_

View File

@ -0,0 +1,225 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
#define BLOCKSIZE 256
#define MAX_DIMENSION 5
namespace mindspore {
namespace kernel {
void ParseOutputCoordinate(std::vector<int64_t> dims, int32_t output_length, int32_t input_dim_size,
int32_t input_total_count, const int *tmp_output, int *output) {
int it = 0;
int column = input_total_count / dims[0];
for (int i = 0; i < output_length; i++) {
int32_t tmp_output_number = tmp_output[i];
int tmp_column = column;
for (int j = 0; j < input_dim_size; j++) {
if (j == input_dim_size - 1) {
output[it++] = tmp_output_number;
continue;
}
output[it++] = tmp_output_number / column;
tmp_output_number = tmp_output_number % column;
tmp_column = tmp_column / dims[j + 1];
}
}
}
void GetOutputLength(bool *padding_flag, int32_t *output_length, int32_t *output_non_zero_length, int32_t count,
int32_t non_zero_num) {
if (count == 0) {
*padding_flag = false;
*output_length = non_zero_num;
*output_non_zero_length = non_zero_num;
} else if (count > 0 && count <= non_zero_num) {
*padding_flag = false;
*output_length = count;
*output_non_zero_length = count;
} else if (count > non_zero_num) {
*padding_flag = true;
*output_length = count;
*output_non_zero_length = non_zero_num;
} else {
MS_LOG(EXCEPTION) << "Input count must be greater than or equal to 0, but is " << count;
}
}
void GetInputTotalCount(const std::vector<int64_t> &dims_, int32_t *input_total_count, const int32_t &input_dim_size) {
for (int32_t i = 0; i < input_dim_size; i++) {
*input_total_count *= dims_[i];
}
}
void UpdateOutput(const std::vector<int64_t> &dims_, const int32_t &non_zero_num, const int32_t &count_,
const int32_t &output_length, const int *mask_dim, int32_t *output_coordinate, bool *mask) {
for (int32_t i = non_zero_num * dims_.size(); i < static_cast<int32_t>(count_ * dims_.size()); i++) {
output_coordinate[i] = 0;
}
for (int32_t i = 0; i < output_length; i++) {
mask[i] = static_cast<bool>(mask_dim[i]);
}
for (int32_t i = non_zero_num; i < count_; i++) {
mask[i] = false;
}
}
void RandomChoiceWithMaskCPUKernel::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 1) {
MS_LOG(ERROR) << "Input num is " << input_num << ", but RandomChoiceWithMask needs 1 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 2) {
MS_LOG(ERROR) << "Output num is " << output_num << ", but RandomChoiceWithMask needs 2 outputs.";
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
input_shape_size_ = input_shape.size();
if (input_shape_size_ < 1 || input_shape_size_ > MAX_DIMENSION) {
MS_LOG(ERROR) << "Input is " << input_shape_size_
<< "-D, but RandomChoiceWithMask supports only 1-D to 5-D inputs.";
}
seed_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "seed"));
seed2_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "seed2"));
count_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "count"));
MS_LOG(INFO) << "This op attr count is " << count_;
for (size_t i = 0; i < input_num; i++) {
auto input_i_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i);
for (size_t j = 0; j < input_i_shape.size(); j++) {
dims_.emplace_back(input_i_shape[j]);
}
}
}
bool RandomChoiceWithMaskCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto *input = reinterpret_cast<bool *>(inputs[0]->addr);
auto *output_coordinate = reinterpret_cast<int32_t *>(outputs[0]->addr);
auto *mask = reinterpret_cast<bool *>(outputs[1]->addr);
int32_t input_dim_size = dims_.size();
int32_t non_zero_num = 0;
int32_t input_total_count = 1;
if (input_dim_size < 1 || input_dim_size > 5) {
MS_LOG(EXCEPTION) << "Input dim size is " << input_dim_size << ", which is not supported.";
}
int seedc = seed2_ != 0 ? seed2_ : (seed_ != 0 ? seed_ : generator_());
GetInputTotalCount(dims_, &input_total_count, input_dim_size);
int *input_dim = new (std::nothrow) int[input_total_count];
if (input_dim == nullptr) {
MS_LOG(EXCEPTION) << "Malloc memory failed!";
return false;
}
for (int32_t i = 0; i < input_total_count; i++) {
if (input[i] != 0) {
input_dim[non_zero_num] = i;
non_zero_num++;
}
}
bool padding_flag = false;
int32_t output_length = 0;
int32_t output_non_zero_length = 0;
GetOutputLength(&padding_flag, &output_length, &output_non_zero_length, count_, non_zero_num);
int *tmp_output = new (std::nothrow) int[output_length];
if (tmp_output == nullptr) {
MS_LOG(EXCEPTION) << "Malloc memory failed!";
delete[] input_dim;
return false;
}
std::mt19937 gen(seedc);
std::uniform_int_distribution<> dis(0, non_zero_num - 1);
int *mask_dim = new (std::nothrow) int[output_length];
if (mask_dim == nullptr) {
MS_LOG(EXCEPTION) << "Malloc memory failed!";
delete[] input_dim;
delete[] tmp_output;
return false;
}
(void)memset_s(mask_dim, output_length, 0X00, output_length);
(void)memset_s(tmp_output, output_length, 0X00, output_length);
for (int32_t i = 0; i < output_non_zero_length; i++) {
int32_t mean = dis(gen);
tmp_output[i] = input_dim[mean];
mask_dim[i] = 1;
}
if (padding_flag) {
int32_t index = 0;
for (int32_t i = output_length - 1; i > non_zero_num; i--) {
tmp_output[non_zero_num + index] = 0;
mask_dim[non_zero_num + index] = 0;
index++;
}
}
int32_t copy_output_length = 0;
if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) {
MS_LOG(EXCEPTION) << "Output size exceed INT_MAX";
delete[] input_dim;
delete[] tmp_output;
delete[] mask_dim;
return false;
}
copy_output_length = output_length * input_dim_size;
int *output = new (std::nothrow) int[copy_output_length];
if (output == nullptr) {
MS_LOG(EXCEPTION) << "Malloc memory failed!";
delete[] input_dim;
delete[] tmp_output;
delete[] mask_dim;
return false;
}
(void)memset_s(output, copy_output_length, 0X00, copy_output_length);
ParseOutputCoordinate(dims_, output_length, input_dim_size, input_total_count, tmp_output, output);
int32_t actual_output_length = count_ * dims_.size();
copy_output_length = std::min(actual_output_length, copy_output_length);
int32_t copy_output_bytes = 0;
if (INT_MAX / static_cast<int>(sizeof(int32_t)) < copy_output_length) {
MS_LOG(EXCEPTION) << "The output length is out of range!";
delete[] input_dim;
delete[] mask_dim;
delete[] tmp_output;
delete[] output;
return false;
}
copy_output_bytes = copy_output_length * sizeof(int32_t);
memcpy_s(output_coordinate, copy_output_bytes, output, copy_output_bytes);
UpdateOutput(dims_, non_zero_num, count_, output_length, mask_dim, output_coordinate, mask);
delete[] input_dim;
delete[] mask_dim;
delete[] tmp_output;
delete[] output;
return true;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,55 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_
#include <vector>
#include <random>
#include <algorithm>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
class RandomChoiceWithMaskCPUKernel : public CPUKernel {
public:
RandomChoiceWithMaskCPUKernel() = default;
~RandomChoiceWithMaskCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
int32_t count_{0};
std::vector<int64_t> dims_;
int input_shape_size_{0};
int seed_{0};
int seed2_{0};
int input_size_{1};
std::mt19937 generator_;
};
MS_REG_CPU_KERNEL(
RandomChoiceWithMask,
KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool),
RandomChoiceWithMaskCPUKernel);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_

View File

@ -0,0 +1,223 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/roi_align_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
namespace {
constexpr size_t kInputSize = 2;
constexpr size_t kOutputSize = 1;
} // namespace
template <typename T>
void ROIAlignCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
// Get the input shapes
auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto rois_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto x_shape_size = x_shape.size();
if (x_shape_size != 4) {
MS_LOG(ERROR) << "x shape size is " << x_shape_size << ", but should be 4.";
}
channels_ = x_shape[1];
height_ = x_shape[2];
width_ = x_shape[3];
roi_rows_ = rois_shape[0];
roi_cols_ = rois_shape[1];
pooled_height_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "pooled_height"));
pooled_width_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "pooled_width"));
spatial_scale_ = static_cast<T>(AnfAlgo::GetNodeAttr<float>(kernel_node, "spatial_scale"));
sample_num_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "sample_num"));
roi_end_mode_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "roi_end_mode"));
}
template <typename T>
bool ROIAlignCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
const T *input = reinterpret_cast<T *>(inputs[0]->addr);
const T *rois = reinterpret_cast<T *>(inputs[1]->addr);
auto out_data = reinterpret_cast<T *>(outputs[0]->addr);
size_t elem_num = roi_rows_ * channels_ * pooled_height_ * pooled_width_;
auto task = [&](size_t start, size_t end) {
for (size_t thread_idx = start; thread_idx < end; thread_idx++) {
int n = thread_idx / pooled_width_ / pooled_height_ / channels_;
const T *roi_box = rois + n * roi_cols_;
if (roi_box[1] < static_cast<T>(0.001) && roi_box[3] < static_cast<T>(0.001) &&
roi_box[1] > static_cast<T>(-0.001) && roi_box[3] > static_cast<T>(-0.001)) {
continue;
}
int offset = -1;
int c, ph, pw, roi_bin_grid_h, roi_bin_grid_w;
T bin_size_h, bin_size_w, roi_start_h, roi_start_w;
bin_box(thread_idx, rois, roi_cols_, spatial_scale_, sample_num_, roi_end_mode_, channels_, height_, width_,
pooled_height_, pooled_width_, &offset, &n, &c, &ph, &pw, &roi_bin_grid_h, &roi_bin_grid_w, &bin_size_h,
&bin_size_w, &roi_start_h, &roi_start_w);
// (n, c, ph, pw) is the base param of pooled map
const T count_points_in_grid_cell = static_cast<T>(roi_bin_grid_h * roi_bin_grid_w);
T accumulate_val = static_cast<T>(0.);
for (int iy = 0; iy < roi_bin_grid_h; iy++) {
// Shift half point RIGHT for y / x, while previous scaled roi shift half point LEFT
const T y = roi_start_h + static_cast<T>(ph) * bin_size_h +
static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h);
for (int ix = 0; ix < roi_bin_grid_w; ix++) {
const T x = roi_start_w + static_cast<T>(pw) * bin_size_w +
static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
// bilinear interpolate by shifted y / x
// calculate bilinear interpolation
int x_low = 0, y_low = 0, x_high = 0, y_high = 0;
T w1, w2, w3, w4;
bilinear_interpolate(height_, width_, y, x, &x_low, &y_low, &x_high, &y_high, &w1, &w2, &w3, &w4);
if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0 && y_low < height_ && y_high < height_ &&
x_low < width_ && x_high < width_) {
T v1 = input[offset + y_low * width_ + x_low];
T v2 = input[offset + y_low * width_ + x_high];
T v3 = input[offset + y_high * width_ + x_low];
T v4 = input[offset + y_high * width_ + x_high];
T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4);
accumulate_val += val;
}
}
}
accumulate_val /= count_points_in_grid_cell;
out_data[thread_idx] = accumulate_val;
}
};
CPUKernelUtils::ParallelFor(task, elem_num);
return true;
}
template <typename T>
void ROIAlignCPUKernel<T>::CheckParam(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &outputs) {
if (inputs.size() != kInputSize) {
MS_LOG(EXCEPTION) << "Input number is: " << inputs.size() << ", but ROIAlign needs " << kInputSize << " inputs.";
}
if (outputs.size() != kOutputSize) {
MS_LOG(EXCEPTION) << "Output number is: " << outputs.size() << ", but ROIAlign needs " << kOutputSize << "outputs.";
}
}
template <typename T>
void ROIAlignCPUKernel<T>::bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low,
int *x_high, int *y_high, T *w1, T *w2, T *w3, T *w4) {
constexpr float eps = 0.00007;
if (y < static_cast<T>(-1.0) || y > static_cast<T>(height) || x < static_cast<T>(-1.0) || x > static_cast<T>(width)) {
*w1 = *w2 = *w3 = *w4 = static_cast<T>(0);
*x_low = *x_high = *y_low = *y_high = -1;
return;
}
// low bounder is at least zero
y = y <= static_cast<T>(.0) ? static_cast<T>(.0) : y;
x = x <= static_cast<T>(.0) ? static_cast<T>(.0) : x;
// top left point
*y_low = (y <= static_cast<T>(eps) ? 0 : static_cast<int>(floor(y)));
*x_low = (x <= static_cast<T>(eps) ? 0 : static_cast<int>(floor(x)));
// bottom right point
if (*y_low >= height - 1) {
*y_high = *y_low = height - 1;
y = static_cast<T>(*y_low);
} else {
*y_high = *y_low + 1;
}
if (*x_low >= width - 1) {
*x_high = *x_low = width - 1;
x = static_cast<T>(*x_low);
} else {
*x_high = *x_low + 1;
}
// distance to nearest points
T lx, ly, hx, hy;
ly = y - static_cast<T>(*y_low), lx = x - static_cast<T>(*x_low);
hy = static_cast<T>(1.) - ly, hx = static_cast<T>(1.) - lx;
// weight is evaluated by the distance to point away.
// the closer to point home, the more weight, the farther to point away.
*w1 = hy * hx, *w2 = hy * lx, *w3 = ly * hx, *w4 = ly * lx;
return;
}
template <typename T>
void ROIAlignCPUKernel<T>::bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale,
const int sample_num, int roi_end_mode, const int channels, const int height,
const int width, const int pooled_height, const int pooled_width, int *offset,
int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h, int *roi_bin_grid_w,
T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w) {
// (n, c, ph, pw) is the base param of pooled map
*pw = thread_idx % pooled_width;
*ph = (thread_idx / pooled_width) % pooled_height;
*c = (thread_idx / pooled_width / pooled_height) % channels;
*n = thread_idx / pooled_width / pooled_height / channels;
// Roi has
// 1. 4 points, or
// 2. indicator + 4 points (1 + 4)
const T *roi_box = roi_boxes + (*n) * roi_cols;
int roi_batch_ind = 0;
if (roi_cols == 5) {
roi_batch_ind = static_cast<int>(rint(static_cast<float>(roi_box[0]) + static_cast<float>(0.00007)));
roi_box++;
}
// Scale and shift ROI
*roi_start_w = roi_box[0] * spatial_scale;
*roi_start_h = roi_box[1] * spatial_scale;
T roi_end_w = (roi_box[2] + static_cast<T>(roi_end_mode)) * spatial_scale;
T roi_end_h = (roi_box[3] + static_cast<T>(roi_end_mode)) * spatial_scale;
// New ROI height/width
T roi_width = roi_end_w - (*roi_start_w);
T roi_height = roi_end_h - (*roi_start_h);
if (roi_end_mode == 0) { // backward compatibility
// Force malformed ROIs to be 1x1
roi_width = roi_width > static_cast<T>(1.0) ? roi_width : static_cast<T>(1.0);
roi_height = roi_height > static_cast<T>(1.0) ? roi_height : static_cast<T>(1.0);
}
// ratio of roi / pooled
*bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
*bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
*offset = (roi_batch_ind * channels + (*c)) * height * width;
// grid (int) by Sample ratio if defined, otherwise by pooled H/W
*roi_bin_grid_h = (sample_num > 0) ? sample_num : static_cast<int>(floor(roi_height / static_cast<T>(pooled_height)));
*roi_bin_grid_w = (sample_num > 0) ? sample_num : static_cast<int>(floor(roi_width / static_cast<T>(pooled_width)));
return;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,72 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class ROIAlignCPUKernel : public CPUKernel {
public:
ROIAlignCPUKernel() = default;
~ROIAlignCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
void CheckParam(const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs);
void bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low, int *x_high,
int *y_high, T *w1, T *w2, T *w3, T *w4);
void bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, const int sample_num,
int roi_end_mode, const int channels, const int height, const int width, const int pooled_height,
const int pooled_width, int *offset, int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h,
int *roi_bin_grid_w, T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w);
int pooled_height_;
int pooled_width_;
T spatial_scale_;
int sample_num_;
int roi_end_mode_;
int roi_rows_;
int roi_cols_;
int channels_;
int height_;
int width_;
};
MS_REG_CPU_KERNEL_T(
ROIAlign,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ROIAlignCPUKernel, float);
MS_REG_CPU_KERNEL_T(
ROIAlign,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
ROIAlignCPUKernel, float16);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_

View File

@ -0,0 +1,280 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.h"
#include "runtime/device/cpu/cpu_device_address.h"
namespace mindspore {
namespace kernel {
template <typename T, typename U>
void AtomicAddTask(T *address, T val) {
auto *address_as_ull = reinterpret_cast<U *>(address);
U old = *address_as_ull;
U assumed;
T desired;
T *assumed_t = NULL;
U *desired_u = NULL;
do {
assumed = old;
assumed_t = reinterpret_cast<T *>(&assumed);
desired_u = reinterpret_cast<U *>(&desired);
desired = *assumed_t + static_cast<T>(val);
old = __sync_val_compare_and_swap(address_as_ull, assumed, *desired_u);
} while (assumed != old);
}
template <typename T>
void AtomicAdd(T *address, T val) {
switch (sizeof(T)) {
case 1: {
AtomicAddTask<T, uint8_t>(address, val);
break;
}
case 2: {
AtomicAddTask<T, uint16_t>(address, val);
break;
}
case 4: {
AtomicAddTask<T, uint32_t>(address, val);
break;
}
case 8: {
AtomicAddTask<T, uint64_t>(address, val);
break;
}
}
}
template <typename T>
void ROIAlignGradCPUKernel<T>::CheckParam(const CNodePtr &kernel_node) {
// Get the number of the input args
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(ERROR) << "Input number is: " << input_num << ", but ROIAlignGrad needs 2 inputs.";
}
// Get the number of the output args
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is: " << output_num << ", but ROIAlignGrad needs 1 output.";
}
// Get the input shapes
auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto dy_shape_size = dy_shape.size();
if (dy_shape_size != 4) {
MS_LOG(ERROR) << "dy shape size is " << dy_shape_size << ", but should be 4.";
}
}
template <typename T>
void ROIAlignGradCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
MS_EXCEPTION_IF_NULL(kernel_node);
CheckParam(kernel_node);
auto rois_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
roi_rows_ = rois_shape[0];
roi_cols_ = rois_shape[1];
std::vector<int64_t> xdiff_shape_me = AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "xdiff_shape");
(void)std::transform(xdiff_shape_me.begin(), xdiff_shape_me.end(), std::back_inserter(xdiff_shape_),
[](const int64_t &value) { return static_cast<int>(value); });
pooled_height_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "pooled_height"));
pooled_width_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "pooled_width"));
spatial_scale_ = static_cast<T>(AnfAlgo::GetNodeAttr<float>(kernel_node, "spatial_scale"));
sample_num_ = static_cast<int>(AnfAlgo::GetNodeAttr<int64_t>(kernel_node, "sample_num"));
roi_end_mode_ = 1;
batch_size_ = xdiff_shape_[0];
channels_ = xdiff_shape_[1];
height_ = xdiff_shape_[2];
width_ = xdiff_shape_[3];
}
template <typename T>
bool ROIAlignGradCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
const T *dy = reinterpret_cast<T *>(inputs[0]->addr);
const T *rois = reinterpret_cast<T *>(inputs[1]->addr);
T *dx = reinterpret_cast<T *>(outputs[0]->addr);
size_t size_init = batch_size_ * channels_ * height_ * width_;
auto task1 = [&](size_t start, size_t end) {
for (size_t thread_idx = start; thread_idx < end; thread_idx++) {
dx[thread_idx] = static_cast<T>(0.);
}
};
CPUKernelUtils::ParallelFor(task1, size_init);
size_t elem_num = roi_rows_ * channels_ * pooled_height_ * pooled_width_;
auto task2 = [&](size_t start, size_t end) {
for (size_t thread_idx = start; thread_idx < end; thread_idx++) {
int n = thread_idx / pooled_width_ / pooled_height_ / channels_;
const T *roi_box = rois + n * roi_cols_;
if (roi_box[1] < static_cast<T>(0.001) && roi_box[3] < static_cast<T>(0.001) &&
roi_box[1] > static_cast<T>(-0.001) && roi_box[3] > static_cast<T>(-0.001)) {
continue;
}
int offset = -1;
int c, ph, pw, roi_bin_grid_h, roi_bin_grid_w;
T bin_size_h, bin_size_w, roi_start_h, roi_start_w;
bin_box(thread_idx, rois, roi_cols_, spatial_scale_, sample_num_, roi_end_mode_, channels_, height_, width_,
pooled_height_, pooled_width_, &offset, &n, &c, &ph, &pw, &roi_bin_grid_h, &roi_bin_grid_w, &bin_size_h,
&bin_size_w, &roi_start_h, &roi_start_w);
// (n, c, ph, pw) is the base param of pooled map
const T count_points_in_grid_cell = static_cast<T>(roi_bin_grid_h * roi_bin_grid_w);
int top_offset = (n * channels_ + c) * pooled_height_ * pooled_width_;
const T *offset_top_diff = dy + top_offset;
const T top_diff_this_bin = offset_top_diff[ph * pooled_width_ + pw];
for (int iy = 0; iy < roi_bin_grid_h; iy++) {
// Shift half point RIGHT for y / x, while previous scaled roi shift half point LEFT
const T y = roi_start_h + static_cast<T>(ph) * bin_size_h +
static_cast<T>(iy + .5f) * bin_size_h / static_cast<T>(roi_bin_grid_h);
for (int ix = 0; ix < roi_bin_grid_w; ix++) {
const T x = roi_start_w + static_cast<T>(pw) * bin_size_w +
static_cast<T>(ix + .5f) * bin_size_w / static_cast<T>(roi_bin_grid_w);
// bilinear interpolate by shifted y / x
// calculate bilinear interpolation
int x_low = 0, y_low = 0, x_high = 0, y_high = 0;
T w1, w2, w3, w4;
bilinear_interpolate(height_, width_, y, x, &x_low, &y_low, &x_high, &y_high, &w1, &w2, &w3, &w4);
if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0 && y_low < height_ && y_high < height_ &&
x_low < width_ && x_high < width_) {
T g1 = top_diff_this_bin * w1 / count_points_in_grid_cell;
T g2 = top_diff_this_bin * w2 / count_points_in_grid_cell;
T g3 = top_diff_this_bin * w3 / count_points_in_grid_cell;
T g4 = top_diff_this_bin * w4 / count_points_in_grid_cell;
T *dx_1 = dx + offset + y_low * width_ + x_low;
T *dx_2 = dx + offset + y_low * width_ + x_high;
T *dx_3 = dx + offset + y_high * width_ + x_low;
T *dx_4 = dx + offset + y_high * width_ + x_high;
AtomicAdd(dx_1, g1);
AtomicAdd(dx_2, g2);
AtomicAdd(dx_3, g3);
AtomicAdd(dx_4, g4);
}
}
}
}
};
CPUKernelUtils::ParallelFor(task2, elem_num);
return true;
}
template <typename T>
void ROIAlignGradCPUKernel<T>::bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low,
int *x_high, int *y_high, T *w1, T *w2, T *w3, T *w4) {
constexpr float eps = 0.00007;
if (y < static_cast<T>(-1.0) || y > static_cast<T>(height) || x < static_cast<T>(-1.0) || x > static_cast<T>(width)) {
*w1 = *w2 = *w3 = *w4 = static_cast<T>(0);
*x_low = *x_high = *y_low = *y_high = -1;
return;
}
// low bounder is at least zero
y = y <= static_cast<T>(.0) ? static_cast<T>(.0) : y;
x = x <= static_cast<T>(.0) ? static_cast<T>(.0) : x;
// top left point
*y_low = (y <= static_cast<T>(eps) ? 0 : static_cast<int>(floor(y)));
*x_low = (x <= static_cast<T>(eps) ? 0 : static_cast<int>(floor(x)));
// bottom right point
if (*y_low >= height - 1) {
*y_high = *y_low = height - 1;
y = static_cast<T>(*y_low);
} else {
*y_high = *y_low + 1;
}
if (*x_low >= width - 1) {
*x_high = *x_low = width - 1;
x = static_cast<T>(*x_low);
} else {
*x_high = *x_low + 1;
}
// distance to nearest points
T lx, ly, hx, hy;
ly = y - static_cast<T>(*y_low), lx = x - static_cast<T>(*x_low);
hy = static_cast<T>(1.) - ly, hx = static_cast<T>(1.) - lx;
// weight is evaluated by the distance to point away.
// the closer to point home, the more weight, the farther to point away.
*w1 = hy * hx, *w2 = hy * lx, *w3 = ly * hx, *w4 = ly * lx;
return;
}
template <typename T>
void ROIAlignGradCPUKernel<T>::bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale,
const int sample_num, int roi_end_mode, const int channels, const int height,
const int width, const int pooled_height, const int pooled_width, int *offset,
int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h, int *roi_bin_grid_w,
T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w) {
// (n, c, ph, pw) is the base param of pooled map
*pw = thread_idx % pooled_width;
*ph = (thread_idx / pooled_width) % pooled_height;
*c = (thread_idx / pooled_width / pooled_height) % channels;
*n = thread_idx / pooled_width / pooled_height / channels;
// Roi has
// 1. 4 points, or
// 2. indicator + 4 points (1 + 4)
const T *roi_box = roi_boxes + (*n) * roi_cols;
int roi_batch_ind = 0;
if (roi_cols == 5) {
roi_batch_ind = static_cast<int>(rint(static_cast<float>(roi_box[0]) + static_cast<float>(0.00007)));
roi_box++;
}
// Scale and shift ROI
*roi_start_w = roi_box[0] * spatial_scale;
*roi_start_h = roi_box[1] * spatial_scale;
T roi_end_w = (roi_box[2] + static_cast<T>(roi_end_mode)) * spatial_scale;
T roi_end_h = (roi_box[3] + static_cast<T>(roi_end_mode)) * spatial_scale;
// New ROI height/width
T roi_width = roi_end_w - (*roi_start_w);
T roi_height = roi_end_h - (*roi_start_h);
if (roi_end_mode == 0) { // backward compatibility
// Force malformed ROIs to be 1x1
roi_width = roi_width > static_cast<T>(1.0) ? roi_width : static_cast<T>(1.0);
roi_height = roi_height > static_cast<T>(1.0) ? roi_height : static_cast<T>(1.0);
}
// ratio of roi / pooled
*bin_size_h = static_cast<T>(roi_height) / static_cast<T>(pooled_height);
*bin_size_w = static_cast<T>(roi_width) / static_cast<T>(pooled_width);
*offset = (roi_batch_ind * channels + (*c)) * height * width;
// grid (int) by Sample ratio if defined, otherwise by pooled H/W
*roi_bin_grid_h = (sample_num > 0) ? sample_num : static_cast<int>(floor(roi_height / static_cast<T>(pooled_height)));
*roi_bin_grid_w = (sample_num > 0) ? sample_num : static_cast<int>(floor(roi_width / static_cast<T>(pooled_width)));
return;
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,75 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_
#include <vector>
#include <algorithm>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename T>
class ROIAlignGradCPUKernel : public CPUKernel {
public:
ROIAlignGradCPUKernel() = default;
~ROIAlignGradCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
void CheckParam(const CNodePtr &kernel_node);
void bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low, int *x_high,
int *y_high, T *w1, T *w2, T *w3, T *w4);
void bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, const int sample_num,
int roi_end_mode, const int channels, const int height, const int width, const int pooled_height,
const int pooled_width, int *offset, int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h,
int *roi_bin_grid_w, T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w);
std::vector<int> xdiff_shape_;
int pooled_height_;
int pooled_width_;
T spatial_scale_;
int sample_num_;
int roi_end_mode_;
int roi_rows_;
int roi_cols_;
int batch_size_;
int channels_;
int height_;
int width_;
};
MS_REG_CPU_KERNEL_T(
ROIAlignGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ROIAlignGradCPUKernel, float);
MS_REG_CPU_KERNEL_T(
ROIAlignGrad,
KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
ROIAlignGradCPUKernel, float16);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_GRAD_CPU_KERNEL_H_

View File

@ -0,0 +1,127 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.h"
#include <string>
#include "runtime/device/cpu/cpu_device_address.h"
#include "common/thread_pool.h"
namespace mindspore {
namespace kernel {
namespace {
template <typename S, typename T>
void Compute(const ComputeParams<S, T> *params, const size_t start, const size_t end) {
MS_EXCEPTION_IF_NULL(params);
T *target = params->target_;
S *indices = params->indices_;
T *updates = params->updates_;
std::vector<int> *out_strides = params->out_strides_;
MS_EXCEPTION_IF_NULL(out_strides);
for (size_t i = start; i < end; ++i) {
int offset = 0;
for (int j = 0; j < params->indices_unit_rank_; ++j) {
auto index = indices[i * params->indices_unit_rank_ + j];
if (index < 0) {
MS_LOG(EXCEPTION) << "Indices contains element " << index << " less than 0.";
}
offset += index * out_strides->at(j) * params->unit_size_;
}
target[offset] += updates[params->unit_size_ * i];
}
}
} // namespace
template <typename S, typename T>
void ScatterNdCPUKernel<S, T>::InitKernel(const CNodePtr &kernel_node) {
Check(kernel_node);
auto shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
auto updates_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
auto indices_unit_rank = indices_shape.back();
if (indices_unit_rank > shape.size()) {
MS_LOG(EXCEPTION) << "Value of last dimension of indices is greater than shape rank";
}
if (indices_shape.size() < 2) {
MS_LOG(EXCEPTION) << "Indices has dimension less than 2";
}
if (updates_shape.size() != indices_shape.size() - 1 + shape.size() - indices_unit_rank) {
MS_LOG(EXCEPTION) << "The ranks of update and indices are inconsistent";
}
for (size_t i = 0; i < indices_shape.size() - 1; ++i) {
if (updates_shape[i] != indices_shape[i]) {
MS_LOG(EXCEPTION) << "The shape of updates and indices are different in dimension " << i << " .";
}
}
indices_unit_rank_ = SizeToInt(indices_unit_rank);
unit_size_ = 1;
for (size_t i = indices_shape.size() - 1; i < updates_shape.size(); ++i) {
unit_size_ *= SizeToInt(updates_shape[i]);
}
num_units_ = 1;
num_units_ *= updates_shape[indices_shape.size() - 2];
for (int i = SizeToInt(indices_shape.size()) - 3; i >= 0; i--) {
num_units_ *= updates_shape[i];
}
int out_stride = 1;
out_strides_.push_back(out_stride);
for (int i = indices_unit_rank_ - 2; i >= 0; i--) {
out_stride *= shape[i + 1];
out_strides_.push_back(out_stride);
}
reverse(out_strides_.begin(), out_strides_.end());
}
template <typename S, typename T>
bool ScatterNdCPUKernel<S, T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> &,
const std::vector<kernel::AddressPtr> &outputs) {
auto target = reinterpret_cast<T *>(outputs[0]->addr);
auto target_init = memset_s(target, outputs[0]->size / sizeof(T), static_cast<T>(0.0), outputs[0]->size / sizeof(T));
if (target_init != EOK) {
MS_LOG(EXCEPTION) << "ScatterNdCPUKernel Launch task memset failed.";
}
ComputeParams<S, T> params;
params.target_ = target;
params.indices_ = reinterpret_cast<S *>(inputs[0]->addr);
params.updates_ = reinterpret_cast<T *>(inputs[1]->addr);
params.target_mem_size_ = outputs[0]->size;
params.unit_size_ = unit_size_;
params.indices_unit_rank_ = indices_unit_rank_;
params.out_strides_ = &out_strides_;
auto task = [&](size_t start, size_t end) {
for (size_t idx = start; idx < end; idx++) {
Compute<S, T>(&params, idx, idx + 1);
}
};
CPUKernelUtils::ParallelFor(task, num_units_);
return true;
}
template <typename S, typename T>
void ScatterNdCPUKernel<S, T>::Check(const CNodePtr &kernel_node) {
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 2) {
MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterNd needs 2 input.";
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterNd needs 1 output.";
}
}
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,150 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_
#include <vector>
#include <unordered_map>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename S, typename T>
struct ComputeParams {
T *target_{nullptr};
S *indices_{nullptr};
T *updates_{nullptr};
int unit_size_{0};
int indices_unit_rank_{0};
std::vector<int> *out_strides_{nullptr};
size_t target_mem_size_{0};
};
template <typename S, typename T>
class ScatterNdCPUKernel : public CPUKernel {
public:
ScatterNdCPUKernel() = default;
~ScatterNdCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
const std::vector<AddressPtr> &outputs) override;
private:
void Check(const CNodePtr &kernel_node);
int unit_size_{0};
size_t num_units_{0};
int indices_unit_rank_{0};
std::vector<int> out_strides_;
};
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
ScatterNdCPUKernel, int64_t, double);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ScatterNdCPUKernel, int64_t, float);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ScatterNdCPUKernel, int64_t, int64_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ScatterNdCPUKernel, int64_t, int32_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
ScatterNdCPUKernel, int64_t, int16_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
ScatterNdCPUKernel, int64_t, int8_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
ScatterNdCPUKernel, int64_t, uint64_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
ScatterNdCPUKernel, int64_t, uint32_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
ScatterNdCPUKernel, int64_t, uint16_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
ScatterNdCPUKernel, int64_t, uint8_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
ScatterNdCPUKernel, int32_t, double);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
ScatterNdCPUKernel, int32_t, float);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
ScatterNdCPUKernel, int32_t, int64_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
ScatterNdCPUKernel, int32_t, int32_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
ScatterNdCPUKernel, int32_t, int16_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
ScatterNdCPUKernel, int32_t, int8_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
ScatterNdCPUKernel, int32_t, uint64_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
ScatterNdCPUKernel, int32_t, uint32_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd,
KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
ScatterNdCPUKernel, int32_t, uint16_t);
MS_REG_CPU_KERNEL_T_S(
ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
ScatterNdCPUKernel, int32_t, uint8_t);
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_

View File

@ -24,6 +24,7 @@ from .split import _split_cpu
from .adam import _adam_cpu
from .arg_max import _arg_max_cpu
from .arg_min_with_value import _arg_min_with_value_cpu
from .arg_max_with_value import _arg_max_with_value_cpu
from .bias_add import _bias_add_cpu
from .bias_add_grad import _bias_add_grad_cpu
from .dropout import _dropout_cpu

View File

@ -0,0 +1,31 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ArgMaxWithValue op"""
from mindspore.ops.op_info_register import op_info_register, CpuRegOp, DataType
arg_max_with_value_op_info = CpuRegOp("ArgMaxWithValue") \
.input(0, "x", "required") \
.output(0, "indice", "required") \
.output(1, "values", "required") \
.dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \
.dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \
.get_op_info()
@op_info_register(arg_max_with_value_op_info)
def _arg_max_with_value_cpu():
"""ArgMaxWithValue cpu register"""
return

View File

@ -1785,7 +1785,7 @@ class ArgMaxWithValue(PrimitiveWithInfer):
TypeError: If `axis` is not an int.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> input_x = Tensor(np.array([0.0, 0.4, 0.6, 0.7, 0.1]), mindspore.float32)
@ -3484,7 +3484,7 @@ class ScatterNd(PrimitiveWithInfer):
ValueError: If any element of `shape` is less than 1.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> op = ops.ScatterNd()

View File

@ -59,7 +59,7 @@ class CropAndResize(PrimitiveWithInfer):
ValueError: If `method` is not one of 'bilinear', 'nearest', 'bilinear_v2'.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> class CropAndResizeNet(nn.Cell):

View File

@ -4257,7 +4257,7 @@ class NMSWithMask(PrimitiveWithInfer):
Tensor is not float16 or float32.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> bbox = np.array([[100.0, 100.0, 50.0, 68.0, 0.63], [150.0, 75.0, 165.0, 115.0, 0.55],

View File

@ -4262,7 +4262,7 @@ class ROIAlign(PrimitiveWithInfer):
TypeError: If `features` or `rois` is not a Tensor.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> input_tensor = Tensor(np.array([[[[1., 2.], [3., 4.]]]]), mindspore.float32)

View File

@ -168,7 +168,7 @@ class BoundingBoxEncode(PrimitiveWithInfer):
TypeError: If `anchor_box` or `groundtruth_box` is not a Tensor.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> anchor_box = Tensor([[2, 2, 2, 3], [2, 2, 2, 3]], mindspore.float32)
@ -230,7 +230,7 @@ class BoundingBoxDecode(PrimitiveWithInfer):
TypeError: If `anchor_box` or `deltas` is not a Tensor.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> anchor_box = Tensor([[4, 1, 2, 1], [2, 2, 2, 3]], mindspore.float32)
@ -293,7 +293,7 @@ class CheckValid(PrimitiveWithInfer):
TypeError: If dtype of `bboxes` or `img_metas` is neither float16 nor float32.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> import mindspore

View File

@ -404,7 +404,7 @@ class RandomChoiceWithMask(PrimitiveWithInfer):
TypeError: If `input_x` is not a Tensor.
Supported Platforms:
``Ascend`` ``GPU``
``Ascend`` ``GPU`` ``CPU``
Examples:
>>> rnd_choice_mask = ops.RandomChoiceWithMask()

View File

@ -0,0 +1,146 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class NetArgmaxWithValue(nn.Cell):
def __init__(self):
super(NetArgmaxWithValue, self).__init__()
axis1 = 0
axis2 = -1
self.argmax1 = P.ArgMaxWithValue(axis1)
self.argmax2 = P.ArgMaxWithValue(axis2)
self.argmax3 = P.ArgMaxWithValue()
def construct(self, x):
return (self.argmax1(x), self.argmax2(x), self.argmax3(x))
class NetArgmaxWithValueBig(nn.Cell):
def __init__(self, axis=0):
super(NetArgmaxWithValueBig, self).__init__()
self.argmax = P.ArgMaxWithValue(axis)
def construct(self, x):
return self.argmax(x)
def argmaxwithvalue_base(data_type):
x = Tensor(np.array([[1., 20., 5.],
[67., 8., 9.],
[130., 24., 15.],
[0.3, -0.4, -15.]]).astype(data_type))
expect1 = np.array([2, 2, 2]).astype(data_type)
expect2 = np.array([1, 0, 0, 0]).astype(data_type)
expect11 = np.array([130, 24, 15]).astype(data_type)
expect22 = np.array([20, 67, 130, 0.3]).astype(data_type)
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
argmax = NetArgmaxWithValue()
output = argmax(x)
assert (output[0][0].asnumpy() == expect1).all()
assert (output[0][1].asnumpy() == expect11).all()
assert (output[1][0].asnumpy() == expect2).all()
assert (output[1][1].asnumpy() == expect22).all()
assert (output[2][0].asnumpy() == expect1).all()
assert (output[2][1].asnumpy() == expect11).all()
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
argmax = NetArgmaxWithValue()
output = argmax(x)
assert (output[0][0].asnumpy() == expect1).all()
assert (output[0][1].asnumpy() == expect11).all()
assert (output[1][0].asnumpy() == expect2).all()
assert (output[1][1].asnumpy() == expect22).all()
assert (output[2][0].asnumpy() == expect1).all()
assert (output[2][1].asnumpy() == expect11).all()
def argmaxwithvalue_3d(data_type, shape_x):
np.random.seed(2)
x_np = np.random.random(shape_x).astype(data_type)
x = Tensor(x_np)
argmax = NetArgmaxWithValueBig(0)
output = argmax(x)
expect1 = np.argmax(x_np, axis=0)
expect2 = np.maximum.reduce(x_np, 0)
assert (output[0].asnumpy() == expect1).all()
assert (output[1].asnumpy() == expect2).all()
argmax = NetArgmaxWithValueBig(1)
output = argmax(x)
expect1 = np.argmax(x_np, axis=1)
expect2 = np.maximum.reduce(x_np, 1)
assert (output[0].asnumpy() == expect1).all()
assert (output[1].asnumpy() == expect2).all()
argmax = NetArgmaxWithValueBig(2)
output = argmax(x)
expect1 = np.argmax(x_np, axis=2)
expect2 = np.maximum.reduce(x_np, 2)
assert (output[0].asnumpy() == expect1).all()
assert (output[1].asnumpy() == expect2).all()
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_argmaxwithvalue_base_float32():
argmaxwithvalue_base(np.float32)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_argmaxwithvalue_base_float16():
argmaxwithvalue_base(np.float16)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_argmaxwithvalue_3d_float32():
shape_x = (2, 32, 256)
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
argmaxwithvalue_3d(np.float32, shape_x)
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
argmaxwithvalue_3d(np.float32, shape_x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_argmaxwithvalue_3d_float16():
shape_x = (2, 64, 128)
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
argmaxwithvalue_3d(np.float16, shape_x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_argmaxwithvalue_3d_big_float32():
shape_x = (128, 1024, 1)
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
argmaxwithvalue_3d(np.float32, shape_x)
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
argmaxwithvalue_3d(np.float32, shape_x)

View File

@ -0,0 +1,60 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class NetBoundingBoxDecode(nn.Cell):
def __init__(self, means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)):
super(NetBoundingBoxDecode, self).__init__()
self.decode = P.BoundingBoxDecode(max_shape=(768, 1280), means=means, stds=stds,
wh_ratio_clip=0.016)
def construct(self, anchor, groundtruth):
return self.decode(anchor, groundtruth)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_boundingbox_decode():
anchor = np.array([[4, 1, 2, 1], [2, 2, 2, 3]], np.float32)
deltas = np.array([[3, 1, 2, 2], [1, 2, 1, 4]], np.float32)
means = (0.1, 0.1, 0.2, 0.2)
stds = (2.0, 2.0, 3.0, 3.0)
anchor_box = Tensor(anchor, mindspore.float32)
deltas_box = Tensor(deltas, mindspore.float32)
expect_deltas = np.array([[28.6500, 0.0000, 0.0000, 33.8500],
[0.0000, 0.0000, 15.8663, 72.7000]], np.float32)
error = np.ones(shape=[2, 4]) * 1.0e-4
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
boundingbox_decode = NetBoundingBoxDecode(means, stds)
output = boundingbox_decode(anchor_box, deltas_box)
diff = output.asnumpy() - expect_deltas
assert np.all(abs(diff) < error)
context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU')
boundingbox_decode = NetBoundingBoxDecode(means, stds)
output = boundingbox_decode(anchor_box, deltas_box)
diff = output.asnumpy() - expect_deltas
assert np.all(abs(diff) < error)

View File

@ -0,0 +1,80 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class NetBoundingBoxEncode(nn.Cell):
def __init__(self, means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)):
super(NetBoundingBoxEncode, self).__init__()
self.encode = P.BoundingBoxEncode(means=means, stds=stds)
def construct(self, anchor, groundtruth):
return self.encode(anchor, groundtruth)
def bbox2delta(proposals, gt, means, stds):
px = (proposals[..., 0] + proposals[..., 2]) * 0.5
py = (proposals[..., 1] + proposals[..., 3]) * 0.5
pw = proposals[..., 2] - proposals[..., 0] + 1.0
ph = proposals[..., 3] - proposals[..., 1] + 1.0
gx = (gt[..., 0] + gt[..., 2]) * 0.5
gy = (gt[..., 1] + gt[..., 3]) * 0.5
gw = gt[..., 2] - gt[..., 0] + 1.0
gh = gt[..., 3] - gt[..., 1] + 1.0
dx = (gx - px) / pw
dy = (gy - py) / ph
dw = np.log(gw / pw)
dh = np.log(gh / ph)
means = np.array(means, np.float32)
stds = np.array(stds, np.float32)
deltas = np.stack([(dx - means[0]) / stds[0], (dy - means[1]) / stds[1],
(dw - means[2]) / stds[2], (dh - means[3]) / stds[3]], axis=-1)
return deltas
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_boundingbox_encode():
anchor = np.array([[4, 1, 6, 9], [2, 5, 5, 9]]).astype(np.float32)
gt = np.array([[3, 2, 7, 7], [1, 5, 5, 8]]).astype(np.float32)
means = (0.1, 0.1, 0.2, 0.2)
stds = (2.0, 2.0, 3.0, 3.0)
anchor_box = Tensor(anchor, mindspore.float32)
groundtruth_box = Tensor(gt, mindspore.float32)
expect_deltas = bbox2delta(anchor, gt, means, stds)
error = np.ones(shape=[2, 4]) * 1.0e-6
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
boundingbox_encode = NetBoundingBoxEncode(means, stds)
output = boundingbox_encode(anchor_box, groundtruth_box)
diff = output.asnumpy() - expect_deltas
assert np.all(abs(diff) < error)
context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU')
boundingbox_encode = NetBoundingBoxEncode(means, stds)
output = boundingbox_encode(anchor_box, groundtruth_box)
diff = output.asnumpy() - expect_deltas
assert np.all(abs(diff) < error)

View File

@ -0,0 +1,86 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class NetCheckValid(nn.Cell):
def __init__(self):
super(NetCheckValid, self).__init__()
self.valid = P.CheckValid()
def construct(self, anchor, image_metas):
return self.valid(anchor, image_metas)
def check_valid(nptype):
anchor = np.array([[50, 0, 100, 700], [-2, 2, 8, 100], [10, 20, 300, 2000]], nptype)
image_metas = np.array([768, 1280, 1], nptype)
anchor_box = Tensor(anchor)
image_metas_box = Tensor(image_metas)
expect = np.array([True, False, False], np.bool)
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
boundingbox_decode = NetCheckValid()
output = boundingbox_decode(anchor_box, image_metas_box)
assert np.array_equal(output.asnumpy(), expect)
context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU')
boundingbox_decode = NetCheckValid()
output = boundingbox_decode(anchor_box, image_metas_box)
assert np.array_equal(output.asnumpy(), expect)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_check_valid_float32():
check_valid(np.float32)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_check_valid_float16():
check_valid(np.float16)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_check_valid_int16():
check_valid(np.int16)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_check_valid_uint8():
anchor = np.array([[5, 0, 10, 70], [2, 2, 8, 10], [1, 2, 30, 200]], np.uint8)
image_metas = np.array([76, 128, 1], np.uint8)
anchor_box = Tensor(anchor)
image_metas_box = Tensor(image_metas)
expect = np.array([True, True, False], np.bool)
context.set_context(mode=context.GRAPH_MODE, device_target='CPU')
boundingbox_decode = NetCheckValid()
output = boundingbox_decode(anchor_box, image_metas_box)
assert np.array_equal(output.asnumpy(), expect)
context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU')
boundingbox_decode = NetCheckValid()
output = boundingbox_decode(anchor_box, image_metas_box)
assert np.array_equal(output.asnumpy(), expect)

View File

@ -0,0 +1,423 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
from mindspore import context, Tensor
from mindspore.ops import operations as P
from mindspore import nn
class NetCropAndResize(nn.Cell):
def __init__(self, method_="bilinear", extrapolation_value_=0.0):
super(NetCropAndResize, self).__init__()
self.op = P.CropAndResize(
method=method_, extrapolation_value=extrapolation_value_)
def construct(self, image, boxes, box_index, channel):
return self.op(image, boxes, box_index, channel)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_int8_bilinear(datatype=np.int8):
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
batch_size = 2
image_height = 32
image_width = 18
channels = 2
crop_size = (5, 3)
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0, total_values).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("bilinear", 0.5)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[-111.0, -110.0], [-119.5, -118.5], [-128.0, -127.0]],
[[28.5, 29.5], [20.0, 21.0], [11.5, 12.5]],
[[-88.0, -87.0], [-96.5, -95.5], [-41.0, -40.0]],
[[51.5, 52.5], [43.0, 44.0], [34.5, 35.5]],
[[-65.0, -64.0], [-73.5, -72.5], [-82.0, -81.0]]],
[[[0.0, 1.0], [29.75, 30.75], [0.5, 0.5]],
[[-46.75, -45.75], [-17.0, -16.0], [0.5, 0.5]],
[[-93.5, -92.5], [-63.75, -62.75], [0.5, 0.5]],
[[3.75, 4.75], [-110.5, -109.5], [0.5, 0.5]],
[[69.0, 70.0], [98.75, 99.75], [0.5, 0.5]]]]).astype(np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_int16_nearest(datatype=np.int16):
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
batch_size = 2
image_height = 32
image_width = 18
channels = 2
crop_size = (5, 3)
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0, total_values).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("nearest", 0.5)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[1170.0, 1171.0], [1160.0, 1161.0], [1152.0, 1153.0]],
[[1314.0, 1315.0], [1304.0, 1305.0], [1296.0, 1297.0]],
[[1458.0, 1459.0], [1448.0, 1449.0], [1440.0, 1441.0]],
[[1602.0, 1603.0], [1592.0, 1593.0], [1584.0, 1585.0]],
[[1746.0, 1747.0], [1736.0, 1737.0], [1728.0, 1729.0]]],
[[[0.0, 1.0], [30.0, 31.0], [0.5, 0.5]],
[[216.0, 217.0], [246.0, 247.0], [0.5, 0.5]],
[[432.0, 433.0], [462.0, 463.0], [0.5, 0.5]],
[[612.0, 613.0], [642.0, 643.0], [0.5, 0.5]],
[[828.0, 829.0], [858.0, 859.0], [0.5, 0.5]]]]).astype(np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_int32_bilinear_v2(datatype=np.int32):
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
batch_size = 2
image_height = 32
image_width = 18
channels = 2
crop_size = (5, 3)
offset = 8795
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0 + offset, total_values + offset).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("bilinear_v2", 0.369)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[10008.199, 10009.199], [10008.2, 10009.2], [10008.199, 10009.2]],
[[10130.6, 10131.6], [10130.6, 10131.6], [10130.601, 10131.6]],
[[10253, 10253.999], [10253, 10254], [10253, 10254]],
[[10375.4, 10376.398], [10375.4, 10376.4], [10375.4, 10376.399]],
[[10497.799, 10498.799], [10497.801, 10498.8], [10497.8, 10498.8]]],
[[[8876.667, 8877.667], [8898, 8899], [8919.334, 8920.333]],
[[9056.667, 9057.667], [9078, 9079], [9099.333, 9100.333]],
[[9236.667, 9237.667], [9258, 9259], [9279.333, 9280.333]],
[[9416.667, 9417.667], [9438, 9439], [9459.333, 9460.333]],
[[9596.667, 9597.667], [9618, 9619], [9639.333, 9640.334]]]]).astype(
np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_float16_nearest(datatype=np.float16):
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
batch_size = 2
image_height = 50
image_width = 40
channels = 3
crop_size = (5, 3)
offset = 0
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0 + offset, total_values + offset).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("nearest", 0.0)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[7380.0, 7380.0, 7384.0], [7352.0, 7352.0, 7352.0],
[7320.0, 7320.0, 7320.0]],
[[8224.0, 8224.0, 8224.0], [8192.0, 8192.0, 8192.0],
[8160.0, 8160.0, 8160.0]],
[[8944.0, 8944.0, 8944.0], [8912.0, 8912.0, 8912.0],
[8880.0, 8880.0, 8880.0]],
[[9664.0, 9664.0, 9664.0], [9632.0, 9632.0, 9632.0],
[9600.0, 9600.0, 9600.0]],
[[10496.0, 10504.0, 10504.0], [10472.0, 10472.0, 10472.0],
[10440.0, 10440.0, 10440.0]]],
[[[12.0, 13.0, 14.0], [108.0, 109.0, 110.0], [0.0, 0.0, 0.0]],
[[1092.0, 1093.0, 1094.0], [1188.0, 1189.0, 1190.0], [0.0, 0.0, 0.0]],
[[2172.0, 2172.0, 2174.0], [2268.0, 2268.0, 2270.0], [0.0, 0.0, 0.0]],
[[3372.0, 3372.0, 3374.0], [3468.0, 3468.0, 3470.0], [0.0, 0.0, 0.0]],
[[4452.0, 4452.0, 4456.0], [4548.0, 4548.0, 4552.0],
[0.0, 0.0, 0.0]]]]).astype(np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_float32_bilinear(datatype=np.float32):
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
batch_size = 2
image_height = 512
image_width = 256
channels = 3
crop_size = (5, 3)
offset = 5000
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0 + offset, total_values + offset).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("bilinear", 0.0)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[488861.53, 488862.53, 488863.53],
[488670.28, 488671.28, 488672.28],
[488479.03, 488480.03, 488481.03]],
[[539879.75, 539880.75, 539881.75],
[539688.5, 539689.5, 539690.5],
[539497.25, 539498.25, 539499.25]],
[[590898.0, 590899.0, 590900.0], [590706.75, 590707.75, 590708.75],
[590515.5, 590516.5, 590517.5]],
[[641916.25, 641917.25, 641918.25], [641725.0, 641726.0, 641727.0],
[641533.75, 641534.75, 641535.75]],
[[692934.5, 692935.5, 692936.5], [692743.25, 692744.25, 692745.25],
[692552.0, 692553.0, 692554.0]]],
[[[5076.5, 5077.5, 5078.5], [5707.625, 5708.625, 5709.625], [0.0, 0.0, 0.0]],
[[78660.5, 78661.5, 78662.5], [79291.625, 79292.625, 79293.625], [0.0, 0.0, 0.0]],
[[152244.5, 152245.5, 152246.5], [152875.625, 152876.625, 152877.625],
[0.0, 0.0, 0.0]],
[[225828.5, 225829.5, 225830.5], [226459.625, 226460.625, 226461.625],
[0.0, 0.0, 0.0]],
[[299412.5, 299413.5, 299414.5], [300043.625, 300044.625, 300045.625],
[0.0, 0.0, 0.0]]]]).astype(np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_float64_nearest(datatype=np.float64):
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
batch_size = 2
image_height = 50
image_width = 25
channels = 3
crop_size = (5, 3)
offset = 7549
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0 + offset, total_values + offset).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("nearest", 0.0)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[12160.0, 12161.0, 12162.0], [12142.0, 12143.0, 12144.0],
[12124.0, 12125.0, 12126.0]],
[[12685.0, 12686.0, 12687.0], [12667.0, 12668.0, 12669.0],
[12649.0, 12650.0, 12651.0]],
[[13135.0, 13136.0, 13137.0], [13117.0, 13118.0, 13119.0],
[13099.0, 13100.0, 13101.0]],
[[13585.0, 13586.0, 13587.0], [13567.0, 13568.0, 13569.0],
[13549.0, 13550.0, 13551.0]],
[[14110.0, 14111.0, 14112.0], [14092.0, 14093.0, 14094.0],
[14074.0, 14075.0, 14076.0]]],
[[[7555.0, 7556.0, 7557.0], [7615.0, 7616.0, 7617.0], [0.0, 0.0, 0.0]],
[[8230.0, 8231.0, 8232.0], [8290.0, 8291.0, 8292.0], [0.0, 0.0, 0.0]],
[[8905.0, 8906.0, 8907.0], [8965.0, 8966.0, 8967.0], [0.0, 0.0, 0.0]],
[[9655.0, 9656.0, 9657.0], [9715.0, 9716.0, 9717.0], [0.0, 0.0, 0.0]],
[[10330.0, 10331.0, 10332.0], [10390.0, 10391.0, 10392.0],
[0.0, 0.0, 0.0]]]]).astype(np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_int64_bilinearv2(datatype=np.int64):
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
batch_size = 2
image_height = 50
image_width = 25
channels = 3
crop_size = (5, 3)
offset = 7549
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0 + offset, total_values + offset).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("bilinear_v2", 0.0)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[12324.999, 12326, 12327], [12325, 12326, 12327],
[12325, 12326, 12327.001]],
[[12730, 12730.999, 12732], [12730, 12731, 12732],
[12730, 12731, 12732]],
[[13134.999, 13136, 13136.998], [13135, 13136, 13137],
[13135, 13136, 13137]],
[[13540, 13540.999, 13541.999], [13540, 13541, 13542],
[13540, 13541, 13542]],
[[13944.999, 13945.999, 13946.999], [13945, 13946.001, 13947],
[13945, 13946, 13947]]],
[[[7822, 7823, 7824], [7864, 7865, 7866], [7906, 7907, 7908]],
[[8392, 8393, 8394], [8434, 8435, 8436], [8476, 8477, 8478]],
[[8962, 8963, 8964], [9004, 9005, 9006], [9046, 9047, 9048]],
[[9531.999, 9533.001, 9534], [9574, 9575, 9576], [9616, 9617, 9618.001]],
[[10102, 10103, 10104], [10144, 10145, 10146],
[10186, 10187, 10188]]]]).astype(np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_uint8_nearest(datatype=np.uint8):
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
batch_size = 2
image_height = 7
image_width = 5
channels = 2
crop_size = (5, 3)
offset = 0
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0 + offset, total_values + offset).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("nearest", 0.0)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[84.0, 85.0], [82.0, 83.0], [80.0, 81.0]],
[[94.0, 95.0], [92.0, 93.0], [90.0, 91.0]],
[[104.0, 105.0], [102.0, 103.0], [100.0, 101.0]],
[[114.0, 115.0], [112.0, 113.0], [110.0, 111.0]],
[[124.0, 125.0], [122.0, 123.0], [120.0, 121.0]]],
[[[0.0, 1.0], [8.0, 9.0], [0.0, 0.0]],
[[10.0, 11.0], [18.0, 19.0], [0.0, 0.0]],
[[20.0, 21.0], [28.0, 29.0], [0.0, 0.0]],
[[30.0, 31.0], [38.0, 39.0], [0.0, 0.0]],
[[50.0, 51.0], [58.0, 59.0], [0.0, 0.0]]]]).astype(np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_crop_and_resize_uint16_bilinear(datatype=np.uint16):
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
batch_size = 2
image_height = 50
image_width = 30
channels = 3
crop_size = (5, 3)
offset = 0
total_values = batch_size * image_height * image_width * channels
input_data = np.arange(0 + offset, total_values + offset).reshape(
(batch_size, image_height, image_width, channels))
input_boxes = np.array(
[[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32)
input_box_index = np.array([1, 0]).astype(np.int32)
input_data_tensor = Tensor(input_data.astype(datatype))
input_boxes_tensor = Tensor(input_boxes)
input_box_index_tensor = Tensor(input_box_index)
net = NetCropAndResize("bilinear", 0.0)
output = net(input_data_tensor, input_boxes_tensor,
input_box_index_tensor, crop_size)
output_ms = output.asnumpy()
expected_output = np.array([[[[5557.7998046875, 5558.7998046875, 5559.7998046875],
[5536.0498046875, 5537.0498046875, 5538.0498046875],
[5514.2998046875, 5515.2998046875, 5516.2998046875]],
[[6131.10009765625, 6132.10009765625, 6133.10009765625],
[6109.35009765625, 6110.35009765625, 6111.35009765625],
[6087.60009765625, 6088.60009765625, 6089.60009765625]],
[[6704.39990234375, 6705.39990234375, 6706.39990234375],
[6682.64990234375, 6683.64990234375, 6684.64990234375],
[6660.89990234375, 6661.89990234375, 6662.89990234375]],
[[7277.7001953125, 7278.7001953125, 7279.7001953125],
[7255.9501953125, 7256.9501953125, 7257.9501953125],
[7234.2001953125, 7235.2001953125, 7236.2001953125]],
[[7851.0, 7852.0, 7853.0], [7829.25, 7830.25, 7831.25],
[7807.5, 7808.5, 7809.5]]],
[[[8.700000762939453, 9.700000762939453, 10.700000762939453],
[80.4749984741211, 81.4749984741211, 82.4749984741211],
[0.0, 0.0, 0.0]],
[[835.5750122070312, 836.5750122070312, 837.5750122070312],
[907.3499755859375, 908.3499755859375, 909.3499755859375], [0.0, 0.0, 0.0]],
[[1662.449951171875, 1663.449951171875, 1664.449951171875],
[1734.2249755859375, 1735.2249755859375, 1736.2249755859375],
[0.0, 0.0, 0.0]],
[[2489.324951171875, 2490.324951171875, 2491.324951171875],
[2561.10009765625, 2562.10009765625, 2563.10009765625], [0.0, 0.0, 0.0]],
[[3316.199951171875, 3317.199951171875, 3318.199951171875],
[3387.97509765625, 3388.97509765625, 3389.97509765625],
[0.0, 0.0, 0.0]]]]).astype(np.float32)
error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6
diff = output_ms - expected_output
assert np.all(abs(diff) < error)

View File

@ -0,0 +1,109 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore
from mindspore import Tensor
from mindspore.ops import operations as P
def runMSRun(op, bbox):
inputs = Tensor(bbox, mindspore.float32)
box, _, mask = op(inputs)
box = box.asnumpy()
mask = mask.asnumpy()
sel_idx = np.where(mask)
sel_rows = box[sel_idx][:, 0:4]
sel_score = box[sel_idx][:, -1]
return sel_rows, sel_score
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_nms_with_mask_check_order():
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
nms_op = P.NMSWithMask(0.5)
for _ in range(10):
count = 4000
box = np.random.randint(1, 100, size=(count, 4))
box[:, 2] = box[:, 0] + box[:, 2]
box[:, 3] = box[:, 1] + box[:, 3]
unsorted_scores = np.random.rand(count, 1)
bbox = np.hstack((box, unsorted_scores))
bbox = Tensor(bbox, dtype=mindspore.float32)
prop, _, _ = nms_op(bbox)
ms_sorted_scores = (prop.asnumpy()[:, -1]) # select just scores
np_sorted_scores = (np.sort(unsorted_scores, axis=0)[::-1][:, 0]) # sort manually
np.testing.assert_array_almost_equal(
ms_sorted_scores, np_sorted_scores)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_nms_with_mask_edge_case_1():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
# CASE 1 - FULL OVERLAP BOXES - Every box is duplicated and has a different score
nms_op1 = P.NMSWithMask(0.3)
bbox1 = [[12, 4, 33, 17, 0.6], [20, 11, 38, 23, 0.1], [20, 10, 45, 26, 0.9], [15, 17, 35, 38, 0.5],
[10, 20, 30, 40, 0.4], [35, 35, 89, 90, 0.8], [12, 4, 33, 17, 0.3], [20, 11, 38, 23, 0.2],
[20, 10, 45, 26, 0.1], [15, 17, 35, 38, 0.8], [10, 20, 30, 40, 0.41], [35, 35, 89, 90, 0.82]]
expected_bbox = np.array([[20., 10., 45., 26.],
[35., 35., 89., 90.],
[15., 17., 35., 38.],
[12., 4., 33., 17.]])
expected_score = np.array([0.9, 0.82, 0.8, 0.6])
sel_rows, sel_score = runMSRun(nms_op1, bbox1)
np.testing.assert_almost_equal(sel_rows, expected_bbox)
np.testing.assert_almost_equal(sel_score, expected_score)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_nms_with_mask_edge_case_2():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
# CASE 2 - 0 value boxes - with valid scores
nms_op2 = P.NMSWithMask(0.5)
bbox2 = [[0, 0, 0, 0, 0.6], [0, 0, 0, 0, 0.1]]
expected_bbox = np.array([[0., 0., 0., 0.],
[0., 0., 0., 0.]])
expected_score = np.array([0.6, 0.1])
sel_rows, sel_score = runMSRun(nms_op2, bbox2)
np.testing.assert_almost_equal(sel_rows, expected_bbox)
np.testing.assert_almost_equal(sel_score, expected_score)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_nms_with_mask_edge_case_3():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
# CASE 3 - x2/x1 and y2/y1 sequence out of place
nms_op3 = P.NMSWithMask(0.7)
bbox3 = [[70, 70, 45, 75, 0.6], [30, 33, 43, 29, 0.1]]
expected_bbox = np.array([[70., 70., 45., 75.],
[30., 33., 43., 29.]])
expected_score = np.array([0.6, 0.1])
sel_rows, sel_score = runMSRun(nms_op3, bbox3)
np.testing.assert_almost_equal(sel_rows, expected_bbox)
np.testing.assert_almost_equal(sel_score, expected_score)

View File

@ -0,0 +1,121 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class RCWM_count_in(nn.Cell):
def __init__(self):
super(RCWM_count_in, self).__init__()
self.RCWM_count_in = P.RandomChoiceWithMask(count=4, seed=1)
def construct(self, x):
return self.RCWM_count_in(x)
class RCWM_count_out(nn.Cell):
def __init__(self):
super(RCWM_count_out, self).__init__()
self.RCWM_count_out = P.RandomChoiceWithMask(count=10, seed=1)
def construct(self, x):
return self.RCWM_count_out(x)
class RCWM_3D(nn.Cell):
def __init__(self):
super(RCWM_3D, self).__init__()
self.RCWM_3D = P.RandomChoiceWithMask(count=10, seed=1)
def construct(self, x):
return self.RCWM_3D(x)
class RCWM_1D(nn.Cell):
def __init__(self):
super(RCWM_1D, self).__init__()
self.RCWM_1D = P.RandomChoiceWithMask(count=10, seed=9)
def construct(self, x):
return self.RCWM_1D(x)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_RCWM_3D():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
input_tensor = Tensor(np.ones([3, 4, 5]).astype(np.bool))
expect1 = (10, 3)
expect2 = (10,)
rcwm = RCWM_3D()
output1, output2 = rcwm(input_tensor)
assert output1.shape == expect1
assert output2.shape == expect2
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_RCWM_count_out():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
input_tensor = Tensor(np.array([[1, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1],
[0, 0, 0, 1]]).astype(np.bool))
expect1 = (10, 2)
expect2 = (10,)
rcwm = RCWM_count_out()
output1, output2 = rcwm(input_tensor)
assert output1.shape == expect1
assert output2.shape == expect2
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_RCWM_count_in():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
input_tensor = Tensor(np.array([[1, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1],
[0, 0, 0, 1]]).astype(np.bool))
expect1 = (4, 2)
expect2 = (4,)
rcwm = RCWM_count_in()
output1, output2 = rcwm(input_tensor)
assert output1.shape == expect1
assert output2.shape == expect2
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_RCWM_1D():
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
input_tensor = Tensor(
np.array([1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1]).astype(np.bool))
expect_index = np.array([[0], [7], [9], [8], [8], [0],
[2], [7], [0], [0]]).astype(np.int32)
expect_mask = np.array(
[True, True, True, True, True, True, True, True, False, False])
rcwm = RCWM_1D()
output1, output2 = rcwm(input_tensor)
print(output1.asnumpy())
print(output2)
assert np.array_equal(output1.asnumpy(), expect_index)
assert np.array_equal(output2.asnumpy(), expect_mask)

View File

@ -0,0 +1,75 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops.operations import _grad_ops as G
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
class NetROIAlignGrad(nn.Cell):
def __init__(self, xdiff_shape, pooled_height, pooled_width, spatial_scale, sample_num):
super(NetROIAlignGrad, self).__init__()
self.roiAlignGrad = G.ROIAlignGrad(
xdiff_shape,
pooled_height,
pooled_width,
spatial_scale,
sample_num)
def construct(self, dy, rois):
return self.roiAlignGrad(dy, rois)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_roi_align_grad():
def roi_align_grad_case(data_type):
rois = Tensor(np.array([[0, -2.0, -2.0, 21.0, 21.0]], data_type))
dy = Tensor(np.array([[[
[.1, .2, .3],
[.1, .2, .3],
[.1, .2, .3]
]]], data_type))
xdiff_shape = (1, 1, 6, 6)
pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
roi_align_grad = NetROIAlignGrad(
xdiff_shape,
pooled_height,
pooled_width,
spatial_scale,
sample_num)
output = roi_align_grad(dy, rois)
#print(output)
expect = ([[[[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075],
[0.025, 0.025, 0.05, 0.05, 0.075, 0.075]]]])
np.testing.assert_almost_equal(output.asnumpy(), expect, decimal=4)
roi_align_grad_case(np.float32)
roi_align_grad_case(np.float16)

View File

@ -0,0 +1,75 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
from mindspore import Tensor
from mindspore.ops import operations as P
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_roi_align():
def roi_align_case(data_type):
context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
x = Tensor(np.array([[
[[1, 2, 3, 4, 5, 6],
[7, 8, 9, 10, 11, 12],
[13, 14, 15, 16, 17, 18],
[19, 20, 21, 22, 23, 24],
[25, 26, 27, 28, 29, 30],
[31, 32, 33, 34, 35, 36]]
]], data_type))
# test case 1
rois = Tensor(np.array([[0, -2.0, -2.0, 21.0, 21.0]], data_type))
pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2
roi_align = P.ROIAlign(pooled_height, pooled_width,
spatial_scale, sample_num, 1)
output = roi_align(x, rois)
#print(output)
expect = [[[[4.5, 6.5, 8.5],
[16.5, 18.5, 20.5],
[28.5, 30.5, 32.5]]]]
assert (output.asnumpy() == expect).all()
# test case 2
rois = Tensor(np.array([[0, -2.0, -2.0, 22.0, 22.0]], data_type))
pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2
roi_align = P.ROIAlign(pooled_height, pooled_width,
spatial_scale, sample_num, 0)
output = roi_align(x, rois)
#print(output)
expect = [[[[4.5, 6.5, 8.5],
[16.5, 18.5, 20.5],
[28.5, 30.5, 32.5]]]]
assert (output.asnumpy() == expect).all()
# test case 3
pooled_height, pooled_width, spatial_scale, sample_num = 2, 2, 1.0, -1
rois = Tensor(np.array([[0, -2.0, -2.0, 22.0, 22.0]], data_type))
roi_align = P.ROIAlign(pooled_height, pooled_width,
spatial_scale, sample_num, 0)
output = roi_align(x, rois)
#print(output)
expect = [[[[6.295, 0.],
[0., 0.]]]]
np.testing.assert_almost_equal(output.asnumpy(), expect, decimal=2)
roi_align_case(np.float32)
roi_align_case(np.float16)

View File

@ -0,0 +1,142 @@
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
class Net(nn.Cell):
def __init__(self, _shape):
super(Net, self).__init__()
self.shape = _shape
self.scatternd = P.ScatterNd()
def construct(self, indices, update):
return self.scatternd(indices, update, self.shape)
def scatternd_net(indices, update, _shape, expect):
scatternd = Net(_shape)
output = scatternd(Tensor(indices), Tensor(update))
error = np.ones(shape=output.asnumpy().shape) * 1.0e-6
diff = output.asnumpy() - expect
assert np.all(diff < error)
assert np.all(-diff < error)
def scatternd_positive(nptype):
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int32)
arr_update = np.array([3.2, 1.1, 5.3, -2.2, -1.0]).astype(nptype)
shape = (2, 2)
expect = np.array([[0., 5.3],
[0., 1.1]]).astype(nptype)
scatternd_net(arr_indices, arr_update, shape, expect)
arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int64)
arr_update = np.array([3.2, 1.1, 5.3, -2.2, -1.0]).astype(nptype)
shape = (2, 2)
expect = np.array([[0., 5.3],
[0., 1.1]]).astype(nptype)
scatternd_net(arr_indices, arr_update, shape, expect)
def scatternd_negative(nptype):
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
arr_indices = np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]]).astype(np.int32)
arr_update = np.array([-13.4, -3.1, 5.1, -12.1, -1.0]).astype(nptype)
shape = (2, 2)
expect = np.array([[0., 0.],
[-21.4, -3.1]]).astype(nptype)
scatternd_net(arr_indices, arr_update, shape, expect)
arr_indices = np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]]).astype(np.int64)
arr_update = np.array([-13.4, -3.1, 5.1, -12.1, -1.0]).astype(nptype)
shape = (2, 2)
expect = np.array([[0., 0.],
[-21.4, -3.1]]).astype(nptype)
scatternd_net(arr_indices, arr_update, shape, expect)
def scatternd_positive_uint(nptype):
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int32)
arr_update = np.array([3.2, 1.1, 5.3, 3.8, 1.2]).astype(nptype)
shape = (2, 2)
expect = np.array([[0., 12.],
[0., 1.]]).astype(nptype)
scatternd_net(arr_indices, arr_update, shape, expect)
arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int64)
arr_update = np.array([3.2, 1.1, 5.3, 3.8, 1.2]).astype(nptype)
shape = (2, 2)
expect = np.array([[0., 12.],
[0., 1.]]).astype(nptype)
scatternd_net(arr_indices, arr_update, shape, expect)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatternd_float64():
scatternd_positive(np.float64)
scatternd_negative(np.float64)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatternd_float32():
scatternd_positive(np.float32)
scatternd_negative(np.float32)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatternd_int64():
scatternd_positive(np.int64)
scatternd_negative(np.int64)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatternd_int16():
scatternd_positive(np.int16)
scatternd_negative(np.int16)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatternd_uint64():
scatternd_positive_uint(np.uint64)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatternd_uint32():
scatternd_positive_uint(np.uint32)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatternd_uint16():
scatternd_positive_uint(np.uint16)
@pytest.mark.level0
@pytest.mark.platform_x86_cpu
@pytest.mark.env_onecard
def test_scatternd_uint8():
scatternd_positive_uint(np.uint8)