diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.cc new file mode 100644 index 00000000000..b6b598ba2c2 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.cc @@ -0,0 +1,105 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +namespace { +size_t get_element_num(const std::vector &shape) { + size_t size = 1; + for (size_t i = 0; i < shape.size(); i++) { + size *= shape[i]; + } + return size; +} + +template +bool check_validation(const std::vector &shape, const size_t num_before_axis, const size_t num_after_axis, + const std::vector &inputs, const std::vector &outputs) { + if (inputs.size() != 1 || outputs.size() != 2) { + MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!"; + return false; + } + size_t data_size = sizeof(T); + size_t input_size = get_element_num(shape) * data_size; + size_t output_num = num_before_axis * num_after_axis; + size_t out0_size = output_num * sizeof(int); + size_t out1_size = output_num * data_size; + if (inputs[0]->size != input_size || outputs[0]->size != out0_size || outputs[1]->size != out1_size) { + MS_LOG(EXCEPTION) << "Invalid input or output data size!"; + return false; + } + return true; +} +} // namespace + +template +void ArgMaxWithValueCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + size_t shape_len = shape_.size(); + int64_t axis = AnfAlgo::GetNodeAttr(kernel_node, AXIS); + axis += static_cast(shape_len); + if (axis < 0) { + MS_LOG(EXCEPTION) << "Invalid axis:" << axis << ", should in range [-1, " << (shape_len - 1) << "]"; + } + axis = axis % static_cast(shape_len); + num_before_axis_ = 1; + num_after_axis_ = 1; + for (size_t i = 0; i < shape_len; i++) { + if (static_cast(i) < axis) { + num_before_axis_ *= shape_[i]; + } else if (static_cast(i) > axis) { + num_after_axis_ *= shape_[i]; + } + } + dim_axis_ = shape_[axis]; +} + +template +bool ArgMaxWithValueCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + if (!check_validation(shape_, num_before_axis_, num_after_axis_, inputs, outputs)) { + return false; + } + + auto input = reinterpret_cast(inputs[0]->addr); + auto output0 = reinterpret_cast(outputs[0]->addr); + auto output1 = reinterpret_cast(outputs[1]->addr); + + for (size_t i = 0; i < num_before_axis_; i++) { + size_t src_index_i = i * dim_axis_ * num_after_axis_; + for (size_t j = 0; j < num_after_axis_; j++) { + std::vector array_axis; + size_t src_index_j = src_index_i + j; + for (size_t k = 0; k < dim_axis_; k++) { + size_t src_index_k = k * num_after_axis_ + src_index_j; + array_axis.push_back(static_cast(input[src_index_k])); + } + auto max_ops = std::max_element(array_axis.begin(), array_axis.end()); + auto max_index = static_cast(std::distance(array_axis.begin(), max_ops)); + auto dst_index = i * num_after_axis_ + j; + output0[dst_index] = max_index; + auto src_index = IntToSize(max_index) * num_after_axis_ + src_index_j; + output1[dst_index] = input[src_index]; + } + } + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.h new file mode 100644 index 00000000000..5f3664b934e --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/argmax_with_value_cpu_kernel.h @@ -0,0 +1,50 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_ +#include +#include +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +class ArgMaxWithValueCPUKernel : public CPUKernel { + public: + ArgMaxWithValueCPUKernel() = default; + ~ArgMaxWithValueCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + std::vector shape_; + size_t num_before_axis_; + size_t num_after_axis_; + size_t dim_axis_; +}; + +MS_REG_CPU_KERNEL_T(ArgMaxWithValue, KernelAttr(), ArgMaxWithValueCPUKernel, float); +MS_REG_CPU_KERNEL_T(ArgMaxWithValue, KernelAttr(), ArgMaxWithValueCPUKernel, float16); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ARGMAXWITHVALUE_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.cc new file mode 100644 index 00000000000..2cc26d9af31 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.cc @@ -0,0 +1,142 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { + +template +void BoundingBoxDecodeCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 2) { + MS_LOG(ERROR) << "Input num is " << input_num << ", but BoundingBoxDecode needs 2 inputs."; + } + + const size_t coordinate_size = 4; + if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa() || + AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa()) { + means_ = AnfAlgo::GetNodeAttr>(kernel_node, "means"); + } else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa()) { + float mean = AnfAlgo::GetNodeAttr(kernel_node, "means"); + for (size_t i = 0; i < coordinate_size; i++) { + means_.emplace_back(mean); + } + } else { + MS_LOG(EXCEPTION) << "Attribute means type is invalid."; + } + + if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa() || + AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa()) { + stds_ = AnfAlgo::GetNodeAttr>(kernel_node, "stds"); + } else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa()) { + float std = AnfAlgo::GetNodeAttr(kernel_node, "stds"); + for (size_t i = 0; i < coordinate_size; i++) { + stds_.emplace_back(std); + } + } else { + MS_LOG(EXCEPTION) << "Attribute stds type is invalid."; + } + + if (means_.size() < coordinate_size || stds_.size() < coordinate_size) { + MS_LOG(EXCEPTION) << "The size of means or stds is less than 4."; + } + + std::vector max_shape_me = AnfAlgo::GetNodeAttr>(kernel_node, "max_shape"); + (void)std::transform(max_shape_me.begin(), max_shape_me.end(), std::back_inserter(max_shape_), + [](const int64_t &value) { return static_cast(value); }); + wh_ratio_clip_ = AnfAlgo::GetNodeAttr(kernel_node, "wh_ratio_clip"); + + if (max_shape_.size() < 2) { + MS_LOG(EXCEPTION) << "The size of max_shape is less than 2."; + } +} + +template +bool BoundingBoxDecodeCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + auto anchor_box = reinterpret_cast(inputs[0]->addr); + auto deltas = reinterpret_cast(inputs[1]->addr); + auto bboxes = reinterpret_cast(outputs[0]->addr); + + T ms1 = static_cast(max_shape_[0]); + T ms2 = static_cast(max_shape_[1]); + + if (inputs[0]->size != inputs[1]->size) { + MS_LOG(ERROR) << "Anchor box size must be equal to deltas box size: " << inputs[1]->size << ", but got" + << inputs[0]->size; + return false; + } + + const size_t coordinate = 4; + const size_t block_size = inputs[0]->size / sizeof(T); + if ((block_size % coordinate) != 0) { + MS_LOG(ERROR) << "The size of the box must be a multiple of 4."; + return false; + } + + size_t elem_num = block_size / coordinate; + auto task = [&](size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + const size_t left_x = i * 4; + const size_t left_y = i * 4 + 1; + const size_t right_x = i * 4 + 2; + const size_t right_y = i * 4 + 3; + + T dx = deltas[left_x] * static_cast(stds_[0]) + static_cast(means_[0]); + T dy = deltas[left_y] * static_cast(stds_[1]) + static_cast(means_[1]); + T dw = deltas[right_x] * static_cast(stds_[2]) + static_cast(means_[2]); + T dh = deltas[right_y] * static_cast(stds_[3]) + static_cast(means_[3]); + + T max_ratio = static_cast(abs(log(wh_ratio_clip_))); + + dw = dw > max_ratio ? max_ratio : (dw < (-max_ratio) ? (-max_ratio) : dw); + dh = dh > max_ratio ? max_ratio : (dh < (-max_ratio) ? (-max_ratio) : dh); + + T px = (anchor_box[left_x] + anchor_box[right_x]) * static_cast(0.5); + T py = (anchor_box[left_y] + anchor_box[right_y]) * static_cast(0.5); + T pw = anchor_box[right_x] - anchor_box[left_x] + static_cast(1.0); + T ph = anchor_box[right_y] - anchor_box[left_y] + static_cast(1.0); + + T gx = px + pw * dx; + T gy = py + ph * dy; + T gw = pw * exp(dw); + T gh = ph * exp(dh); + + T x1 = gx - gw * static_cast(0.5) + static_cast(0.5); + T y1 = gy - gh * static_cast(0.5) + static_cast(0.5); + T x2 = gx + gw * static_cast(0.5) - static_cast(0.5); + T y2 = gy + gh * static_cast(0.5) - static_cast(0.5); + + x1 = x1 > ms2 ? ms2 : (x1 < static_cast(0) ? static_cast(0) : x1); + y1 = y1 > ms1 ? ms1 : (y1 < static_cast(0) ? static_cast(0) : y1); + x2 = x2 > ms2 ? ms2 : (x2 < static_cast(0) ? static_cast(0) : x2); + y2 = y2 > ms1 ? ms1 : (y2 < static_cast(0) ? static_cast(0) : y2); + + bboxes[left_x] = x1; + bboxes[left_y] = y1; + bboxes[right_x] = x2; + bboxes[right_y] = y2; + } + }; + CPUKernelUtils::ParallelFor(task, elem_num); + + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.h new file mode 100644 index 00000000000..ddb452134e1 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_decode_cpu_kernel.h @@ -0,0 +1,56 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_ +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +class BoundingBoxDecodeCPUKernel : public CPUKernel { + public: + BoundingBoxDecodeCPUKernel() = default; + ~BoundingBoxDecodeCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + std::vector means_; + std::vector stds_; + std::vector max_shape_; + float wh_ratio_clip_; +}; + +MS_REG_CPU_KERNEL_T( + BoundingBoxDecode, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + BoundingBoxDecodeCPUKernel, float); + +MS_REG_CPU_KERNEL_T( + BoundingBoxDecode, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + BoundingBoxDecodeCPUKernel, float16); + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_DECODE_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.cc new file mode 100644 index 00000000000..4f160d130cf --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.cc @@ -0,0 +1,115 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { + +template +void BoundingBoxEncodeCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 2) { + MS_LOG(ERROR) << "Input num is " << input_num << ", but BoundingBoxEncode needs 2 inputs."; + } + + const size_t coordinate_size = 4; + if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa() || + AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa()) { + means_ = AnfAlgo::GetNodeAttr>(kernel_node, "means"); + } else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("means")->isa()) { + float mean = AnfAlgo::GetNodeAttr(kernel_node, "means"); + for (size_t i = 0; i < coordinate_size; i++) { + means_.emplace_back(mean); + } + } else { + MS_LOG(EXCEPTION) << "Attribute means type is invalid."; + } + + if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa() || + AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa()) { + stds_ = AnfAlgo::GetNodeAttr>(kernel_node, "stds"); + } else if (AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("stds")->isa()) { + float std = AnfAlgo::GetNodeAttr(kernel_node, "stds"); + for (size_t i = 0; i < coordinate_size; i++) { + stds_.emplace_back(std); + } + } else { + MS_LOG(EXCEPTION) << "Attribute stds type is invalid."; + } + + if (means_.size() < coordinate_size || stds_.size() < coordinate_size) { + MS_LOG(EXCEPTION) << "The size of means or stds is less than 4."; + } +} + +template +bool BoundingBoxEncodeCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + auto anchor_box = reinterpret_cast(inputs[0]->addr); + auto groundtruth_box = reinterpret_cast(inputs[1]->addr); + auto deltas = reinterpret_cast(outputs[0]->addr); + + if (inputs[0]->size != inputs[1]->size) { + MS_LOG(ERROR) << "Anchor box size must be equal to groundtruth box size: " << inputs[1]->size << ", but got" + << inputs[0]->size; + return false; + } + + const size_t coordinate = 4; + const size_t block_size = inputs[0]->size / sizeof(T); + if ((block_size % coordinate) != 0) { + MS_LOG(ERROR) << "The size of the box must be a multiple of 4."; + return false; + } + + size_t elem_num = block_size / coordinate; + auto task = [&](size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + const size_t left_x = i * 4; + const size_t left_y = i * 4 + 1; + const size_t right_x = i * 4 + 2; + const size_t right_y = i * 4 + 3; + + T px = (anchor_box[left_x] + anchor_box[right_x]) * static_cast(0.5); + T py = (anchor_box[left_y] + anchor_box[right_y]) * static_cast(0.5); + T pw = anchor_box[right_x] - anchor_box[left_x] + static_cast(1.0); + T ph = anchor_box[right_y] - anchor_box[left_y] + static_cast(1.0); + + T gx = (groundtruth_box[left_x] + groundtruth_box[right_x]) * static_cast(0.5); + T gy = (groundtruth_box[left_y] + groundtruth_box[right_y]) * static_cast(0.5); + T gw = groundtruth_box[right_x] - groundtruth_box[left_x] + static_cast(1.0); + T gh = groundtruth_box[right_y] - groundtruth_box[left_y] + static_cast(1.0); + + T dx = (gx - px) / pw; + T dy = (gy - py) / ph; + T dw = log(gw / pw); + T dh = log(gh / ph); + + deltas[left_x] = (dx - static_cast(means_[0])) / static_cast(stds_[0]); + deltas[left_y] = (dy - static_cast(means_[1])) / static_cast(stds_[1]); + deltas[right_x] = (dw - static_cast(means_[2])) / static_cast(stds_[2]); + deltas[right_y] = (dh - static_cast(means_[3])) / static_cast(stds_[3]); + } + }; + CPUKernelUtils::ParallelFor(task, elem_num); + + return true; +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.h new file mode 100644 index 00000000000..76ab0d415a8 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/boundingbox_encode_cpu_kernel.h @@ -0,0 +1,54 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_ +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +class BoundingBoxEncodeCPUKernel : public CPUKernel { + public: + BoundingBoxEncodeCPUKernel() = default; + ~BoundingBoxEncodeCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + std::vector means_; + std::vector stds_; +}; + +MS_REG_CPU_KERNEL_T( + BoundingBoxEncode, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + BoundingBoxEncodeCPUKernel, float); + +MS_REG_CPU_KERNEL_T( + BoundingBoxEncode, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + BoundingBoxEncodeCPUKernel, float16); + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_BOUNDINGBOX_ENCODE_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/check_valid_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/check_valid_cpu_kernel.cc new file mode 100644 index 00000000000..233e1265e36 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/check_valid_cpu_kernel.cc @@ -0,0 +1,84 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/cpu/check_valid_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr size_t kInputSize = 2; +constexpr size_t kOutputSize = 1; +} // namespace + +template +void CheckValidCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + anchor_box_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0); + img_metas_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 1); +} + +template +bool CheckValidCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + CheckParams(inputs, outputs); + auto anchor_box = reinterpret_cast(inputs[0]->addr); + auto img_metas = reinterpret_cast(inputs[1]->addr); + auto output = reinterpret_cast(outputs[0]->addr); + const size_t coordinate = 4; + const size_t elem_num = inputs[0]->size / sizeof(T) / coordinate; + + auto task = [&](size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + const size_t left_x = i * 4; + const size_t left_y = i * 4 + 1; + const size_t right_x = i * 4 + 2; + const size_t right_y = i * 4 + 3; + + bool valid_flag = false; + valid_flag |= !(anchor_box[left_x] >= static_cast(0.0)); + valid_flag |= !(anchor_box[left_y] >= static_cast(0.0)); + valid_flag |= !(img_metas[1] * img_metas[2] - static_cast(1.0) >= anchor_box[right_x]); + valid_flag |= !(img_metas[0] * img_metas[2] - static_cast(1.0) >= anchor_box[right_y]); + + output[i] = !valid_flag; + } + }; + CPUKernelUtils::ParallelFor(task, elem_num); + + return true; +} + +template +void CheckValidCPUKernel::CheckParams(const std::vector &inputs, + const std::vector &outputs) { + // inputs: anchor_box, img_metas + if (inputs.size() != kInputSize) { + MS_LOG(EXCEPTION) << "Input number is: " << inputs.size() << ", but CheckValid needs " << kInputSize << " inputs."; + } + + // outputs: valid + if (outputs.size() != kOutputSize) { + MS_LOG(EXCEPTION) << "Output number is: " << outputs.size() << ", but CheckValid needs " << kOutputSize + << "outputs."; + } + if (outputs[0]->size / sizeof(bool) != inputs[0]->size / sizeof(T) / 4) { + MS_LOG(EXCEPTION) << "The output dimensions must match the dimensions of img_metas."; + } +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/check_valid_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/check_valid_cpu_kernel.h new file mode 100644 index 00000000000..0260e722182 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/check_valid_cpu_kernel.h @@ -0,0 +1,61 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_ +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +class CheckValidCPUKernel : public CPUKernel { + public: + CheckValidCPUKernel() = default; + ~CheckValidCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + void CheckParams(const std::vector &inputs, const std::vector &outputs); + std::vector anchor_box_shape_; + std::vector img_metas_shape_; +}; + +MS_REG_CPU_KERNEL_T( + CheckValid, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeBool), + CheckValidCPUKernel, float); + +MS_REG_CPU_KERNEL_T( + CheckValid, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeBool), + CheckValidCPUKernel, float16); + +MS_REG_CPU_KERNEL_T( + CheckValid, KernelAttr().AddInputAttr(kNumberTypeInt16).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeBool), + CheckValidCPUKernel, int16_t); + +MS_REG_CPU_KERNEL_T( + CheckValid, KernelAttr().AddInputAttr(kNumberTypeUInt8).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeBool), + CheckValidCPUKernel, uint8_t); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CHECK_VALID_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.cc new file mode 100644 index 00000000000..cc8d37147ec --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.cc @@ -0,0 +1,219 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { + +template +void CropAndResizeCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 4) { + MS_LOG(ERROR) << "Input num is " << input_num << ", but CropAndResize needs 4 inputs."; + } + + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 1) { + MS_LOG(ERROR) << "Output num is " << output_num << ", but CropAndResize needs 1 output."; + } + + // input image + auto input_image_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + size_t input_image_shape_len = input_image_shape.size(); + if (input_image_shape_len != 4) { + MS_LOG(ERROR) << "Image tensor is " << input_image_shape_len << "-D, but CropAndResize supports only " << 4 + << "-D image tensor."; + } + + input_height_ = input_image_shape[1]; + input_width_ = input_image_shape[2]; + + // input boxes + auto input_boxes_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + size_t input_boxes_shape_len = input_boxes_shape.size(); + if (input_boxes_shape_len != 2) { + MS_LOG(ERROR) << "Box is rank " << input_boxes_shape_len << ", but CropAndResize supports only rank " << 2 + << "for boxes."; + } + + // input box_index + auto input_box_index_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2); + size_t input_box_index_shape_len = input_box_index_shape.size(); + if (input_box_index_shape_len != 1) { + MS_LOG(ERROR) << "Box index is rank " << input_box_index_shape_len << ", but CropAndResize supports only rank " << 1 + << "for box_index."; + } + + // input crop_size + auto input_crop_size_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3); + size_t input_crop_size_shape_len = input_crop_size_shape.size(); + if (input_crop_size_shape_len != 1) { + MS_LOG(ERROR) << "Crop_size is rank " << input_crop_size_shape_len << "-D, but CropAndResize supports only rank " + << 1 << "for Crop_size."; + } + if (input_crop_size_shape[0] != 2) { + MS_LOG(ERROR) << "Crop_size is size " << input_crop_size_shape[0] << "-D, but CropAndResize supports only size " + << 2 << "for Crop_size."; + } + + // output + auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto output_shape_len = output_shape.size(); + output_size_ = 1; + for (size_t i = 0; i < output_shape_len; i++) { + output_size_ *= output_shape[i]; + } + + // set expected output params + final_height_ = output_shape[1]; + final_width_ = output_shape[2]; + channel_ = output_shape[3]; + + // get op parameters + string method = AnfAlgo::GetNodeAttr(kernel_node, "method"); + if (method == "bilinear") { + method_ = 1; + } else if (method == "nearest") { + method_ = 2; + } else { // bilinear-v2 + method_ = 3; + } + extrapolation_value_ = AnfAlgo::GetNodeAttr(kernel_node, "extrapolation_value"); +} + +template +bool CropAndResizeCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + auto *input_image = reinterpret_cast(inputs[0]->addr); + auto *input_boxes = reinterpret_cast(inputs[1]->addr); + auto *input_box_index = reinterpret_cast(inputs[2]->addr); + auto *output = reinterpret_cast(outputs[0]->addr); + + auto task = [&](size_t start, size_t end) { + for (size_t pos = start; pos < end; pos++) { + size_t pos_temp = pos; + const int pos_channel = pos_temp % channel_; + pos_temp = pos_temp / channel_; + const int pos_x = pos_temp % final_width_; + pos_temp = pos_temp / final_width_; + const int pos_y = pos_temp % final_height_; + const int pos_image_idx = pos_temp / final_height_; + const int box_index = input_box_index[pos_image_idx]; + + // crop values + const float y1 = input_boxes[4 * pos_image_idx + 0]; + const float x1 = input_boxes[4 * pos_image_idx + 1]; + const float y2 = input_boxes[4 * pos_image_idx + 2]; + const float x2 = input_boxes[4 * pos_image_idx + 3]; + + // set scale and target pixels + float scale_height = final_height_ > 1 ? (y2 - y1) * (input_height_ - 1) / (final_height_ - 1) : 0; + float scale_width = final_width_ > 1 ? (x2 - x1) * (input_width_ - 1) / (final_width_ - 1) : 0; + float target_y = + final_height_ > 1 ? y1 * (input_height_ - 1) + pos_y * scale_height : 0.5 * (y1 + y2) + (input_height_ - 1); + float target_x = + final_width_ > 1 ? x1 * (input_width_ - 1) + pos_x * scale_width : 0.5 * (x1 + x2) + (input_width_ - 1); + + // use extrapolation value if out of range + if (((target_x < 0) || (target_x > input_width_ - 1)) || ((target_y < 0) || (target_y > input_height_ - 1))) { + if ((method_ == 1) || (method_ == 2)) { + output[pos] = extrapolation_value_; + continue; + } + } + + if (method_ == 1) { + // Bilinear + const int top_y_index = floorf(target_y); + const int bottom_y_index = ceilf(target_y); + const int left_x_index = floorf(target_x); + const int right_x_index = ceilf(target_x); + const float y_lerp = target_y - top_y_index; + const float x_lerp = target_x - left_x_index; + const float top_left = static_cast( + input_image[((box_index * input_height_ + top_y_index) * input_width_ + left_x_index) * channel_ + + pos_channel]); + const float top_right = static_cast( + input_image[((box_index * input_height_ + top_y_index) * input_width_ + right_x_index) * channel_ + + pos_channel]); + const float bottom_left = static_cast( + input_image[((box_index * input_height_ + bottom_y_index) * input_width_ + left_x_index) * channel_ + + pos_channel]); + const float bottom_right = static_cast( + input_image[((box_index * input_height_ + bottom_y_index) * input_width_ + right_x_index) * channel_ + + pos_channel]); + const float top = top_left + (top_right - top_left) * x_lerp; + const float bottom = bottom_left + (bottom_right - bottom_left) * x_lerp; + output[pos] = top + (bottom - top) * y_lerp; + } else if (method_ == 3) { + int y1h = static_cast(y1 * input_height_); + int x1w = static_cast(x1 * input_width_); + int y2h = static_cast(y2 * input_height_); + int x2w = static_cast(x2 * input_width_); + int w = ((x2w - x1w + 1) > 1) ? x2w - x1w + 1 : 1; + int h = ((y2h - y1h + 1) > 1) ? y2h - y1h + 1 : 1; + + float y_point = (pos_y + 0.5) * (h / static_cast(final_height_)) - 0.5; + int top_y_index = floorf(y_point); + top_y_index = std::min(std::max(0, top_y_index), h - 1); + + int bottom_y_index = ceilf(y_point); + bottom_y_index = std::min(std::max(0, bottom_y_index), h - 1); + + float x_point = (pos_x + 0.5) * (w / static_cast(final_width_)) - 0.5; + int left_x_index = floorf(x_point); + left_x_index = std::min(std::max(0, left_x_index), w - 1); + + int right_x_index = ceilf(x_point); + right_x_index = std::min(std::max(0, right_x_index), w - 1); + + const float y_lerp = y_point - top_y_index; + const float x_lerp = x_point - left_x_index; + const int y_top_index = box_index * input_height_ + y1h + top_y_index; + const int y_bottom_index = box_index * input_height_ + y1h + bottom_y_index; + + const float top_left = + static_cast(input_image[(y_top_index * input_width_ + x1w + left_x_index) * channel_ + pos_channel]); + const float top_right = + static_cast(input_image[(y_top_index * input_width_ + x1w + right_x_index) * channel_ + pos_channel]); + const float bottom_left = static_cast( + input_image[(y_bottom_index * input_width_ + x1w + left_x_index) * channel_ + pos_channel]); + const float bottom_right = static_cast( + input_image[(y_bottom_index * input_width_ + x1w + right_x_index) * channel_ + pos_channel]); + + float ret = top_left * (1 - y_lerp) * (1 - x_lerp) + bottom_right * y_lerp * x_lerp + + top_right * (1 - y_lerp) * x_lerp + bottom_left * y_lerp * (1 - x_lerp); + output[pos] = ret; + } else { + // Nearest Neighbour + const int closest_x_index = roundf(target_x); + const int closest_y_index = roundf(target_y); + const float val = static_cast( + input_image[((box_index * input_height_ + closest_y_index) * input_width_ + closest_x_index) * channel_ + + pos_channel]); + output[pos] = val; + } + } + }; + CPUKernelUtils::ParallelFor(task, output_size_); + return true; +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h new file mode 100644 index 00000000000..62c43c35317 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/crop_and_resize_cpu_kernel.h @@ -0,0 +1,213 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_ +#include +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +class CropAndResizeCPUKernel : public CPUKernel { + public: + CropAndResizeCPUKernel() = default; + ~CropAndResizeCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + int method_; + float extrapolation_value_; + int input_crop_size_; + int output_size_; + int input_height_; + int input_width_; + int final_height_; + int final_width_; + int channel_; +}; + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, float16); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, float16); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, float); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, float); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, double); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, double); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, int8_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, int8_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, int16_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, int16_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, int8_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, int32_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, int64_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, int64_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, uint8_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, uint8_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeUInt16) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, uint16_t); + +MS_REG_CPU_KERNEL_T(CropAndResize, + KernelAttr() + .AddInputAttr(kNumberTypeUInt16) + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddOutputAttr(kNumberTypeFloat32), + CropAndResizeCPUKernel, uint16_t); +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CROP_AND_RESIZE_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.cc new file mode 100644 index 00000000000..b2cc5f667db --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.cc @@ -0,0 +1,243 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { + +int NmsRoundUpPower2(int v) { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; +} + +template +void Swap(T *lhs, T *rhs) { + T tmp = lhs[0]; + lhs[0] = rhs[0]; + rhs[0] = tmp; +} + +// Sorting function based on BitonicSort from TopK kernel +template +void NMSWithMaskCPUKernel::NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2, + T *input, T *data_buff, int *index_buff, int box_size) { + auto task1 = [&](int start, int end) { + for (int i = start; i < end; i++) { + data_buff[i] = (i < inner) ? input[(i * box_size) + 4] : std::numeric_limits::max(); + index_buff[i] = i; + } + }; + CPUKernelUtils::ParallelFor(task1, ceil_power2); + + for (size_t i = 2; i <= static_cast(ceil_power2); i <<= 1) { + for (size_t j = (i >> 1); j > 0; j >>= 1) { + auto task2 = [&](size_t start, size_t end) { + for (size_t tid = start; tid < end; tid++) { + size_t tid_comp = tid ^ j; + if (tid_comp > tid) { + if ((tid & i) == 0) { + if (data_buff[tid] > data_buff[tid_comp]) { + Swap(&data_buff[tid], &data_buff[tid_comp]); + Swap(&index_buff[tid], &index_buff[tid_comp]); + } + } else { + if (data_buff[tid] < data_buff[tid_comp]) { + Swap(&data_buff[tid], &data_buff[tid_comp]); + Swap(&index_buff[tid], &index_buff[tid_comp]); + } + } + } + } + }; + CPUKernelUtils::ParallelFor(task2, ceil_power2); + } + } +} + +// Initialize per row mask array to all true +template +void NMSWithMaskCPUKernel::MaskInit(int numSq, bool *row_mask) { + auto task = [&](int start, int end) { + for (int mat_pos = start; mat_pos < end; mat_pos++) { + row_mask[mat_pos] = true; + } + }; + CPUKernelUtils::ParallelFor(task, numSq); +} + +// copy data from input to output array sorted by indices returned from bitonic sort +// flips boxes if asked to, default - false -> if (x1/y1 > x2/y2) +template +void NMSWithMaskCPUKernel::PopulateOutput(T *data_in, T *data_out, int *index_buff, const int num, int box_size, + bool flip_mode) { + auto task = [&](int start, int end) { + for (int box_num = start; box_num < end; box_num++) { + int correct_index = index_buff[(num - 1) - box_num]; // flip the array around + int correct_arr_start = correct_index * box_size; + int current_arr_start = box_num * box_size; + if (flip_mode) { // flip boxes + // check x + if (data_in[correct_arr_start + 0] > data_in[correct_arr_start + 2]) { + data_out[current_arr_start + 0] = data_in[correct_arr_start + 2]; + data_out[current_arr_start + 2] = data_in[correct_arr_start + 0]; + } else { + data_out[current_arr_start + 0] = data_in[correct_arr_start + 0]; + data_out[current_arr_start + 2] = data_in[correct_arr_start + 2]; + } + // check y + if (data_in[correct_arr_start + 1] > data_in[correct_arr_start + 3]) { + data_out[current_arr_start + 1] = data_in[correct_arr_start + 3]; + data_out[current_arr_start + 3] = data_in[correct_arr_start + 1]; + } else { + data_out[current_arr_start + 1] = data_in[correct_arr_start + 1]; + data_out[current_arr_start + 3] = data_in[correct_arr_start + 3]; + } + data_out[current_arr_start + 4] = data_in[correct_arr_start + 4]; + } else { // default behaviour, don't flip + for (int x = 0; x < 5; x++) { + data_out[current_arr_start + x] = data_in[correct_arr_start + x]; + } + } + } + }; + CPUKernelUtils::ParallelFor(task, num); +} + +// populated return mask (init to all true) and return index array +template +void NMSWithMaskCPUKernel::Preprocess(const int num, int *sel_idx, bool *sel_boxes, T *output, int box_size) { + auto task = [&](int start, int end) { + for (int box_num = start; box_num < end; box_num++) { + sel_idx[box_num] = box_num; + sel_boxes[box_num] = true; + } + }; + CPUKernelUtils::ParallelFor(task, num); +} + +template +bool NMSWithMaskCPUKernel::IouDecision(T *output, int box_A_ix, int box_B_ix, int box_A_start, int box_B_start, + float IOU_value) { + T x_1 = std::max(output[box_A_start + 0], output[box_B_start + 0]); + T y_1 = std::max(output[box_A_start + 1], output[box_B_start + 1]); + T x_2 = std::min(output[box_A_start + 2], output[box_B_start + 2]); + T y_2 = std::min(output[box_A_start + 3], output[box_B_start + 3]); + T width = std::max(x_2 - x_1, T(0)); // in case of no overlap + T height = std::max(y_2 - y_1, T(0)); + + T area1 = (output[box_A_start + 2] - output[box_A_start + 0]) * (output[box_A_start + 3] - output[box_A_start + 1]); + T area2 = (output[box_B_start + 2] - output[box_B_start + 0]) * (output[box_B_start + 3] - output[box_B_start + 1]); + + T combined_area = area1 + area2; + return !(((width * height) / (combined_area - (width * height))) > static_cast(IOU_value)); +} + +// Run parallel NMS pass +// Every position in the row_mask array is updated wit correct IOU decision after being init to all True +template +void NMSWithMaskCPUKernel::NmsPass(const int num, const float IOU_value, T *output, bool *sel_boxes, int box_size, + bool *row_mask) { + auto task = [&](int start, int end) { + for (int mask_index = start; mask_index < end; mask_index++) { + int box_i = mask_index / num; // row in 2d row_mask array + int box_j = mask_index % num; // col in 2d row_mask array + if (box_j > box_i) { // skip when box_j index lower/equal to box_i - will remain true + int box_i_start_index = box_i * box_size; // adjust starting indices + int box_j_start_index = box_j * box_size; + row_mask[mask_index] = IouDecision(output, box_i, box_j, box_i_start_index, box_j_start_index, IOU_value); + } + } + }; + CPUKernelUtils::ParallelFor(task, num * num); +} + +// Reduce pass runs on 1 block to allow thread sync +template +void NMSWithMaskCPUKernel::ReducePass(const int num, bool *sel_boxes, bool *row_mask) { + // loop over every box in order of high to low confidence score + for (int i = 0; i < num - 1; ++i) { + if (!sel_boxes[i]) { + continue; + } + // every thread handles a different set of boxes (per all boxes in order) + auto task = [&](int start, int end) { + for (int j = start; j < end; j++) { + sel_boxes[j] = sel_boxes[j] && row_mask[i * num + j]; + } + }; + CPUKernelUtils::ParallelFor(task, num); + } +} + +template +void NMSWithMaskCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + iou_value_ = AnfAlgo::GetNodeAttr(kernel_node, "iou_threshold"); + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 1) { + MS_LOG(ERROR) << "Input num is " << input_num << ", but NMSWithMask needs 1 input."; + } + + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 3) { + MS_LOG(ERROR) << "Output num is " << output_num << ", but NMSWithMask needs 3 outputs."; + } +} + +template +void NMSWithMaskCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { + CPUKernel::InitInputOutputSize(kernel_node); + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + num_input_ = input_shape[0]; // Get N values in [N, 5] data. + ceil_power_2 = NmsRoundUpPower2(num_input_); + + workspace_size_list_.push_back(ceil_power_2 * sizeof(T)); // data buff + workspace_size_list_.push_back(ceil_power_2 * sizeof(int)); // index buff + workspace_size_list_.push_back(num_input_ * num_input_ * sizeof(bool)); // mask list +} + +template +bool NMSWithMaskCPUKernel::Launch(const std::vector &inputs, + const std::vector &workspace, + const std::vector &outputs) { + auto input = reinterpret_cast(inputs[0]->addr); + auto data_buff = reinterpret_cast(workspace[0]->addr); + auto index_buff = reinterpret_cast(workspace[1]->addr); + auto row_mask = reinterpret_cast(workspace[2]->addr); + auto output = reinterpret_cast(outputs[0]->addr); + auto sel_idx = reinterpret_cast(outputs[1]->addr); + auto sel_boxes = reinterpret_cast(outputs[2]->addr); + + NmsBitonicSortByKeyKernel(1, num_input_, ceil_power_2, input, data_buff, index_buff, box_size_); + int total_val = num_input_ * num_input_; + MaskInit(total_val, row_mask); + PopulateOutput(input, output, index_buff, num_input_, box_size_, false); + Preprocess(num_input_, sel_idx, sel_boxes, output, box_size_); + NmsPass(num_input_, iou_value_, output, sel_boxes, box_size_, row_mask); + ReducePass(num_input_, sel_boxes, row_mask); + return true; +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.h new file mode 100644 index 00000000000..3f073757047 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/nms_with_mask_cpu_kernel.h @@ -0,0 +1,80 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_ +#include +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +class NMSWithMaskCPUKernel : public CPUKernel { + public: + NMSWithMaskCPUKernel() = default; + ~NMSWithMaskCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &workspace, + const std::vector &outputs) override; + + void InitInputOutputSize(const CNodePtr &kernel_node) override; + + private: + void NmsBitonicSortByKeyKernel(const int outer, const int inner, const int ceil_power2, T *input, T *data_buff, + int *index_buff, int box_size); + + void MaskInit(int numSq, bool *row_mask); + + void PopulateOutput(T *data_in, T *data_out, int *index_buff, const int num, int box_size, bool flip_mode); + + void Preprocess(const int num, int *sel_idx, bool *sel_boxes, T *output, int box_size); + + bool IouDecision(T *output, int box_A_ix, int box_B_ix, int box_A_start, int box_B_start, float IOU_value); + + void NmsPass(const int num, const float IOU_value, T *output, bool *sel_boxes, int box_size, bool *row_mask); + + void ReducePass(const int num, bool *sel_boxes, bool *row_mask); + + int num_input_; + float iou_value_; + size_t ceil_power_2; + static const int box_size_ = 5; // pre_defined box width +}; + +MS_REG_CPU_KERNEL_T(NMSWithMask, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeBool), + NMSWithMaskCPUKernel, float); + +MS_REG_CPU_KERNEL_T(NMSWithMask, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeBool), + NMSWithMaskCPUKernel, float16); + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_NMS_WITH_MASK_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.cc new file mode 100644 index 00000000000..921f2811cbb --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.cc @@ -0,0 +1,225 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" +#define BLOCKSIZE 256 +#define MAX_DIMENSION 5 + +namespace mindspore { +namespace kernel { + +void ParseOutputCoordinate(std::vector dims, int32_t output_length, int32_t input_dim_size, + int32_t input_total_count, const int *tmp_output, int *output) { + int it = 0; + int column = input_total_count / dims[0]; + for (int i = 0; i < output_length; i++) { + int32_t tmp_output_number = tmp_output[i]; + int tmp_column = column; + for (int j = 0; j < input_dim_size; j++) { + if (j == input_dim_size - 1) { + output[it++] = tmp_output_number; + continue; + } + output[it++] = tmp_output_number / column; + tmp_output_number = tmp_output_number % column; + tmp_column = tmp_column / dims[j + 1]; + } + } +} + +void GetOutputLength(bool *padding_flag, int32_t *output_length, int32_t *output_non_zero_length, int32_t count, + int32_t non_zero_num) { + if (count == 0) { + *padding_flag = false; + *output_length = non_zero_num; + *output_non_zero_length = non_zero_num; + } else if (count > 0 && count <= non_zero_num) { + *padding_flag = false; + *output_length = count; + *output_non_zero_length = count; + } else if (count > non_zero_num) { + *padding_flag = true; + *output_length = count; + *output_non_zero_length = non_zero_num; + } else { + MS_LOG(EXCEPTION) << "Input count must be greater than or equal to 0, but is " << count; + } +} + +void GetInputTotalCount(const std::vector &dims_, int32_t *input_total_count, const int32_t &input_dim_size) { + for (int32_t i = 0; i < input_dim_size; i++) { + *input_total_count *= dims_[i]; + } +} + +void UpdateOutput(const std::vector &dims_, const int32_t &non_zero_num, const int32_t &count_, + const int32_t &output_length, const int *mask_dim, int32_t *output_coordinate, bool *mask) { + for (int32_t i = non_zero_num * dims_.size(); i < static_cast(count_ * dims_.size()); i++) { + output_coordinate[i] = 0; + } + for (int32_t i = 0; i < output_length; i++) { + mask[i] = static_cast(mask_dim[i]); + } + for (int32_t i = non_zero_num; i < count_; i++) { + mask[i] = false; + } +} + +void RandomChoiceWithMaskCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 1) { + MS_LOG(ERROR) << "Input num is " << input_num << ", but RandomChoiceWithMask needs 1 input."; + } + + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 2) { + MS_LOG(ERROR) << "Output num is " << output_num << ", but RandomChoiceWithMask needs 2 outputs."; + } + + auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + input_shape_size_ = input_shape.size(); + if (input_shape_size_ < 1 || input_shape_size_ > MAX_DIMENSION) { + MS_LOG(ERROR) << "Input is " << input_shape_size_ + << "-D, but RandomChoiceWithMask supports only 1-D to 5-D inputs."; + } + + seed_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "seed")); + seed2_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "seed2")); + count_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "count")); + + MS_LOG(INFO) << "This op attr count is " << count_; + + for (size_t i = 0; i < input_num; i++) { + auto input_i_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, i); + for (size_t j = 0; j < input_i_shape.size(); j++) { + dims_.emplace_back(input_i_shape[j]); + } + } +} + +bool RandomChoiceWithMaskCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + auto *input = reinterpret_cast(inputs[0]->addr); + auto *output_coordinate = reinterpret_cast(outputs[0]->addr); + auto *mask = reinterpret_cast(outputs[1]->addr); + int32_t input_dim_size = dims_.size(); + int32_t non_zero_num = 0; + int32_t input_total_count = 1; + + if (input_dim_size < 1 || input_dim_size > 5) { + MS_LOG(EXCEPTION) << "Input dim size is " << input_dim_size << ", which is not supported."; + } + + int seedc = seed2_ != 0 ? seed2_ : (seed_ != 0 ? seed_ : generator_()); + GetInputTotalCount(dims_, &input_total_count, input_dim_size); + int *input_dim = new (std::nothrow) int[input_total_count]; + if (input_dim == nullptr) { + MS_LOG(EXCEPTION) << "Malloc memory failed!"; + return false; + } + for (int32_t i = 0; i < input_total_count; i++) { + if (input[i] != 0) { + input_dim[non_zero_num] = i; + non_zero_num++; + } + } + + bool padding_flag = false; + int32_t output_length = 0; + int32_t output_non_zero_length = 0; + GetOutputLength(&padding_flag, &output_length, &output_non_zero_length, count_, non_zero_num); + int *tmp_output = new (std::nothrow) int[output_length]; + if (tmp_output == nullptr) { + MS_LOG(EXCEPTION) << "Malloc memory failed!"; + delete[] input_dim; + return false; + } + + std::mt19937 gen(seedc); + std::uniform_int_distribution<> dis(0, non_zero_num - 1); + int *mask_dim = new (std::nothrow) int[output_length]; + if (mask_dim == nullptr) { + MS_LOG(EXCEPTION) << "Malloc memory failed!"; + delete[] input_dim; + delete[] tmp_output; + return false; + } + (void)memset_s(mask_dim, output_length, 0X00, output_length); + (void)memset_s(tmp_output, output_length, 0X00, output_length); + + for (int32_t i = 0; i < output_non_zero_length; i++) { + int32_t mean = dis(gen); + tmp_output[i] = input_dim[mean]; + mask_dim[i] = 1; + } + if (padding_flag) { + int32_t index = 0; + for (int32_t i = output_length - 1; i > non_zero_num; i--) { + tmp_output[non_zero_num + index] = 0; + mask_dim[non_zero_num + index] = 0; + index++; + } + } + + int32_t copy_output_length = 0; + if (output_length * input_dim_size >= INT_MAX || output_length * input_dim_size < 0) { + MS_LOG(EXCEPTION) << "Output size exceed INT_MAX"; + delete[] input_dim; + delete[] tmp_output; + delete[] mask_dim; + return false; + } + + copy_output_length = output_length * input_dim_size; + int *output = new (std::nothrow) int[copy_output_length]; + if (output == nullptr) { + MS_LOG(EXCEPTION) << "Malloc memory failed!"; + delete[] input_dim; + delete[] tmp_output; + delete[] mask_dim; + return false; + } + (void)memset_s(output, copy_output_length, 0X00, copy_output_length); + ParseOutputCoordinate(dims_, output_length, input_dim_size, input_total_count, tmp_output, output); + + int32_t actual_output_length = count_ * dims_.size(); + copy_output_length = std::min(actual_output_length, copy_output_length); + int32_t copy_output_bytes = 0; + if (INT_MAX / static_cast(sizeof(int32_t)) < copy_output_length) { + MS_LOG(EXCEPTION) << "The output length is out of range!"; + delete[] input_dim; + delete[] mask_dim; + delete[] tmp_output; + delete[] output; + return false; + } + + copy_output_bytes = copy_output_length * sizeof(int32_t); + memcpy_s(output_coordinate, copy_output_bytes, output, copy_output_bytes); + UpdateOutput(dims_, non_zero_num, count_, output_length, mask_dim, output_coordinate, mask); + delete[] input_dim; + delete[] mask_dim; + delete[] tmp_output; + delete[] output; + + return true; +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.h new file mode 100644 index 00000000000..d91ec02aca9 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/random_choice_with_mask_cpu_kernel.h @@ -0,0 +1,55 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_ +#include +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { + +class RandomChoiceWithMaskCPUKernel : public CPUKernel { + public: + RandomChoiceWithMaskCPUKernel() = default; + ~RandomChoiceWithMaskCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + int32_t count_{0}; + std::vector dims_; + int input_shape_size_{0}; + int seed_{0}; + int seed2_{0}; + int input_size_{1}; + std::mt19937 generator_; +}; + +MS_REG_CPU_KERNEL( + RandomChoiceWithMask, + KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeBool), + RandomChoiceWithMaskCPUKernel); + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_RANDOM_CHOICE_WITH_MASK_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_cpu_kernel.cc new file mode 100644 index 00000000000..64084808067 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_cpu_kernel.cc @@ -0,0 +1,223 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/cpu/roi_align_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { +namespace { +constexpr size_t kInputSize = 2; +constexpr size_t kOutputSize = 1; +} // namespace + +template +void ROIAlignCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + // Get the input shapes + auto x_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto rois_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + + auto x_shape_size = x_shape.size(); + if (x_shape_size != 4) { + MS_LOG(ERROR) << "x shape size is " << x_shape_size << ", but should be 4."; + } + + channels_ = x_shape[1]; + height_ = x_shape[2]; + width_ = x_shape[3]; + + roi_rows_ = rois_shape[0]; + roi_cols_ = rois_shape[1]; + + pooled_height_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "pooled_height")); + pooled_width_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "pooled_width")); + spatial_scale_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "spatial_scale")); + sample_num_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "sample_num")); + roi_end_mode_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "roi_end_mode")); +} + +template +bool ROIAlignCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + const T *input = reinterpret_cast(inputs[0]->addr); + const T *rois = reinterpret_cast(inputs[1]->addr); + auto out_data = reinterpret_cast(outputs[0]->addr); + + size_t elem_num = roi_rows_ * channels_ * pooled_height_ * pooled_width_; + auto task = [&](size_t start, size_t end) { + for (size_t thread_idx = start; thread_idx < end; thread_idx++) { + int n = thread_idx / pooled_width_ / pooled_height_ / channels_; + const T *roi_box = rois + n * roi_cols_; + if (roi_box[1] < static_cast(0.001) && roi_box[3] < static_cast(0.001) && + roi_box[1] > static_cast(-0.001) && roi_box[3] > static_cast(-0.001)) { + continue; + } + int offset = -1; + int c, ph, pw, roi_bin_grid_h, roi_bin_grid_w; + T bin_size_h, bin_size_w, roi_start_h, roi_start_w; + + bin_box(thread_idx, rois, roi_cols_, spatial_scale_, sample_num_, roi_end_mode_, channels_, height_, width_, + pooled_height_, pooled_width_, &offset, &n, &c, &ph, &pw, &roi_bin_grid_h, &roi_bin_grid_w, &bin_size_h, + &bin_size_w, &roi_start_h, &roi_start_w); + + // (n, c, ph, pw) is the base param of pooled map + const T count_points_in_grid_cell = static_cast(roi_bin_grid_h * roi_bin_grid_w); + + T accumulate_val = static_cast(0.); + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + // Shift half point RIGHT for y / x, while previous scaled roi shift half point LEFT + const T y = roi_start_h + static_cast(ph) * bin_size_h + + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + static_cast(pw) * bin_size_w + + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); + // bilinear interpolate by shifted y / x + // calculate bilinear interpolation + int x_low = 0, y_low = 0, x_high = 0, y_high = 0; + T w1, w2, w3, w4; + bilinear_interpolate(height_, width_, y, x, &x_low, &y_low, &x_high, &y_high, &w1, &w2, &w3, &w4); + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0 && y_low < height_ && y_high < height_ && + x_low < width_ && x_high < width_) { + T v1 = input[offset + y_low * width_ + x_low]; + T v2 = input[offset + y_low * width_ + x_high]; + T v3 = input[offset + y_high * width_ + x_low]; + T v4 = input[offset + y_high * width_ + x_high]; + + T val = (w1 * v1 + w2 * v2 + w3 * v3 + w4 * v4); + accumulate_val += val; + } + } + } + accumulate_val /= count_points_in_grid_cell; + + out_data[thread_idx] = accumulate_val; + } + }; + CPUKernelUtils::ParallelFor(task, elem_num); + + return true; +} + +template +void ROIAlignCPUKernel::CheckParam(const std::vector &inputs, + const std::vector &outputs) { + if (inputs.size() != kInputSize) { + MS_LOG(EXCEPTION) << "Input number is: " << inputs.size() << ", but ROIAlign needs " << kInputSize << " inputs."; + } + + if (outputs.size() != kOutputSize) { + MS_LOG(EXCEPTION) << "Output number is: " << outputs.size() << ", but ROIAlign needs " << kOutputSize << "outputs."; + } +} + +template +void ROIAlignCPUKernel::bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low, + int *x_high, int *y_high, T *w1, T *w2, T *w3, T *w4) { + constexpr float eps = 0.00007; + if (y < static_cast(-1.0) || y > static_cast(height) || x < static_cast(-1.0) || x > static_cast(width)) { + *w1 = *w2 = *w3 = *w4 = static_cast(0); + *x_low = *x_high = *y_low = *y_high = -1; + return; + } + + // low bounder is at least zero + y = y <= static_cast(.0) ? static_cast(.0) : y; + x = x <= static_cast(.0) ? static_cast(.0) : x; + + // top left point + *y_low = (y <= static_cast(eps) ? 0 : static_cast(floor(y))); + *x_low = (x <= static_cast(eps) ? 0 : static_cast(floor(x))); + + // bottom right point + if (*y_low >= height - 1) { + *y_high = *y_low = height - 1; + y = static_cast(*y_low); + } else { + *y_high = *y_low + 1; + } + + if (*x_low >= width - 1) { + *x_high = *x_low = width - 1; + x = static_cast(*x_low); + } else { + *x_high = *x_low + 1; + } + + // distance to nearest points + T lx, ly, hx, hy; + ly = y - static_cast(*y_low), lx = x - static_cast(*x_low); + hy = static_cast(1.) - ly, hx = static_cast(1.) - lx; + + // weight is evaluated by the distance to point away. + // the closer to point home, the more weight, the farther to point away. + *w1 = hy * hx, *w2 = hy * lx, *w3 = ly * hx, *w4 = ly * lx; + return; +} + +template +void ROIAlignCPUKernel::bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, + const int sample_num, int roi_end_mode, const int channels, const int height, + const int width, const int pooled_height, const int pooled_width, int *offset, + int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h, int *roi_bin_grid_w, + T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w) { + // (n, c, ph, pw) is the base param of pooled map + *pw = thread_idx % pooled_width; + *ph = (thread_idx / pooled_width) % pooled_height; + *c = (thread_idx / pooled_width / pooled_height) % channels; + *n = thread_idx / pooled_width / pooled_height / channels; + + // Roi has + // 1. 4 points, or + // 2. indicator + 4 points (1 + 4) + const T *roi_box = roi_boxes + (*n) * roi_cols; + int roi_batch_ind = 0; + if (roi_cols == 5) { + roi_batch_ind = static_cast(rint(static_cast(roi_box[0]) + static_cast(0.00007))); + roi_box++; + } + + // Scale and shift ROI + *roi_start_w = roi_box[0] * spatial_scale; + *roi_start_h = roi_box[1] * spatial_scale; + T roi_end_w = (roi_box[2] + static_cast(roi_end_mode)) * spatial_scale; + T roi_end_h = (roi_box[3] + static_cast(roi_end_mode)) * spatial_scale; + + // New ROI height/width + T roi_width = roi_end_w - (*roi_start_w); + T roi_height = roi_end_h - (*roi_start_h); + + if (roi_end_mode == 0) { // backward compatibility + // Force malformed ROIs to be 1x1 + roi_width = roi_width > static_cast(1.0) ? roi_width : static_cast(1.0); + roi_height = roi_height > static_cast(1.0) ? roi_height : static_cast(1.0); + } + + // ratio of roi / pooled + *bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + *bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + *offset = (roi_batch_ind * channels + (*c)) * height * width; + + // grid (int) by Sample ratio if defined, otherwise by pooled H/W + *roi_bin_grid_h = (sample_num > 0) ? sample_num : static_cast(floor(roi_height / static_cast(pooled_height))); + *roi_bin_grid_w = (sample_num > 0) ? sample_num : static_cast(floor(roi_width / static_cast(pooled_width))); + return; +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_cpu_kernel.h new file mode 100644 index 00000000000..72821dfb836 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_cpu_kernel.h @@ -0,0 +1,72 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_ +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +class ROIAlignCPUKernel : public CPUKernel { + public: + ROIAlignCPUKernel() = default; + ~ROIAlignCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + void CheckParam(const std::vector &inputs, const std::vector &outputs); + + void bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low, int *x_high, + int *y_high, T *w1, T *w2, T *w3, T *w4); + + void bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, const int sample_num, + int roi_end_mode, const int channels, const int height, const int width, const int pooled_height, + const int pooled_width, int *offset, int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h, + int *roi_bin_grid_w, T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w); + + int pooled_height_; + int pooled_width_; + T spatial_scale_; + int sample_num_; + int roi_end_mode_; + + int roi_rows_; + int roi_cols_; + int channels_; + int height_; + int width_; +}; + +MS_REG_CPU_KERNEL_T( + ROIAlign, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ROIAlignCPUKernel, float); + +MS_REG_CPU_KERNEL_T( + ROIAlign, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ROIAlignCPUKernel, float16); + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.cc new file mode 100644 index 00000000000..e361c46450a --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.cc @@ -0,0 +1,280 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.h" +#include "runtime/device/cpu/cpu_device_address.h" + +namespace mindspore { +namespace kernel { + +template +void AtomicAddTask(T *address, T val) { + auto *address_as_ull = reinterpret_cast(address); + U old = *address_as_ull; + U assumed; + T desired; + T *assumed_t = NULL; + U *desired_u = NULL; + do { + assumed = old; + assumed_t = reinterpret_cast(&assumed); + desired_u = reinterpret_cast(&desired); + desired = *assumed_t + static_cast(val); + old = __sync_val_compare_and_swap(address_as_ull, assumed, *desired_u); + } while (assumed != old); +} + +template +void AtomicAdd(T *address, T val) { + switch (sizeof(T)) { + case 1: { + AtomicAddTask(address, val); + break; + } + case 2: { + AtomicAddTask(address, val); + break; + } + case 4: { + AtomicAddTask(address, val); + break; + } + case 8: { + AtomicAddTask(address, val); + break; + } + } +} + +template +void ROIAlignGradCPUKernel::CheckParam(const CNodePtr &kernel_node) { + // Get the number of the input args + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 2) { + MS_LOG(ERROR) << "Input number is: " << input_num << ", but ROIAlignGrad needs 2 inputs."; + } + + // Get the number of the output args + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 1) { + MS_LOG(ERROR) << "Output number is: " << output_num << ", but ROIAlignGrad needs 1 output."; + } + + // Get the input shapes + auto dy_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto dy_shape_size = dy_shape.size(); + if (dy_shape_size != 4) { + MS_LOG(ERROR) << "dy shape size is " << dy_shape_size << ", but should be 4."; + } +} + +template +void ROIAlignGradCPUKernel::InitKernel(const CNodePtr &kernel_node) { + MS_EXCEPTION_IF_NULL(kernel_node); + CheckParam(kernel_node); + + auto rois_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + roi_rows_ = rois_shape[0]; + roi_cols_ = rois_shape[1]; + + std::vector xdiff_shape_me = AnfAlgo::GetNodeAttr>(kernel_node, "xdiff_shape"); + (void)std::transform(xdiff_shape_me.begin(), xdiff_shape_me.end(), std::back_inserter(xdiff_shape_), + [](const int64_t &value) { return static_cast(value); }); + pooled_height_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "pooled_height")); + pooled_width_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "pooled_width")); + spatial_scale_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "spatial_scale")); + sample_num_ = static_cast(AnfAlgo::GetNodeAttr(kernel_node, "sample_num")); + roi_end_mode_ = 1; + + batch_size_ = xdiff_shape_[0]; + channels_ = xdiff_shape_[1]; + height_ = xdiff_shape_[2]; + width_ = xdiff_shape_[3]; +} + +template +bool ROIAlignGradCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + const T *dy = reinterpret_cast(inputs[0]->addr); + const T *rois = reinterpret_cast(inputs[1]->addr); + T *dx = reinterpret_cast(outputs[0]->addr); + + size_t size_init = batch_size_ * channels_ * height_ * width_; + auto task1 = [&](size_t start, size_t end) { + for (size_t thread_idx = start; thread_idx < end; thread_idx++) { + dx[thread_idx] = static_cast(0.); + } + }; + CPUKernelUtils::ParallelFor(task1, size_init); + + size_t elem_num = roi_rows_ * channels_ * pooled_height_ * pooled_width_; + auto task2 = [&](size_t start, size_t end) { + for (size_t thread_idx = start; thread_idx < end; thread_idx++) { + int n = thread_idx / pooled_width_ / pooled_height_ / channels_; + const T *roi_box = rois + n * roi_cols_; + if (roi_box[1] < static_cast(0.001) && roi_box[3] < static_cast(0.001) && + roi_box[1] > static_cast(-0.001) && roi_box[3] > static_cast(-0.001)) { + continue; + } + int offset = -1; + int c, ph, pw, roi_bin_grid_h, roi_bin_grid_w; + T bin_size_h, bin_size_w, roi_start_h, roi_start_w; + + bin_box(thread_idx, rois, roi_cols_, spatial_scale_, sample_num_, roi_end_mode_, channels_, height_, width_, + pooled_height_, pooled_width_, &offset, &n, &c, &ph, &pw, &roi_bin_grid_h, &roi_bin_grid_w, &bin_size_h, + &bin_size_w, &roi_start_h, &roi_start_w); + + // (n, c, ph, pw) is the base param of pooled map + const T count_points_in_grid_cell = static_cast(roi_bin_grid_h * roi_bin_grid_w); + + int top_offset = (n * channels_ + c) * pooled_height_ * pooled_width_; + const T *offset_top_diff = dy + top_offset; + const T top_diff_this_bin = offset_top_diff[ph * pooled_width_ + pw]; + + for (int iy = 0; iy < roi_bin_grid_h; iy++) { + // Shift half point RIGHT for y / x, while previous scaled roi shift half point LEFT + const T y = roi_start_h + static_cast(ph) * bin_size_h + + static_cast(iy + .5f) * bin_size_h / static_cast(roi_bin_grid_h); + for (int ix = 0; ix < roi_bin_grid_w; ix++) { + const T x = roi_start_w + static_cast(pw) * bin_size_w + + static_cast(ix + .5f) * bin_size_w / static_cast(roi_bin_grid_w); + // bilinear interpolate by shifted y / x + // calculate bilinear interpolation + int x_low = 0, y_low = 0, x_high = 0, y_high = 0; + T w1, w2, w3, w4; + bilinear_interpolate(height_, width_, y, x, &x_low, &y_low, &x_high, &y_high, &w1, &w2, &w3, &w4); + if (x_low >= 0 && x_high >= 0 && y_low >= 0 && y_high >= 0 && y_low < height_ && y_high < height_ && + x_low < width_ && x_high < width_) { + T g1 = top_diff_this_bin * w1 / count_points_in_grid_cell; + T g2 = top_diff_this_bin * w2 / count_points_in_grid_cell; + T g3 = top_diff_this_bin * w3 / count_points_in_grid_cell; + T g4 = top_diff_this_bin * w4 / count_points_in_grid_cell; + + T *dx_1 = dx + offset + y_low * width_ + x_low; + T *dx_2 = dx + offset + y_low * width_ + x_high; + T *dx_3 = dx + offset + y_high * width_ + x_low; + T *dx_4 = dx + offset + y_high * width_ + x_high; + + AtomicAdd(dx_1, g1); + AtomicAdd(dx_2, g2); + AtomicAdd(dx_3, g3); + AtomicAdd(dx_4, g4); + } + } + } + } + }; + CPUKernelUtils::ParallelFor(task2, elem_num); + return true; +} + +template +void ROIAlignGradCPUKernel::bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low, + int *x_high, int *y_high, T *w1, T *w2, T *w3, T *w4) { + constexpr float eps = 0.00007; + if (y < static_cast(-1.0) || y > static_cast(height) || x < static_cast(-1.0) || x > static_cast(width)) { + *w1 = *w2 = *w3 = *w4 = static_cast(0); + *x_low = *x_high = *y_low = *y_high = -1; + return; + } + + // low bounder is at least zero + y = y <= static_cast(.0) ? static_cast(.0) : y; + x = x <= static_cast(.0) ? static_cast(.0) : x; + + // top left point + *y_low = (y <= static_cast(eps) ? 0 : static_cast(floor(y))); + *x_low = (x <= static_cast(eps) ? 0 : static_cast(floor(x))); + + // bottom right point + if (*y_low >= height - 1) { + *y_high = *y_low = height - 1; + y = static_cast(*y_low); + } else { + *y_high = *y_low + 1; + } + + if (*x_low >= width - 1) { + *x_high = *x_low = width - 1; + x = static_cast(*x_low); + } else { + *x_high = *x_low + 1; + } + + // distance to nearest points + T lx, ly, hx, hy; + ly = y - static_cast(*y_low), lx = x - static_cast(*x_low); + hy = static_cast(1.) - ly, hx = static_cast(1.) - lx; + + // weight is evaluated by the distance to point away. + // the closer to point home, the more weight, the farther to point away. + *w1 = hy * hx, *w2 = hy * lx, *w3 = ly * hx, *w4 = ly * lx; + return; +} + +template +void ROIAlignGradCPUKernel::bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, + const int sample_num, int roi_end_mode, const int channels, const int height, + const int width, const int pooled_height, const int pooled_width, int *offset, + int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h, int *roi_bin_grid_w, + T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w) { + // (n, c, ph, pw) is the base param of pooled map + *pw = thread_idx % pooled_width; + *ph = (thread_idx / pooled_width) % pooled_height; + *c = (thread_idx / pooled_width / pooled_height) % channels; + *n = thread_idx / pooled_width / pooled_height / channels; + + // Roi has + // 1. 4 points, or + // 2. indicator + 4 points (1 + 4) + const T *roi_box = roi_boxes + (*n) * roi_cols; + int roi_batch_ind = 0; + if (roi_cols == 5) { + roi_batch_ind = static_cast(rint(static_cast(roi_box[0]) + static_cast(0.00007))); + roi_box++; + } + + // Scale and shift ROI + *roi_start_w = roi_box[0] * spatial_scale; + *roi_start_h = roi_box[1] * spatial_scale; + T roi_end_w = (roi_box[2] + static_cast(roi_end_mode)) * spatial_scale; + T roi_end_h = (roi_box[3] + static_cast(roi_end_mode)) * spatial_scale; + + // New ROI height/width + T roi_width = roi_end_w - (*roi_start_w); + T roi_height = roi_end_h - (*roi_start_h); + + if (roi_end_mode == 0) { // backward compatibility + // Force malformed ROIs to be 1x1 + roi_width = roi_width > static_cast(1.0) ? roi_width : static_cast(1.0); + roi_height = roi_height > static_cast(1.0) ? roi_height : static_cast(1.0); + } + + // ratio of roi / pooled + *bin_size_h = static_cast(roi_height) / static_cast(pooled_height); + *bin_size_w = static_cast(roi_width) / static_cast(pooled_width); + + *offset = (roi_batch_ind * channels + (*c)) * height * width; + + // grid (int) by Sample ratio if defined, otherwise by pooled H/W + *roi_bin_grid_h = (sample_num > 0) ? sample_num : static_cast(floor(roi_height / static_cast(pooled_height))); + *roi_bin_grid_w = (sample_num > 0) ? sample_num : static_cast(floor(roi_width / static_cast(pooled_width))); + return; +} + +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.h new file mode 100644 index 00000000000..56f3b558368 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/roi_align_grad_cpu_kernel.h @@ -0,0 +1,75 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_CPU_KERNEL_H_ +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +class ROIAlignGradCPUKernel : public CPUKernel { + public: + ROIAlignGradCPUKernel() = default; + ~ROIAlignGradCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + void CheckParam(const CNodePtr &kernel_node); + + void bilinear_interpolate(const int height, const int width, T y, T x, int *x_low, int *y_low, int *x_high, + int *y_high, T *w1, T *w2, T *w3, T *w4); + + void bin_box(int thread_idx, const T *roi_boxes, int roi_cols, const T spatial_scale, const int sample_num, + int roi_end_mode, const int channels, const int height, const int width, const int pooled_height, + const int pooled_width, int *offset, int *n, int *c, int *ph, int *pw, int *roi_bin_grid_h, + int *roi_bin_grid_w, T *bin_size_h, T *bin_size_w, T *roi_start_h, T *roi_start_w); + + std::vector xdiff_shape_; + int pooled_height_; + int pooled_width_; + T spatial_scale_; + int sample_num_; + int roi_end_mode_; + + int roi_rows_; + int roi_cols_; + int batch_size_; + int channels_; + int height_; + int width_; +}; + +MS_REG_CPU_KERNEL_T( + ROIAlignGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ROIAlignGradCPUKernel, float); + +MS_REG_CPU_KERNEL_T( + ROIAlignGrad, + KernelAttr().AddInputAttr(kNumberTypeFloat16).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ROIAlignGradCPUKernel, float16); + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_ROI_ALIGN_GRAD_CPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.cc new file mode 100644 index 00000000000..4b14ad9d1ae --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.cc @@ -0,0 +1,127 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.h" +#include +#include "runtime/device/cpu/cpu_device_address.h" +#include "common/thread_pool.h" + +namespace mindspore { +namespace kernel { +namespace { +template +void Compute(const ComputeParams *params, const size_t start, const size_t end) { + MS_EXCEPTION_IF_NULL(params); + T *target = params->target_; + S *indices = params->indices_; + T *updates = params->updates_; + std::vector *out_strides = params->out_strides_; + MS_EXCEPTION_IF_NULL(out_strides); + + for (size_t i = start; i < end; ++i) { + int offset = 0; + for (int j = 0; j < params->indices_unit_rank_; ++j) { + auto index = indices[i * params->indices_unit_rank_ + j]; + if (index < 0) { + MS_LOG(EXCEPTION) << "Indices contains element " << index << " less than 0."; + } + offset += index * out_strides->at(j) * params->unit_size_; + } + target[offset] += updates[params->unit_size_ * i]; + } +} +} // namespace + +template +void ScatterNdCPUKernel::InitKernel(const CNodePtr &kernel_node) { + Check(kernel_node); + auto shape = AnfAlgo::GetOutputInferShape(kernel_node, 0); + auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + auto updates_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); + auto indices_unit_rank = indices_shape.back(); + if (indices_unit_rank > shape.size()) { + MS_LOG(EXCEPTION) << "Value of last dimension of indices is greater than shape rank"; + } + if (indices_shape.size() < 2) { + MS_LOG(EXCEPTION) << "Indices has dimension less than 2"; + } + if (updates_shape.size() != indices_shape.size() - 1 + shape.size() - indices_unit_rank) { + MS_LOG(EXCEPTION) << "The ranks of update and indices are inconsistent"; + } + for (size_t i = 0; i < indices_shape.size() - 1; ++i) { + if (updates_shape[i] != indices_shape[i]) { + MS_LOG(EXCEPTION) << "The shape of updates and indices are different in dimension " << i << " ."; + } + } + indices_unit_rank_ = SizeToInt(indices_unit_rank); + unit_size_ = 1; + for (size_t i = indices_shape.size() - 1; i < updates_shape.size(); ++i) { + unit_size_ *= SizeToInt(updates_shape[i]); + } + num_units_ = 1; + num_units_ *= updates_shape[indices_shape.size() - 2]; + for (int i = SizeToInt(indices_shape.size()) - 3; i >= 0; i--) { + num_units_ *= updates_shape[i]; + } + int out_stride = 1; + out_strides_.push_back(out_stride); + for (int i = indices_unit_rank_ - 2; i >= 0; i--) { + out_stride *= shape[i + 1]; + out_strides_.push_back(out_stride); + } + reverse(out_strides_.begin(), out_strides_.end()); +} + +template +bool ScatterNdCPUKernel::Launch(const std::vector &inputs, + const std::vector &, + const std::vector &outputs) { + auto target = reinterpret_cast(outputs[0]->addr); + auto target_init = memset_s(target, outputs[0]->size / sizeof(T), static_cast(0.0), outputs[0]->size / sizeof(T)); + if (target_init != EOK) { + MS_LOG(EXCEPTION) << "ScatterNdCPUKernel Launch task memset failed."; + } + ComputeParams params; + params.target_ = target; + params.indices_ = reinterpret_cast(inputs[0]->addr); + params.updates_ = reinterpret_cast(inputs[1]->addr); + params.target_mem_size_ = outputs[0]->size; + params.unit_size_ = unit_size_; + params.indices_unit_rank_ = indices_unit_rank_; + params.out_strides_ = &out_strides_; + + auto task = [&](size_t start, size_t end) { + for (size_t idx = start; idx < end; idx++) { + Compute(¶ms, idx, idx + 1); + } + }; + CPUKernelUtils::ParallelFor(task, num_units_); + return true; +} + +template +void ScatterNdCPUKernel::Check(const CNodePtr &kernel_node) { + size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); + if (input_num != 2) { + MS_LOG(EXCEPTION) << "Input number is " << input_num << ", but ScatterNd needs 2 input."; + } + size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); + if (output_num != 1) { + MS_LOG(EXCEPTION) << "Output number is " << output_num << ", but ScatterNd needs 1 output."; + } +} +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.h new file mode 100644 index 00000000000..c4f66e1d337 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/scatter_nd_cpu_kernel.h @@ -0,0 +1,150 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_ +#include +#include +#include "backend/kernel_compiler/cpu/cpu_kernel.h" +#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h" + +namespace mindspore { +namespace kernel { +template +struct ComputeParams { + T *target_{nullptr}; + S *indices_{nullptr}; + T *updates_{nullptr}; + int unit_size_{0}; + int indices_unit_rank_{0}; + std::vector *out_strides_{nullptr}; + size_t target_mem_size_{0}; +}; + +template +class ScatterNdCPUKernel : public CPUKernel { + public: + ScatterNdCPUKernel() = default; + ~ScatterNdCPUKernel() override = default; + + void InitKernel(const CNodePtr &kernel_node) override; + + bool Launch(const std::vector &inputs, const std::vector &, + const std::vector &outputs) override; + + private: + void Check(const CNodePtr &kernel_node); + + int unit_size_{0}; + size_t num_units_{0}; + int indices_unit_rank_{0}; + std::vector out_strides_; +}; + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), + ScatterNdCPUKernel, int64_t, double); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ScatterNdCPUKernel, int64_t, float); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + ScatterNdCPUKernel, int64_t, int64_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ScatterNdCPUKernel, int64_t, int32_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), + ScatterNdCPUKernel, int64_t, int16_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), + ScatterNdCPUKernel, int64_t, int8_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64), + ScatterNdCPUKernel, int64_t, uint64_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32), + ScatterNdCPUKernel, int64_t, uint32_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16), + ScatterNdCPUKernel, int64_t, uint16_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt64).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8), + ScatterNdCPUKernel, int64_t, uint8_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64), + ScatterNdCPUKernel, int32_t, double); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ScatterNdCPUKernel, int32_t, float); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), + ScatterNdCPUKernel, int32_t, int64_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32), + ScatterNdCPUKernel, int32_t, int32_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16), + ScatterNdCPUKernel, int32_t, int16_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), + ScatterNdCPUKernel, int32_t, int8_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64), + ScatterNdCPUKernel, int32_t, uint64_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32), + ScatterNdCPUKernel, int32_t, uint32_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, + KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16), + ScatterNdCPUKernel, int32_t, uint16_t); + +MS_REG_CPU_KERNEL_T_S( + ScatterNd, KernelAttr().AddInputAttr(kNumberTypeInt32).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8), + ScatterNdCPUKernel, int32_t, uint8_t); + +} // namespace kernel +} // namespace mindspore + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_SCATTER_ND_CPU_KERNEL_H_ diff --git a/mindspore/ops/_op_impl/cpu/__init__.py b/mindspore/ops/_op_impl/cpu/__init__.py index ad6a7e95137..6ac57186a4b 100644 --- a/mindspore/ops/_op_impl/cpu/__init__.py +++ b/mindspore/ops/_op_impl/cpu/__init__.py @@ -24,6 +24,7 @@ from .split import _split_cpu from .adam import _adam_cpu from .arg_max import _arg_max_cpu from .arg_min_with_value import _arg_min_with_value_cpu +from .arg_max_with_value import _arg_max_with_value_cpu from .bias_add import _bias_add_cpu from .bias_add_grad import _bias_add_grad_cpu from .dropout import _dropout_cpu diff --git a/mindspore/ops/_op_impl/cpu/arg_max_with_value.py b/mindspore/ops/_op_impl/cpu/arg_max_with_value.py new file mode 100644 index 00000000000..c4b38615f07 --- /dev/null +++ b/mindspore/ops/_op_impl/cpu/arg_max_with_value.py @@ -0,0 +1,31 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""ArgMaxWithValue op""" +from mindspore.ops.op_info_register import op_info_register, CpuRegOp, DataType + +arg_max_with_value_op_info = CpuRegOp("ArgMaxWithValue") \ + .input(0, "x", "required") \ + .output(0, "indice", "required") \ + .output(1, "values", "required") \ + .dtype_format(DataType.F32_Default, DataType.I32_Default, DataType.F32_Default) \ + .dtype_format(DataType.F16_Default, DataType.I32_Default, DataType.F16_Default) \ + .get_op_info() + + +@op_info_register(arg_max_with_value_op_info) +def _arg_max_with_value_cpu(): + """ArgMaxWithValue cpu register""" + return diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index b45b871c83c..6ec5981485e 100755 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -1785,7 +1785,7 @@ class ArgMaxWithValue(PrimitiveWithInfer): TypeError: If `axis` is not an int. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> input_x = Tensor(np.array([0.0, 0.4, 0.6, 0.7, 0.1]), mindspore.float32) @@ -3484,7 +3484,7 @@ class ScatterNd(PrimitiveWithInfer): ValueError: If any element of `shape` is less than 1. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> op = ops.ScatterNd() diff --git a/mindspore/ops/operations/image_ops.py b/mindspore/ops/operations/image_ops.py index 6c5486c11fc..36b8b0fa988 100644 --- a/mindspore/ops/operations/image_ops.py +++ b/mindspore/ops/operations/image_ops.py @@ -59,7 +59,7 @@ class CropAndResize(PrimitiveWithInfer): ValueError: If `method` is not one of 'bilinear', 'nearest', 'bilinear_v2'. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> class CropAndResizeNet(nn.Cell): diff --git a/mindspore/ops/operations/math_ops.py b/mindspore/ops/operations/math_ops.py index 6d36ede148d..08700bdb80a 100644 --- a/mindspore/ops/operations/math_ops.py +++ b/mindspore/ops/operations/math_ops.py @@ -4257,7 +4257,7 @@ class NMSWithMask(PrimitiveWithInfer): Tensor is not float16 or float32. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> bbox = np.array([[100.0, 100.0, 50.0, 68.0, 0.63], [150.0, 75.0, 165.0, 115.0, 0.55], diff --git a/mindspore/ops/operations/nn_ops.py b/mindspore/ops/operations/nn_ops.py index 7d98934f242..be50a2a9f79 100755 --- a/mindspore/ops/operations/nn_ops.py +++ b/mindspore/ops/operations/nn_ops.py @@ -4262,7 +4262,7 @@ class ROIAlign(PrimitiveWithInfer): TypeError: If `features` or `rois` is not a Tensor. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> input_tensor = Tensor(np.array([[[[1., 2.], [3., 4.]]]]), mindspore.float32) diff --git a/mindspore/ops/operations/other_ops.py b/mindspore/ops/operations/other_ops.py index e0a2507a502..34d4eecb136 100644 --- a/mindspore/ops/operations/other_ops.py +++ b/mindspore/ops/operations/other_ops.py @@ -168,7 +168,7 @@ class BoundingBoxEncode(PrimitiveWithInfer): TypeError: If `anchor_box` or `groundtruth_box` is not a Tensor. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> anchor_box = Tensor([[2, 2, 2, 3], [2, 2, 2, 3]], mindspore.float32) @@ -230,7 +230,7 @@ class BoundingBoxDecode(PrimitiveWithInfer): TypeError: If `anchor_box` or `deltas` is not a Tensor. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> anchor_box = Tensor([[4, 1, 2, 1], [2, 2, 2, 3]], mindspore.float32) @@ -293,7 +293,7 @@ class CheckValid(PrimitiveWithInfer): TypeError: If dtype of `bboxes` or `img_metas` is neither float16 nor float32. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> import mindspore diff --git a/mindspore/ops/operations/random_ops.py b/mindspore/ops/operations/random_ops.py index 541637912d1..3c310cbce45 100644 --- a/mindspore/ops/operations/random_ops.py +++ b/mindspore/ops/operations/random_ops.py @@ -404,7 +404,7 @@ class RandomChoiceWithMask(PrimitiveWithInfer): TypeError: If `input_x` is not a Tensor. Supported Platforms: - ``Ascend`` ``GPU`` + ``Ascend`` ``GPU`` ``CPU`` Examples: >>> rnd_choice_mask = ops.RandomChoiceWithMask() diff --git a/tests/st/ops/cpu/test_argmaxwithvalue_op.py b/tests/st/ops/cpu/test_argmaxwithvalue_op.py new file mode 100644 index 00000000000..6393e10c2f6 --- /dev/null +++ b/tests/st/ops/cpu/test_argmaxwithvalue_op.py @@ -0,0 +1,146 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class NetArgmaxWithValue(nn.Cell): + def __init__(self): + super(NetArgmaxWithValue, self).__init__() + axis1 = 0 + axis2 = -1 + self.argmax1 = P.ArgMaxWithValue(axis1) + self.argmax2 = P.ArgMaxWithValue(axis2) + self.argmax3 = P.ArgMaxWithValue() + + def construct(self, x): + return (self.argmax1(x), self.argmax2(x), self.argmax3(x)) + + +class NetArgmaxWithValueBig(nn.Cell): + def __init__(self, axis=0): + super(NetArgmaxWithValueBig, self).__init__() + self.argmax = P.ArgMaxWithValue(axis) + + def construct(self, x): + return self.argmax(x) + + +def argmaxwithvalue_base(data_type): + x = Tensor(np.array([[1., 20., 5.], + [67., 8., 9.], + [130., 24., 15.], + [0.3, -0.4, -15.]]).astype(data_type)) + expect1 = np.array([2, 2, 2]).astype(data_type) + expect2 = np.array([1, 0, 0, 0]).astype(data_type) + expect11 = np.array([130, 24, 15]).astype(data_type) + expect22 = np.array([20, 67, 130, 0.3]).astype(data_type) + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + argmax = NetArgmaxWithValue() + output = argmax(x) + assert (output[0][0].asnumpy() == expect1).all() + assert (output[0][1].asnumpy() == expect11).all() + assert (output[1][0].asnumpy() == expect2).all() + assert (output[1][1].asnumpy() == expect22).all() + assert (output[2][0].asnumpy() == expect1).all() + assert (output[2][1].asnumpy() == expect11).all() + + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + argmax = NetArgmaxWithValue() + output = argmax(x) + assert (output[0][0].asnumpy() == expect1).all() + assert (output[0][1].asnumpy() == expect11).all() + assert (output[1][0].asnumpy() == expect2).all() + assert (output[1][1].asnumpy() == expect22).all() + assert (output[2][0].asnumpy() == expect1).all() + assert (output[2][1].asnumpy() == expect11).all() + + +def argmaxwithvalue_3d(data_type, shape_x): + np.random.seed(2) + x_np = np.random.random(shape_x).astype(data_type) + x = Tensor(x_np) + + argmax = NetArgmaxWithValueBig(0) + output = argmax(x) + expect1 = np.argmax(x_np, axis=0) + expect2 = np.maximum.reduce(x_np, 0) + assert (output[0].asnumpy() == expect1).all() + assert (output[1].asnumpy() == expect2).all() + + argmax = NetArgmaxWithValueBig(1) + output = argmax(x) + expect1 = np.argmax(x_np, axis=1) + expect2 = np.maximum.reduce(x_np, 1) + assert (output[0].asnumpy() == expect1).all() + assert (output[1].asnumpy() == expect2).all() + + argmax = NetArgmaxWithValueBig(2) + output = argmax(x) + expect1 = np.argmax(x_np, axis=2) + expect2 = np.maximum.reduce(x_np, 2) + assert (output[0].asnumpy() == expect1).all() + assert (output[1].asnumpy() == expect2).all() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_argmaxwithvalue_base_float32(): + argmaxwithvalue_base(np.float32) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_argmaxwithvalue_base_float16(): + argmaxwithvalue_base(np.float16) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_argmaxwithvalue_3d_float32(): + shape_x = (2, 32, 256) + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + argmaxwithvalue_3d(np.float32, shape_x) + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + argmaxwithvalue_3d(np.float32, shape_x) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_argmaxwithvalue_3d_float16(): + shape_x = (2, 64, 128) + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + argmaxwithvalue_3d(np.float16, shape_x) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_argmaxwithvalue_3d_big_float32(): + shape_x = (128, 1024, 1) + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + argmaxwithvalue_3d(np.float32, shape_x) + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + argmaxwithvalue_3d(np.float32, shape_x) diff --git a/tests/st/ops/cpu/test_boundingbox_decode_op.py b/tests/st/ops/cpu/test_boundingbox_decode_op.py new file mode 100644 index 00000000000..59c88c10460 --- /dev/null +++ b/tests/st/ops/cpu/test_boundingbox_decode_op.py @@ -0,0 +1,60 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class NetBoundingBoxDecode(nn.Cell): + def __init__(self, means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)): + super(NetBoundingBoxDecode, self).__init__() + self.decode = P.BoundingBoxDecode(max_shape=(768, 1280), means=means, stds=stds, + wh_ratio_clip=0.016) + + def construct(self, anchor, groundtruth): + return self.decode(anchor, groundtruth) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_boundingbox_decode(): + anchor = np.array([[4, 1, 2, 1], [2, 2, 2, 3]], np.float32) + deltas = np.array([[3, 1, 2, 2], [1, 2, 1, 4]], np.float32) + means = (0.1, 0.1, 0.2, 0.2) + stds = (2.0, 2.0, 3.0, 3.0) + anchor_box = Tensor(anchor, mindspore.float32) + deltas_box = Tensor(deltas, mindspore.float32) + expect_deltas = np.array([[28.6500, 0.0000, 0.0000, 33.8500], + [0.0000, 0.0000, 15.8663, 72.7000]], np.float32) + + error = np.ones(shape=[2, 4]) * 1.0e-4 + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + boundingbox_decode = NetBoundingBoxDecode(means, stds) + output = boundingbox_decode(anchor_box, deltas_box) + diff = output.asnumpy() - expect_deltas + assert np.all(abs(diff) < error) + + context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU') + boundingbox_decode = NetBoundingBoxDecode(means, stds) + output = boundingbox_decode(anchor_box, deltas_box) + diff = output.asnumpy() - expect_deltas + assert np.all(abs(diff) < error) diff --git a/tests/st/ops/cpu/test_boundingbox_encode_op.py b/tests/st/ops/cpu/test_boundingbox_encode_op.py new file mode 100644 index 00000000000..918fd9d7b77 --- /dev/null +++ b/tests/st/ops/cpu/test_boundingbox_encode_op.py @@ -0,0 +1,80 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class NetBoundingBoxEncode(nn.Cell): + def __init__(self, means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)): + super(NetBoundingBoxEncode, self).__init__() + self.encode = P.BoundingBoxEncode(means=means, stds=stds) + + def construct(self, anchor, groundtruth): + return self.encode(anchor, groundtruth) + +def bbox2delta(proposals, gt, means, stds): + px = (proposals[..., 0] + proposals[..., 2]) * 0.5 + py = (proposals[..., 1] + proposals[..., 3]) * 0.5 + pw = proposals[..., 2] - proposals[..., 0] + 1.0 + ph = proposals[..., 3] - proposals[..., 1] + 1.0 + + gx = (gt[..., 0] + gt[..., 2]) * 0.5 + gy = (gt[..., 1] + gt[..., 3]) * 0.5 + gw = gt[..., 2] - gt[..., 0] + 1.0 + gh = gt[..., 3] - gt[..., 1] + 1.0 + + dx = (gx - px) / pw + dy = (gy - py) / ph + dw = np.log(gw / pw) + dh = np.log(gh / ph) + means = np.array(means, np.float32) + stds = np.array(stds, np.float32) + deltas = np.stack([(dx - means[0]) / stds[0], (dy - means[1]) / stds[1], + (dw - means[2]) / stds[2], (dh - means[3]) / stds[3]], axis=-1) + + return deltas + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_boundingbox_encode(): + anchor = np.array([[4, 1, 6, 9], [2, 5, 5, 9]]).astype(np.float32) + gt = np.array([[3, 2, 7, 7], [1, 5, 5, 8]]).astype(np.float32) + means = (0.1, 0.1, 0.2, 0.2) + stds = (2.0, 2.0, 3.0, 3.0) + anchor_box = Tensor(anchor, mindspore.float32) + groundtruth_box = Tensor(gt, mindspore.float32) + expect_deltas = bbox2delta(anchor, gt, means, stds) + + error = np.ones(shape=[2, 4]) * 1.0e-6 + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + boundingbox_encode = NetBoundingBoxEncode(means, stds) + output = boundingbox_encode(anchor_box, groundtruth_box) + diff = output.asnumpy() - expect_deltas + assert np.all(abs(diff) < error) + + context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU') + boundingbox_encode = NetBoundingBoxEncode(means, stds) + output = boundingbox_encode(anchor_box, groundtruth_box) + diff = output.asnumpy() - expect_deltas + assert np.all(abs(diff) < error) diff --git a/tests/st/ops/cpu/test_check_valid_op.py b/tests/st/ops/cpu/test_check_valid_op.py new file mode 100644 index 00000000000..b3ed60476e9 --- /dev/null +++ b/tests/st/ops/cpu/test_check_valid_op.py @@ -0,0 +1,86 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class NetCheckValid(nn.Cell): + def __init__(self): + super(NetCheckValid, self).__init__() + self.valid = P.CheckValid() + + def construct(self, anchor, image_metas): + return self.valid(anchor, image_metas) + +def check_valid(nptype): + anchor = np.array([[50, 0, 100, 700], [-2, 2, 8, 100], [10, 20, 300, 2000]], nptype) + image_metas = np.array([768, 1280, 1], nptype) + anchor_box = Tensor(anchor) + image_metas_box = Tensor(image_metas) + expect = np.array([True, False, False], np.bool) + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + boundingbox_decode = NetCheckValid() + output = boundingbox_decode(anchor_box, image_metas_box) + assert np.array_equal(output.asnumpy(), expect) + + context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU') + boundingbox_decode = NetCheckValid() + output = boundingbox_decode(anchor_box, image_metas_box) + assert np.array_equal(output.asnumpy(), expect) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_check_valid_float32(): + check_valid(np.float32) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_check_valid_float16(): + check_valid(np.float16) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_check_valid_int16(): + check_valid(np.int16) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_check_valid_uint8(): + anchor = np.array([[5, 0, 10, 70], [2, 2, 8, 10], [1, 2, 30, 200]], np.uint8) + image_metas = np.array([76, 128, 1], np.uint8) + anchor_box = Tensor(anchor) + image_metas_box = Tensor(image_metas) + expect = np.array([True, True, False], np.bool) + + context.set_context(mode=context.GRAPH_MODE, device_target='CPU') + boundingbox_decode = NetCheckValid() + output = boundingbox_decode(anchor_box, image_metas_box) + assert np.array_equal(output.asnumpy(), expect) + + context.set_context(mode=context.PYNATIVE_MODE, device_target='CPU') + boundingbox_decode = NetCheckValid() + output = boundingbox_decode(anchor_box, image_metas_box) + assert np.array_equal(output.asnumpy(), expect) diff --git a/tests/st/ops/cpu/test_crop_and_resize_op.py b/tests/st/ops/cpu/test_crop_and_resize_op.py new file mode 100644 index 00000000000..29e456b738f --- /dev/null +++ b/tests/st/ops/cpu/test_crop_and_resize_op.py @@ -0,0 +1,423 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest + +from mindspore import context, Tensor +from mindspore.ops import operations as P +from mindspore import nn + + +class NetCropAndResize(nn.Cell): + def __init__(self, method_="bilinear", extrapolation_value_=0.0): + super(NetCropAndResize, self).__init__() + self.op = P.CropAndResize( + method=method_, extrapolation_value=extrapolation_value_) + + def construct(self, image, boxes, box_index, channel): + return self.op(image, boxes, box_index, channel) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_int8_bilinear(datatype=np.int8): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + batch_size = 2 + image_height = 32 + image_width = 18 + channels = 2 + crop_size = (5, 3) + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0, total_values).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("bilinear", 0.5) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[-111.0, -110.0], [-119.5, -118.5], [-128.0, -127.0]], + [[28.5, 29.5], [20.0, 21.0], [11.5, 12.5]], + [[-88.0, -87.0], [-96.5, -95.5], [-41.0, -40.0]], + [[51.5, 52.5], [43.0, 44.0], [34.5, 35.5]], + [[-65.0, -64.0], [-73.5, -72.5], [-82.0, -81.0]]], + [[[0.0, 1.0], [29.75, 30.75], [0.5, 0.5]], + [[-46.75, -45.75], [-17.0, -16.0], [0.5, 0.5]], + [[-93.5, -92.5], [-63.75, -62.75], [0.5, 0.5]], + [[3.75, 4.75], [-110.5, -109.5], [0.5, 0.5]], + [[69.0, 70.0], [98.75, 99.75], [0.5, 0.5]]]]).astype(np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_int16_nearest(datatype=np.int16): + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + batch_size = 2 + image_height = 32 + image_width = 18 + channels = 2 + crop_size = (5, 3) + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0, total_values).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("nearest", 0.5) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[1170.0, 1171.0], [1160.0, 1161.0], [1152.0, 1153.0]], + [[1314.0, 1315.0], [1304.0, 1305.0], [1296.0, 1297.0]], + [[1458.0, 1459.0], [1448.0, 1449.0], [1440.0, 1441.0]], + [[1602.0, 1603.0], [1592.0, 1593.0], [1584.0, 1585.0]], + [[1746.0, 1747.0], [1736.0, 1737.0], [1728.0, 1729.0]]], + [[[0.0, 1.0], [30.0, 31.0], [0.5, 0.5]], + [[216.0, 217.0], [246.0, 247.0], [0.5, 0.5]], + [[432.0, 433.0], [462.0, 463.0], [0.5, 0.5]], + [[612.0, 613.0], [642.0, 643.0], [0.5, 0.5]], + [[828.0, 829.0], [858.0, 859.0], [0.5, 0.5]]]]).astype(np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_int32_bilinear_v2(datatype=np.int32): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + batch_size = 2 + image_height = 32 + image_width = 18 + channels = 2 + crop_size = (5, 3) + offset = 8795 + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0 + offset, total_values + offset).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0, 0.5, 0.5, 0.0], [0, 0, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("bilinear_v2", 0.369) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[10008.199, 10009.199], [10008.2, 10009.2], [10008.199, 10009.2]], + [[10130.6, 10131.6], [10130.6, 10131.6], [10130.601, 10131.6]], + [[10253, 10253.999], [10253, 10254], [10253, 10254]], + [[10375.4, 10376.398], [10375.4, 10376.4], [10375.4, 10376.399]], + [[10497.799, 10498.799], [10497.801, 10498.8], [10497.8, 10498.8]]], + [[[8876.667, 8877.667], [8898, 8899], [8919.334, 8920.333]], + [[9056.667, 9057.667], [9078, 9079], [9099.333, 9100.333]], + [[9236.667, 9237.667], [9258, 9259], [9279.333, 9280.333]], + [[9416.667, 9417.667], [9438, 9439], [9459.333, 9460.333]], + [[9596.667, 9597.667], [9618, 9619], [9639.333, 9640.334]]]]).astype( + np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_float16_nearest(datatype=np.float16): + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + batch_size = 2 + image_height = 50 + image_width = 40 + channels = 3 + crop_size = (5, 3) + offset = 0 + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0 + offset, total_values + offset).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("nearest", 0.0) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[7380.0, 7380.0, 7384.0], [7352.0, 7352.0, 7352.0], + [7320.0, 7320.0, 7320.0]], + [[8224.0, 8224.0, 8224.0], [8192.0, 8192.0, 8192.0], + [8160.0, 8160.0, 8160.0]], + [[8944.0, 8944.0, 8944.0], [8912.0, 8912.0, 8912.0], + [8880.0, 8880.0, 8880.0]], + [[9664.0, 9664.0, 9664.0], [9632.0, 9632.0, 9632.0], + [9600.0, 9600.0, 9600.0]], + [[10496.0, 10504.0, 10504.0], [10472.0, 10472.0, 10472.0], + [10440.0, 10440.0, 10440.0]]], + [[[12.0, 13.0, 14.0], [108.0, 109.0, 110.0], [0.0, 0.0, 0.0]], + [[1092.0, 1093.0, 1094.0], [1188.0, 1189.0, 1190.0], [0.0, 0.0, 0.0]], + [[2172.0, 2172.0, 2174.0], [2268.0, 2268.0, 2270.0], [0.0, 0.0, 0.0]], + [[3372.0, 3372.0, 3374.0], [3468.0, 3468.0, 3470.0], [0.0, 0.0, 0.0]], + [[4452.0, 4452.0, 4456.0], [4548.0, 4548.0, 4552.0], + [0.0, 0.0, 0.0]]]]).astype(np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_float32_bilinear(datatype=np.float32): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + batch_size = 2 + image_height = 512 + image_width = 256 + channels = 3 + crop_size = (5, 3) + offset = 5000 + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0 + offset, total_values + offset).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("bilinear", 0.0) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[488861.53, 488862.53, 488863.53], + [488670.28, 488671.28, 488672.28], + [488479.03, 488480.03, 488481.03]], + [[539879.75, 539880.75, 539881.75], + [539688.5, 539689.5, 539690.5], + [539497.25, 539498.25, 539499.25]], + [[590898.0, 590899.0, 590900.0], [590706.75, 590707.75, 590708.75], + [590515.5, 590516.5, 590517.5]], + [[641916.25, 641917.25, 641918.25], [641725.0, 641726.0, 641727.0], + [641533.75, 641534.75, 641535.75]], + [[692934.5, 692935.5, 692936.5], [692743.25, 692744.25, 692745.25], + [692552.0, 692553.0, 692554.0]]], + [[[5076.5, 5077.5, 5078.5], [5707.625, 5708.625, 5709.625], [0.0, 0.0, 0.0]], + [[78660.5, 78661.5, 78662.5], [79291.625, 79292.625, 79293.625], [0.0, 0.0, 0.0]], + [[152244.5, 152245.5, 152246.5], [152875.625, 152876.625, 152877.625], + [0.0, 0.0, 0.0]], + [[225828.5, 225829.5, 225830.5], [226459.625, 226460.625, 226461.625], + [0.0, 0.0, 0.0]], + [[299412.5, 299413.5, 299414.5], [300043.625, 300044.625, 300045.625], + [0.0, 0.0, 0.0]]]]).astype(np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_float64_nearest(datatype=np.float64): + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + batch_size = 2 + image_height = 50 + image_width = 25 + channels = 3 + crop_size = (5, 3) + offset = 7549 + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0 + offset, total_values + offset).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("nearest", 0.0) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[12160.0, 12161.0, 12162.0], [12142.0, 12143.0, 12144.0], + [12124.0, 12125.0, 12126.0]], + [[12685.0, 12686.0, 12687.0], [12667.0, 12668.0, 12669.0], + [12649.0, 12650.0, 12651.0]], + [[13135.0, 13136.0, 13137.0], [13117.0, 13118.0, 13119.0], + [13099.0, 13100.0, 13101.0]], + [[13585.0, 13586.0, 13587.0], [13567.0, 13568.0, 13569.0], + [13549.0, 13550.0, 13551.0]], + [[14110.0, 14111.0, 14112.0], [14092.0, 14093.0, 14094.0], + [14074.0, 14075.0, 14076.0]]], + [[[7555.0, 7556.0, 7557.0], [7615.0, 7616.0, 7617.0], [0.0, 0.0, 0.0]], + [[8230.0, 8231.0, 8232.0], [8290.0, 8291.0, 8292.0], [0.0, 0.0, 0.0]], + [[8905.0, 8906.0, 8907.0], [8965.0, 8966.0, 8967.0], [0.0, 0.0, 0.0]], + [[9655.0, 9656.0, 9657.0], [9715.0, 9716.0, 9717.0], [0.0, 0.0, 0.0]], + [[10330.0, 10331.0, 10332.0], [10390.0, 10391.0, 10392.0], + [0.0, 0.0, 0.0]]]]).astype(np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_int64_bilinearv2(datatype=np.int64): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + batch_size = 2 + image_height = 50 + image_width = 25 + channels = 3 + crop_size = (5, 3) + offset = 7549 + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0 + offset, total_values + offset).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("bilinear_v2", 0.0) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[12324.999, 12326, 12327], [12325, 12326, 12327], + [12325, 12326, 12327.001]], + [[12730, 12730.999, 12732], [12730, 12731, 12732], + [12730, 12731, 12732]], + [[13134.999, 13136, 13136.998], [13135, 13136, 13137], + [13135, 13136, 13137]], + [[13540, 13540.999, 13541.999], [13540, 13541, 13542], + [13540, 13541, 13542]], + [[13944.999, 13945.999, 13946.999], [13945, 13946.001, 13947], + [13945, 13946, 13947]]], + [[[7822, 7823, 7824], [7864, 7865, 7866], [7906, 7907, 7908]], + [[8392, 8393, 8394], [8434, 8435, 8436], [8476, 8477, 8478]], + [[8962, 8963, 8964], [9004, 9005, 9006], [9046, 9047, 9048]], + [[9531.999, 9533.001, 9534], [9574, 9575, 9576], [9616, 9617, 9618.001]], + [[10102, 10103, 10104], [10144, 10145, 10146], + [10186, 10187, 10188]]]]).astype(np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_uint8_nearest(datatype=np.uint8): + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + batch_size = 2 + image_height = 7 + image_width = 5 + channels = 2 + crop_size = (5, 3) + offset = 0 + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0 + offset, total_values + offset).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("nearest", 0.0) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[84.0, 85.0], [82.0, 83.0], [80.0, 81.0]], + [[94.0, 95.0], [92.0, 93.0], [90.0, 91.0]], + [[104.0, 105.0], [102.0, 103.0], [100.0, 101.0]], + [[114.0, 115.0], [112.0, 113.0], [110.0, 111.0]], + [[124.0, 125.0], [122.0, 123.0], [120.0, 121.0]]], + [[[0.0, 1.0], [8.0, 9.0], [0.0, 0.0]], + [[10.0, 11.0], [18.0, 19.0], [0.0, 0.0]], + [[20.0, 21.0], [28.0, 29.0], [0.0, 0.0]], + [[30.0, 31.0], [38.0, 39.0], [0.0, 0.0]], + [[50.0, 51.0], [58.0, 59.0], [0.0, 0.0]]]]).astype(np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_crop_and_resize_uint16_bilinear(datatype=np.uint16): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + batch_size = 2 + image_height = 50 + image_width = 30 + channels = 3 + crop_size = (5, 3) + offset = 0 + total_values = batch_size * image_height * image_width * channels + input_data = np.arange(0 + offset, total_values + offset).reshape( + (batch_size, image_height, image_width, channels)) + input_boxes = np.array( + [[0.23, 0.5, 0.75, 0.0], [0, 0.1, 0.75, 1.75]]).astype(np.float32) + input_box_index = np.array([1, 0]).astype(np.int32) + input_data_tensor = Tensor(input_data.astype(datatype)) + input_boxes_tensor = Tensor(input_boxes) + input_box_index_tensor = Tensor(input_box_index) + net = NetCropAndResize("bilinear", 0.0) + output = net(input_data_tensor, input_boxes_tensor, + input_box_index_tensor, crop_size) + output_ms = output.asnumpy() + expected_output = np.array([[[[5557.7998046875, 5558.7998046875, 5559.7998046875], + [5536.0498046875, 5537.0498046875, 5538.0498046875], + [5514.2998046875, 5515.2998046875, 5516.2998046875]], + [[6131.10009765625, 6132.10009765625, 6133.10009765625], + [6109.35009765625, 6110.35009765625, 6111.35009765625], + [6087.60009765625, 6088.60009765625, 6089.60009765625]], + [[6704.39990234375, 6705.39990234375, 6706.39990234375], + [6682.64990234375, 6683.64990234375, 6684.64990234375], + [6660.89990234375, 6661.89990234375, 6662.89990234375]], + [[7277.7001953125, 7278.7001953125, 7279.7001953125], + [7255.9501953125, 7256.9501953125, 7257.9501953125], + [7234.2001953125, 7235.2001953125, 7236.2001953125]], + [[7851.0, 7852.0, 7853.0], [7829.25, 7830.25, 7831.25], + [7807.5, 7808.5, 7809.5]]], + [[[8.700000762939453, 9.700000762939453, 10.700000762939453], + [80.4749984741211, 81.4749984741211, 82.4749984741211], + [0.0, 0.0, 0.0]], + [[835.5750122070312, 836.5750122070312, 837.5750122070312], + [907.3499755859375, 908.3499755859375, 909.3499755859375], [0.0, 0.0, 0.0]], + [[1662.449951171875, 1663.449951171875, 1664.449951171875], + [1734.2249755859375, 1735.2249755859375, 1736.2249755859375], + [0.0, 0.0, 0.0]], + [[2489.324951171875, 2490.324951171875, 2491.324951171875], + [2561.10009765625, 2562.10009765625, 2563.10009765625], [0.0, 0.0, 0.0]], + [[3316.199951171875, 3317.199951171875, 3318.199951171875], + [3387.97509765625, 3388.97509765625, 3389.97509765625], + [0.0, 0.0, 0.0]]]]).astype(np.float32) + error = np.ones(shape=[2, *crop_size, channels]) * 1.0e-6 + diff = output_ms - expected_output + assert np.all(abs(diff) < error) diff --git a/tests/st/ops/cpu/test_nms_with_mask_op.py b/tests/st/ops/cpu/test_nms_with_mask_op.py new file mode 100644 index 00000000000..845d657c40b --- /dev/null +++ b/tests/st/ops/cpu/test_nms_with_mask_op.py @@ -0,0 +1,109 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore +from mindspore import Tensor +from mindspore.ops import operations as P + + +def runMSRun(op, bbox): + inputs = Tensor(bbox, mindspore.float32) + box, _, mask = op(inputs) + box = box.asnumpy() + mask = mask.asnumpy() + sel_idx = np.where(mask) + sel_rows = box[sel_idx][:, 0:4] + sel_score = box[sel_idx][:, -1] + return sel_rows, sel_score + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_nms_with_mask_check_order(): + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + nms_op = P.NMSWithMask(0.5) + for _ in range(10): + count = 4000 + box = np.random.randint(1, 100, size=(count, 4)) + box[:, 2] = box[:, 0] + box[:, 2] + box[:, 3] = box[:, 1] + box[:, 3] + unsorted_scores = np.random.rand(count, 1) + bbox = np.hstack((box, unsorted_scores)) + bbox = Tensor(bbox, dtype=mindspore.float32) + prop, _, _ = nms_op(bbox) + ms_sorted_scores = (prop.asnumpy()[:, -1]) # select just scores + np_sorted_scores = (np.sort(unsorted_scores, axis=0)[::-1][:, 0]) # sort manually + np.testing.assert_array_almost_equal( + ms_sorted_scores, np_sorted_scores) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_nms_with_mask_edge_case_1(): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + # CASE 1 - FULL OVERLAP BOXES - Every box is duplicated and has a different score + nms_op1 = P.NMSWithMask(0.3) + bbox1 = [[12, 4, 33, 17, 0.6], [20, 11, 38, 23, 0.1], [20, 10, 45, 26, 0.9], [15, 17, 35, 38, 0.5], + [10, 20, 30, 40, 0.4], [35, 35, 89, 90, 0.8], [12, 4, 33, 17, 0.3], [20, 11, 38, 23, 0.2], + [20, 10, 45, 26, 0.1], [15, 17, 35, 38, 0.8], [10, 20, 30, 40, 0.41], [35, 35, 89, 90, 0.82]] + expected_bbox = np.array([[20., 10., 45., 26.], + [35., 35., 89., 90.], + [15., 17., 35., 38.], + [12., 4., 33., 17.]]) + expected_score = np.array([0.9, 0.82, 0.8, 0.6]) + + sel_rows, sel_score = runMSRun(nms_op1, bbox1) + np.testing.assert_almost_equal(sel_rows, expected_bbox) + np.testing.assert_almost_equal(sel_score, expected_score) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_nms_with_mask_edge_case_2(): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + # CASE 2 - 0 value boxes - with valid scores + nms_op2 = P.NMSWithMask(0.5) + bbox2 = [[0, 0, 0, 0, 0.6], [0, 0, 0, 0, 0.1]] + expected_bbox = np.array([[0., 0., 0., 0.], + [0., 0., 0., 0.]]) + expected_score = np.array([0.6, 0.1]) + + sel_rows, sel_score = runMSRun(nms_op2, bbox2) + np.testing.assert_almost_equal(sel_rows, expected_bbox) + np.testing.assert_almost_equal(sel_score, expected_score) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_nms_with_mask_edge_case_3(): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + # CASE 3 - x2/x1 and y2/y1 sequence out of place + nms_op3 = P.NMSWithMask(0.7) + bbox3 = [[70, 70, 45, 75, 0.6], [30, 33, 43, 29, 0.1]] + expected_bbox = np.array([[70., 70., 45., 75.], + [30., 33., 43., 29.]]) + expected_score = np.array([0.6, 0.1]) + + sel_rows, sel_score = runMSRun(nms_op3, bbox3) + np.testing.assert_almost_equal(sel_rows, expected_bbox) + np.testing.assert_almost_equal(sel_score, expected_score) diff --git a/tests/st/ops/cpu/test_random_choice_with_mask_op.py b/tests/st/ops/cpu/test_random_choice_with_mask_op.py new file mode 100644 index 00000000000..47a4ac200a9 --- /dev/null +++ b/tests/st/ops/cpu/test_random_choice_with_mask_op.py @@ -0,0 +1,121 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class RCWM_count_in(nn.Cell): + def __init__(self): + super(RCWM_count_in, self).__init__() + self.RCWM_count_in = P.RandomChoiceWithMask(count=4, seed=1) + + def construct(self, x): + return self.RCWM_count_in(x) + + +class RCWM_count_out(nn.Cell): + def __init__(self): + super(RCWM_count_out, self).__init__() + self.RCWM_count_out = P.RandomChoiceWithMask(count=10, seed=1) + + def construct(self, x): + return self.RCWM_count_out(x) + + +class RCWM_3D(nn.Cell): + def __init__(self): + super(RCWM_3D, self).__init__() + self.RCWM_3D = P.RandomChoiceWithMask(count=10, seed=1) + + def construct(self, x): + return self.RCWM_3D(x) + + +class RCWM_1D(nn.Cell): + def __init__(self): + super(RCWM_1D, self).__init__() + self.RCWM_1D = P.RandomChoiceWithMask(count=10, seed=9) + + def construct(self, x): + return self.RCWM_1D(x) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_RCWM_3D(): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + input_tensor = Tensor(np.ones([3, 4, 5]).astype(np.bool)) + expect1 = (10, 3) + expect2 = (10,) + rcwm = RCWM_3D() + output1, output2 = rcwm(input_tensor) + assert output1.shape == expect1 + assert output2.shape == expect2 + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_RCWM_count_out(): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + input_tensor = Tensor(np.array([[1, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1], + [0, 0, 0, 1]]).astype(np.bool)) + expect1 = (10, 2) + expect2 = (10,) + rcwm = RCWM_count_out() + output1, output2 = rcwm(input_tensor) + assert output1.shape == expect1 + assert output2.shape == expect2 + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_RCWM_count_in(): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + input_tensor = Tensor(np.array([[1, 0, 1, 0], [0, 0, 0, 1], [1, 1, 1, 1], + [0, 0, 0, 1]]).astype(np.bool)) + expect1 = (4, 2) + expect2 = (4,) + rcwm = RCWM_count_in() + output1, output2 = rcwm(input_tensor) + assert output1.shape == expect1 + assert output2.shape == expect2 + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_RCWM_1D(): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + input_tensor = Tensor( + np.array([1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1]).astype(np.bool)) + expect_index = np.array([[0], [7], [9], [8], [8], [0], + [2], [7], [0], [0]]).astype(np.int32) + expect_mask = np.array( + [True, True, True, True, True, True, True, True, False, False]) + rcwm = RCWM_1D() + output1, output2 = rcwm(input_tensor) + print(output1.asnumpy()) + print(output2) + assert np.array_equal(output1.asnumpy(), expect_index) + assert np.array_equal(output2.asnumpy(), expect_mask) diff --git a/tests/st/ops/cpu/test_roi_align_grad_op.py b/tests/st/ops/cpu/test_roi_align_grad_op.py new file mode 100644 index 00000000000..cf1d5107274 --- /dev/null +++ b/tests/st/ops/cpu/test_roi_align_grad_op.py @@ -0,0 +1,75 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops.operations import _grad_ops as G + +context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + +class NetROIAlignGrad(nn.Cell): + def __init__(self, xdiff_shape, pooled_height, pooled_width, spatial_scale, sample_num): + super(NetROIAlignGrad, self).__init__() + self.roiAlignGrad = G.ROIAlignGrad( + xdiff_shape, + pooled_height, + pooled_width, + spatial_scale, + sample_num) + + def construct(self, dy, rois): + return self.roiAlignGrad(dy, rois) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_roi_align_grad(): + def roi_align_grad_case(data_type): + rois = Tensor(np.array([[0, -2.0, -2.0, 21.0, 21.0]], data_type)) + + dy = Tensor(np.array([[[ + [.1, .2, .3], + [.1, .2, .3], + [.1, .2, .3] + ]]], data_type)) + + xdiff_shape = (1, 1, 6, 6) + pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2 + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + roi_align_grad = NetROIAlignGrad( + xdiff_shape, + pooled_height, + pooled_width, + spatial_scale, + sample_num) + output = roi_align_grad(dy, rois) + #print(output) + expect = ([[[[0.025, 0.025, 0.05, 0.05, 0.075, 0.075], + [0.025, 0.025, 0.05, 0.05, 0.075, 0.075], + [0.025, 0.025, 0.05, 0.05, 0.075, 0.075], + [0.025, 0.025, 0.05, 0.05, 0.075, 0.075], + [0.025, 0.025, 0.05, 0.05, 0.075, 0.075], + [0.025, 0.025, 0.05, 0.05, 0.075, 0.075]]]]) + np.testing.assert_almost_equal(output.asnumpy(), expect, decimal=4) + + roi_align_grad_case(np.float32) + roi_align_grad_case(np.float16) diff --git a/tests/st/ops/cpu/test_roi_align_op.py b/tests/st/ops/cpu/test_roi_align_op.py new file mode 100644 index 00000000000..6882baf295e --- /dev/null +++ b/tests/st/ops/cpu/test_roi_align_op.py @@ -0,0 +1,75 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +from mindspore import Tensor +from mindspore.ops import operations as P + + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_roi_align(): + def roi_align_case(data_type): + context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU") + x = Tensor(np.array([[ + [[1, 2, 3, 4, 5, 6], + [7, 8, 9, 10, 11, 12], + [13, 14, 15, 16, 17, 18], + [19, 20, 21, 22, 23, 24], + [25, 26, 27, 28, 29, 30], + [31, 32, 33, 34, 35, 36]] + ]], data_type)) + + # test case 1 + rois = Tensor(np.array([[0, -2.0, -2.0, 21.0, 21.0]], data_type)) + pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2 + roi_align = P.ROIAlign(pooled_height, pooled_width, + spatial_scale, sample_num, 1) + output = roi_align(x, rois) + #print(output) + expect = [[[[4.5, 6.5, 8.5], + [16.5, 18.5, 20.5], + [28.5, 30.5, 32.5]]]] + assert (output.asnumpy() == expect).all() + + # test case 2 + rois = Tensor(np.array([[0, -2.0, -2.0, 22.0, 22.0]], data_type)) + pooled_height, pooled_width, spatial_scale, sample_num = 3, 3, 0.25, 2 + roi_align = P.ROIAlign(pooled_height, pooled_width, + spatial_scale, sample_num, 0) + output = roi_align(x, rois) + #print(output) + expect = [[[[4.5, 6.5, 8.5], + [16.5, 18.5, 20.5], + [28.5, 30.5, 32.5]]]] + assert (output.asnumpy() == expect).all() + + # test case 3 + pooled_height, pooled_width, spatial_scale, sample_num = 2, 2, 1.0, -1 + rois = Tensor(np.array([[0, -2.0, -2.0, 22.0, 22.0]], data_type)) + roi_align = P.ROIAlign(pooled_height, pooled_width, + spatial_scale, sample_num, 0) + output = roi_align(x, rois) + #print(output) + expect = [[[[6.295, 0.], + [0., 0.]]]] + np.testing.assert_almost_equal(output.asnumpy(), expect, decimal=2) + + roi_align_case(np.float32) + roi_align_case(np.float16) diff --git a/tests/st/ops/cpu/test_scatter_nd_op.py b/tests/st/ops/cpu/test_scatter_nd_op.py new file mode 100644 index 00000000000..70015a347a4 --- /dev/null +++ b/tests/st/ops/cpu/test_scatter_nd_op.py @@ -0,0 +1,142 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import numpy as np +import pytest +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P + + +class Net(nn.Cell): + def __init__(self, _shape): + super(Net, self).__init__() + self.shape = _shape + self.scatternd = P.ScatterNd() + + def construct(self, indices, update): + return self.scatternd(indices, update, self.shape) + + +def scatternd_net(indices, update, _shape, expect): + scatternd = Net(_shape) + output = scatternd(Tensor(indices), Tensor(update)) + error = np.ones(shape=output.asnumpy().shape) * 1.0e-6 + diff = output.asnumpy() - expect + assert np.all(diff < error) + assert np.all(-diff < error) + +def scatternd_positive(nptype): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int32) + arr_update = np.array([3.2, 1.1, 5.3, -2.2, -1.0]).astype(nptype) + shape = (2, 2) + expect = np.array([[0., 5.3], + [0., 1.1]]).astype(nptype) + scatternd_net(arr_indices, arr_update, shape, expect) + + arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int64) + arr_update = np.array([3.2, 1.1, 5.3, -2.2, -1.0]).astype(nptype) + shape = (2, 2) + expect = np.array([[0., 5.3], + [0., 1.1]]).astype(nptype) + scatternd_net(arr_indices, arr_update, shape, expect) + +def scatternd_negative(nptype): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + arr_indices = np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]]).astype(np.int32) + arr_update = np.array([-13.4, -3.1, 5.1, -12.1, -1.0]).astype(nptype) + shape = (2, 2) + expect = np.array([[0., 0.], + [-21.4, -3.1]]).astype(nptype) + scatternd_net(arr_indices, arr_update, shape, expect) + + arr_indices = np.array([[1, 0], [1, 1], [1, 0], [1, 0], [1, 0]]).astype(np.int64) + arr_update = np.array([-13.4, -3.1, 5.1, -12.1, -1.0]).astype(nptype) + shape = (2, 2) + expect = np.array([[0., 0.], + [-21.4, -3.1]]).astype(nptype) + scatternd_net(arr_indices, arr_update, shape, expect) + +def scatternd_positive_uint(nptype): + context.set_context(mode=context.GRAPH_MODE, device_target="CPU") + + arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int32) + arr_update = np.array([3.2, 1.1, 5.3, 3.8, 1.2]).astype(nptype) + shape = (2, 2) + expect = np.array([[0., 12.], + [0., 1.]]).astype(nptype) + scatternd_net(arr_indices, arr_update, shape, expect) + + arr_indices = np.array([[0, 1], [1, 1], [0, 1], [0, 1], [0, 1]]).astype(np.int64) + arr_update = np.array([3.2, 1.1, 5.3, 3.8, 1.2]).astype(nptype) + shape = (2, 2) + expect = np.array([[0., 12.], + [0., 1.]]).astype(nptype) + scatternd_net(arr_indices, arr_update, shape, expect) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_scatternd_float64(): + scatternd_positive(np.float64) + scatternd_negative(np.float64) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_scatternd_float32(): + scatternd_positive(np.float32) + scatternd_negative(np.float32) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_scatternd_int64(): + scatternd_positive(np.int64) + scatternd_negative(np.int64) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_scatternd_int16(): + scatternd_positive(np.int16) + scatternd_negative(np.int16) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_scatternd_uint64(): + scatternd_positive_uint(np.uint64) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_scatternd_uint32(): + scatternd_positive_uint(np.uint32) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_scatternd_uint16(): + scatternd_positive_uint(np.uint16) + +@pytest.mark.level0 +@pytest.mark.platform_x86_cpu +@pytest.mark.env_onecard +def test_scatternd_uint8(): + scatternd_positive_uint(np.uint8)