!41060 [assistant][ops]New operator implementation, include CombinedNonMaxSuppression
Merge pull request !41060 from Wangsong95/combinednonmaxsuppression
This commit is contained in:
commit
b1a32c6c89
|
@ -0,0 +1,458 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "plugin/device/cpu/kernel/combined_non_max_suppression_cpu_kernel.h"
|
||||
#include "plugin/device/cpu/hal/device/cpu_device_address.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
namespace {
|
||||
constexpr char kKernelName[] = "CombinedNonMaxSuppression";
|
||||
constexpr size_t kCombinedNonMaxSuppressionInputsNum = 6;
|
||||
constexpr size_t kCombinedNonMaxSuppressionOutputsNum = 4;
|
||||
constexpr size_t KIndex0 = 0;
|
||||
constexpr size_t KIndex1 = 1;
|
||||
constexpr size_t KIndex2 = 2;
|
||||
constexpr size_t KIndex3 = 3;
|
||||
constexpr size_t KIndex4 = 4;
|
||||
constexpr size_t KIndex5 = 5;
|
||||
constexpr size_t KIndex6 = 6;
|
||||
constexpr size_t KIndex7 = 7;
|
||||
constexpr size_t KIndex8 = 8;
|
||||
constexpr size_t KIndex9 = 9;
|
||||
constexpr size_t KIndex10 = 10;
|
||||
constexpr int64_t DimSize4 = 4;
|
||||
constexpr float k_5 = 0.5;
|
||||
constexpr int multiplier = 4;
|
||||
} // namespace
|
||||
|
||||
void CombinedNonMaxSuppressionCpuKernelMod::regular_input2buffer(std::vector<std::vector<float>> *boxes_buffer,
|
||||
float *box_src, const int class_idx) {
|
||||
/**
|
||||
* shape of box_src
|
||||
* box_src[num_boxes_*q_*4]
|
||||
* ways to visit box_src[i][class_idx][k] which stored by 1-dimension
|
||||
* box_src[i][class_idx][k]=box_src[i*q_*4+class_idx*4+k]
|
||||
*/
|
||||
int sub_box_len1 = q_ * multiplier;
|
||||
int box_len2 = (class_idx << KIndex2);
|
||||
for (size_t i = 0; i < IntToSize(num_boxes_); i++) {
|
||||
size_t box_len1 = IntToSize(i * sub_box_len1 + box_len2);
|
||||
if (box_src[box_len1] > box_src[box_len1 + KIndex2]) {
|
||||
(*boxes_buffer)[i][0] = box_src[box_len1 + KIndex2];
|
||||
(*boxes_buffer)[i][KIndex2] = box_src[box_len1 + 0];
|
||||
} else {
|
||||
(*boxes_buffer)[i][0] = box_src[box_len1 + 0];
|
||||
(*boxes_buffer)[i][KIndex2] = box_src[box_len1 + KIndex2];
|
||||
}
|
||||
if (box_src[box_len1 + KIndex1] > box_src[box_len1 + KIndex3]) {
|
||||
(*boxes_buffer)[i][KIndex1] = box_src[box_len1 + KIndex3];
|
||||
(*boxes_buffer)[i][KIndex3] = box_src[box_len1 + KIndex1];
|
||||
} else {
|
||||
(*boxes_buffer)[i][KIndex1] = box_src[box_len1 + KIndex1];
|
||||
(*boxes_buffer)[i][KIndex3] = box_src[box_len1 + KIndex3];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate the area ratio of the intersection of two squares
|
||||
float CombinedNonMaxSuppressionCpuKernelMod::IOU(std::vector<std::vector<float>> *boxes_buffer, int i, int j) {
|
||||
std::vector<float> box_a = (*boxes_buffer)[i];
|
||||
std::vector<float> box_b = (*boxes_buffer)[j];
|
||||
float lx, ly, rx, ry;
|
||||
float w, h;
|
||||
float area;
|
||||
float area_a = (box_a[KIndex2] - box_a[0]) * (box_a[KIndex3] - box_a[KIndex1]);
|
||||
float area_b = (box_b[KIndex2] - box_b[0]) * (box_b[KIndex3] - box_b[KIndex1]);
|
||||
if (area_a <= 0 || area_b <= 0) {
|
||||
return 0.0;
|
||||
}
|
||||
lx = box_a[0] > box_b[0] ? box_a[0] : box_b[0];
|
||||
ly = box_a[KIndex1] > box_b[KIndex1] ? box_a[KIndex1] : box_b[KIndex1];
|
||||
rx = box_a[KIndex2] < box_b[KIndex2] ? box_a[KIndex2] : box_b[KIndex2];
|
||||
ry = box_a[KIndex3] < box_b[KIndex3] ? box_a[KIndex3] : box_b[KIndex3];
|
||||
w = rx > lx ? (rx - lx) : 0;
|
||||
h = ry > ly ? (ry - ly) : 0;
|
||||
area = w * h;
|
||||
return area / (area_a + area_b - area);
|
||||
}
|
||||
|
||||
/**
|
||||
* if soft_nms_sigma_ > 0.0, soft_nms is used, means update by score=score*exp(scale*iou^2)
|
||||
* if soft_nms_sigma_ <= 0.0, nms is used, means delete it when iou > iou_threshold_
|
||||
* run non max suppression per bath per class
|
||||
*/
|
||||
void CombinedNonMaxSuppressionCpuKernelMod::non_max_suppression(std::vector<std::vector<float>> *boxes_buffer,
|
||||
std::vector<float> *scores_buffer,
|
||||
std::vector<int> &selected) {
|
||||
std::priority_queue<non_max_suppression_local::score_index> pq;
|
||||
for (size_t i = 0; i < IntToSize(num_boxes_); i++) {
|
||||
if ((*scores_buffer)[i] > score_threshold_) {
|
||||
pq.push(non_max_suppression_local::score_index(static_cast<int>(i), (*scores_buffer)[i], 0));
|
||||
}
|
||||
}
|
||||
|
||||
float scale = static_cast<float>(0.0);
|
||||
bool is_soft_nms = soft_nms_sigma_ > static_cast<float>(0.0);
|
||||
if (is_soft_nms) {
|
||||
scale = static_cast<float>(-k_5) / soft_nms_sigma_;
|
||||
}
|
||||
|
||||
float similarity;
|
||||
non_max_suppression_local::score_index next_si;
|
||||
while (static_cast<int>(selected.size()) < size_per_class_ && !pq.empty()) {
|
||||
next_si = pq.top();
|
||||
float original_score = next_si.score;
|
||||
pq.pop();
|
||||
bool should_hard_suppress = false;
|
||||
for (int j = selected.size() - 1; j >= next_si.suppress_begin_index; j--) {
|
||||
similarity = IOU(boxes_buffer, next_si.box_index, selected[IntToSize(j)]);
|
||||
if (is_soft_nms) {
|
||||
next_si.score *=
|
||||
similarity <= iou_threshold_ ? std::exp(scale * similarity * similarity) : static_cast<float>(0.0);
|
||||
}
|
||||
if (!is_soft_nms && similarity > iou_threshold_) {
|
||||
should_hard_suppress = true;
|
||||
break;
|
||||
}
|
||||
if (next_si.score <= score_threshold_) break;
|
||||
}
|
||||
|
||||
next_si.suppress_begin_index = static_cast<int>(selected.size());
|
||||
if (!should_hard_suppress) {
|
||||
if (next_si.score == original_score) {
|
||||
selected.push_back(next_si.box_index);
|
||||
continue;
|
||||
}
|
||||
if (next_si.score > score_threshold_) {
|
||||
pq.push(next_si);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CombinedNonMaxSuppressionCpuKernelMod::nms_perclass(
|
||||
float *boxes, float *scores, std::vector<non_max_suppression_local::result_para> &sub_result_vec, int &result_size) {
|
||||
size_t k = 0;
|
||||
int box_idx;
|
||||
size_t boxe_len1;
|
||||
int sub_box_len1 = q_ * multiplier;
|
||||
int box_len2 = 0;
|
||||
std::vector<std::vector<float>> boxes_buffer(num_boxes_, std::vector<float>(KIndex4));
|
||||
std::vector<float> scores_buffer(num_boxes_);
|
||||
/**
|
||||
* shape of score and boxes
|
||||
* score[num_boxes_*num_class_]
|
||||
* boxes[num_boxes_*q_*4]
|
||||
*/
|
||||
if (q_ == 1) {
|
||||
regular_input2buffer(&boxes_buffer, boxes, 0);
|
||||
}
|
||||
for (int j = 0; j < num_class_; j++) {
|
||||
for (int i = 0; i < num_boxes_; i++) {
|
||||
scores_buffer[IntToSize(i)] = scores[IntToSize(i * num_class_ + j)];
|
||||
}
|
||||
if (q_ > 1) {
|
||||
regular_input2buffer(&boxes_buffer, boxes, j);
|
||||
box_len2 = j * multiplier;
|
||||
}
|
||||
std::vector<int> selected;
|
||||
non_max_suppression(&boxes_buffer, &scores_buffer, selected);
|
||||
for (size_t i = 0; i < selected.size(); i++) {
|
||||
box_idx = selected[i];
|
||||
boxe_len1 = IntToSize(box_idx * sub_box_len1 + box_len2);
|
||||
sub_result_vec[k++] = {
|
||||
box_idx,
|
||||
scores_buffer[IntToSize(box_idx)],
|
||||
j,
|
||||
{boxes[boxe_len1 + 0], boxes[boxe_len1 + 1], boxes[boxe_len1 + KIndex2], boxes[boxe_len1 + KIndex3]}};
|
||||
}
|
||||
result_size += selected.size();
|
||||
}
|
||||
}
|
||||
|
||||
size_t CombinedNonMaxSuppressionCpuKernelMod::nms_perbath(float *boxes, float *scores, float *nmsed_boxes,
|
||||
float *nmsed_scores, float *nmsed_class,
|
||||
int *valid_detection) {
|
||||
int box_size = num_bath_ * num_detection_ * sizeof(float) * multiplier;
|
||||
int score_size = num_bath_ * num_detection_ * sizeof(float);
|
||||
void(memset_s(nmsed_boxes, box_size, 0.0, box_size));
|
||||
void(memset_s(nmsed_scores, score_size, 0.0, score_size));
|
||||
void(memset_s(nmsed_class, score_size, 0.0, score_size));
|
||||
void(memset_s(valid_detection, sizeof(int) * num_bath_, 0, sizeof(int) * num_bath_));
|
||||
const float box_min = 0.0;
|
||||
const float box_max = 1.0;
|
||||
/**
|
||||
* shape of scores and boxes:
|
||||
* scores[num_bath_*num_boxes_*num_class_]
|
||||
* boxes[num_bath_*num_boxes_*q_*4]
|
||||
*/
|
||||
int score_len2 = num_boxes_ * num_class_;
|
||||
int boxes_len2 = num_boxes_ * q_ * multiplier;
|
||||
auto shard_nms = [this, &boxes, &scores, score_len2, boxes_len2, &nmsed_boxes, &nmsed_scores, &nmsed_class,
|
||||
&valid_detection, box_max, box_min](size_t start, size_t end) {
|
||||
for (size_t i = start; i < end; i++) {
|
||||
int tmp_i = static_cast<int>(i);
|
||||
int per_detections = 0;
|
||||
size_t scores_KIndex = 0;
|
||||
int result_size = 0;
|
||||
std::vector<non_max_suppression_local::result_para> result_vec(size_per_class_ * num_class_,
|
||||
{0, 0.0, 0, {0.0, 0.0, 0.0, 0.0}});
|
||||
nms_perclass(boxes + tmp_i * boxes_len2, scores + tmp_i * score_len2, result_vec, result_size);
|
||||
if (!pad_per_class_) {
|
||||
per_detections = std::min(result_size, max_total_size_);
|
||||
} else {
|
||||
per_detections = std::min(result_size, num_detection_);
|
||||
}
|
||||
std::sort(result_vec.begin(), result_vec.begin() + result_size, non_max_suppression_local::result_cmp);
|
||||
scores_KIndex = IntToSize(tmp_i * num_detection_);
|
||||
for (size_t k = 0; k < IntToSize(per_detections); k++) {
|
||||
if (clip_boxes_) {
|
||||
nmsed_boxes[(scores_KIndex << KIndex2) + 0] =
|
||||
std::max(std::min(result_vec[k].box_coord[0], box_max), box_min);
|
||||
nmsed_boxes[(scores_KIndex << KIndex2) + KIndex1] =
|
||||
std::max(std::min(result_vec[k].box_coord[KIndex1], box_max), box_min);
|
||||
nmsed_boxes[(scores_KIndex << KIndex2) + KIndex2] =
|
||||
std::max(std::min(result_vec[k].box_coord[KIndex2], box_max), box_min);
|
||||
nmsed_boxes[(scores_KIndex << KIndex2) + KIndex3] =
|
||||
std::max(std::min(result_vec[k].box_coord[KIndex3], box_max), box_min);
|
||||
nmsed_scores[scores_KIndex] = result_vec[k].score;
|
||||
nmsed_class[scores_KIndex] = static_cast<float>(result_vec[k].class_idx);
|
||||
} else {
|
||||
nmsed_boxes[(scores_KIndex << KIndex2) + 0] = result_vec[k].box_coord[0];
|
||||
nmsed_boxes[(scores_KIndex << KIndex2) + KIndex1] = result_vec[k].box_coord[KIndex1];
|
||||
nmsed_boxes[(scores_KIndex << KIndex2) + KIndex2] = result_vec[k].box_coord[KIndex2];
|
||||
nmsed_boxes[(scores_KIndex << KIndex2) + KIndex3] = result_vec[k].box_coord[KIndex3];
|
||||
nmsed_scores[scores_KIndex] = result_vec[k].score;
|
||||
nmsed_class[scores_KIndex] = static_cast<float>(result_vec[k].class_idx);
|
||||
}
|
||||
scores_KIndex++;
|
||||
}
|
||||
valid_detection[i] = per_detections;
|
||||
}
|
||||
};
|
||||
ParallelLaunchAutoSearch(shard_nms, num_bath_, this, ¶llel_search_info_);
|
||||
return true;
|
||||
}
|
||||
|
||||
void CombinedNonMaxSuppressionCpuKernelMod::CheckInput() {
|
||||
constexpr int kInputDimension0 = 4;
|
||||
if (input0_shape_.size() != kInputDimension0) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the boxes's dims must be 4, but got " << input0_shape_.size()
|
||||
<< " .";
|
||||
}
|
||||
constexpr int kInputDimension1 = 3;
|
||||
if (input1_shape_.size() != kInputDimension1) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the scores's dims must be 3, but got " << input1_shape_.size()
|
||||
<< " .";
|
||||
}
|
||||
if (input2_shape_.size() != 0) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the max_output_size_per_class's dims must be 0, but got "
|
||||
<< input1_shape_.size() << " .";
|
||||
}
|
||||
if (input3_shape_.size() != 0) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the max_total_size's dims must be 0, but got "
|
||||
<< input1_shape_.size() << " .";
|
||||
}
|
||||
if (input4_shape_.size() != 0) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the iou_threshold's dims must be, 0 but got "
|
||||
<< input1_shape_.size() << " .";
|
||||
}
|
||||
if (input5_shape_.size() != 0) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the score_threshold's dims must be 0, but got "
|
||||
<< input1_shape_.size() << ".";
|
||||
}
|
||||
if (input0_shape_[0] != input1_shape_[0]) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the boxes's 1st dim need to be with the scores's 1st dim, but got "
|
||||
<< input0_shape_[0] << " and " << input1_shape_[0] << ".";
|
||||
}
|
||||
if (input0_shape_[KIndex1] != input1_shape_[KIndex1]) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the boxes's 2nd dim need to be same with the scores's 2nd dim,"
|
||||
<< " but got " << input0_shape_[KIndex1] << " and " << input1_shape_[KIndex1] << ".";
|
||||
}
|
||||
if (input0_shape_[KIndex2] != input1_shape_[KIndex2] && input0_shape_[KIndex2] != 1) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the boxes's 3rd dim need to be same with the scores's 3rd dim or 1"
|
||||
<< ", but got " << input0_shape_[KIndex2] << ".";
|
||||
}
|
||||
if (input0_shape_[KIndex3] != DimSize4) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the boxes's 4th dim need to be equal to 4, but got "
|
||||
<< input0_shape_[KIndex3] << ".";
|
||||
}
|
||||
}
|
||||
|
||||
void CombinedNonMaxSuppressionCpuKernelMod::CheckOutput() {
|
||||
constexpr size_t kOutputDimension0 = 3;
|
||||
constexpr size_t kOutputDimension1 = 2;
|
||||
constexpr size_t kOutputDimension2 = 2;
|
||||
constexpr size_t kOutputDimension3 = 1;
|
||||
if (output0_shape_.size() != kOutputDimension0) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the nmsed_boxes's dims must be 3, but got "
|
||||
<< output0_shape_.size() << ".";
|
||||
}
|
||||
if (output1_shape_.size() != kOutputDimension1) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the nmsed_scores's dims must be 2, but got "
|
||||
<< output1_shape_.size() << ".";
|
||||
}
|
||||
if (output2_shape_.size() != kOutputDimension2) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the nmsed_classes's dims must be 2, but got "
|
||||
<< output2_shape_.size() << ".";
|
||||
}
|
||||
if (output3_shape_.size() != kOutputDimension3) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the valid_detection's dims must be 1, but got "
|
||||
<< output3_shape_.size() << ".";
|
||||
}
|
||||
if ((output0_shape_[0] != output1_shape_[0] || output0_shape_[0] != output2_shape_[0]) ||
|
||||
output0_shape_[0] != output3_shape_[0]) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the nmsed_boxes's 1st dim, nmsed_scores's 1st dim,"
|
||||
<< " nmsed_classes's 1st dim, valid_detection's 1st dim, must be same with each other, but got"
|
||||
<< " four as follows: " << output0_shape_[0] << " and " << output1_shape_[0] << " and "
|
||||
<< output2_shape_[0] << " and " << output3_shape_[0] << ".";
|
||||
}
|
||||
if (output0_shape_[1] != output1_shape_[1] || output0_shape_[1] != output2_shape_[1]) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the nmsed_boxes's 2nd dim, nmsed_scores's 2nd dim, nmsed_classes's"
|
||||
<< " 2nd dim bust be same with each other, but got the three as follows: " << output0_shape_[1]
|
||||
<< " and " << output1_shape_[1] << " and " << output2_shape_[1] << ".";
|
||||
}
|
||||
if (static_cast<int>(output0_shape_[0]) != num_bath_) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the nmsed_boxes's 1st dim must be same with the boxes's 1st dim,"
|
||||
<< " but got " << output0_shape_[0] << ".";
|
||||
}
|
||||
if (static_cast<int>(output1_shape_[0]) != num_bath_) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the nmsed_scores's 1st dim must be same with the boxes's 1st dim,"
|
||||
<< " but got " << output1_shape_[0] << ".";
|
||||
}
|
||||
if (static_cast<int>(output2_shape_[0]) != num_bath_) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the nmsed_classes's 1st dim must be same with the boxes's 1st dim,"
|
||||
<< " but got " << output2_shape_[0] << ".";
|
||||
}
|
||||
if (static_cast<int>(output3_shape_[0]) != num_bath_) {
|
||||
MS_LOG(EXCEPTION) << "For " << kKernelName << ", the valid_detection's 1st dim must be same with the boxes's 1st"
|
||||
<< " dim, but got " << output3_shape_[0] << ".";
|
||||
}
|
||||
}
|
||||
|
||||
void CombinedNonMaxSuppressionCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
|
||||
size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
node_wpt_ = kernel_node;
|
||||
input0_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
input1_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, KIndex1);
|
||||
input2_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, KIndex2);
|
||||
input3_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, KIndex3);
|
||||
input4_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, KIndex4);
|
||||
input5_shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, KIndex5);
|
||||
soft_nms_sigma_ = 0.0;
|
||||
num_bath_ = static_cast<int>(input0_shape_[0]);
|
||||
num_boxes_ = static_cast<int>(input0_shape_[KIndex1]);
|
||||
q_ = static_cast<int>(input0_shape_[KIndex2]);
|
||||
num_class_ = static_cast<int>((input1_shape_[KIndex2]));
|
||||
kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
|
||||
pad_per_class_ = false;
|
||||
clip_boxes_ = true;
|
||||
auto prim = common::AnfAlgo::GetCNodePrimitive(kernel_node);
|
||||
auto pad_per_class = prim->GetAttr("pad_per_class");
|
||||
auto clip_boxes = prim->GetAttr("clip_boxes");
|
||||
if (pad_per_class != nullptr) {
|
||||
pad_per_class_ = GetValue<bool>(pad_per_class);
|
||||
}
|
||||
if (clip_boxes != nullptr) {
|
||||
clip_boxes_ = GetValue<bool>(clip_boxes);
|
||||
}
|
||||
CHECK_KERNEL_INPUTS_NUM(input_num, kCombinedNonMaxSuppressionInputsNum, kernel_name_);
|
||||
CHECK_KERNEL_OUTPUTS_NUM(output_num, kCombinedNonMaxSuppressionOutputsNum, kernel_name_);
|
||||
}
|
||||
|
||||
bool CombinedNonMaxSuppressionCpuKernelMod::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> &,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
float *boxes = reinterpret_cast<float *>(inputs[0]->addr);
|
||||
float *scores = reinterpret_cast<float *>(inputs[KIndex1]->addr);
|
||||
max_output_size_per_class_ = *(reinterpret_cast<int *>(inputs[KIndex2]->addr));
|
||||
max_total_size_ = *(reinterpret_cast<int *>(inputs[KIndex3]->addr));
|
||||
iou_threshold_ = *(reinterpret_cast<float *>(inputs[KIndex4]->addr));
|
||||
score_threshold_ = *(reinterpret_cast<float *>(inputs[KIndex5]->addr));
|
||||
float *nmsed_boxes = reinterpret_cast<float *>(outputs[KIndex0]->addr);
|
||||
float *nmsed_scores = reinterpret_cast<float *>(outputs[KIndex1]->addr);
|
||||
float *nmsed_class = reinterpret_cast<float *>(outputs[KIndex2]->addr);
|
||||
int *valid_detection = reinterpret_cast<int *>(outputs[KIndex3]->addr);
|
||||
if (pad_per_class_) {
|
||||
num_detection_ = std::min(max_total_size_, max_output_size_per_class_ * num_class_);
|
||||
} else {
|
||||
num_detection_ = max_total_size_;
|
||||
}
|
||||
auto node_ = node_wpt_.lock();
|
||||
if (!node_) {
|
||||
MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', node_wpt_(kernel_node) is expired. Error no: " << node_ << ".";
|
||||
}
|
||||
ShapeVector shape0 = {input0_shape_[0], static_cast<int64_t>(num_detection_), DimSize4};
|
||||
ShapeVector shape1 = {input0_shape_[0], static_cast<int64_t>(num_detection_)};
|
||||
ShapeVector shape2 = {input0_shape_[0], static_cast<int64_t>(num_detection_)};
|
||||
ShapeVector shape3 = {input0_shape_[0]};
|
||||
common::AnfAlgo::SetOutputInferTypeAndShape(
|
||||
{kNumberTypeFloat32, kNumberTypeFloat32, kNumberTypeFloat32, kNumberTypeInt32}, {shape0, shape1, shape2, shape3},
|
||||
node_.get());
|
||||
output0_shape_ = AnfAlgo::GetOutputDeviceShape(node_, KIndex0);
|
||||
output1_shape_ = AnfAlgo::GetOutputDeviceShape(node_, KIndex1);
|
||||
output2_shape_ = AnfAlgo::GetOutputDeviceShape(node_, KIndex2);
|
||||
output3_shape_ = AnfAlgo::GetOutputDeviceShape(node_, KIndex3);
|
||||
size_per_class_ = max_output_size_per_class_ < num_boxes_ ? max_output_size_per_class_ : num_boxes_;
|
||||
CheckInput();
|
||||
CheckOutput();
|
||||
if (max_total_size_ <= 0) {
|
||||
MS_LOG(EXCEPTION) << "For " << kernel_name_ << " max_total_size must be > 0, but got " << max_total_size_ << ".";
|
||||
}
|
||||
if (max_output_size_per_class_ <= 0) {
|
||||
MS_LOG(EXCEPTION) << "For " << kernel_name_ << " max_output_size_per_class must be > 0, but got "
|
||||
<< max_output_size_per_class_ << ".";
|
||||
}
|
||||
if (iou_threshold_ < 0 || iou_threshold_ > 1) {
|
||||
MS_LOG(EXCEPTION) << "For " << kernel_name_ << " iou_threshold must be in [0,1], but got " << iou_threshold_ << ".";
|
||||
}
|
||||
if (static_cast<int>(output0_shape_[KIndex1]) != num_detection_) {
|
||||
MS_LOG(EXCEPTION) << "For " << kernel_name_ << " The nmsed_boxes's 2nd dims must be same with " << num_detection_
|
||||
<< "but got " << output0_shape_[KIndex1] << ".";
|
||||
}
|
||||
if (static_cast<int>(output1_shape_[KIndex1]) != num_detection_) {
|
||||
MS_LOG(EXCEPTION) << "For " << kernel_name_ << " The nmsed_scores's 2nd dims must be same with " << num_detection_
|
||||
<< "but got " << output1_shape_[KIndex1] << ".";
|
||||
}
|
||||
if (static_cast<int>(output2_shape_[KIndex1]) != num_detection_) {
|
||||
MS_LOG(EXCEPTION) << "For " << kernel_name_ << " The nmsed_classes's 2nd dims must be same with " << num_detection_
|
||||
<< "but got " << output2_shape_[KIndex1] << ".";
|
||||
}
|
||||
nms_perbath(boxes, scores, nmsed_boxes, nmsed_scores, nmsed_class, valid_detection);
|
||||
return true;
|
||||
}
|
||||
std::vector<KernelAttr> CombinedNonMaxSuppressionCpuKernelMod::GetOpSupport() {
|
||||
static std::vector<KernelAttr> kernel_attr_list = {
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
};
|
||||
|
||||
return kernel_attr_list;
|
||||
}
|
||||
MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, CombinedNonMaxSuppression, CombinedNonMaxSuppressionCpuKernelMod);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,104 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_COMBINED_NON_MAX_SUPPRESSION_CPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_COMBINED_NON_MAX_SUPPRESSION_CPU_KERNEL_H_
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <queue>
|
||||
#include "plugin/device/cpu/kernel/cpu_kernel.h"
|
||||
#include "plugin/factory/ms_factory.h"
|
||||
|
||||
namespace non_max_suppression_local {
|
||||
struct score_index {
|
||||
int box_index;
|
||||
float score;
|
||||
int suppress_begin_index;
|
||||
score_index() {}
|
||||
score_index(int bi, float s, int sbi) : box_index(bi), score(s), suppress_begin_index(sbi) {}
|
||||
bool operator<(const score_index &b) const {
|
||||
return (score < b.score) || ((score == b.score) && (box_index > b.box_index));
|
||||
}
|
||||
};
|
||||
struct result_para {
|
||||
int box_index;
|
||||
float score;
|
||||
int class_idx;
|
||||
float box_coord[4];
|
||||
};
|
||||
|
||||
bool result_cmp(const result_para &a, const result_para &b) { return a.score > b.score; }
|
||||
} // namespace non_max_suppression_local
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
class CombinedNonMaxSuppressionCpuKernelMod : public DeprecatedNativeCpuKernelMod {
|
||||
public:
|
||||
CombinedNonMaxSuppressionCpuKernelMod() = default;
|
||||
~CombinedNonMaxSuppressionCpuKernelMod() override = default;
|
||||
void InitKernel(const CNodePtr &kernel_node) override;
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
protected:
|
||||
std::vector<KernelAttr> GetOpSupport() override;
|
||||
|
||||
private:
|
||||
size_t nms_perbath(float *, float *, float *, float *, float *, int *);
|
||||
void regular_input2buffer(std::vector<std::vector<float>> *, float *, const int);
|
||||
float IOU(std::vector<std::vector<float>> *, int, int);
|
||||
void non_max_suppression(std::vector<std::vector<float>> *, std::vector<float> *, std::vector<int> &);
|
||||
void nms_perclass(float *, float *, std::vector<non_max_suppression_local::result_para> &, int &);
|
||||
void CheckInput();
|
||||
void CheckOutput();
|
||||
int num_bath_ = 0;
|
||||
int num_boxes_ = 0;
|
||||
int q_ = 0;
|
||||
int num_class_ = 0;
|
||||
// per batch size;
|
||||
int num_detection_ = 0;
|
||||
int max_total_size_ = 0;
|
||||
// The length of each type of selection defined by the user
|
||||
int max_output_size_per_class_ = 0;
|
||||
// Calculation num_detection length
|
||||
int size_per_class_ = 0;
|
||||
// When lower than a score_threshold, delete the relevant box
|
||||
float score_threshold_ = 0.0;
|
||||
// When it is higher than the threshold value, according to the soft_nms_sigma determines deletion or decay
|
||||
float iou_threshold_ = 0.0;
|
||||
float soft_nms_sigma_ = 0.0;
|
||||
bool pad_per_class_ = 0;
|
||||
bool clip_boxes_ = 1;
|
||||
CNodeWeakPtr node_wpt_;
|
||||
std::vector<int64_t> input0_shape_;
|
||||
std::vector<int64_t> input1_shape_;
|
||||
std::vector<int64_t> input2_shape_;
|
||||
std::vector<int64_t> input3_shape_;
|
||||
std::vector<int64_t> input4_shape_;
|
||||
std::vector<int64_t> input5_shape_;
|
||||
std::vector<int64_t> output0_shape_;
|
||||
std::vector<int64_t> output1_shape_;
|
||||
std::vector<int64_t> output2_shape_;
|
||||
std::vector<int64_t> output3_shape_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_COMBINED_NON_MAX_SUPPRESSION_CPU_KERNEL_H_
|
|
@ -0,0 +1,178 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "ops/combined_non_max_suppression.h"
|
||||
#define IsValue(value_ptr) (!value_ptr->isa<AnyValue>() && !value_ptr->isa<None>())
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include "ops/op_utils.h"
|
||||
#include "utils/check_convert_utils.h"
|
||||
#include "abstract/ops/primitive_infer_map.h"
|
||||
#include "mindapi/src/helper.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ops {
|
||||
namespace {
|
||||
const int64_t kInputDimension0 = 4;
|
||||
const int64_t kInputDimension1 = 3;
|
||||
const int64_t kDimsize = 4;
|
||||
const int64_t kInputs = 6;
|
||||
const size_t ksecond = 2;
|
||||
tensor::TensorPtr Get_Value(const std::vector<AbstractBasePtr> &input_args, size_t index) {
|
||||
auto input = input_args[index]->cast<abstract::AbstractTensorPtr>();
|
||||
MS_EXCEPTION_IF_NULL(input);
|
||||
auto input_shape_value_ptr = input->BuildValue();
|
||||
MS_EXCEPTION_IF_NULL(input_shape_value_ptr);
|
||||
return input_shape_value_ptr->cast<tensor::TensorPtr>();
|
||||
}
|
||||
abstract::TupleShapePtr CombinedNonMaxSuppressionInferShape(const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
auto prim_name = primitive->name();
|
||||
auto input0_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kInputIndex0]->BuildShape())[kShape];
|
||||
auto input1_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kInputIndex1]->BuildShape())[kShape];
|
||||
auto input2_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kInputIndex2]->BuildShape())[kShape];
|
||||
auto input3_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kInputIndex3]->BuildShape())[kShape];
|
||||
auto input4_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kInputIndex4]->BuildShape())[kShape];
|
||||
auto input5_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[kInputIndex5]->BuildShape())[kShape];
|
||||
(void)CheckAndConvertUtils::CheckInteger("boxes dim", input0_shape.size(), kEqual, kInputDimension0, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckInteger("scores dim", input1_shape.size(), kEqual, kInputDimension1, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckInteger("max_output_size_per_class dim", input2_shape.size(), kEqual, 0, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckInteger("max_total_size dim", input3_shape.size(), kEqual, 0, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckInteger("iou_threshold", input4_shape.size(), kEqual, 0, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckInteger("score_threshold", input5_shape.size(), kEqual, 0, prim_name);
|
||||
if (input0_shape[0] != input1_shape[0]) {
|
||||
MS_EXCEPTION(ValueError) << "For " << prim_name << ", the boxes's 1st dim must be same with the scores's"
|
||||
<< " 1st dim, but got" << input0_shape[0] << " and " << input1_shape[0] << ".";
|
||||
}
|
||||
if (input0_shape[1] != input1_shape[1]) {
|
||||
MS_EXCEPTION(ValueError) << "For " << prim_name << ", the boxes's 2nd dim must be same with the scores's"
|
||||
<< " 2nd dim, but got" << input0_shape[1] << " and " << input1_shape[1] << ".";
|
||||
}
|
||||
if (input0_shape[kInputIndex2] != input1_shape[kInputIndex2] && input0_shape[kInputIndex2] != 1) {
|
||||
MS_EXCEPTION(ValueError) << "For " << prim_name
|
||||
<< ", the boxes's 3rd dim is must be same with the scores's 3rd dim or 1, but got "
|
||||
<< input0_shape[kInputIndex2] << ".";
|
||||
}
|
||||
if (input0_shape[kInputIndex3] != kDimsize) {
|
||||
MS_EXCEPTION(ValueError) << "For " << prim_name << ", the boxes's 4th dim must be equal to 4, but got"
|
||||
<< input0_shape[kInputIndex3] << ".";
|
||||
}
|
||||
for (int i = 0; i < kInputs; i++) {
|
||||
if (!input_args[i]->isa<abstract::AbstractTensor>()) {
|
||||
MS_EXCEPTION(TypeError) << "For " << prim_name << " input" << i << " only support tensor!";
|
||||
}
|
||||
}
|
||||
auto pad_per_class_ptr = primitive->GetAttr("pad_per_class");
|
||||
MS_EXCEPTION_IF_NULL(pad_per_class_ptr);
|
||||
bool pad_per_class = GetValue<bool>(pad_per_class_ptr);
|
||||
auto input2_tensor = Get_Value(input_args, kInputIndex2);
|
||||
auto input3_tensor = Get_Value(input_args, kInputIndex3);
|
||||
auto input4_tensor = Get_Value(input_args, kInputIndex4);
|
||||
auto input5_tensor = Get_Value(input_args, kInputIndex5);
|
||||
if (IsValue(input_args[kInputIndex2]->BuildValue()) && IsValue(input_args[kInputIndex3]->BuildValue())) {
|
||||
if (IsValue(input_args[kInputIndex4]->BuildValue()) && input_args[kInputIndex5]->BuildValue()) {
|
||||
auto iou_threshold = *(reinterpret_cast<float *>(input4_tensor->data_c()));
|
||||
auto score_threshold = *(reinterpret_cast<float *>(input5_tensor->data_c()));
|
||||
if (iou_threshold < 0 || iou_threshold > 1) {
|
||||
MS_EXCEPTION(ValueError) << "For " << prim_name << ", iou_threshold must be in [0,1], but got " << iou_threshold
|
||||
<< ".";
|
||||
}
|
||||
if (score_threshold < 0 && input0_shape[kInputIndex2] == input1_shape[kInputIndex2]) {
|
||||
MS_EXCEPTION(ValueError) << "For " << prim_name << ", it is temporarily unsupported when boxes's 2'nd dim "
|
||||
<< "is not 1 and score_threshold is less than 1.";
|
||||
}
|
||||
}
|
||||
auto max_output_size_per_class = *(reinterpret_cast<int32_t *>(input2_tensor->data_c()));
|
||||
auto max_total_size = *(reinterpret_cast<int32_t *>(input3_tensor->data_c()));
|
||||
if (max_total_size <= 0) {
|
||||
MS_EXCEPTION(ValueError) << "For " << prim_name << " max_total_size must be > 0, but got " << max_total_size
|
||||
<< ".";
|
||||
}
|
||||
if (max_output_size_per_class <= 0) {
|
||||
MS_EXCEPTION(ValueError) << "For " << prim_name << " max_output_size_per_class must be > 0, but got "
|
||||
<< max_output_size_per_class << ".";
|
||||
}
|
||||
auto num_detection = max_total_size;
|
||||
if (pad_per_class) {
|
||||
num_detection = std::min(max_total_size, max_output_size_per_class * static_cast<int32_t>(input1_shape[ksecond]));
|
||||
}
|
||||
int64_t bs = input0_shape[0];
|
||||
ShapeVector shape1 = {bs, num_detection, 4};
|
||||
ShapeVector shape2 = {bs, num_detection};
|
||||
ShapeVector shape3 = {bs, num_detection};
|
||||
ShapeVector shape4 = {bs};
|
||||
auto out1 = std::make_shared<abstract::Shape>(shape1);
|
||||
auto out2 = std::make_shared<abstract::Shape>(shape2);
|
||||
auto out3 = std::make_shared<abstract::Shape>(shape3);
|
||||
auto out4 = std::make_shared<abstract::Shape>(shape4);
|
||||
return std::make_shared<abstract::TupleShape>(std::vector<abstract::BaseShapePtr>{out1, out2, out3, out4});
|
||||
} else {
|
||||
ShapeVector nmsed_boxes_shape = {-2, -2, -2};
|
||||
ShapeVector max_nmsed_boxes_shape = {1, 1, 1};
|
||||
ShapeVector min_nmsed_boxes_shape = {1, 1, 1};
|
||||
auto shape1 = std::make_shared<abstract::Shape>(nmsed_boxes_shape, min_nmsed_boxes_shape, max_nmsed_boxes_shape);
|
||||
ShapeVector nmsed_scores_shape = {-2, -2};
|
||||
ShapeVector max_nmsed_scores_shape = {1, 1};
|
||||
ShapeVector min_nmsed_scores_shape = {1, 1};
|
||||
auto shape2 = std::make_shared<abstract::Shape>(nmsed_scores_shape, min_nmsed_scores_shape, max_nmsed_scores_shape);
|
||||
ShapeVector nmsed_class_shape = {-2, -2};
|
||||
ShapeVector max_nmsed_class_shape = {1, 1};
|
||||
ShapeVector min_nmsed_class_shape = {1, 1};
|
||||
auto shape3 = std::make_shared<abstract::Shape>(nmsed_class_shape, min_nmsed_class_shape, max_nmsed_class_shape);
|
||||
ShapeVector valid_detection = {-2};
|
||||
auto shape4 = std::make_shared<abstract::Shape>(valid_detection);
|
||||
return std::make_shared<abstract::TupleShape>(std::vector<abstract::BaseShapePtr>{shape1, shape2, shape3, shape4});
|
||||
}
|
||||
}
|
||||
|
||||
TuplePtr CombinedNonMaxSuppressionInferType(const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
auto prim_name = primitive->name();
|
||||
auto input0_type = input_args[kInputIndex0]->BuildType();
|
||||
auto input1_type = input_args[kInputIndex1]->BuildType();
|
||||
auto input2_type = input_args[kInputIndex2]->BuildType();
|
||||
auto input3_type = input_args[kInputIndex3]->BuildType();
|
||||
auto input4_type = input_args[kInputIndex4]->BuildType();
|
||||
auto input5_type = input_args[kInputIndex5]->BuildType();
|
||||
const std::set valid_type_float32 = {kFloat32};
|
||||
const std::set valid_type_int = {kInt32};
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("boxes", input0_type, valid_type_float32, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("scores", input1_type, valid_type_float32, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("max_output_size_per_class", input2_type, valid_type_int, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("max_total_size", input3_type, valid_type_int, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("iou_threshold", input4_type, valid_type_float32, prim_name);
|
||||
(void)CheckAndConvertUtils::CheckTensorTypeValid("score_threshold", input5_type, valid_type_float32, prim_name);
|
||||
return std::make_shared<Tuple>(
|
||||
std::vector<TypePtr>{std::make_shared<TensorType>(kFloat32), std::make_shared<TensorType>(kFloat32),
|
||||
std::make_shared<TensorType>(kFloat32), std::make_shared<TensorType>(kInt32)});
|
||||
}
|
||||
} // namespace
|
||||
MIND_API_OPERATOR_IMPL(CombinedNonMaxSuppression, BaseOperator);
|
||||
AbstractBasePtr CombinedNonMaxSuppressionInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const std::vector<AbstractBasePtr> &input_args) {
|
||||
auto prim_name = primitive->name();
|
||||
const int64_t kInputNum = 6;
|
||||
(void)CheckAndConvertUtils::CheckInputArgs(input_args, kGreaterEqual, kInputNum, prim_name);
|
||||
auto infer_shape = CombinedNonMaxSuppressionInferShape(primitive, input_args);
|
||||
auto infer_type = CombinedNonMaxSuppressionInferType(primitive, input_args);
|
||||
return abstract::MakeAbstract(infer_shape, infer_type);
|
||||
}
|
||||
|
||||
REGISTER_PRIMITIVE_EVAL_IMPL(CombinedNonMaxSuppression, prim::kPrimCombinedNonMaxSuppression,
|
||||
CombinedNonMaxSuppressionInfer, nullptr, true);
|
||||
REGISTER_HOST_DEPENDS(kNameCombinedNonMaxSuppression, {2, 3, 4, 5});
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CORE_OPS_COMBINED_NON_MAX_SUPPRESSION_H_
|
||||
#define MINDSPORE_CORE_OPS_COMBINED_NON_MAX_SUPPRESSION_H_
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "ops/base_operator.h"
|
||||
#include "mindapi/base/types.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace ops {
|
||||
constexpr auto kNameCombinedNonMaxSuppression = "CombinedNonMaxSuppression";
|
||||
/// \brief Greedily selects a subset of bounding boxes in descending order of score.
|
||||
/// Refer to Python API @ref mindspore.ops.CombineNonMaxSuppression for more details.
|
||||
class MIND_API CombinedNonMaxSuppression : public BaseOperator {
|
||||
public:
|
||||
MIND_API_BASE_MEMBER(CombinedNonMaxSuppression);
|
||||
/// \brief Constructor.
|
||||
CombinedNonMaxSuppression() : BaseOperator(kNameCombinedNonMaxSuppression) {
|
||||
InitIOName({"boxes", "scores", "max_output_size_per_class", "max_total_size", "iou_threshold", "score_threshold"},
|
||||
{"nmsed_box", "nmsed_scores", "nmsed_classes", "valid_detections"});
|
||||
}
|
||||
};
|
||||
abstract::AbstractBasePtr CombinedNonMaxSuppressionInfer(const abstract::AnalysisEnginePtr &,
|
||||
const PrimitivePtr &primitive,
|
||||
const std::vector<abstract::AbstractBasePtr> &input_args);
|
||||
|
||||
using kPrimCombinedNonMaxSuppressionPtr = std::shared_ptr<CombinedNonMaxSuppression>;
|
||||
} // namespace ops
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CORE_OPS_COMBINED_NON_MAX_SUPPRESSION_H_
|
|
@ -1244,6 +1244,7 @@ GVAR_DEF(PrimitivePtr, kPrimAdjustSaturation, std::make_shared<Primitive>(kAdjus
|
|||
GVAR_DEF(PrimitivePtr, kPrimCompareAndBitpack, std::make_shared<Primitive>(kCompareAndBitpack));
|
||||
GVAR_DEF(PrimitivePtr, kPrimScaleAndTranslate, std::make_shared<Primitive>("ScaleAndTranslate"));
|
||||
GVAR_DEF(PrimitivePtr, kPrimScaleAndTranslateGrad, std::make_shared<Primitive>("ScaleAndTranslateGrad"));
|
||||
GVAR_DEF(PrimitivePtr, kPrimCombinedNonMaxSuppression, std::make_shared<Primitive>("CombinedNonMaxSuppression"))
|
||||
|
||||
// Statements
|
||||
GVAR_DEF(PrimitivePtr, kPrimReturn, std::make_shared<Primitive>(kReturn));
|
||||
|
|
|
@ -168,3 +168,9 @@ from .reservoir_replay_buffer import _rrb_create_op_cpu
|
|||
from .reservoir_replay_buffer import _rrb_push_op_cpu
|
||||
from .reservoir_replay_buffer import _rrb_sample_op_cpu
|
||||
from .reservoir_replay_buffer import _rrb_destroy_op_cpu
|
||||
from .sparse_reshape import _sparse_reshape_aicpu
|
||||
from .unsorted_segment_sum import _unsorted_segment_sum_aicpu
|
||||
from .unsorted_segment_prod import _unsorted_segment_prod_aicpu
|
||||
from .hard_sigmoid import _hard_sigmoid_aicpu
|
||||
from .hard_sigmoid_grad import _hard_sigmoid_grad_aicpu
|
||||
from .sparse_reorder import _sparse_reorder_aicpu
|
||||
|
|
|
@ -0,0 +1,42 @@
|
|||
# Copyright 2022 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
"""CombinedNonMaxSuppression op"""
|
||||
from mindspore.ops.op_info_register import op_info_register, AiCPURegOp, DataType
|
||||
|
||||
combined_non_max_suppression_op_info = AiCPURegOp("CombinedNonMaxSuppression")\
|
||||
.fusion_type("OPAQUE")\
|
||||
.attr("pad_per_class", "bool")\
|
||||
.attr("clip_boxes", "bool")\
|
||||
.input(0, "boxes", "required")\
|
||||
.input(1, "scores", "required")\
|
||||
.input(2, "max_output_size_per_class", "required")\
|
||||
.input(3, "max_total_size", "required")\
|
||||
.input(4, "iou_threshold", "required")\
|
||||
.input(5, "score_threshold", "required")\
|
||||
.output(0, "nmsed_box", "required")\
|
||||
.output(1, "nmsed_scores", "required")\
|
||||
.output(2, "nmsed_classes", "required")\
|
||||
.output(3, "valid_detections", "required")\
|
||||
.dtype_format(DataType.F32_Default, DataType.F32_Default, DataType.I32_Default, DataType.I32_Default, \
|
||||
DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, DataType.F32_Default, \
|
||||
DataType.F32_Default, DataType.I32_Default)\
|
||||
.get_op_info()
|
||||
|
||||
|
||||
@op_info_register(combined_non_max_suppression_op_info)
|
||||
def _combined_non_max_suppression_aicpu():
|
||||
"""CombinedNonMaxSuppression AiCPU register"""
|
||||
return
|
|
@ -1045,3 +1045,88 @@ class ScaleAndTranslate(Primitive):
|
|||
validator.check_string(kernel_type, ["lanczos1", "lanczos3", "lanczos5", "gaussian", "box", "triangle",
|
||||
"keyscubic", "mitchellcubic"], "kernel_type", self.name)
|
||||
validator.check_value_type("antialias", antialias, [bool], self.name)
|
||||
|
||||
|
||||
class CombinedNonMaxSuppression(Primitive):
|
||||
r"""
|
||||
Greedily selects a subset of bounding boxes in descending order of score.
|
||||
|
||||
Args:
|
||||
clip_boxes (bool): If true, assume the box coordinates are between [0, 1] and clip the output boxes
|
||||
if they fall beyond [0, 1]. If false, do not do clipping and output the box coordinates as it is.
|
||||
Defaults to true.
|
||||
pad_per_class (bool): If false, the output nmsed boxes, scores and classes are padded/clipped to max_total_size.
|
||||
If true, the output nmsed boxes, scores and classes are padded to be of length
|
||||
max_size_per_class * num_classes, unless it exceeds max_total_size in which case it is clipped to
|
||||
max_total_size. Defaults to false.
|
||||
|
||||
Inputs:
|
||||
- **boxes** (Tensor) - A Tensor of type float32 and shape (batch_size, num_boxes, q, 4).
|
||||
If q is 1 then same boxes are used for all classes otherwise,
|
||||
if q is equal to number of classes, class-specific boxes are used.
|
||||
- **scores** (Tensor) - A Tensor of type float32 and shape (batch_size, num_boxes, num_classes)
|
||||
representing a single score corresponding to each box (each row of boxes).
|
||||
- **max_output_size_per_class** - A 0D Tensor of type int32, representing the max number of boxes to be
|
||||
selected by non max suppression per class.
|
||||
- **max_total_size** - A 0D Tensor of type int32, representing the maximum number of boxes retained over all
|
||||
classes.
|
||||
- **iou_threshold** - A 0-D float32 tensor representing the threshold for deciding whether
|
||||
boxes overlap too much with respect to IOU, and iou_threshold must be equal or greater
|
||||
than 0 and be equal or smaller than 1.
|
||||
- **score_threshold** - A 0-D float32 tensor representing the threshold for deciding when to remove
|
||||
boxes based on score.
|
||||
|
||||
Outputs:
|
||||
- **nmsed_boxes** - A Tensor of float32 with shape of (batch_size, num_detection, 4), which contains
|
||||
the non-max suppressed boxes.
|
||||
- **nmsed_scores** - A Tensor of float32 with shape of (batch_size, num_detection), which contains score
|
||||
of boxes.
|
||||
- **nmsed_classes** - A Tensor of float32 with shape of (batch_size, num_detection), which contains classes
|
||||
of boxes.
|
||||
- **valid_detections** A Tensor of int32 with shape of (batch_size,), which indicates the number of valid
|
||||
detections of each batch.
|
||||
|
||||
Raises:
|
||||
TypeError: If the dtype of `boxes` `scores` `iou_threshold` `score threshold` are not float32.
|
||||
TypeError: If the dtype of `max_output_size_per_class` and `max_total_size` are not int32.
|
||||
ValueError: If `boxes`is not 4D.
|
||||
ValueError: If `max_output_size_per_class`, `max_total_size`, `iou_threshold` and `score threshold` are not 0D.
|
||||
ValueError: If shape[0] of `boxes` is not same with shape[0] of `scores`.
|
||||
ValueError: If `scores` is not 3D.
|
||||
ValueError: If shape[1] of `boxes` is not same with shape[1] of the `scores`.
|
||||
ValueError: If shape[2] of `boxes` is not same with shape[2] of `scores` or 1
|
||||
ValueError: If `max_total_size` < 0.
|
||||
ValueError: If `max_output_size_per_class` < 0.
|
||||
ValueError: If `iou_threshold` not in [0,1].
|
||||
|
||||
Supported Platforms:
|
||||
``Ascend`` ``CPU``
|
||||
|
||||
Examples:
|
||||
>>> boxes = Tensor(np.array([[[[200, 100, 150, 100]],
|
||||
... [[220, 120, 150, 100]],
|
||||
... [[190, 110, 150, 100]],
|
||||
... [[210, 112, 150, 100]]]])).astype('float32')
|
||||
>>> scores = Tensor(np.array([[[0.2000, 0.7000, 0.1000], [0.1000, 0.8000, 0.1000], [0.3000, 0.6000, 0.1000],
|
||||
... [0.0500, 0.9000, 0.0500]]])).astype('float32')
|
||||
>>> max_output_size_per_class = Tensor(4, mstype.int32)
|
||||
>>> max_total_size = Tensor(1, mstype.int32)
|
||||
>>> iou_threshold = Tensor(0, mstype.float32)
|
||||
>>> score_threshold = Tensor(0, mstype.float32)
|
||||
>>> net = P.CombinedNonMaxSuppression()
|
||||
>>> out = net(boxes, scores, max_output_size_per_class, max_total_size, iou_threshold, score_threshold)
|
||||
>>> print(out)
|
||||
(Tensor(shape=[1, 1, 4], dtype=Float32, value= [[[1.00000000e+00, 1.00000000e+00, 1.00000000e+00,
|
||||
1.00000000e+00]]]),
|
||||
Tensor(shape=[1, 1], dtype=Float32, value= [[ 8.99999976e-01]]),
|
||||
Tensor(shape=[1, 1], dtype=Float32, value= [[ 1.00000000e+00]]),
|
||||
Tensor(shape=[1], dtype=Int32, value= [1]))
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, pad_per_class=False, clip_boxes=True):
|
||||
"""Initialize CombinedNonMaxSuppression"""
|
||||
self.pad_per_class = validator.check_value_type("pad_per_class", pad_per_class, [bool], self.name)
|
||||
self.add_prim_attr('pad_per_class', self.pad_per_class)
|
||||
self.clip_boxes = validator.check_value_type("clip_boxes", clip_boxes, [bool], self.name)
|
||||
self.add_prim_attr('clip_boxes', self.clip_boxes)
|
||||
|
|
|
@ -29,7 +29,7 @@ from mindspore.ops import operations as P
|
|||
from mindspore.ops.function.math_func import matrix_exp
|
||||
from mindspore.ops.function.math_func import sinc
|
||||
from mindspore.ops.operations.image_ops import CropAndResizeGradBoxes, AdjustHue, AdjustContrastv2, \
|
||||
AdjustSaturation
|
||||
AdjustSaturation, CombinedNonMaxSuppression
|
||||
from mindspore.ops.operations.image_ops import ExtractGlimpse
|
||||
from mindspore.ops.operations import _grad_ops as G
|
||||
from mindspore.ops.operations import _inner_ops as inner
|
||||
|
@ -4030,6 +4030,17 @@ test_case_image_ops = [
|
|||
Tensor([3, 4], mstype.int32), Tensor([5, 6], mstype.float32),
|
||||
Tensor([0.1, 0.8], mstype.float32)],
|
||||
'desc_bprop': [Tensor(np.random.rand(2, 2, 2, 4), mstype.float32)]}),
|
||||
('CombinedNonMaxSuppression', {
|
||||
'block': CombinedNonMaxSuppression(),
|
||||
'desc_inputs': [Tensor(np.array([[[[200, 100, 150, 100]], [[220, 120, 150, 100]], [[190, 110, 150, 100]],
|
||||
[[210, 112, 150, 100]]]]).astype(np.float32)),
|
||||
Tensor(np.array([[[0.2000, 0.7000, 0.1000], [0.1000, 0.8000, 0.1000],
|
||||
[0.3000, 0.6000, 0.1000], [0.0500, 0.9000, 0.0500]]]).astype(np.float32)),
|
||||
Tensor(4, mstype.int32),
|
||||
Tensor(1, mstype.int32),
|
||||
Tensor(0, mstype.float32),
|
||||
Tensor(0, mstype.float32)],
|
||||
'skip': ['backward']}),
|
||||
]
|
||||
|
||||
test_case_other_ops = [
|
||||
|
|
Loading…
Reference in New Issue