optimize cpu unique

This commit is contained in:
kswang 2020-10-29 21:02:39 +08:00
parent 7d250f2218
commit 618c05d454
7 changed files with 440 additions and 92 deletions

View File

@ -19,45 +19,67 @@
namespace mindspore {
namespace kernel {
const size_t kUseBucketUniqueSize = 100000;
const size_t kUniqueThreadNum = 23;
void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
n_ = input_shape[0];
input_size_ = input_shape[0];
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
}
void UniqueCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) {
CPUKernel::InitInputOutputSize(kernel_node);
workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
workspace_size_list_.emplace_back(input_size_ * sizeof(int64_t));
}
bool UniqueCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
LaunchKernel<float>(inputs, outputs);
LaunchKernel<int, int>(inputs, workspace, outputs);
} else if (dtype_ == kNumberTypeInt64) {
LaunchKernel<int64_t>(inputs, outputs);
LaunchKernel<int64_t, int>(inputs, workspace, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
LaunchKernel<float, int>(inputs, workspace, outputs);
}
return true;
}
template <typename T>
void UniqueCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
auto x_addr = reinterpret_cast<T *>(inputs[0]->addr);
auto y_addr = reinterpret_cast<T *>(outputs[0]->addr);
auto idx_addr = reinterpret_cast<int64_t *>(outputs[1]->addr);
std::unordered_map<T, int64_t> uniq;
int n = SizeToInt(n_);
uniq.reserve(n * 2);
for (int i = 0, j = 0; i < n; ++i) {
auto it = uniq.emplace(x_addr[i], j);
idx_addr[i] = it.first->second;
if (it.second) {
++j;
}
template <typename DataType, typename IndexType>
void UniqueCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) {
if (input_size_ == 0) {
return;
}
for (const auto &it : uniq) {
y_addr[it.second] = it.first;
if (inputs.size() < 1) {
MS_LOG(EXCEPTION) << "Input size should be large than 0";
}
if (workspace.size() < 3) {
MS_LOG(EXCEPTION) << "workspace size should be large than 2";
}
if (outputs.size() < 2) {
MS_LOG(EXCEPTION) << "Output size should be large than 1";
}
auto params = std::make_shared<UniqueParam<DataType, IndexType>>();
params->input_ = reinterpret_cast<DataType *>(inputs[0]->addr);
params->input_idx_ = reinterpret_cast<IndexType *>(workspace[0]->addr);
params->workspace_ = reinterpret_cast<DataType *>(workspace[1]->addr);
params->workspace_idx_ = reinterpret_cast<IndexType *>(workspace[2]->addr);
params->output_ = reinterpret_cast<DataType *>(outputs[0]->addr);
params->inverse_idx_ = reinterpret_cast<IndexType *>(outputs[1]->addr);
params->input_size_ = input_size_;
params->output_size_ = 0;
params->need_sort_ = true;
params->thread_num_ = kUniqueThreadNum;
if (input_size_ < kUseBucketUniqueSize) {
Unique(params);
} else {
BucketUnique(params);
}
output_size_ = params->output_size_;
}
void UniqueCPUKernel::CheckParam(const CNodePtr &kernel_node) {

View File

@ -16,31 +16,339 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_CPU_KERNEL_H_
#include <vector>
#include <algorithm>
#include <memory>
#include <thread>
#include <unordered_map>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
namespace mindspore {
namespace kernel {
template <typename DataType, typename IndexType>
struct UniqueParam {
DataType *input_{nullptr};
IndexType *input_idx_{nullptr};
DataType *output_{nullptr};
IndexType *inverse_idx_{nullptr};
DataType *workspace_{nullptr};
IndexType *workspace_idx_{nullptr};
IndexType input_size_{0};
IndexType output_size_{0};
size_t thread_num_{0};
bool need_sort_{true};
};
class UniqueCPUKernel : public CPUKernel {
public:
UniqueCPUKernel() = default;
~UniqueCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
void InitInputOutputSize(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
template <typename DataType, typename IndexType>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs);
private:
void CheckParam(const CNodePtr &kernel_node);
size_t n_{0};
protected:
virtual void CheckParam(const CNodePtr &kernel_node);
size_t input_size_{0};
TypeId dtype_{kTypeUnknown};
size_t output_size_{0};
template <typename DataType>
static size_t BucketId(DataType data, size_t bucket_num) {
return static_cast<size_t>(data) % bucket_num;
}
template <typename DataType, typename IndexType>
static void CalculateEachBucketSize(const std::shared_ptr<UniqueParam<DataType, IndexType>> &params,
std::vector<IndexType> *each_bucket_size) {
MS_EXCEPTION_IF_NULL(params);
MS_EXCEPTION_IF_NULL(params->input_);
MS_EXCEPTION_IF_NULL(each_bucket_size);
size_t bucket_num = each_bucket_size->size();
for (IndexType i = 0; i < params->input_size_; ++i) {
auto bucket_id = BucketId(params->input_[i], bucket_num);
each_bucket_size->at(bucket_id)++;
}
}
template <typename DataType, typename IndexType>
static void SplitAndCalculateBucketSize(
const std::shared_ptr<UniqueParam<DataType, IndexType>> &params,
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *segments_ptr,
std::vector<std::shared_ptr<std::vector<IndexType>>> *segment_bucket_sizes_ptr) {
MS_EXCEPTION_IF_NULL(params);
MS_EXCEPTION_IF_NULL(params->input_);
MS_EXCEPTION_IF_NULL(segments_ptr);
MS_EXCEPTION_IF_NULL(segment_bucket_sizes_ptr);
auto &segments = *segments_ptr;
auto &segment_bucket_sizes = *segment_bucket_sizes_ptr;
IndexType input_size = params->input_size_;
size_t thread_num = params->thread_num_;
if (thread_num < 1) {
MS_LOG(EXCEPTION) << "Thread num must > 0 !";
}
IndexType thread_data_size = input_size / thread_num;
size_t left_data_size = input_size % thread_num;
std::vector<std::thread> threads;
threads.reserve(thread_num);
segments.reserve(thread_num);
segment_bucket_sizes.reserve(thread_num);
IndexType current_offset = 0;
for (size_t i = 0; i < thread_num; ++i) {
segment_bucket_sizes.emplace_back(std::make_shared<std::vector<IndexType>>(thread_num, 0));
IndexType data_size = thread_data_size;
if (i < left_data_size) {
data_size += 1;
}
segments.emplace_back(std::make_shared<UniqueParam<DataType, IndexType>>());
segments[i]->input_ = params->input_ + current_offset;
segments[i]->input_size_ = data_size;
segments[i]->thread_num_ = thread_num;
threads.emplace_back(
std::thread(CalculateEachBucketSize<DataType, IndexType>, segments[i], segment_bucket_sizes[i].get()));
current_offset += data_size;
}
for (size_t i = 0; i < params->thread_num_; ++i) {
threads[i].join();
}
}
template <typename DataType, typename IndexType>
static void SegmentToBuckets(const std::shared_ptr<UniqueParam<DataType, IndexType>> &segment,
IndexType segment_offset,
const std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> &buckets) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(segment);
MS_EXCEPTION_IF_NULL(segment->input_);
std::vector<IndexType> bucket_data_num(segment->thread_num_, 0);
auto bucket_size = buckets.size();
for (IndexType i = 0; i < segment->input_size_; ++i) {
DataType data = segment->input_[i];
auto bucket_id = BucketId(data, segment->thread_num_);
auto bucket_index = bucket_data_num[bucket_id];
if (bucket_id >= bucket_size) {
MS_LOG(ERROR) << "Error bucket id!";
continue;
}
auto &bucket = buckets[bucket_id];
MS_EXCEPTION_IF_NULL(bucket);
if (bucket_index >= bucket->input_size_) {
MS_LOG(ERROR) << "Error bucket index!";
continue;
}
bucket->input_[bucket_index] = data;
bucket->workspace_idx_[bucket_index] = segment_offset + i;
bucket_data_num[bucket_id]++;
}
MS_LOG(DEBUG) << "End";
}
template <typename DataType, typename IndexType>
static void GatherSegmentsToBuckets(const std::shared_ptr<UniqueParam<DataType, IndexType>> &params,
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *segments_ptr,
std::vector<std::shared_ptr<std::vector<IndexType>>> *segment_bucket_sizes_ptr,
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> *buckets_ptr) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(params);
MS_EXCEPTION_IF_NULL(params->workspace_);
MS_EXCEPTION_IF_NULL(params->inverse_idx_);
MS_EXCEPTION_IF_NULL(params->workspace_idx_);
MS_EXCEPTION_IF_NULL(params->output_);
MS_EXCEPTION_IF_NULL(params->input_idx_);
MS_EXCEPTION_IF_NULL(segments_ptr);
MS_EXCEPTION_IF_NULL(segment_bucket_sizes_ptr);
MS_EXCEPTION_IF_NULL(buckets_ptr);
auto &segments = *segments_ptr;
auto &segment_bucket_sizes = *segment_bucket_sizes_ptr;
auto &buckets = *buckets_ptr;
auto thread_num = segments.size();
buckets.reserve(thread_num);
std::vector<IndexType> bucket_data_size(thread_num, 0);
for (size_t i = 0; i < thread_num; ++i) {
for (size_t j = 0; j < thread_num; ++j) {
bucket_data_size[j] += segment_bucket_sizes[i]->at(j);
}
}
IndexType current_offset = 0;
for (size_t i = 0; i < thread_num; ++i) {
auto bucket = std::make_shared<UniqueParam<DataType, IndexType>>();
bucket->input_ = params->output_ + current_offset;
bucket->input_idx_ = params->inverse_idx_ + current_offset;
bucket->workspace_idx_ = params->workspace_idx_ + current_offset;
bucket->output_ = params->workspace_ + current_offset;
bucket->inverse_idx_ = params->input_idx_ + current_offset;
bucket->input_size_ = bucket_data_size[i];
current_offset += bucket_data_size[i];
buckets.emplace_back(bucket);
}
std::vector<IndexType> tmp_bucket_data_size(thread_num, 0);
std::vector<std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>>> thread_buckets;
for (size_t i = 0; i < thread_num; ++i) {
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> local_buckets;
for (size_t j = 0; j < thread_num; ++j) {
auto bucket = std::make_shared<UniqueParam<DataType, IndexType>>();
bucket->input_ = buckets[j]->input_ + tmp_bucket_data_size[j];
bucket->input_size_ = buckets[j]->input_size_ - tmp_bucket_data_size[j];
bucket->workspace_idx_ = buckets[j]->workspace_idx_ + tmp_bucket_data_size[j];
local_buckets.emplace_back(bucket);
tmp_bucket_data_size[j] += segment_bucket_sizes[i]->at(j);
}
thread_buckets.emplace_back(local_buckets);
}
std::vector<std::thread> threads;
threads.reserve(thread_num);
current_offset = 0;
for (size_t i = 0; i < thread_num; ++i) {
MS_EXCEPTION_IF_NULL(segments[i]);
threads.emplace_back(
std::thread(SegmentToBuckets<DataType, IndexType>, segments[i], current_offset, thread_buckets[i]));
current_offset += segments[i]->input_size_;
}
for (size_t i = 0; i < thread_num; ++i) {
threads[i].join();
}
MS_LOG(DEBUG) << "End";
}
template <typename DataType, typename IndexType>
static void Unique(const std::shared_ptr<UniqueParam<DataType, IndexType>> &params) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(params);
DataType *input = params->input_;
IndexType *input_idx = params->input_idx_;
DataType *output = params->output_;
IndexType *inverse_idx = params->inverse_idx_;
MS_EXCEPTION_IF_NULL(input);
MS_EXCEPTION_IF_NULL(input_idx);
MS_EXCEPTION_IF_NULL(output);
MS_EXCEPTION_IF_NULL(inverse_idx);
IndexType j = 0;
if (params->need_sort_) {
for (IndexType i = 0; i < params->input_size_; ++i) {
input_idx[i] = i;
}
std::sort(input_idx, input_idx + params->input_size_,
[&](IndexType left, IndexType right) { return input[left] < input[right]; });
DataType last = input[0];
for (IndexType i = 0; i < params->input_size_; ++i) {
auto curr = input[input_idx[i]];
if (i == 0 || curr != last) {
if (i != 0) {
j++;
}
output[j] = curr;
inverse_idx[input_idx[i]] = j;
last = curr;
} else {
inverse_idx[input_idx[i]] = j;
}
}
params->output_size_ = j + 1;
} else {
std::unordered_map<DataType, IndexType> uniq;
uniq.reserve(params->input_size_);
for (IndexType i = 0; i < params->input_size_; ++i) {
auto it = uniq.emplace(input[i], j);
inverse_idx[i] = it.first->second;
if (it.second) {
++j;
}
}
for (const auto &it : uniq) {
output[it.second] = it.first;
}
params->output_size_ = j;
}
MS_LOG(DEBUG) << "End";
}
template <typename DataType, typename IndexType>
static void UniqueEachBucket(const std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> &buckets) {
MS_LOG(DEBUG) << "Start";
size_t thread_num = buckets.size();
std::vector<std::thread> threads;
threads.reserve(thread_num);
for (size_t i = 0; i < thread_num; ++i) {
threads.emplace_back(std::thread(Unique<DataType, IndexType>, buckets[i]));
}
for (size_t i = 0; i < thread_num; ++i) {
threads[i].join();
}
MS_LOG(DEBUG) << "End";
}
template <typename DataType, typename IndexType>
static void TransformBucketReverseIndices(const std::shared_ptr<UniqueParam<DataType, IndexType>> &bucket,
const std::shared_ptr<UniqueParam<DataType, IndexType>> &result,
IndexType offset) {
MS_EXCEPTION_IF_NULL(bucket);
MS_EXCEPTION_IF_NULL(bucket->inverse_idx_);
MS_EXCEPTION_IF_NULL(bucket->workspace_idx_);
MS_EXCEPTION_IF_NULL(result);
MS_EXCEPTION_IF_NULL(result->inverse_idx_);
for (IndexType i = 0; i < bucket->input_size_; ++i) {
auto origin_idx = bucket->workspace_idx_[i];
if (origin_idx >= 0 && origin_idx < result->input_size_) {
result->inverse_idx_[origin_idx] = bucket->inverse_idx_[i] + offset;
}
}
}
template <typename DataType, typename IndexType>
static void MergeBuckets(const std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> &buckets,
const std::shared_ptr<UniqueParam<DataType, IndexType>> &result) {
MS_LOG(DEBUG) << "Start";
MS_EXCEPTION_IF_NULL(result);
MS_EXCEPTION_IF_NULL(result->output_);
size_t thread_num = buckets.size();
std::vector<IndexType> bucket_offsets(thread_num);
IndexType current_size = 0;
for (size_t i = 0; i < thread_num; ++i) {
auto bucket = buckets[i];
MS_EXCEPTION_IF_NULL(bucket);
MS_EXCEPTION_IF_NULL(bucket->output_);
bucket_offsets[i] = current_size;
auto ret_code = memcpy_s(result->output_ + current_size, (result->input_size_ - current_size) * sizeof(DataType),
bucket->output_, bucket->output_size_ * sizeof(DataType));
if (ret_code != EOK) {
MS_LOG(EXCEPTION) << "Failed to copy data!";
}
current_size += bucket->output_size_;
}
result->output_size_ = current_size;
std::vector<std::thread> threads;
threads.reserve(thread_num);
for (size_t i = 0; i < thread_num; ++i) {
threads.emplace_back(
std::thread(TransformBucketReverseIndices<DataType, IndexType>, buckets[i], result, bucket_offsets[i]));
}
for (size_t i = 0; i < thread_num; ++i) {
threads[i].join();
}
MS_LOG(DEBUG) << "End";
}
template <typename DataType, typename IndexType>
static void BucketUnique(const std::shared_ptr<UniqueParam<DataType, IndexType>> &params) {
MS_EXCEPTION_IF_NULL(params);
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> segments;
std::vector<std::shared_ptr<UniqueParam<DataType, IndexType>>> buckets;
std::vector<std::shared_ptr<std::vector<IndexType>>> segment_bucket_sizes;
SplitAndCalculateBucketSize(params, &segments, &segment_bucket_sizes);
GatherSegmentsToBuckets(params, &segments, &segment_bucket_sizes, &buckets);
UniqueEachBucket(buckets);
MergeBuckets(buckets, params);
}
};
MS_REG_CPU_KERNEL(

View File

@ -19,49 +19,33 @@
namespace mindspore {
namespace kernel {
void UniqueWithPadCPUKernel::InitKernel(const CNodePtr &kernel_node) {
CheckParam(kernel_node);
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
n_ = SizeToLong(input_shape[0]);
dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0);
}
bool UniqueWithPadCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
const std::vector<kernel::AddressPtr> & /*workspace*/,
const std::vector<kernel::AddressPtr> &workspace,
const std::vector<kernel::AddressPtr> &outputs) {
UniqueCPUKernel::Launch(inputs, workspace, outputs);
if (dtype_ == kNumberTypeInt32) {
LaunchKernel<int>(inputs, outputs);
PadOutput<int>(inputs, outputs);
} else if (dtype_ == kNumberTypeInt64) {
LaunchKernel<int64_t>(inputs, outputs);
} else {
MS_LOG(EXCEPTION) << "Only unsupported int32 or int64 dtype";
PadOutput<int64_t>(inputs, outputs);
} else if (dtype_ == kNumberTypeFloat32) {
PadOutput<float>(inputs, outputs);
}
return true;
}
template <typename T>
void UniqueWithPadCPUKernel::LaunchKernel(const std::vector<AddressPtr> &inputs,
const std::vector<AddressPtr> &outputs) {
T *a = reinterpret_cast<T *>(inputs[0]->addr);
void UniqueWithPadCPUKernel::PadOutput(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs) {
if (inputs.size() < 2) {
MS_LOG(EXCEPTION) << "Input size should be large than 1";
}
if (outputs.size() < 1) {
MS_LOG(EXCEPTION) << "Output size should be large than 0";
}
T pad_num = *reinterpret_cast<T *>(inputs[1]->addr);
T *out = reinterpret_cast<T *>(outputs[0]->addr);
T *idx_vec = reinterpret_cast<T *>(outputs[1]->addr);
for (int64_t i = 0; i < n_; ++i) {
for (size_t i = output_size_; i < input_size_; ++i) {
out[i] = pad_num;
}
std::unordered_map<T, int> uniq;
uniq.reserve(n_);
for (int64_t i = 0, j = 0; i < n_; ++i) {
auto it = uniq.emplace(a[i], j);
idx_vec[i] = it.first->second;
if (it.second) {
++j;
}
}
for (const auto &it : uniq) {
out[it.second] = it.first;
}
}
void UniqueWithPadCPUKernel::CheckParam(const CNodePtr &kernel_node) {

View File

@ -16,31 +16,26 @@
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_WITH_PAD_CPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_UNIQUE_WITH_PAD_CPU_KERNEL_H_
#include <vector>
#include <memory>
#include <unordered_map>
#include <vector>
#include "backend/kernel_compiler/cpu/cpu_kernel.h"
#include "backend/kernel_compiler/cpu/cpu_kernel_factory.h"
#include "backend/kernel_compiler/cpu/unique_cpu_kernel.h"
namespace mindspore {
namespace kernel {
class UniqueWithPadCPUKernel : public CPUKernel {
class UniqueWithPadCPUKernel : public UniqueCPUKernel {
public:
UniqueWithPadCPUKernel() = default;
~UniqueWithPadCPUKernel() override = default;
void InitKernel(const CNodePtr &kernel_node) override;
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs) override;
template <typename T>
void LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
void PadOutput(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
private:
void CheckParam(const CNodePtr &kernel_node);
int64_t n_{0};
TypeId dtype_{kTypeUnknown};
protected:
void CheckParam(const CNodePtr &kernel_node) override;
};
MS_REG_CPU_KERNEL(UniqueWithPad,
@ -56,7 +51,15 @@ MS_REG_CPU_KERNEL(UniqueWithPad,
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64)
.AddOutputAttr(kNumberTypeInt64),
.AddOutputAttr(kNumberTypeInt32),
UniqueWithPadCPUKernel);
MS_REG_CPU_KERNEL(UniqueWithPad,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeInt32),
UniqueWithPadCPUKernel);
} // namespace kernel
} // namespace mindspore

View File

@ -33,7 +33,7 @@ class Net(nn.Cell):
return self.uniq(x)
def test_net():
def test_net_fp32():
x = Tensor(np.array([1, 2, 5, 2]), mstype.float32)
uniq = Net()
output = uniq(x)
@ -45,3 +45,31 @@ def test_net():
assert (output[0].asnumpy() == expect_y_result).all()
assert (output[1].asnumpy() == expect_idx_result).all()
def test_net_int32():
x = Tensor(np.array([1, 2, 5, 2]), mstype.int32)
uniq = Net()
output = uniq(x)
print("x:\n", x)
print("y:\n", output[0])
print("idx:\n", output[1])
expect_y_result = [1, 2, 5]
expect_idx_result = [0, 1, 2, 1]
assert (output[0].asnumpy() == expect_y_result).all()
assert (output[1].asnumpy() == expect_idx_result).all()
def test_net_int64():
x = Tensor(np.array([1, 2, 5, 2]), mstype.int64)
uniq = Net()
output = uniq(x)
print("x:\n", x)
print("y:\n", output[0])
print("idx:\n", output[1])
expect_y_result = [1, 2, 5]
expect_idx_result = [0, 1, 2, 1]
assert (output[0].asnumpy() == expect_y_result).all()
assert (output[1].asnumpy() == expect_idx_result).all()

View File

@ -29,7 +29,7 @@ class UniqueCpuKernelTest : public UT::Common {
UniqueCpuKernelTest() : unique_(std::make_shared<UniqueCPUKernel>()) {}
void SetUp() override {
unique_->n_ = 9;
unique_->input_size_ = 9;
unique_->dtype_ = kNumberTypeFloat32;
inputs_.clear();
workspace_.clear();
@ -42,16 +42,19 @@ class UniqueCpuKernelTest : public UT::Common {
return kernel_addr;
}
void CreateInputAddress() { inputs_.push_back(CreateKernelAddress(x_.data())); }
void CreateOutputAddress() {
void CreateAddress() {
inputs_.push_back(CreateKernelAddress(x_.data()));
outputs_.push_back(CreateKernelAddress(y_.data()));
outputs_.push_back(CreateKernelAddress(idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
}
std::vector<float> x_;
std::vector<float> y_;
std::vector<int64_t> idx_;
std::vector<int> idx_;
std::vector<int64_t> workspace_idx_;
std::vector<AddressPtr> inputs_;
std::vector<AddressPtr> workspace_;
std::vector<AddressPtr> outputs_;
@ -62,13 +65,13 @@ TEST_F(UniqueCpuKernelTest, compute_test) {
x_ = {1, 1, 2, 4, 4, 4, 7, 8, 8};
y_ = {1, 1, 1, 1, 1};
idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1};
CreateInputAddress();
CreateOutputAddress();
workspace_idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1};
CreateAddress();
unique_->Launch(inputs_, workspace_, outputs_);
// check compute result
std::vector<float> expect_y{1, 2, 4, 7, 8};
std::vector<int64_t> expect_idx{0, 0, 1, 2, 2, 2, 3, 4, 4};
std::vector<int> expect_idx{0, 0, 1, 2, 2, 2, 3, 4, 4};
EXPECT_TRUE(y_ == expect_y);
EXPECT_TRUE(idx_ == expect_idx);
}

View File

@ -29,7 +29,7 @@ class UniqueWithPadCpuKernelTest : public UT::Common {
UniqueWithPadCpuKernelTest() : unique_with_pad_(std::make_shared<UniqueWithPadCPUKernel>()) {}
void SetUp() override {
unique_with_pad_->n_ = 10;
unique_with_pad_->input_size_ = 10;
unique_with_pad_->dtype_ = kNumberTypeInt64;
inputs_.clear();
workspace_.clear();
@ -42,21 +42,21 @@ class UniqueWithPadCpuKernelTest : public UT::Common {
return kernel_addr;
}
void CreateInputAddress() {
void CreateAddress() {
inputs_.push_back(CreateKernelAddress(x_.data()));
inputs_.push_back(CreateKernelAddress(&pad_dim_));
;
}
void CreateOutputAddress() {
outputs_.push_back(CreateKernelAddress(out_.data()));
outputs_.push_back(CreateKernelAddress(idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
workspace_.push_back(CreateKernelAddress(workspace_idx_.data()));
}
std::vector<int64_t> x_;
int64_t pad_dim_;
std::vector<int64_t> out_;
std::vector<int64_t> idx_;
std::vector<int> idx_;
std::vector<int64_t> workspace_idx_;
std::vector<AddressPtr> inputs_;
std::vector<AddressPtr> workspace_;
std::vector<AddressPtr> outputs_;
@ -68,13 +68,13 @@ TEST_F(UniqueWithPadCpuKernelTest, compute_test) {
pad_dim_ = 8;
out_ = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
CreateInputAddress();
CreateOutputAddress();
workspace_idx_ = {1, 1, 1, 1, 1, 1, 1, 1, 1};
CreateAddress();
unique_with_pad_->Launch(inputs_, workspace_, outputs_);
// check compute result
std::vector<int64_t> expect_out{1, 5, 4, 3, 2, 8, 8, 8, 8, 8};
std::vector<int64_t> expect_idx{0, 0, 1, 1, 2, 2, 3, 3, 4, 4};
std::vector<int64_t> expect_out{1, 2, 3, 4, 5, 8, 8, 8, 8, 8};
std::vector<int> expect_idx{0, 0, 4, 4, 3, 3, 2, 2, 1, 1};
EXPECT_TRUE(out_ == expect_out);
EXPECT_TRUE(idx_ == expect_idx);
}