!31137 Test SkipFirstEpochSampler

Merge pull request !31137 from zetongzhao/test_skip_first_epoch_sampler
This commit is contained in:
i-robot 2022-03-15 18:26:28 +00:00 committed by Gitee
commit 272d25b291
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
8 changed files with 290 additions and 4 deletions

View File

@ -29,7 +29,7 @@ SkipFirstEpochSamplerObj::~SkipFirstEpochSamplerObj() = default;
Status SkipFirstEpochSamplerObj::to_json(nlohmann::json *const out_json) {
nlohmann::json args;
RETURN_IF_NOT_OK(SamplerObj::to_json(&args));
args["sampler_name"] = "SkipFirstEpochSamplerObj";
args["sampler_name"] = "SkipFirstEpochSampler";
args["start_index"] = start_index_;
*out_json = args;
return Status::OK();
@ -37,7 +37,7 @@ Status SkipFirstEpochSamplerObj::to_json(nlohmann::json *const out_json) {
#ifndef ENABLE_ANDROID
Status SkipFirstEpochSamplerObj::from_json(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler) {
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "start_index", "SkipFirstEpochSamplerObj"));
RETURN_IF_NOT_OK(ValidateParamInJson(json_obj, "start_index", "SkipFirstEpochSampler"));
int64_t start_index = json_obj["start_index"];
*sampler = std::make_shared<SkipFirstEpochSamplerObj>(start_index);
// Run common code in super class to add children samplers

View File

@ -218,6 +218,10 @@ Status Serdes::CreateDatasetOperationNode(const std::shared_ptr<DatasetNode> &ds
}
Status Serdes::ConstructSampler(nlohmann::json json_obj, std::shared_ptr<SamplerObj> *sampler) {
if (json_obj["sampler_name"] == "SkipFirstEpochSampler") {
RETURN_IF_NOT_OK(SkipFirstEpochSamplerObj::from_json(json_obj, sampler));
return Status::OK();
}
CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("num_samples") != json_obj.end(), "Failed to find num_samples");
CHECK_FAIL_RETURN_UNEXPECTED(json_obj.find("sampler_name") != json_obj.end(), "Failed to find sampler_name");
int64_t num_samples = json_obj["num_samples"];

View File

@ -65,6 +65,7 @@
#include "minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/skip_first_epoch_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h"

View File

@ -163,6 +163,7 @@ SET(DE_UT_SRCS
rgba_to_bgr_op_test.cc
rgba_to_rgb_op_test.cc
schema_test.cc
skip_first_epoch_sampler_test.cc
skip_pushdown_optimization_pass_test.cc
slice_op_test.cc
sliding_window_op_test.cc

View File

@ -257,7 +257,6 @@ TEST_F(MindDataTestDeserialize, TestDeserializeCSV) {
std::vector<std::string> columns = {"col1", "col4", "col2"};
std::vector<std::shared_ptr<CsvBase>> column_defaults = {};
std::shared_ptr<DatasetCache> cache = nullptr;
std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
std::shared_ptr<DatasetNode> ds = std::make_shared<CSVNode>(dataset_files, field_delim, column_defaults, column_names,
3, ShuffleMode::kGlobal, 1, 0, cache);
ds = std::make_shared<ProjectNode>(ds, columns);
@ -301,7 +300,10 @@ TEST_F(MindDataTestDeserialize, TestDeserializeImageFolder) {
TEST_F(MindDataTestDeserialize, TestDeserializeManifest) {
MS_LOG(INFO) << "Doing MindDataTestDeserialize-Manifest.";
std::string data_file = "./data/dataset/testManifestData/cpp.json";
// Add SkipFirstEpochSampler to pipeline - for recovery test coverage, since users cannot add this sampler explicitly
std::shared_ptr<SamplerObj> sampler = std::make_shared<SequentialSamplerObj>(0, 10);
std::shared_ptr<SamplerObj> child_sampler = std::make_shared<SkipFirstEpochSamplerObj>(0);
sampler->AddChildSampler(child_sampler);
std::map<std::string, int32_t> class_indexing = {};
std::shared_ptr<DatasetCache> cache = nullptr;
std::shared_ptr<DatasetNode> ds =

View File

@ -14,17 +14,19 @@
* limitations under the License.
*/
#include "common/common.h"
#include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/distributed_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/pk_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/prebuilt_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/random_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/samplers_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/sequential_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/skip_first_epoch_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_random_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/subset_sampler_ir.h"
#include "minddata/dataset/engine/ir/datasetops/source/samplers/weighted_random_sampler_ir.h"
#include "minddata/dataset/core/tensor.h"
using namespace mindspore::dataset;
using mindspore::dataset::Tensor;
@ -68,6 +70,11 @@ TEST_F(MindDataTestIrSampler, TestCalculateNumSamples) {
sampl->SamplerBuild(&sampler_rt);
EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), 11);
sampl = std::make_shared<SkipFirstEpochSamplerObj>(0);
EXPECT_NE(sampl, nullptr);
sampl->SamplerBuild(&sampler_rt);
EXPECT_EQ(sampler_rt->CalculateNumSamples(num_rows), -1);
// Testing chains
// Parent and child have num_samples
std::shared_ptr<SamplerObj> sampl1 = std::make_shared<WeightedRandomSamplerObj>(weights, 12);
@ -107,6 +114,13 @@ TEST_F(MindDataTestIrSampler, TestCalculateNumSamples) {
sampl6->SamplerBuild(&sampler_rt6);
sampler_rt6->AddChild(sampler_rt5);
EXPECT_EQ(sampler_rt6->CalculateNumSamples(num_rows), -1);
std::shared_ptr<SamplerObj> sampl7 = std::make_shared<SkipFirstEpochSamplerObj>(0);
EXPECT_NE(sampl7, nullptr);
std::shared_ptr<SamplerRT> sampler_rt7;
sampl7->SamplerBuild(&sampler_rt7);
sampler_rt7->AddChild(sampler_rt5);
EXPECT_EQ(sampler_rt7->CalculateNumSamples(num_rows), -1);
}
TEST_F(MindDataTestIrSampler, TestSamplersMoveParameters) {
@ -122,3 +136,94 @@ TEST_F(MindDataTestIrSampler, TestSamplersMoveParameters) {
sampl2->SamplerBuild(&sampler_rt2);
EXPECT_NE(sampler_rt, nullptr);
}
/// Feature: MindData IR Sampler Support
/// Description: Test MindData IR Sampler by Compile more than one epoch
/// Expectation: Results are successfully outputted, first epoch has fewer rows.
TEST_F(MindDataTestIrSampler, TestSkipFirstEpochSampler) {
MS_LOG(INFO) << "Doing MindDataTestIrSampler-TestSkipFirstEpochSampler.";
std::string dataset_dir = "./data/dataset/testPK/data";
std::set<std::string> extensions = {};
std::shared_ptr<DatasetCache> cache = nullptr;
std::map<std::string, int32_t> class_indexing = {};
std::shared_ptr<SamplerObj> sampler = std::make_shared<SkipFirstEpochSamplerObj>(1);
std::shared_ptr<DatasetNode> ds =
std::make_shared<ImageFolderNode>(dataset_dir, false, sampler, false, extensions, class_indexing, cache);
auto ir_tree = std::make_shared<TreeAdapter>();
// Compile with more than one epoch
int32_t num_epoch = 3;
EXPECT_OK(ir_tree->Compile(ds, num_epoch, 0));
for (int i = 0; i < num_epoch; i++) {
TensorRow row;
ir_tree->GetNext(&row);
int count = 0;
while (row.size() != 0) {
ir_tree->GetNext(&row);
count++;
}
if (i == 0) {
EXPECT_EQ(count, 43);
} else {
EXPECT_EQ(count, 44);
}
}
}
/// Feature: MindData IR Sampler Support
/// Description: Compare SequentialSampler and SkipFirstEpochSampler with More Than One Epoch
/// Expectation: SequentialSampler and SkipFirstEpochSampler have similar output
TEST_F(MindDataTestIrSampler, CompareSequentialSamplerAndSkipFirstEpochSampler) {
MS_LOG(INFO) << "Doing MindDataTestIrSampler-CompareSequentialSamplerAndSkipFirstEpochSampler.";
std::string dataset_dir = "./data/dataset/testPK/data";
std::set<std::string> extensions = {};
std::shared_ptr<DatasetCache> cache = nullptr;
std::map<std::string, int32_t> class_indexing = {};
int32_t skip_num = 2;
std::shared_ptr<SamplerObj> sampler1 = std::make_shared<SequentialSamplerObj>(skip_num, 0);
std::shared_ptr<SamplerObj> sampler2 = std::make_shared<SkipFirstEpochSamplerObj>(skip_num);
std::shared_ptr<DatasetNode> ds1 =
std::make_shared<ImageFolderNode>(dataset_dir, false, sampler1, false, extensions, class_indexing, cache);
std::shared_ptr<DatasetNode> ds2 =
std::make_shared<ImageFolderNode>(dataset_dir, false, sampler2, false, extensions, class_indexing, cache);
auto ir_tree1 = std::make_shared<TreeAdapter>();
auto ir_tree2 = std::make_shared<TreeAdapter>();
// Compile with more than one epoch
int32_t num_epoch = 3;
EXPECT_OK(ir_tree1->Compile(ds1, num_epoch, 0));
EXPECT_OK(ir_tree2->Compile(ds2, num_epoch, 0));
for (int i = 0; i < num_epoch; i++) {
TensorRow row1;
TensorRow row2;
// only the first epoch has the same output
if (i != 0) {
// SkipFirstEpochSampler doesn't skip after the first epoch
for (int j = 0; j < skip_num; j++) {
EXPECT_OK(ir_tree2->GetNext(&row2));
}
}
EXPECT_OK(ir_tree1->GetNext(&row1));
EXPECT_OK(ir_tree2->GetNext(&row2));
EXPECT_EQ(row1.size(), row2.size());
while (row1.size() != 0 && row2.size() != 0) {
std::vector<std::shared_ptr<Tensor>> r1 = row1.getRow();
std::vector<std::shared_ptr<Tensor>> r2 = row2.getRow();
ASSERT_EQ(r1.size(), r2.size());
for (int i = 0; i < r1.size(); i++) {
nlohmann::json out_json1;
EXPECT_OK(r1[i]->to_json(&out_json1));
std::stringstream json_ss1;
json_ss1 << out_json1;
nlohmann::json out_json2;
EXPECT_OK(r2[i]->to_json(&out_json2));
std::stringstream json_ss2;
json_ss2 << out_json2;
EXPECT_EQ(json_ss1.str(), json_ss2.str());
}
EXPECT_OK(ir_tree1->GetNext(&row1));
EXPECT_OK(ir_tree2->GetNext(&row2));
}
}
}

View File

@ -0,0 +1,129 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "common/common.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/skip_first_epoch_sampler.h"
#include "utils/log_adapter.h"
using namespace mindspore::dataset;
class MindDataTestSkipFirstEpochSampler : public UT::Common {
public:
class DummyRandomAccessOp : public RandomAccessOp {
public:
explicit DummyRandomAccessOp(uint64_t num_rows) {
// row count is in base class as protected member
// GetNumRowsInDataset does not need an override, the default from base class is fine.
num_rows_ = num_rows;
}
};
};
/// Feature: MindData SkipFirstEpochSampler Support
/// Description: Test MindData SkipFirstEpochSampler Reset with Replacement
/// Expectation: Results are successfully outputted.
TEST_F(MindDataTestSkipFirstEpochSampler, TestResetReplacement) {
MS_LOG(INFO) << "Doing MindDataTestSkipFirstEpochSampler-TestResetReplacement.";
uint64_t total_samples = 1000000;
// create sampler with replacement = true
SkipFirstEpochSamplerRT m_sampler(0, 0);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
TensorRow row;
std::vector<uint64_t> out;
ASSERT_EQ(m_sampler.GetNextSample(&row), Status::OK());
for (const auto &t : row) {
for (auto it = t->begin<uint64_t>(); it != t->end<uint64_t>(); it++) {
out.push_back(*it);
}
}
ASSERT_EQ(m_sampler.GetNextSample(&row), Status::OK());
ASSERT_EQ(row.eoe(), true);
m_sampler.ResetSampler();
out.clear();
ASSERT_EQ(m_sampler.GetNextSample(&row), Status::OK());
for (const auto &t : row) {
for (auto it = t->begin<uint64_t>(); it != t->end<uint64_t>(); it++) {
out.push_back(*it);
}
}
ASSERT_EQ(m_sampler.GetNextSample(&row), Status::OK());
ASSERT_EQ(row.eoe(), true);
}
/// Feature: MindData SkipFirstEpochSampler Support
/// Description: Test MindData SkipFirstEpochSampler Reset without Replacement
/// Expectation: Results are successfully outputted.
TEST_F(MindDataTestSkipFirstEpochSampler, TestResetNoReplacement) {
MS_LOG(INFO) << "Doing MindDataTestSkipFirstEpochSampler-TestResetNoReplacement.";
// num samples to draw.
uint64_t num_samples = 1000000;
uint64_t total_samples = 1000000;
std::vector<uint64_t> freq(total_samples, 0);
// create sampler without replacement
SkipFirstEpochSamplerRT m_sampler(0, 0);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
TensorRow row;
std::vector<uint64_t> out;
ASSERT_EQ(m_sampler.GetNextSample(&row), Status::OK());
for (const auto &t : row) {
for (auto it = t->begin<uint64_t>(); it != t->end<uint64_t>(); it++) {
out.push_back(*it);
freq[*it]++;
}
}
ASSERT_EQ(num_samples, out.size());
ASSERT_EQ(m_sampler.GetNextSample(&row), Status::OK());
ASSERT_EQ(row.eoe(), true);
m_sampler.ResetSampler();
out.clear();
freq.clear();
freq.resize(total_samples, 0);
MS_LOG(INFO) << "Resetting sampler";
ASSERT_EQ(m_sampler.GetNextSample(&row), Status::OK());
for (const auto &t : row) {
for (auto it = t->begin<uint64_t>(); it != t->end<uint64_t>(); it++) {
out.push_back(*it);
freq[*it]++;
}
}
ASSERT_EQ(num_samples, out.size());
// Without replacement, each sample only drawn once.
for (int i = 0; i < total_samples; i++) {
if (freq[i]) {
ASSERT_EQ(freq[i], 1);
}
}
ASSERT_EQ(m_sampler.GetNextSample(&row), Status::OK());
ASSERT_EQ(row.eoe(), true);
}

View File

@ -21,6 +21,7 @@
#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/engine/datasetops/source/sampler/skip_first_epoch_sampler.h"
#include "minddata/dataset/util/status.h"
#include "gtest/gtest.h"
#include "utils/log_adapter.h"
@ -101,3 +102,46 @@ TEST_F(MindDataTestStandAloneSampler, TestStandAoneSequentialSampler) {
tensor = sample_row[0];
EXPECT_TRUE((*tensor) == (*label2));
}
/// Feature: MindData RT SkipFirstEpochSampler Support
/// Description: Test MindData RT SkipFirstEpochSampler by Checking Sample Outputs
/// Expectation: Results are successfully outputted.
TEST_F(MindDataTestStandAloneSampler, TestStandAloneSkipFirstEpochSampler) {
MS_LOG(INFO) << "Doing MindDataTestStandAloneSampler-TestStandAloneSkipFirstEpochSampler.";
std::vector<std::shared_ptr<Tensor>> row;
MockStorageOp mock(5);
uint64_t res[5] = {0, 1, 2, 3, 4};
std::shared_ptr<Tensor> label, label1, label2;
CreateINT64Tensor(&label, 5, reinterpret_cast<unsigned char *>(res));
CreateINT64Tensor(&label1, 3, reinterpret_cast<unsigned char *>(res));
CreateINT64Tensor(&label2, 2, reinterpret_cast<unsigned char *>(res + 3));
int64_t num_samples = 0;
int64_t start_index = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<SkipFirstEpochSamplerRT>(start_index, num_samples, 3);
std::shared_ptr<Tensor> tensor;
TensorRow sample_row;
sampler->HandshakeRandomAccessOp(&mock);
sampler->GetNextSample(&sample_row);
tensor = sample_row[0];
EXPECT_TRUE((*tensor) == (*label1));
sampler->GetNextSample(&sample_row);
tensor = sample_row[0];
EXPECT_TRUE((*tensor) == (*label2));
// Test output after Reset
sampler->ResetSampler();
sampler->GetNextSample(&sample_row);
tensor = sample_row[0];
EXPECT_TRUE((*tensor) == (*label));
// Test different start index
start_index = 2;
CreateINT64Tensor(&label, 3, reinterpret_cast<unsigned char *>(res + 2));
sampler = std::make_shared<SkipFirstEpochSamplerRT>(start_index, num_samples, 3);
sampler->HandshakeRandomAccessOp(&mock);
sampler->GetNextSample(&sample_row);
tensor = sample_row[0];
EXPECT_TRUE((*tensor) == (*label));
}