Refactor ImageFolderNode and BatchNode into its separate files
fix ci fix ci address review cmmts, fix ci remove dup code in datasets.cc
This commit is contained in:
parent
915fa0eb5f
commit
52eec881ec
|
@ -78,6 +78,8 @@ add_dependencies(callback core)
|
||||||
add_dependencies(text core)
|
add_dependencies(text core)
|
||||||
add_dependencies(text-kernels core)
|
add_dependencies(text-kernels core)
|
||||||
add_dependencies(cpp-API core)
|
add_dependencies(cpp-API core)
|
||||||
|
add_dependencies(engine-ir-datasetops core)
|
||||||
|
add_dependencies(engine-ir-datasetops-source core)
|
||||||
if (ENABLE_PYTHON)
|
if (ENABLE_PYTHON)
|
||||||
add_dependencies(APItoPython core)
|
add_dependencies(APItoPython core)
|
||||||
endif()
|
endif()
|
||||||
|
@ -99,6 +101,8 @@ set(submodules
|
||||||
$<TARGET_OBJECTS:lite-cv>
|
$<TARGET_OBJECTS:lite-cv>
|
||||||
$<TARGET_OBJECTS:kernels-data>
|
$<TARGET_OBJECTS:kernels-data>
|
||||||
$<TARGET_OBJECTS:cpp-API>
|
$<TARGET_OBJECTS:cpp-API>
|
||||||
|
$<TARGET_OBJECTS:engine-ir-datasetops>
|
||||||
|
$<TARGET_OBJECTS:engine-ir-datasetops-source>
|
||||||
$<TARGET_OBJECTS:kernels-soft-dvpp-image>
|
$<TARGET_OBJECTS:kernels-soft-dvpp-image>
|
||||||
$<TARGET_OBJECTS:soft-dvpp-utils>
|
$<TARGET_OBJECTS:soft-dvpp-utils>
|
||||||
$<TARGET_OBJECTS:engine-datasetops-source>
|
$<TARGET_OBJECTS:engine-datasetops-source>
|
||||||
|
|
|
@ -61,6 +61,10 @@
|
||||||
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
|
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
|
||||||
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
|
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
|
||||||
|
|
||||||
|
// IR nodes
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
|
||||||
|
|
||||||
#include "minddata/dataset/core/config_manager.h"
|
#include "minddata/dataset/core/config_manager.h"
|
||||||
#include "minddata/dataset/util/path.h"
|
#include "minddata/dataset/util/path.h"
|
||||||
#include "minddata/dataset/util/random.h"
|
#include "minddata/dataset/util/random.h"
|
||||||
|
@ -69,15 +73,6 @@ namespace mindspore {
|
||||||
namespace dataset {
|
namespace dataset {
|
||||||
namespace api {
|
namespace api {
|
||||||
|
|
||||||
#define RETURN_EMPTY_IF_ERROR(_s) \
|
|
||||||
do { \
|
|
||||||
Status __rc = (_s); \
|
|
||||||
if (__rc.IsError()) { \
|
|
||||||
MS_LOG(ERROR) << __rc; \
|
|
||||||
return {}; \
|
|
||||||
} \
|
|
||||||
} while (false)
|
|
||||||
|
|
||||||
// Function to create the iterator, which will build and launch the execution tree.
|
// Function to create the iterator, which will build and launch the execution tree.
|
||||||
std::shared_ptr<Iterator> Dataset::CreateIterator(std::vector<std::string> columns) {
|
std::shared_ptr<Iterator> Dataset::CreateIterator(std::vector<std::string> columns) {
|
||||||
std::shared_ptr<Iterator> iter;
|
std::shared_ptr<Iterator> iter;
|
||||||
|
@ -1283,43 +1278,6 @@ std::vector<std::shared_ptr<DatasetOp>> CSVNode::Build() {
|
||||||
node_ops.push_back(csv_op);
|
node_ops.push_back(csv_op);
|
||||||
return node_ops;
|
return node_ops;
|
||||||
}
|
}
|
||||||
|
|
||||||
ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
|
|
||||||
bool recursive, std::set<std::string> extensions,
|
|
||||||
std::map<std::string, int32_t> class_indexing)
|
|
||||||
: dataset_dir_(dataset_dir),
|
|
||||||
decode_(decode),
|
|
||||||
sampler_(sampler),
|
|
||||||
recursive_(recursive),
|
|
||||||
class_indexing_(class_indexing),
|
|
||||||
exts_(extensions) {}
|
|
||||||
|
|
||||||
Status ImageFolderNode::ValidateParams() {
|
|
||||||
RETURN_IF_NOT_OK(ValidateDatasetDirParam("ImageFolderNode", dataset_dir_));
|
|
||||||
|
|
||||||
RETURN_IF_NOT_OK(ValidateDatasetSampler("ImageFolderNode", sampler_));
|
|
||||||
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::shared_ptr<DatasetOp>> ImageFolderNode::Build() {
|
|
||||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
|
||||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
|
||||||
|
|
||||||
// Do internal Schema generation.
|
|
||||||
// This arg is exist in ImageFolderOp, but not externalized (in Python API).
|
|
||||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
|
||||||
TensorShape scalar = TensorShape::CreateScalar();
|
|
||||||
RETURN_EMPTY_IF_ERROR(
|
|
||||||
schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
|
||||||
RETURN_EMPTY_IF_ERROR(
|
|
||||||
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
|
|
||||||
node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
|
|
||||||
recursive_, decode_, exts_, class_indexing_, std::move(schema),
|
|
||||||
std::move(sampler_->Build())));
|
|
||||||
return node_ops;
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef ENABLE_ANDROID
|
#ifndef ENABLE_ANDROID
|
||||||
ManifestNode::ManifestNode(const std::string &dataset_file, const std::string &usage,
|
ManifestNode::ManifestNode(const std::string &dataset_file, const std::string &usage,
|
||||||
const std::shared_ptr<SamplerObj> &sampler,
|
const std::shared_ptr<SamplerObj> &sampler,
|
||||||
|
@ -1800,54 +1758,6 @@ std::vector<std::shared_ptr<DatasetOp>> VOCNode::Build() {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// DERIVED DATASET CLASSES LEAF-NODE DATASETS
|
|
||||||
// (In alphabetical order)
|
|
||||||
|
|
||||||
BatchNode::BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad,
|
|
||||||
std::vector<std::string> cols_to_map,
|
|
||||||
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map)
|
|
||||||
: batch_size_(batch_size),
|
|
||||||
drop_remainder_(drop_remainder),
|
|
||||||
pad_(pad),
|
|
||||||
cols_to_map_(cols_to_map),
|
|
||||||
pad_map_(pad_map) {
|
|
||||||
this->children.push_back(child);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::shared_ptr<DatasetOp>> BatchNode::Build() {
|
|
||||||
// A vector containing shared pointer to the Dataset Ops that this object will create
|
|
||||||
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
|
||||||
|
|
||||||
#ifdef ENABLE_PYTHON
|
|
||||||
py::function noop;
|
|
||||||
node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
|
|
||||||
cols_to_map_, cols_to_map_, noop, noop, pad_map_));
|
|
||||||
#else
|
|
||||||
node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
|
|
||||||
cols_to_map_, pad_map_));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Until py::function is implemented for C++ API, there is no need for a project op to be inserted after batch
|
|
||||||
// because project is only needed when batch op performs per_batch_map. This per_batch_map is a pyfunc
|
|
||||||
return node_ops;
|
|
||||||
}
|
|
||||||
|
|
||||||
Status BatchNode::ValidateParams() {
|
|
||||||
if (batch_size_ <= 0) {
|
|
||||||
std::string err_msg = "BatchNode: batch_size should be positive integer, but got: " + std::to_string(batch_size_);
|
|
||||||
MS_LOG(ERROR) << err_msg;
|
|
||||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!cols_to_map_.empty()) {
|
|
||||||
std::string err_msg = "BatchNode: cols_to_map functionality is not implemented in C++; this should be left empty.";
|
|
||||||
MS_LOG(ERROR) << err_msg;
|
|
||||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
return Status::OK();
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifndef ENABLE_ANDROID
|
#ifndef ENABLE_ANDROID
|
||||||
BucketBatchByLengthNode::BucketBatchByLengthNode(
|
BucketBatchByLengthNode::BucketBatchByLengthNode(
|
||||||
std::shared_ptr<Dataset> child, const std::vector<std::string> &column_names,
|
std::shared_ptr<Dataset> child, const std::vector<std::string> &column_names,
|
||||||
|
@ -1884,7 +1794,7 @@ std::vector<std::shared_ptr<DatasetOp>> BucketBatchByLengthNode::Build() {
|
||||||
Status BucketBatchByLengthNode::ValidateParams() {
|
Status BucketBatchByLengthNode::ValidateParams() {
|
||||||
if (element_length_function_ == nullptr && column_names_.size() != 1) {
|
if (element_length_function_ == nullptr && column_names_.size() != 1) {
|
||||||
std::string err_msg = "BucketBatchByLengthNode: element_length_function not specified, but not one column name: " +
|
std::string err_msg = "BucketBatchByLengthNode: element_length_function not specified, but not one column name: " +
|
||||||
column_names_.size();
|
std::to_string(column_names_.size());
|
||||||
MS_LOG(ERROR) << err_msg;
|
MS_LOG(ERROR) << err_msg;
|
||||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,10 @@
|
||||||
add_subdirectory(datasetops)
|
add_subdirectory(datasetops)
|
||||||
add_subdirectory(opt)
|
add_subdirectory(opt)
|
||||||
add_subdirectory(gnn)
|
add_subdirectory(gnn)
|
||||||
|
add_subdirectory(ir)
|
||||||
add_subdirectory(perf)
|
add_subdirectory(perf)
|
||||||
add_subdirectory(cache)
|
add_subdirectory(cache)
|
||||||
|
|
||||||
if (ENABLE_TDTQUE)
|
if (ENABLE_TDTQUE)
|
||||||
add_subdirectory(tdt)
|
add_subdirectory(tdt)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||||
|
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||||
|
add_subdirectory(datasetops)
|
|
@ -0,0 +1,5 @@
|
||||||
|
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||||
|
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||||
|
add_subdirectory(source)
|
||||||
|
add_library(engine-ir-datasetops OBJECT
|
||||||
|
batch_node.cc)
|
|
@ -0,0 +1,76 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/datasetops/batch_op.h"
|
||||||
|
#include "minddata/dataset/util/status.h"
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
namespace api {
|
||||||
|
|
||||||
|
BatchNode::BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad,
|
||||||
|
std::vector<std::string> cols_to_map,
|
||||||
|
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map)
|
||||||
|
: batch_size_(batch_size),
|
||||||
|
drop_remainder_(drop_remainder),
|
||||||
|
pad_(pad),
|
||||||
|
cols_to_map_(cols_to_map),
|
||||||
|
pad_map_(pad_map) {
|
||||||
|
this->children.push_back(child);
|
||||||
|
}
|
||||||
|
|
||||||
|
Status BatchNode::ValidateParams() {
|
||||||
|
if (batch_size_ <= 0) {
|
||||||
|
std::string err_msg = "Batch: batch_size should be positive integer, but got: " + std::to_string(batch_size_);
|
||||||
|
MS_LOG(ERROR) << err_msg;
|
||||||
|
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||||
|
}
|
||||||
|
if (!cols_to_map_.empty()) {
|
||||||
|
std::string err_msg = "cols_to_map functionality is not implemented in C++; this should be left empty.";
|
||||||
|
MS_LOG(ERROR) << err_msg;
|
||||||
|
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<DatasetOp>> BatchNode::Build() {
|
||||||
|
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||||
|
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||||
|
|
||||||
|
#ifdef ENABLE_PYTHON
|
||||||
|
py::function noop;
|
||||||
|
node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
|
||||||
|
cols_to_map_, cols_to_map_, noop, noop, pad_map_));
|
||||||
|
#else
|
||||||
|
node_ops.push_back(std::make_shared<BatchOp>(batch_size_, drop_remainder_, pad_, connector_que_size_, num_workers_,
|
||||||
|
cols_to_map_, pad_map_));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Until py::function is implemented for C++ API, there is no need for a project op to be inserted after batch
|
||||||
|
// because project is only needed when batch op performs per_batch_map. This per_batch_map is a pyfunc
|
||||||
|
return node_ops;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace api
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,61 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_
|
||||||
|
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "minddata/dataset/include/datasets.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
namespace api {
|
||||||
|
|
||||||
|
class BatchNode : public Dataset {
|
||||||
|
public:
|
||||||
|
/// \brief Constructor
|
||||||
|
BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad,
|
||||||
|
std::vector<std::string> cols_to_map,
|
||||||
|
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map);
|
||||||
|
|
||||||
|
/// \brief Destructor
|
||||||
|
~BatchNode() = default;
|
||||||
|
|
||||||
|
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||||
|
/// \return The list of shared pointers to the newly created DatasetOps
|
||||||
|
std::vector<std::shared_ptr<DatasetOp>> Build() override;
|
||||||
|
|
||||||
|
/// \brief Parameters validation
|
||||||
|
/// \return Status Status::OK() if all the parameters are valid
|
||||||
|
Status ValidateParams() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
int32_t batch_size_;
|
||||||
|
bool drop_remainder_;
|
||||||
|
bool pad_;
|
||||||
|
std::vector<std::string> cols_to_map_;
|
||||||
|
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace api
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_BATCH_NODE_H_
|
|
@ -0,0 +1,4 @@
|
||||||
|
file(GLOB_RECURSE _CURRENT_SRC_FILES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.cc")
|
||||||
|
set_property(SOURCE ${_CURRENT_SRC_FILES} PROPERTY COMPILE_DEFINITIONS SUBMODULE_ID=mindspore::SubModuleId::SM_MD)
|
||||||
|
add_library(engine-ir-datasetops-source OBJECT
|
||||||
|
image_folder_node.cc)
|
|
@ -0,0 +1,70 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/datasetops/source/image_folder_op.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/util/status.h"
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
namespace api {
|
||||||
|
|
||||||
|
ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
|
||||||
|
bool recursive, std::set<std::string> extensions,
|
||||||
|
std::map<std::string, int32_t> class_indexing)
|
||||||
|
: dataset_dir_(dataset_dir),
|
||||||
|
decode_(decode),
|
||||||
|
sampler_(sampler),
|
||||||
|
recursive_(recursive),
|
||||||
|
class_indexing_(class_indexing),
|
||||||
|
exts_(extensions) {}
|
||||||
|
|
||||||
|
Status ImageFolderNode::ValidateParams() {
|
||||||
|
RETURN_IF_NOT_OK(ValidateDatasetDirParam("ImageFolderNode", dataset_dir_));
|
||||||
|
|
||||||
|
RETURN_IF_NOT_OK(ValidateDatasetSampler("ImageFolderNode", sampler_));
|
||||||
|
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<DatasetOp>> ImageFolderNode::Build() {
|
||||||
|
// A vector containing shared pointer to the Dataset Ops that this object will create
|
||||||
|
std::vector<std::shared_ptr<DatasetOp>> node_ops;
|
||||||
|
|
||||||
|
// Do internal Schema generation.
|
||||||
|
// This arg is exist in ImageFolderOp, but not externalized (in Python API).
|
||||||
|
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||||
|
TensorShape scalar = TensorShape::CreateScalar();
|
||||||
|
RETURN_EMPTY_IF_ERROR(
|
||||||
|
schema->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
||||||
|
RETURN_EMPTY_IF_ERROR(
|
||||||
|
schema->AddColumn(ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
|
||||||
|
node_ops.push_back(std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
|
||||||
|
recursive_, decode_, exts_, class_indexing_, std::move(schema),
|
||||||
|
std::move(sampler_->Build())));
|
||||||
|
return node_ops;
|
||||||
|
}
|
||||||
|
} // namespace api
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
|
@ -0,0 +1,63 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_IMAGE_FOLDER_NODE_H_
|
||||||
|
#define MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_IMAGE_FOLDER_NODE_H_
|
||||||
|
|
||||||
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
|
#include <set>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "minddata/dataset/include/datasets.h"
|
||||||
|
|
||||||
|
namespace mindspore {
|
||||||
|
namespace dataset {
|
||||||
|
|
||||||
|
namespace api {
|
||||||
|
|
||||||
|
/// \class ImageFolderNode
|
||||||
|
/// \brief A Dataset derived class to represent ImageFolder dataset
|
||||||
|
class ImageFolderNode : public Dataset {
|
||||||
|
public:
|
||||||
|
/// \brief Constructor
|
||||||
|
ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
|
||||||
|
std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing);
|
||||||
|
|
||||||
|
/// \brief Destructor
|
||||||
|
~ImageFolderNode() = default;
|
||||||
|
|
||||||
|
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
||||||
|
/// \return The list of shared pointers to the newly created DatasetOps
|
||||||
|
std::vector<std::shared_ptr<DatasetOp>> Build() override;
|
||||||
|
|
||||||
|
/// \brief Parameters validation
|
||||||
|
/// \return Status Status::OK() if all the parameters are valid
|
||||||
|
Status ValidateParams() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::string dataset_dir_;
|
||||||
|
bool decode_;
|
||||||
|
bool recursive_;
|
||||||
|
std::shared_ptr<SamplerObj> sampler_;
|
||||||
|
std::map<std::string, int32_t> class_indexing_;
|
||||||
|
std::set<std::string> exts_;
|
||||||
|
};
|
||||||
|
} // namespace api
|
||||||
|
} // namespace dataset
|
||||||
|
} // namespace mindspore
|
||||||
|
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_ENGINE_IR_DATASETOPS_SOURCE_IMAGE_FOLDER_NODE_H_
|
|
@ -22,6 +22,7 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <unordered_set>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "minddata/dataset/core/constants.h"
|
#include "minddata/dataset/core/constants.h"
|
||||||
|
@ -65,6 +66,7 @@ class CocoNode;
|
||||||
class CSVNode;
|
class CSVNode;
|
||||||
class CsvBase;
|
class CsvBase;
|
||||||
class ImageFolderNode;
|
class ImageFolderNode;
|
||||||
|
class BatchNode;
|
||||||
#ifndef ENABLE_ANDROID
|
#ifndef ENABLE_ANDROID
|
||||||
class ManifestNode;
|
class ManifestNode;
|
||||||
class MindDataNode;
|
class MindDataNode;
|
||||||
|
@ -77,7 +79,6 @@ class TFRecordNode;
|
||||||
class VOCNode;
|
class VOCNode;
|
||||||
#endif
|
#endif
|
||||||
// Dataset Op classes (in alphabetical order)
|
// Dataset Op classes (in alphabetical order)
|
||||||
class BatchNode;
|
|
||||||
#ifndef ENABLE_ANDROID
|
#ifndef ENABLE_ANDROID
|
||||||
class BucketBatchByLengthNode;
|
class BucketBatchByLengthNode;
|
||||||
class BuildVocabNode;
|
class BuildVocabNode;
|
||||||
|
@ -92,6 +93,30 @@ class SkipNode;
|
||||||
class TakeNode;
|
class TakeNode;
|
||||||
class ZipNode;
|
class ZipNode;
|
||||||
|
|
||||||
|
#define RETURN_EMPTY_IF_ERROR(_s) \
|
||||||
|
do { \
|
||||||
|
Status __rc = (_s); \
|
||||||
|
if (__rc.IsError()) { \
|
||||||
|
MS_LOG(ERROR) << __rc; \
|
||||||
|
return {}; \
|
||||||
|
} \
|
||||||
|
} while (false)
|
||||||
|
|
||||||
|
// Helper function to validate dataset num_shards and shard_id parameters
|
||||||
|
Status ValidateDatasetShardParams(const std::string &dataset_name, int32_t num_shards, int32_t shard_id);
|
||||||
|
|
||||||
|
// Helper function to validate dataset sampler parameter
|
||||||
|
Status ValidateDatasetSampler(const std::string &dataset_name, const std::shared_ptr<SamplerObj> &sampler);
|
||||||
|
|
||||||
|
Status ValidateStringValue(const std::string &str, const std::unordered_set<std::string> &valid_strings);
|
||||||
|
|
||||||
|
// Helper function to validate dataset input/output column parameterCD -
|
||||||
|
Status ValidateDatasetColumnParam(const std::string &dataset_name, const std::string &column_param,
|
||||||
|
const std::vector<std::string> &columns);
|
||||||
|
|
||||||
|
// Helper function to validate dataset directory parameter
|
||||||
|
Status ValidateDatasetDirParam(const std::string &dataset_name, std::string dataset_dir);
|
||||||
|
|
||||||
/// \brief Function to create a SchemaObj
|
/// \brief Function to create a SchemaObj
|
||||||
/// \param[in] schema_file Path of schema file
|
/// \param[in] schema_file Path of schema file
|
||||||
/// \return Shared pointer to the current schema
|
/// \return Shared pointer to the current schema
|
||||||
|
@ -915,34 +940,6 @@ class CSVNode : public Dataset {
|
||||||
int32_t shard_id_;
|
int32_t shard_id_;
|
||||||
};
|
};
|
||||||
|
|
||||||
/// \class ImageFolderNode
|
|
||||||
/// \brief A Dataset derived class to represent ImageFolder dataset
|
|
||||||
class ImageFolderNode : public Dataset {
|
|
||||||
public:
|
|
||||||
/// \brief Constructor
|
|
||||||
ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
|
|
||||||
std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing);
|
|
||||||
|
|
||||||
/// \brief Destructor
|
|
||||||
~ImageFolderNode() = default;
|
|
||||||
|
|
||||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
|
||||||
/// \return The list of shared pointers to the newly created DatasetOps
|
|
||||||
std::vector<std::shared_ptr<DatasetOp>> Build() override;
|
|
||||||
|
|
||||||
/// \brief Parameters validation
|
|
||||||
/// \return Status Status::OK() if all the parameters are valid
|
|
||||||
Status ValidateParams() override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::string dataset_dir_;
|
|
||||||
bool decode_;
|
|
||||||
bool recursive_;
|
|
||||||
std::shared_ptr<SamplerObj> sampler_;
|
|
||||||
std::map<std::string, int32_t> class_indexing_;
|
|
||||||
std::set<std::string> exts_;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifndef ENABLE_ANDROID
|
#ifndef ENABLE_ANDROID
|
||||||
class ManifestNode : public Dataset {
|
class ManifestNode : public Dataset {
|
||||||
public:
|
public:
|
||||||
|
@ -1202,32 +1199,6 @@ class VOCNode : public Dataset {
|
||||||
// DERIVED DATASET CLASSES FOR DATASET OPS
|
// DERIVED DATASET CLASSES FOR DATASET OPS
|
||||||
// (In alphabetical order)
|
// (In alphabetical order)
|
||||||
|
|
||||||
class BatchNode : public Dataset {
|
|
||||||
public:
|
|
||||||
/// \brief Constructor
|
|
||||||
BatchNode(std::shared_ptr<Dataset> child, int32_t batch_size, bool drop_remainder, bool pad,
|
|
||||||
std::vector<std::string> cols_to_map,
|
|
||||||
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map);
|
|
||||||
|
|
||||||
/// \brief Destructor
|
|
||||||
~BatchNode() = default;
|
|
||||||
|
|
||||||
/// \brief a base class override function to create the required runtime dataset op objects for this class
|
|
||||||
/// \return The list of shared pointers to the newly created DatasetOps
|
|
||||||
std::vector<std::shared_ptr<DatasetOp>> Build() override;
|
|
||||||
|
|
||||||
/// \brief Parameters validation
|
|
||||||
/// \return Status Status::OK() if all the parameters are valid
|
|
||||||
Status ValidateParams() override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
int32_t batch_size_;
|
|
||||||
bool drop_remainder_;
|
|
||||||
bool pad_;
|
|
||||||
std::vector<std::string> cols_to_map_;
|
|
||||||
std::map<std::string, std::pair<TensorShape, std::shared_ptr<Tensor>>> pad_map_;
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifndef ENABLE_ANDROID
|
#ifndef ENABLE_ANDROID
|
||||||
class BucketBatchByLengthNode : public Dataset {
|
class BucketBatchByLengthNode : public Dataset {
|
||||||
public:
|
public:
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
#include "common/common.h"
|
#include "common/common.h"
|
||||||
#include "minddata/dataset/include/datasets.h"
|
#include "minddata/dataset/include/datasets.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
|
|
||||||
using namespace mindspore::dataset::api;
|
using namespace mindspore::dataset::api;
|
||||||
using mindspore::dataset::Tensor;
|
using mindspore::dataset::Tensor;
|
||||||
using mindspore::dataset::TensorShape;
|
using mindspore::dataset::TensorShape;
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#include "minddata/dataset/include/datasets.h"
|
#include "minddata/dataset/include/datasets.h"
|
||||||
#include "minddata/dataset/include/vision.h"
|
#include "minddata/dataset/include/vision.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
using namespace mindspore::dataset::api;
|
using namespace mindspore::dataset::api;
|
||||||
using mindspore::dataset::Tensor;
|
using mindspore::dataset::Tensor;
|
||||||
|
|
||||||
|
|
|
@ -19,12 +19,14 @@
|
||||||
#include "minddata/dataset/core/config_manager.h"
|
#include "minddata/dataset/core/config_manager.h"
|
||||||
#include "minddata/dataset/core/global_context.h"
|
#include "minddata/dataset/core/global_context.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
|
|
||||||
using namespace mindspore::dataset;
|
using namespace mindspore::dataset;
|
||||||
using namespace mindspore::dataset::api;
|
using namespace mindspore::dataset::api;
|
||||||
using mindspore::dataset::Tensor;
|
|
||||||
using mindspore::dataset::ShuffleMode;
|
|
||||||
using mindspore::dataset::TensorShape;
|
|
||||||
using mindspore::dataset::DataType;
|
using mindspore::dataset::DataType;
|
||||||
|
using mindspore::dataset::ShuffleMode;
|
||||||
|
using mindspore::dataset::Tensor;
|
||||||
|
using mindspore::dataset::TensorShape;
|
||||||
|
|
||||||
class MindDataTestPipeline : public UT::DatasetOpTesting {
|
class MindDataTestPipeline : public UT::DatasetOpTesting {
|
||||||
protected:
|
protected:
|
||||||
|
@ -355,11 +357,9 @@ TEST_F(MindDataTestPipeline, TestTFRecordDatasetShard) {
|
||||||
|
|
||||||
// Create a TFRecord Dataset
|
// Create a TFRecord Dataset
|
||||||
// Each file has two columns("image", "label") and 3 rows
|
// Each file has two columns("image", "label") and 3 rows
|
||||||
std::vector<std::string> files = {
|
std::vector<std::string> files = {datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data",
|
||||||
datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0001.data",
|
datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0002.data",
|
||||||
datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0002.data",
|
datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0003.data"};
|
||||||
datasets_root_path_ + "/test_tf_file_3_images2/train-0000-of-0003.data"
|
|
||||||
};
|
|
||||||
std::shared_ptr<Dataset> ds1 = TFRecord({files}, "", {}, 0, ShuffleMode::kFalse, 2, 1, true);
|
std::shared_ptr<Dataset> ds1 = TFRecord({files}, "", {}, 0, ShuffleMode::kFalse, 2, 1, true);
|
||||||
EXPECT_NE(ds1, nullptr);
|
EXPECT_NE(ds1, nullptr);
|
||||||
std::shared_ptr<Dataset> ds2 = TFRecord({files}, "", {}, 0, ShuffleMode::kFalse, 2, 1, false);
|
std::shared_ptr<Dataset> ds2 = TFRecord({files}, "", {}, 0, ShuffleMode::kFalse, 2, 1, false);
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
#include "common/common.h"
|
#include "common/common.h"
|
||||||
#include "minddata/dataset/include/datasets.h"
|
#include "minddata/dataset/include/datasets.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
using namespace mindspore::dataset::api;
|
using namespace mindspore::dataset::api;
|
||||||
using mindspore::dataset::Tensor;
|
using mindspore::dataset::Tensor;
|
||||||
using mindspore::dataset::TensorShape;
|
using mindspore::dataset::TensorShape;
|
||||||
|
@ -183,19 +185,19 @@ TEST_F(MindDataTestPipeline, TestImageFolderFailWithWrongSampler) {
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(MindDataTestPipeline, TestMnistFailWithWrongDatasetDir) {
|
TEST_F(MindDataTestPipeline, TestMnistFailWithWrongDatasetDir) {
|
||||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithWrongDatasetDir.";
|
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithWrongDatasetDir.";
|
||||||
|
|
||||||
// Create a Mnist Dataset
|
// Create a Mnist Dataset
|
||||||
std::shared_ptr<Dataset> ds = Mnist("", "all", RandomSampler(false, 10));
|
std::shared_ptr<Dataset> ds = Mnist("", "all", RandomSampler(false, 10));
|
||||||
EXPECT_EQ(ds, nullptr);
|
EXPECT_EQ(ds, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_F(MindDataTestPipeline, TestMnistFailWithNullSampler) {
|
TEST_F(MindDataTestPipeline, TestMnistFailWithNullSampler) {
|
||||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithNullSampler.";
|
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestMnistFailWithNullSampler.";
|
||||||
|
|
||||||
// Create a Mnist Dataset
|
// Create a Mnist Dataset
|
||||||
std::string folder_path = datasets_root_path_ + "/testMnistData/";
|
std::string folder_path = datasets_root_path_ + "/testMnistData/";
|
||||||
std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", nullptr);
|
std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", nullptr);
|
||||||
// Expect failure: sampler can not be nullptr
|
// Expect failure: sampler can not be nullptr
|
||||||
EXPECT_EQ(ds, nullptr);
|
EXPECT_EQ(ds, nullptr);
|
||||||
}
|
}
|
|
@ -16,6 +16,9 @@
|
||||||
#include "common/common.h"
|
#include "common/common.h"
|
||||||
#include "minddata/dataset/include/datasets.h"
|
#include "minddata/dataset/include/datasets.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
|
|
||||||
using namespace mindspore::dataset::api;
|
using namespace mindspore::dataset::api;
|
||||||
using mindspore::dataset::Tensor;
|
using mindspore::dataset::Tensor;
|
||||||
|
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#include "minddata/dataset/include/transforms.h"
|
#include "minddata/dataset/include/transforms.h"
|
||||||
#include "minddata/dataset/include/vision.h"
|
#include "minddata/dataset/include/vision.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
|
|
||||||
using namespace mindspore::dataset::api;
|
using namespace mindspore::dataset::api;
|
||||||
using mindspore::dataset::BorderType;
|
using mindspore::dataset::BorderType;
|
||||||
using mindspore::dataset::Tensor;
|
using mindspore::dataset::Tensor;
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#include "minddata/dataset/include/transforms.h"
|
#include "minddata/dataset/include/transforms.h"
|
||||||
#include "minddata/dataset/include/vision.h"
|
#include "minddata/dataset/include/vision.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
using namespace mindspore::dataset::api;
|
using namespace mindspore::dataset::api;
|
||||||
using mindspore::dataset::BorderType;
|
using mindspore::dataset::BorderType;
|
||||||
using mindspore::dataset::Tensor;
|
using mindspore::dataset::Tensor;
|
||||||
|
|
|
@ -20,6 +20,8 @@
|
||||||
#include "minddata/dataset/include/datasets.h"
|
#include "minddata/dataset/include/datasets.h"
|
||||||
#include "minddata/dataset/include/transforms.h"
|
#include "minddata/dataset/include/transforms.h"
|
||||||
|
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
|
||||||
|
#include "minddata/dataset/engine/ir/datasetops/batch_node.h"
|
||||||
using namespace mindspore::dataset;
|
using namespace mindspore::dataset;
|
||||||
using mindspore::dataset::Tensor;
|
using mindspore::dataset::Tensor;
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue