!34529 Support specific data set decryption

Merge pull request !34529 from 刘勇琪/master-decrypt-column
This commit is contained in:
i-robot 2022-05-31 02:47:19 +00:00 committed by Gitee
commit fedccd0d63
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
26 changed files with 538 additions and 38 deletions

View File

@ -20,6 +20,7 @@ mindspore.dataset.CelebADataset
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/tutorials/experts/zh-CN/master/dataset/cache.html>`_ 。默认值None不使用缓存。
- **decrypt** (callable, 可选) - 图像解密函数接受加密的图片路径并返回bytes类型的解密数据。默认值None不进行解密。
**异常:**

View File

@ -19,6 +19,7 @@
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/tutorials/experts/zh-CN/master/dataset/cache.html>`_ 。默认值None不使用缓存。
- **extra_metadata** (bool, 可选) - 用于指定是否额外输出一个数据列用于表示图片元信息。如果为True则将额外输出一个名为 `[_meta-filename, dtype=string]` 的数据列默认值False。
- **decrypt** (callable, 可选) - 图像解密函数接受加密的图片路径并返回bytes类型的解密数据。默认值None不进行解密。
[表1] 根据不同 `task` 参数设置,生成数据集具有不同的输出列:

View File

@ -20,6 +20,7 @@ mindspore.dataset.ImageFolderDataset
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数默认值None。指定此参数后 `num_samples` 表示每个分片的最大样本数。
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/tutorials/experts/zh-CN/master/dataset/cache.html>`_ 。默认值None不使用缓存。
- **decrypt** (callable, 可选) - 图像解密函数接受加密的图片路径并返回bytes类型的解密数据。默认值None不进行解密。
**异常:**

View File

@ -24,6 +24,7 @@ mindspore.dataset.VOCDataset
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号默认值None。只有当指定了 `num_shards` 时才能指定此参数。
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/tutorials/experts/zh-CN/master/dataset/cache.html>`_ 。默认值None不使用缓存。
- **extra_metadata** (bool, 可选) - 用于指定是否额外输出一个数据列用于表示图片元信息。如果为True则将额外输出一个名为 `[_meta-filename, dtype=string]` 的数据列默认值False。
- **decrypt** (callable, 可选) - 图像解密函数接受加密的图片路径并返回bytes类型的解密数据。默认值None不进行解密。
根据给定的 `task` 配置,生成数据集具有不同的输出列:

View File

@ -129,13 +129,14 @@ PYBIND_REGISTER(Caltech256Node, 2, ([](const py::module *m) {
PYBIND_REGISTER(CelebANode, 2, ([](const py::module *m) {
(void)py::class_<CelebANode, DatasetNode, std::shared_ptr<CelebANode>>(*m, "CelebANode",
"to create a CelebANode")
.def(py::init([](const std::string &dataset_dir, const std::string &usage,
const py::handle &sampler, bool decode, const py::list &extensions) {
auto celebA = std::make_shared<CelebANode>(dataset_dir, usage, toSamplerObj(sampler), decode,
toStringSet(extensions), nullptr);
THROW_IF_ERROR(celebA->ValidateParams());
return celebA;
}));
.def(
py::init([](const std::string &dataset_dir, const std::string &usage, const py::handle &sampler,
bool decode, const py::list &extensions, const py::object &decrypt) {
auto celebA = std::make_shared<CelebANode>(dataset_dir, usage, toSamplerObj(sampler), decode,
toStringSet(extensions), nullptr, decrypt);
THROW_IF_ERROR(celebA->ValidateParams());
return celebA;
}));
}));
PYBIND_REGISTER(Cifar10Node, 2, ([](const py::module *m) {
@ -199,18 +200,17 @@ PYBIND_REGISTER(CMUArcticNode, 2, ([](const py::module *m) {
}));
}));
PYBIND_REGISTER(CocoNode, 2, ([](const py::module *m) {
(void)py::class_<CocoNode, DatasetNode, std::shared_ptr<CocoNode>>(*m, "CocoNode",
"to create a CocoNode")
.def(py::init([](const std::string &dataset_dir, const std::string &annotation_file,
const std::string &task, bool decode, const py::handle &sampler,
bool extra_metadata) {
std::shared_ptr<CocoNode> coco = std::make_shared<CocoNode>(
dataset_dir, annotation_file, task, decode, toSamplerObj(sampler), nullptr, extra_metadata);
THROW_IF_ERROR(coco->ValidateParams());
return coco;
}));
}));
PYBIND_REGISTER(
CocoNode, 2, ([](const py::module *m) {
(void)py::class_<CocoNode, DatasetNode, std::shared_ptr<CocoNode>>(*m, "CocoNode", "to create a CocoNode")
.def(py::init([](const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
bool decode, const py::handle &sampler, bool extra_metadata, const py::object &decrypt) {
std::shared_ptr<CocoNode> coco = std::make_shared<CocoNode>(
dataset_dir, annotation_file, task, decode, toSamplerObj(sampler), nullptr, extra_metadata, decrypt);
THROW_IF_ERROR(coco->ValidateParams());
return coco;
}));
}));
PYBIND_REGISTER(CoNLL2000Node, 2, ([](const py::module *m) {
(void)py::class_<CoNLL2000Node, DatasetNode, std::shared_ptr<CoNLL2000Node>>(
@ -359,12 +359,13 @@ PYBIND_REGISTER(ImageFolderNode, 2, ([](const py::module *m) {
(void)py::class_<ImageFolderNode, DatasetNode, std::shared_ptr<ImageFolderNode>>(
*m, "ImageFolderNode", "to create an ImageFolderNode")
.def(py::init([](const std::string &dataset_dir, bool decode, const py::handle &sampler,
const py::list &extensions, const py::dict &class_indexing) {
const py::list &extensions, const py::dict &class_indexing,
const py::object &decrypt) {
// Don't update recursive to true
bool recursive = false; // Will be removed in future PR
auto imagefolder = std::make_shared<ImageFolderNode>(dataset_dir, decode, toSamplerObj(sampler),
recursive, toStringSet(extensions),
toStringMap(class_indexing), nullptr);
auto imagefolder = std::make_shared<ImageFolderNode>(
dataset_dir, decode, toSamplerObj(sampler), recursive, toStringSet(extensions),
toStringMap(class_indexing), nullptr, decrypt);
THROW_IF_ERROR(imagefolder->ValidateParams());
return imagefolder;
}));
@ -764,10 +765,10 @@ PYBIND_REGISTER(VOCNode, 2, ([](const py::module *m) {
(void)py::class_<VOCNode, DatasetNode, std::shared_ptr<VOCNode>>(*m, "VOCNode", "to create a VOCNode")
.def(py::init([](const std::string &dataset_dir, const std::string &task, const std::string &usage,
const py::dict &class_indexing, bool decode, const py::handle &sampler,
bool extra_metadata) {
bool extra_metadata, const py::object &decrypt) {
std::shared_ptr<VOCNode> voc =
std::make_shared<VOCNode>(dataset_dir, task, usage, toStringMap(class_indexing), decode,
toSamplerObj(sampler), nullptr, extra_metadata);
toSamplerObj(sampler), nullptr, extra_metadata, decrypt);
THROW_IF_ERROR(voc->ValidateParams());
return voc;
}));

View File

@ -18,12 +18,12 @@
#include <algorithm>
#include <fstream>
#include <iomanip>
#include "utils/file_utils.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/util/path.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "minddata/dataset/engine/data_schema.h"
#include "minddata/dataset/engine/execution_tree.h"
#include "minddata/dataset/util/path.h"
#include "utils/file_utils.h"
#ifndef ENABLE_ANDROID
#include "minddata/dataset/kernels/image/image_utils.h"
#else
@ -32,6 +32,22 @@
namespace mindspore {
namespace dataset {
#ifdef ENABLE_PYTHON
CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode,
const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema,
std::shared_ptr<SamplerRT> sampler, py::function decrypt)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
folder_path_(dir),
decode_(decode),
extensions_(exts),
data_schema_(std::move(schema)),
num_rows_in_attr_file_(0),
attr_file_(""),
usage_(usage),
decrypt_(std::move(decrypt)) {
attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size);
}
#else
CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode,
const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema,
std::shared_ptr<SamplerRT> sampler)
@ -45,6 +61,7 @@ CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_si
usage_(usage) {
attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size);
}
#endif
Status CelebAOp::RegisterAndLaunchThreads() {
ParallelOp::RegisterAndLaunchThreads();
@ -237,7 +254,12 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
Path path(folder_path_);
Path image_path = path / image_label.first;
#ifdef ENABLE_PYTHON
RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(image_path.ToString(), &image, decrypt_));
#else
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_path.ToString(), &image));
#endif
if (decode_ == true) {
Status rc = Decode(image, &image);
if (rc.IsError()) {

View File

@ -44,6 +44,22 @@ namespace mindspore {
namespace dataset {
class CelebAOp : public MappableLeafOp {
public:
#ifdef ENABLE_PYTHON
// Constructor
// @param int32_t - num_workers - Num of workers reading images in parallel
// @param std::string - dir directory of celeba dataset
// @param int32_t queueSize - connector queue size
// @param bool decode - decode the images after reading
// @param std::string usage - specify the train, valid, test part or all parts of dataset
// @param std::set<std::string> exts - list of file extensions to be included in the dataset
// @param std::unique_ptr<DataSchema> schema - path to the JSON schema file or schema object
// @param std::unique_ptr<Sampler> sampler - sampler tells CelebAOp what to read
// @param py::function decrypt - Image decryption function, which accepts the path of the encrypted image file
// and returns the decrypted bytes data. Default: None, no decryption.
CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, const std::string &usage,
const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler,
py::function decrypt = py::none());
#else
// Constructor
// @param int32_t - num_workers - Num of workers reading images in parallel
// @param std::string - dir directory of celeba dataset
@ -55,6 +71,7 @@ class CelebAOp : public MappableLeafOp {
// @param std::unique_ptr<Sampler> sampler - sampler tells CelebAOp what to read
CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, const std::string &usage,
const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler);
#endif
~CelebAOp() override = default;
@ -112,6 +129,9 @@ class CelebAOp : public MappableLeafOp {
std::string usage_;
std::ifstream partition_file_;
std::string attr_file_;
#ifdef ENABLE_PYTHON
py::function decrypt_;
#endif
};
} // namespace dataset
} // namespace mindspore

View File

@ -17,11 +17,11 @@
#include <algorithm>
#include <fstream>
#include "utils/file_utils.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/core/config_manager.h"
#include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
#include "utils/file_utils.h"
#include "utils/ms_utils.h"
namespace mindspore {
namespace dataset {
@ -46,6 +46,19 @@ const char kJsonCategoriesName[] = "name";
const float kDefaultPadValue = -1.0;
const unsigned int kPadValueZero = 0;
#ifdef ENABLE_PYTHON
CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler, bool extra_metadata, py::function decrypt)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
decode_(decode),
task_type_(task_type),
image_folder_path_(image_folder_path),
annotation_path_(annotation_path),
data_schema_(std::move(data_schema)),
extra_metadata_(extra_metadata),
decrypt_(std::move(decrypt)) {}
#else
CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
@ -56,6 +69,7 @@ CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path,
annotation_path_(annotation_path),
data_schema_(std::move(data_schema)),
extra_metadata_(extra_metadata) {}
#endif
void CocoOp::Print(std::ostream &out, bool show_all) const {
if (!show_all) {
@ -556,7 +570,11 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col,
std::shared_ptr<Tensor> *tensor) const {
#ifdef ENABLE_PYTHON
RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(path, tensor, decrypt_));
#else
RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
#endif
if (decode_) {
Status rc = Decode(*tensor, tensor);

View File

@ -146,6 +146,23 @@ class CocoOp : public MappableLeafOp {
std::unique_ptr<DataSchema> builder_schema_;
};
#ifdef ENABLE_PYTHON
/// \brief Constructor.
/// \param[in] task_type Task type of Coco.
/// \param[in] image_folder_path Image folder path of Coco.
/// \param[in] annotation_path Annotation json path of Coco.
/// \param[in] num_workers Number of workers reading images in parallel.
/// \param[in] queue_size Connector queue size.
/// \param[in] num_samples Number of samples to read.
/// \param[in] decode Whether to decode images.
/// \param[in] data_schema The schema of the Coco dataset.
/// \param[in] sampler Sampler tells CocoOp what to read.
/// \param[in] decrypt - Image decryption function, which accepts the path of the encrypted image file
/// and returns the decrypted bytes data. Default: None, no decryption.
CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler, bool extra_metadata, py::function decrypt = py::none());
#else
/// \brief Constructor.
/// \param[in] task_type Task type of Coco.
/// \param[in] image_folder_path Image folder path of Coco.
@ -159,6 +176,7 @@ class CocoOp : public MappableLeafOp {
CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler, bool extra_metadata);
#endif
/// \brief Destructor.
~CocoOp() = default;
@ -298,6 +316,9 @@ class CocoOp : public MappableLeafOp {
std::map<std::string, std::vector<uint32_t>> simple_item_map_;
std::map<std::string, std::vector<std::string>> captions_map_;
std::set<uint32_t> category_set_;
#ifdef ENABLE_PYTHON
py::function decrypt_;
#endif
};
} // namespace dataset
} // namespace mindspore

View File

@ -27,6 +27,25 @@
namespace mindspore {
namespace dataset {
#ifdef ENABLE_PYTHON
ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler,
py::function decrypt)
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler)),
folder_path_(std::move(file_dir)),
recursive_(recursive),
decode_(do_decode),
extensions_(exts),
class_index_(map),
data_schema_(std::move(data_schema)),
sampler_ind_(0),
dirname_offset_(0),
decrypt_(std::move(decrypt)) {
folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size);
image_name_queue_ = std::make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size);
}
#else
ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
@ -42,6 +61,7 @@ ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t que
folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size);
image_name_queue_ = std::make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size);
}
#endif
// Master thread that pulls the prescan worker's results.
// Keep collecting results until all prescan workers quit
@ -88,7 +108,11 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
ImageLabelPair pair_ptr = image_label_pairs_[row_id];
std::shared_ptr<Tensor> image, label;
RETURN_IF_NOT_OK(Tensor::CreateScalar(pair_ptr->second, &label));
#ifdef ENABLE_PYTHON
RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(folder_path_ + (pair_ptr->first), &image, decrypt_));
#else
RETURN_IF_NOT_OK(Tensor::CreateFromFile(folder_path_ + (pair_ptr->first), &image));
#endif
if (decode_ == true) {
Status rc = Decode(image, &image);

View File

@ -53,6 +53,23 @@ using FolderImagesPair = std::shared_ptr<std::pair<std::string, std::queue<Image
class ImageFolderOp : public MappableLeafOp {
public:
#ifdef ENABLE_PYTHON
// Constructor
// @param int32_t num_wkrs - Num of workers reading images in parallel
// @param std::string - dir directory of ImageNetFolder
// @param int32_t queue_size - connector queue size
// @param bool recursive - read recursively
// @param bool do_decode - decode the images after reading
// @param std::set<std::string> &exts - set of file extensions to read, if empty, read everything under the dir
// @param std::map<std::string, int32_t> &map- map of folder name and class id
// @param std::unique_ptr<dataschema> data_schema - schema of data
// @param py::function decrypt - Image decryption function, which accepts the path of the encrypted image file
// and returns the decrypted bytes data. Default: None, no decryption.
ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler,
py::function decrypt = py::none());
#else
// Constructor
// @param int32_t num_wkrs - Num of workers reading images in parallel
// @param std::string - dir directory of ImageNetFolder
@ -65,6 +82,7 @@ class ImageFolderOp : public MappableLeafOp {
ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
#endif
/// Destructor.
~ImageFolderOp() = default;
@ -144,6 +162,9 @@ class ImageFolderOp : public MappableLeafOp {
std::vector<ImageLabelPair> image_label_pairs_;
std::unique_ptr<Queue<std::string>> folder_name_queue_;
std::unique_ptr<Queue<FolderImagesPair>> image_name_queue_;
#ifdef ENABLE_PYTHON
py::function decrypt_;
#endif
};
} // namespace dataset
} // namespace mindspore

View File

@ -24,6 +24,36 @@ namespace dataset {
MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler)
: ParallelOp(num_wkrs, queue_size, std::move(sampler)) {}
#ifdef ENABLE_PYTHON
Status MappableLeafOp::ImageDecrypt(const std::string &path, std::shared_ptr<Tensor> *tensor,
const py::function &decrypt) {
RETURN_UNEXPECTED_IF_NULL(tensor);
if (py::isinstance<py::none>(decrypt)) {
RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
} else {
// Acquire Python GIL
py::gil_scoped_acquire gil_acquire;
if (Py_IsInitialized() == 0) {
RETURN_STATUS_ERROR(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized.");
}
try {
py::bytes ret_py_obj = decrypt(path);
int64_t num_bytes = len(ret_py_obj);
CHECK_FAIL_RETURN_UNEXPECTED(num_bytes < kDeMaxDim,
"The length of decrypted bytes returned by the decryption function exceeds the "
"maximum value of int64, check path: " +
path);
std::string ret_str = ret_py_obj;
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(TensorShape{num_bytes}, DataType(DataType::DE_UINT8),
reinterpret_cast<const uchar *>(ret_str.c_str()), num_bytes, tensor));
} catch (const py::error_already_set &e) {
RETURN_STATUS_ERROR(StatusCode::kMDPyFuncException, e.what());
}
}
return Status::OK();
}
#endif
// Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work
Status MappableLeafOp::operator()() {
// Registering and launching worker threads have to be before in sync with caller (i.e., before FindMe()::Post())

View File

@ -68,6 +68,16 @@ class MappableLeafOp : public ParallelOp<std::unique_ptr<IOBlock>, TensorRow>, p
/// @return Name of the current Op
std::string Name() const override { return "MappableLeafPp"; }
#ifdef ENABLE_PYTHON
/// \brief Decrypt the encrypted image data as a public function.
/// \param[in] path - The path of the image that needs to be decrypted.
/// \param[in] decrypt - Image decryption function. Default: None, no decryption.
/// \param[out] tensor - Returned tensor.
/// \return Status code.
static Status ImageDecrypt(const std::string &path, std::shared_ptr<Tensor> *tensor,
const py::function &decrypt = py::none());
#endif
protected:
/// Initialize Sampler, calls sampler->Init() within
/// @return Status The status code returned

View File

@ -43,6 +43,22 @@ const char kSegmentationExtension[] = ".png";
const char kAnnotationExtension[] = ".xml";
const char kImageSetsExtension[] = ".txt";
#ifdef ENABLE_PYTHON
VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata,
py::function decrypt)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
decode_(decode),
row_cnt_(0),
task_type_(task_type),
usage_(task_mode),
folder_path_(folder_path),
class_index_(class_index),
data_schema_(std::move(data_schema)),
extra_metadata_(extra_metadata),
decrypt_(std::move(decrypt)) {}
#else
VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
@ -55,6 +71,7 @@ VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std:
class_index_(class_index),
data_schema_(std::move(data_schema)),
extra_metadata_(extra_metadata) {}
#endif
void VOCOp::Print(std::ostream &out, bool show_all) const {
if (!show_all) {
@ -253,7 +270,11 @@ Status VOCOp::PrepareData() {
return Status::OK();
}
Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
#ifdef ENABLE_PYTHON
RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(path, tensor, decrypt_));
#else
RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
#endif
if (decode_ == true) {
Status rc = Decode(*tensor, tensor);
if (rc.IsError()) {

View File

@ -50,6 +50,25 @@ class VOCOp : public MappableLeafOp {
public:
enum class TaskType { Segmentation = 0, Detection = 1 };
#ifdef ENABLE_PYTHON
// Constructor
// @param TaskType task_type - task type of VOC
// @param std::string task_mode - task mode of VOC
// @param std::string folder_path - dir directory of VOC
// @param std::map<std::string, int32_t> class_index - input class-to-index of annotation
// @param int32_t num_workers - number of workers reading images in parallel
// @param int32_t queue_size - connector queue size
// @param bool decode - whether to decode images
// @param std::unique_ptr<DataSchema> data_schema - the schema of the VOC dataset
// @param std::shared_ptr<Sampler> sampler - sampler tells VOCOp what to read
// @param extra_metadata - flag to add extra meta-data to row
// @param py::function decrypt - Image decryption function, which accepts the path of the encrypted image file
// and returns the decrypted bytes data. Default: None, no decryption.
VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata,
py::function decrypt = py::none());
#else
// Constructor
// @param TaskType task_type - task type of VOC
// @param std::string task_mode - task mode of VOC
@ -64,6 +83,7 @@ class VOCOp : public MappableLeafOp {
VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata);
#endif
// Destructor
~VOCOp() = default;
@ -150,6 +170,9 @@ class VOCOp : public MappableLeafOp {
std::map<std::string, int32_t> class_index_;
std::map<std::string, int32_t> label_index_;
std::map<std::string, Annotation> annotation_map_;
#ifdef ENABLE_PYTHON
py::function decrypt_;
#endif
};
} // namespace dataset
} // namespace mindspore

View File

@ -32,6 +32,20 @@
namespace mindspore {
namespace dataset {
#ifdef ENABLE_PYTHON
// Constructor for CelebANode
CelebANode::CelebANode(const std::string &dataset_dir, const std::string &usage,
const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
const std::set<std::string> &extensions, const std::shared_ptr<DatasetCache> &cache,
py::function decrypt)
: MappableSourceNode(std::move(cache)),
dataset_dir_(dataset_dir),
usage_(usage),
sampler_(sampler),
decode_(decode),
extensions_(extensions),
decrypt_(decrypt) {}
#else
// Constructor for CelebANode
CelebANode::CelebANode(const std::string &dataset_dir, const std::string &usage,
const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
@ -42,10 +56,15 @@ CelebANode::CelebANode(const std::string &dataset_dir, const std::string &usage,
sampler_(sampler),
decode_(decode),
extensions_(extensions) {}
#endif
std::shared_ptr<DatasetNode> CelebANode::Copy() {
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
#ifdef ENABLE_PYTHON
auto node = std::make_shared<CelebANode>(dataset_dir_, usage_, sampler, decode_, extensions_, cache_, decrypt_);
#else
auto node = std::make_shared<CelebANode>(dataset_dir_, usage_, sampler, decode_, extensions_, cache_);
#endif
node->SetNumWorkers(num_workers_);
node->SetConnectorQueueSize(connector_que_size_);
return node;
@ -75,8 +94,13 @@ Status CelebANode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
#ifdef ENABLE_PYTHON
auto celeba_op = std::make_shared<CelebAOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, usage_,
extensions_, std::move(schema), std::move(sampler_rt), decrypt_);
#else
auto celeba_op = std::make_shared<CelebAOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, usage_,
extensions_, std::move(schema), std::move(sampler_rt));
#endif
celeba_op->SetTotalRepeats(GetTotalRepeats());
celeba_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
node_ops->push_back(celeba_op);

View File

@ -29,9 +29,16 @@ namespace mindspore {
namespace dataset {
class CelebANode : public MappableSourceNode {
public:
#ifdef ENABLE_PYTHON
/// \brief Constructor
CelebANode(const std::string &dataset_dir, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
const bool &decode, const std::set<std::string> &extensions, const std::shared_ptr<DatasetCache> &cache,
py::function decrypt = py::none());
#else
/// \brief Constructor
CelebANode(const std::string &dataset_dir, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
const bool &decode, const std::set<std::string> &extensions, const std::shared_ptr<DatasetCache> &cache);
#endif
/// \brief Destructor
~CelebANode() override = default;
@ -102,6 +109,9 @@ class CelebANode : public MappableSourceNode {
bool decode_;
std::set<std::string> extensions_;
std::shared_ptr<SamplerObj> sampler_;
#ifdef ENABLE_PYTHON
py::function decrypt_;
#endif
};
} // namespace dataset
} // namespace mindspore

View File

@ -30,6 +30,20 @@
namespace mindspore {
namespace dataset {
#ifdef ENABLE_PYTHON
// Constructor for CocoNode
CocoNode::CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
const bool &extra_metadata, py::function decrypt)
: MappableSourceNode(std::move(cache)),
dataset_dir_(dataset_dir),
annotation_file_(annotation_file),
task_(task),
decode_(decode),
sampler_(sampler),
extra_metadata_(extra_metadata),
decrypt_(decrypt) {}
#else
// Constructor for CocoNode
CocoNode::CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
@ -41,11 +55,17 @@ CocoNode::CocoNode(const std::string &dataset_dir, const std::string &annotation
decode_(decode),
sampler_(sampler),
extra_metadata_(extra_metadata) {}
#endif
std::shared_ptr<DatasetNode> CocoNode::Copy() {
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
#ifdef ENABLE_PYTHON
auto node = std::make_shared<CocoNode>(dataset_dir_, annotation_file_, task_, decode_, sampler, cache_,
extra_metadata_, decrypt_);
#else
auto node =
std::make_shared<CocoNode>(dataset_dir_, annotation_file_, task_, decode_, sampler, cache_, extra_metadata_);
#endif
node->SetNumWorkers(num_workers_);
node->SetConnectorQueueSize(connector_que_size_);
return node;
@ -135,9 +155,15 @@ Status CocoNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
#ifdef ENABLE_PYTHON
std::shared_ptr<CocoOp> op =
std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, connector_que_size_, decode_,
std::move(schema), std::move(sampler_rt), extra_metadata_, decrypt_);
#else
std::shared_ptr<CocoOp> op =
std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, connector_que_size_, decode_,
std::move(schema), std::move(sampler_rt), extra_metadata_);
#endif
op->SetTotalRepeats(GetTotalRepeats());
op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
node_ops->push_back(op);

View File

@ -27,10 +27,17 @@ namespace mindspore {
namespace dataset {
class CocoNode : public MappableSourceNode {
public:
#ifdef ENABLE_PYTHON
/// \brief Constructor.
CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
const bool &extra_metadata, py::function decrypt = py::none());
#else
/// \brief Constructor.
CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
const bool &extra_metadata);
#endif
/// \brief Destructor.
~CocoNode() override = default;
@ -103,6 +110,9 @@ class CocoNode : public MappableSourceNode {
bool decode_;
std::shared_ptr<SamplerObj> sampler_;
bool extra_metadata_;
#ifdef ENABLE_PYTHON
py::function decrypt_;
#endif
};
} // namespace dataset
} // namespace mindspore

View File

@ -32,6 +32,20 @@
namespace mindspore {
namespace dataset {
#ifdef ENABLE_PYTHON
ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
bool recursive, std::set<std::string> extensions,
std::map<std::string, int32_t> class_indexing,
std::shared_ptr<DatasetCache> cache = nullptr, py::function decrypt)
: MappableSourceNode(std::move(cache)),
dataset_dir_(dataset_dir),
decode_(decode),
sampler_(sampler),
recursive_(recursive),
class_indexing_(class_indexing),
exts_(extensions),
decrypt_(decrypt) {}
#else
ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
bool recursive, std::set<std::string> extensions,
std::map<std::string, int32_t> class_indexing,
@ -43,11 +57,17 @@ ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shar
recursive_(recursive),
class_indexing_(class_indexing),
exts_(extensions) {}
#endif
std::shared_ptr<DatasetNode> ImageFolderNode::Copy() {
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
#ifdef ENABLE_PYTHON
auto node = std::make_shared<ImageFolderNode>(dataset_dir_, decode_, sampler, recursive_, exts_, class_indexing_,
cache_, decrypt_);
#else
auto node =
std::make_shared<ImageFolderNode>(dataset_dir_, decode_, sampler, recursive_, exts_, class_indexing_, cache_);
#endif
node->SetNumWorkers(num_workers_);
node->SetConnectorQueueSize(connector_que_size_);
return node;
@ -77,11 +97,17 @@ Status ImageFolderNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const nod
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
#ifdef ENABLE_PYTHON
auto op = std::make_shared<ImageFolderOp>(num_workers_, dataset_dir_, connector_que_size_, recursive_, decode_, exts_,
class_indexing_, std::move(schema), std::move(sampler_rt), decrypt_);
#else
auto op = std::make_shared<ImageFolderOp>(num_workers_, dataset_dir_, connector_que_size_, recursive_, decode_, exts_,
class_indexing_, std::move(schema), std::move(sampler_rt));
#endif
op->SetTotalRepeats(GetTotalRepeats());
op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
node_ops->push_back(op);
return Status::OK();
}

View File

@ -32,11 +32,17 @@ namespace dataset {
/// \brief A Dataset derived class to represent ImageFolder dataset
class ImageFolderNode : public MappableSourceNode {
public:
#ifdef ENABLE_PYTHON
/// \brief Constructor
ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing,
std::shared_ptr<DatasetCache> cache, py::function decrypt = py::none());
#else
/// \brief Constructor
ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing,
std::shared_ptr<DatasetCache> cache);
#endif
/// \brief Destructor
~ImageFolderNode() override = default;
@ -108,6 +114,9 @@ class ImageFolderNode : public MappableSourceNode {
std::shared_ptr<SamplerObj> sampler_;
std::map<std::string, int32_t> class_indexing_;
std::set<std::string> exts_;
#ifdef ENABLE_PYTHON
py::function decrypt_;
#endif
};
} // namespace dataset
} // namespace mindspore

View File

@ -31,6 +31,21 @@
namespace mindspore {
namespace dataset {
#ifdef ENABLE_PYTHON
// Constructor for VOCNode
VOCNode::VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
std::shared_ptr<DatasetCache> cache, bool extra_metadata, py::function decrypt)
: MappableSourceNode(std::move(cache)),
dataset_dir_(dataset_dir),
task_(task),
usage_(usage),
class_index_(class_indexing),
decode_(decode),
sampler_(sampler),
extra_metadata_(extra_metadata),
decrypt_(decrypt) {}
#else
// Constructor for VOCNode
VOCNode::VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
@ -43,11 +58,17 @@ VOCNode::VOCNode(const std::string &dataset_dir, const std::string &task, const
decode_(decode),
sampler_(sampler),
extra_metadata_(extra_metadata) {}
#endif
std::shared_ptr<DatasetNode> VOCNode::Copy() {
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
#ifdef ENABLE_PYTHON
auto node = std::make_shared<VOCNode>(dataset_dir_, task_, usage_, class_index_, decode_, sampler, cache_,
extra_metadata_, decrypt_);
#else
auto node =
std::make_shared<VOCNode>(dataset_dir_, task_, usage_, class_index_, decode_, sampler, cache_, extra_metadata_);
#endif
node->SetNumWorkers(num_workers_);
node->SetConnectorQueueSize(connector_que_size_);
return node;
@ -125,11 +146,18 @@ Status VOCNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
std::shared_ptr<VOCOp> voc_op;
#ifdef ENABLE_PYTHON
voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_,
decode_, std::move(schema), std::move(sampler_rt), extra_metadata_, decrypt_);
#else
voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_,
decode_, std::move(schema), std::move(sampler_rt), extra_metadata_);
#endif
voc_op->SetTotalRepeats(GetTotalRepeats());
voc_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
node_ops->push_back(voc_op);
return Status::OK();
}

View File

@ -28,10 +28,17 @@ namespace mindspore {
namespace dataset {
class VOCNode : public MappableSourceNode {
public:
#ifdef ENABLE_PYTHON
/// \brief Constructor
VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
std::shared_ptr<DatasetCache> cache, bool extra_metadata = false, py::function decrypt = py::none());
#else
/// \brief Constructor
VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
std::shared_ptr<DatasetCache> cache, bool extra_metadata = false);
#endif
/// \brief Destructor
~VOCNode() override = default;
@ -112,6 +119,9 @@ class VOCNode : public MappableSourceNode {
bool decode_;
std::shared_ptr<SamplerObj> sampler_;
bool extra_metadata_;
#ifdef ENABLE_PYTHON
py::function decrypt_;
#endif
};
} // namespace dataset
} // namespace mindspore

View File

@ -429,6 +429,8 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
argument can only be specified when `num_shards` is also specified.
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
(default=None, which means no cache is used).
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
and returns the decrypted bytes data. Default: None, no decryption.
Raises:
RuntimeError: If `dataset_dir` does not contain data files.
@ -548,13 +550,14 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
@check_celebadataset
def __init__(self, dataset_dir, num_parallel_workers=None, shuffle=None, usage='all', sampler=None, decode=False,
extensions=None, num_samples=None, num_shards=None, shard_id=None, cache=None):
extensions=None, num_samples=None, num_shards=None, shard_id=None, cache=None, decrypt=None):
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
self.dataset_dir = dataset_dir
self.decode = replace_none(decode, False)
self.extensions = replace_none(extensions, [])
self.usage = replace_none(usage, "all")
self.decrypt = decrypt
def parse(self, children=None):
if self.usage != "all":
@ -562,7 +565,8 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
partition_file = os.path.join(dataset_dir, "list_eval_partition.txt")
if os.path.exists(partition_file) is False:
raise RuntimeError("Partition file can not be found when usage is not 'all'.")
return cde.CelebANode(self.dataset_dir, self.usage, self.sampler, self.decode, self.extensions)
return cde.CelebANode(self.dataset_dir, self.usage, self.sampler, self.decode,
self.extensions, self.decrypt)
@ -1023,6 +1027,8 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
(default=None, which means no cache is used).
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
output at the end :py:obj:`[_meta-filename, dtype=string]` (default=False).
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
and returns the decrypted bytes data. Default: None, no decryption.
The generated dataset with different task setting has different output columns:
@ -1195,7 +1201,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
@check_cocodataset
def __init__(self, dataset_dir, annotation_file, task="Detection", num_samples=None, num_parallel_workers=None,
shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None, cache=None,
extra_metadata=False):
extra_metadata=False, decrypt=None):
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
self.dataset_dir = dataset_dir
@ -1203,10 +1209,11 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
self.task = replace_none(task, "Detection")
self.decode = replace_none(decode, False)
self.extra_metadata = extra_metadata
self.decrypt = decrypt
def parse(self, children=None):
return cde.CocoNode(self.dataset_dir, self.annotation_file, self.task, self.decode, self.sampler,
self.extra_metadata)
self.extra_metadata, self.decrypt)
def get_class_indexing(self):
"""
@ -2203,6 +2210,8 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
argument can only be specified when `num_shards` is also specified.
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
(default=None, which means no cache is used).
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
and returns the decrypted bytes data. Default: None, no decryption.
Raises:
RuntimeError: If `dataset_dir` does not contain data files.
@ -2287,7 +2296,8 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
@check_imagefolderdataset
def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, sampler=None,
extensions=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None):
extensions=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None,
decrypt=None):
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
@ -2295,9 +2305,11 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
self.extensions = replace_none(extensions, [])
self.class_indexing = replace_none(class_indexing, {})
self.decode = replace_none(decode, False)
self.decrypt = decrypt
def parse(self, children=None):
return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing)
return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing,
self.decrypt)
class KITTIDataset(MappableDataset):
@ -4534,6 +4546,8 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
(default=None, which means no cache is used).
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
:py:obj:`[_meta-filename, dtype=string]` will be output at the end (default=False).
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
and returns the decrypted bytes data. Default: None, no decryption.
Raises:
RuntimeError: If `dataset_dir` does not contain data files.
@ -4658,7 +4672,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
@check_vocdataset
def __init__(self, dataset_dir, task="Segmentation", usage="train", class_indexing=None, num_samples=None,
num_parallel_workers=None, shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None,
cache=None, extra_metadata=False):
cache=None, extra_metadata=False, decrypt=None):
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
self.dataset_dir = dataset_dir
@ -4667,10 +4681,11 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
self.class_indexing = replace_none(class_indexing, {})
self.decode = replace_none(decode, False)
self.extra_metadata = extra_metadata
self.decrypt = decrypt
def parse(self, children=None):
return cde.VOCNode(self.dataset_dir, self.task, self.usage, self.class_indexing, self.decode, self.sampler,
self.extra_metadata)
self.extra_metadata, self.decrypt)
def get_class_indexing(self):
"""

View File

@ -111,6 +111,10 @@ def check_imagefolderdataset(method):
dataset_dir = param_dict.get('dataset_dir')
check_dir(dataset_dir)
decrypt = param_dict.get('decrypt')
if decrypt is not None and not callable(decrypt):
raise TypeError("Argument decrypt is not a callable object, but got " + str(type(decrypt)))
validate_dataset_param_value(nreq_param_int, param_dict, int)
validate_dataset_param_value(nreq_param_bool, param_dict, bool)
validate_dataset_param_value(nreq_param_list, param_dict, list)
@ -689,6 +693,10 @@ def check_caltech256_dataset(method):
dataset_dir = param_dict.get('dataset_dir')
check_dir(dataset_dir)
decrypt = param_dict.get('decrypt')
if decrypt is not None and not callable(decrypt):
raise TypeError("Argument decrypt is not a callable object, but got " + str(type(decrypt)))
validate_dataset_param_value(nreq_param_int, param_dict, int)
validate_dataset_param_value(nreq_param_bool, param_dict, bool)
check_sampler_shuffle_shard_options(param_dict)
@ -731,6 +739,10 @@ def check_vocdataset(method):
else:
raise ValueError("Invalid task : " + task + ".")
decrypt = param_dict.get('decrypt')
if decrypt is not None and not callable(decrypt):
raise TypeError("Argument decrypt is not a callable object, but got " + str(type(decrypt)))
check_file(imagesets_file)
validate_dataset_param_value(nreq_param_int, param_dict, int)
@ -768,6 +780,10 @@ def check_cocodataset(method):
if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint', 'Captioning'}:
raise ValueError("Invalid task type: " + task + ".")
decrypt = param_dict.get('decrypt')
if decrypt is not None and not callable(decrypt):
raise TypeError("Argument decrypt is not a callable object, but got " + str(type(decrypt)))
validate_dataset_param_value(nreq_param_int, param_dict, int)
validate_dataset_param_value(nreq_param_bool, param_dict, bool)

View File

@ -12,12 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import os
import shutil
import numpy as np
import pytest
import mindspore.dataset as ds
import mindspore.dataset.transforms as data_trans
import mindspore.dataset.vision as vision
from mindspore import log as logger
DATA_DIR = "../data/dataset/testPK/data"
DATA_DIR_2 = "../data/dataset/testImageNetData2/train"
DATA_DIR_3 = "../data/dataset/testImageNetData2/encrypt_train"
def test_imagefolder_basic():
@ -903,6 +909,108 @@ def test_imagefolder_exception():
assert "should be file, but got directory" in str(e)
def encrypt_func():
"""
Feature: Encrypt function
Description: Encrypt and save the image
Expectation: Success
"""
plain_dir = os.path.realpath(DATA_DIR_2)
cipher_dir = os.path.realpath(DATA_DIR_3)
for root, _, files in os.walk(plain_dir):
for f in files:
fn = os.path.join(root, f)
enc_file = os.path.join(cipher_dir, os.path.relpath(fn, plain_dir))
os.makedirs(os.path.dirname(enc_file), exist_ok=True)
with open(fn, 'rb')as f:
content = f.read()
new_content = b'helloworld' + content
with open(enc_file, 'wb')as f:
f.write(new_content)
def decrypt_func(cipher_file):
"""
Feature: Decrypt function
Description: Decrypt encrypted image data
Expectation: Decryption is successful, return bytes type data
"""
with open(cipher_file, 'rb')as f:
content = f.read()
new_content = content[10:]
return new_content
def test_imagefolder_decrypt():
"""
Feature: Test imagefolder decrypt
Description: Support decrypting encrypted image data
Expectation: Success
"""
logger.info("Test imagefolder decrypt")
encrypt_func()
resize_height = 224
resize_width = 224
# Create dataset and define map operations
ds1 = ds.ImageFolderDataset(DATA_DIR_3, decrypt=decrypt_func)
num_classes = 3
decode_op = vision.Decode()
resize_op = vision.Resize((resize_height, resize_width), vision.Inter.LINEAR)
one_hot_encode = data_trans.OneHot(num_classes) # num_classes is input argument
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
ds1 = ds1.map(operations=resize_op, input_columns=["image"])
ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])
# apply batch operations
batch_size = 3
ds1 = ds1.batch(batch_size, drop_remainder=True)
ds2 = ds1
alpha = 0.2
transforms = [vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False)
]
ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
num_iter = 0
batch1_image1 = 0
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True),
ds2.create_dict_iterator(num_epochs=1, output_numpy=True)):
image1 = data1["image"]
label1 = data1["label"]
logger.info("label: {}".format(label1))
image2 = data2["image"]
label2 = data2["label"]
logger.info("label2: {}".format(label2))
if num_iter == 0:
batch1_image1 = image1
if num_iter == 1:
lam = np.abs(label2 - label1)
logger.info("lam value in multi: {}".format(lam))
for index in range(batch_size):
if np.square(lam[index]).mean() != 0:
lam_value = 1 - np.sum(lam[index]) / 2
img_golden = lam_value * image2[index] + (1 - lam_value) * batch1_image1[index]
assert image1[index].all() == img_golden.all()
logger.info("====test several batch mixup ok====")
break
num_iter += 1
if os.path.exists(DATA_DIR_3):
shutil.rmtree(DATA_DIR_3)
if __name__ == '__main__':
test_imagefolder_basic()
logger.info('test_imagefolder_basic Ended.\n')
@ -987,3 +1095,6 @@ if __name__ == '__main__':
test_imagefolder_exception()
logger.info('test_imagefolder_exception Ended.\n')
test_imagefolder_decrypt()
logger.info('test_imagefolder_decrypt Ended.\n')