!34529 Support specific data set decryption
Merge pull request !34529 from 刘勇琪/master-decrypt-column
This commit is contained in:
commit
fedccd0d63
|
@ -20,6 +20,7 @@ mindspore.dataset.CelebADataset
|
|||
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数,默认值:None。指定此参数后, `num_samples` 表示每个分片的最大样本数。
|
||||
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号,默认值:None。只有当指定了 `num_shards` 时才能指定此参数。
|
||||
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/tutorials/experts/zh-CN/master/dataset/cache.html>`_ 。默认值:None,不使用缓存。
|
||||
- **decrypt** (callable, 可选) - 图像解密函数,接受加密的图片路径并返回bytes类型的解密数据。默认值:None,不进行解密。
|
||||
|
||||
**异常:**
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号,默认值:None。只有当指定了 `num_shards` 时才能指定此参数。
|
||||
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/tutorials/experts/zh-CN/master/dataset/cache.html>`_ 。默认值:None,不使用缓存。
|
||||
- **extra_metadata** (bool, 可选) - 用于指定是否额外输出一个数据列用于表示图片元信息。如果为True,则将额外输出一个名为 `[_meta-filename, dtype=string]` 的数据列,默认值:False。
|
||||
- **decrypt** (callable, 可选) - 图像解密函数,接受加密的图片路径并返回bytes类型的解密数据。默认值:None,不进行解密。
|
||||
|
||||
[表1] 根据不同 `task` 参数设置,生成数据集具有不同的输出列:
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ mindspore.dataset.ImageFolderDataset
|
|||
- **num_shards** (int, 可选) - 指定分布式训练时将数据集进行划分的分片数,默认值:None。指定此参数后, `num_samples` 表示每个分片的最大样本数。
|
||||
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号,默认值:None。只有当指定了 `num_shards` 时才能指定此参数。
|
||||
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/tutorials/experts/zh-CN/master/dataset/cache.html>`_ 。默认值:None,不使用缓存。
|
||||
- **decrypt** (callable, 可选) - 图像解密函数,接受加密的图片路径并返回bytes类型的解密数据。默认值:None,不进行解密。
|
||||
|
||||
**异常:**
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ mindspore.dataset.VOCDataset
|
|||
- **shard_id** (int, 可选) - 指定分布式训练时使用的分片ID号,默认值:None。只有当指定了 `num_shards` 时才能指定此参数。
|
||||
- **cache** (DatasetCache, 可选) - 单节点数据缓存服务,用于加快数据集处理,详情请阅读 `单节点数据缓存 <https://www.mindspore.cn/tutorials/experts/zh-CN/master/dataset/cache.html>`_ 。默认值:None,不使用缓存。
|
||||
- **extra_metadata** (bool, 可选) - 用于指定是否额外输出一个数据列用于表示图片元信息。如果为True,则将额外输出一个名为 `[_meta-filename, dtype=string]` 的数据列,默认值:False。
|
||||
- **decrypt** (callable, 可选) - 图像解密函数,接受加密的图片路径并返回bytes类型的解密数据。默认值:None,不进行解密。
|
||||
|
||||
根据给定的 `task` 配置,生成数据集具有不同的输出列:
|
||||
|
||||
|
|
|
@ -129,13 +129,14 @@ PYBIND_REGISTER(Caltech256Node, 2, ([](const py::module *m) {
|
|||
PYBIND_REGISTER(CelebANode, 2, ([](const py::module *m) {
|
||||
(void)py::class_<CelebANode, DatasetNode, std::shared_ptr<CelebANode>>(*m, "CelebANode",
|
||||
"to create a CelebANode")
|
||||
.def(py::init([](const std::string &dataset_dir, const std::string &usage,
|
||||
const py::handle &sampler, bool decode, const py::list &extensions) {
|
||||
auto celebA = std::make_shared<CelebANode>(dataset_dir, usage, toSamplerObj(sampler), decode,
|
||||
toStringSet(extensions), nullptr);
|
||||
THROW_IF_ERROR(celebA->ValidateParams());
|
||||
return celebA;
|
||||
}));
|
||||
.def(
|
||||
py::init([](const std::string &dataset_dir, const std::string &usage, const py::handle &sampler,
|
||||
bool decode, const py::list &extensions, const py::object &decrypt) {
|
||||
auto celebA = std::make_shared<CelebANode>(dataset_dir, usage, toSamplerObj(sampler), decode,
|
||||
toStringSet(extensions), nullptr, decrypt);
|
||||
THROW_IF_ERROR(celebA->ValidateParams());
|
||||
return celebA;
|
||||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(Cifar10Node, 2, ([](const py::module *m) {
|
||||
|
@ -199,18 +200,17 @@ PYBIND_REGISTER(CMUArcticNode, 2, ([](const py::module *m) {
|
|||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(CocoNode, 2, ([](const py::module *m) {
|
||||
(void)py::class_<CocoNode, DatasetNode, std::shared_ptr<CocoNode>>(*m, "CocoNode",
|
||||
"to create a CocoNode")
|
||||
.def(py::init([](const std::string &dataset_dir, const std::string &annotation_file,
|
||||
const std::string &task, bool decode, const py::handle &sampler,
|
||||
bool extra_metadata) {
|
||||
std::shared_ptr<CocoNode> coco = std::make_shared<CocoNode>(
|
||||
dataset_dir, annotation_file, task, decode, toSamplerObj(sampler), nullptr, extra_metadata);
|
||||
THROW_IF_ERROR(coco->ValidateParams());
|
||||
return coco;
|
||||
}));
|
||||
}));
|
||||
PYBIND_REGISTER(
|
||||
CocoNode, 2, ([](const py::module *m) {
|
||||
(void)py::class_<CocoNode, DatasetNode, std::shared_ptr<CocoNode>>(*m, "CocoNode", "to create a CocoNode")
|
||||
.def(py::init([](const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
|
||||
bool decode, const py::handle &sampler, bool extra_metadata, const py::object &decrypt) {
|
||||
std::shared_ptr<CocoNode> coco = std::make_shared<CocoNode>(
|
||||
dataset_dir, annotation_file, task, decode, toSamplerObj(sampler), nullptr, extra_metadata, decrypt);
|
||||
THROW_IF_ERROR(coco->ValidateParams());
|
||||
return coco;
|
||||
}));
|
||||
}));
|
||||
|
||||
PYBIND_REGISTER(CoNLL2000Node, 2, ([](const py::module *m) {
|
||||
(void)py::class_<CoNLL2000Node, DatasetNode, std::shared_ptr<CoNLL2000Node>>(
|
||||
|
@ -359,12 +359,13 @@ PYBIND_REGISTER(ImageFolderNode, 2, ([](const py::module *m) {
|
|||
(void)py::class_<ImageFolderNode, DatasetNode, std::shared_ptr<ImageFolderNode>>(
|
||||
*m, "ImageFolderNode", "to create an ImageFolderNode")
|
||||
.def(py::init([](const std::string &dataset_dir, bool decode, const py::handle &sampler,
|
||||
const py::list &extensions, const py::dict &class_indexing) {
|
||||
const py::list &extensions, const py::dict &class_indexing,
|
||||
const py::object &decrypt) {
|
||||
// Don't update recursive to true
|
||||
bool recursive = false; // Will be removed in future PR
|
||||
auto imagefolder = std::make_shared<ImageFolderNode>(dataset_dir, decode, toSamplerObj(sampler),
|
||||
recursive, toStringSet(extensions),
|
||||
toStringMap(class_indexing), nullptr);
|
||||
auto imagefolder = std::make_shared<ImageFolderNode>(
|
||||
dataset_dir, decode, toSamplerObj(sampler), recursive, toStringSet(extensions),
|
||||
toStringMap(class_indexing), nullptr, decrypt);
|
||||
THROW_IF_ERROR(imagefolder->ValidateParams());
|
||||
return imagefolder;
|
||||
}));
|
||||
|
@ -764,10 +765,10 @@ PYBIND_REGISTER(VOCNode, 2, ([](const py::module *m) {
|
|||
(void)py::class_<VOCNode, DatasetNode, std::shared_ptr<VOCNode>>(*m, "VOCNode", "to create a VOCNode")
|
||||
.def(py::init([](const std::string &dataset_dir, const std::string &task, const std::string &usage,
|
||||
const py::dict &class_indexing, bool decode, const py::handle &sampler,
|
||||
bool extra_metadata) {
|
||||
bool extra_metadata, const py::object &decrypt) {
|
||||
std::shared_ptr<VOCNode> voc =
|
||||
std::make_shared<VOCNode>(dataset_dir, task, usage, toStringMap(class_indexing), decode,
|
||||
toSamplerObj(sampler), nullptr, extra_metadata);
|
||||
toSamplerObj(sampler), nullptr, extra_metadata, decrypt);
|
||||
THROW_IF_ERROR(voc->ValidateParams());
|
||||
return voc;
|
||||
}));
|
||||
|
|
|
@ -18,12 +18,12 @@
|
|||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include "utils/file_utils.h"
|
||||
#include "minddata/dataset/core/config_manager.h"
|
||||
#include "minddata/dataset/util/path.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
|
||||
#include "minddata/dataset/engine/data_schema.h"
|
||||
#include "minddata/dataset/engine/execution_tree.h"
|
||||
#include "minddata/dataset/util/path.h"
|
||||
#include "utils/file_utils.h"
|
||||
#ifndef ENABLE_ANDROID
|
||||
#include "minddata/dataset/kernels/image/image_utils.h"
|
||||
#else
|
||||
|
@ -32,6 +32,22 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
#ifdef ENABLE_PYTHON
|
||||
CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode,
|
||||
const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema,
|
||||
std::shared_ptr<SamplerRT> sampler, py::function decrypt)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
|
||||
folder_path_(dir),
|
||||
decode_(decode),
|
||||
extensions_(exts),
|
||||
data_schema_(std::move(schema)),
|
||||
num_rows_in_attr_file_(0),
|
||||
attr_file_(""),
|
||||
usage_(usage),
|
||||
decrypt_(std::move(decrypt)) {
|
||||
attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size);
|
||||
}
|
||||
#else
|
||||
CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode,
|
||||
const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
|
@ -45,6 +61,7 @@ CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_si
|
|||
usage_(usage) {
|
||||
attr_info_queue_ = std::make_unique<Queue<std::vector<std::string>>>(queue_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
Status CelebAOp::RegisterAndLaunchThreads() {
|
||||
ParallelOp::RegisterAndLaunchThreads();
|
||||
|
@ -237,7 +254,12 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
|
|||
|
||||
Path path(folder_path_);
|
||||
Path image_path = path / image_label.first;
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(image_path.ToString(), &image, decrypt_));
|
||||
#else
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_path.ToString(), &image));
|
||||
#endif
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
|
|
|
@ -44,6 +44,22 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
class CelebAOp : public MappableLeafOp {
|
||||
public:
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructor
|
||||
// @param int32_t - num_workers - Num of workers reading images in parallel
|
||||
// @param std::string - dir directory of celeba dataset
|
||||
// @param int32_t queueSize - connector queue size
|
||||
// @param bool decode - decode the images after reading
|
||||
// @param std::string usage - specify the train, valid, test part or all parts of dataset
|
||||
// @param std::set<std::string> exts - list of file extensions to be included in the dataset
|
||||
// @param std::unique_ptr<DataSchema> schema - path to the JSON schema file or schema object
|
||||
// @param std::unique_ptr<Sampler> sampler - sampler tells CelebAOp what to read
|
||||
// @param py::function decrypt - Image decryption function, which accepts the path of the encrypted image file
|
||||
// and returns the decrypted bytes data. Default: None, no decryption.
|
||||
CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, const std::string &usage,
|
||||
const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler,
|
||||
py::function decrypt = py::none());
|
||||
#else
|
||||
// Constructor
|
||||
// @param int32_t - num_workers - Num of workers reading images in parallel
|
||||
// @param std::string - dir directory of celeba dataset
|
||||
|
@ -55,6 +71,7 @@ class CelebAOp : public MappableLeafOp {
|
|||
// @param std::unique_ptr<Sampler> sampler - sampler tells CelebAOp what to read
|
||||
CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, const std::string &usage,
|
||||
const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler);
|
||||
#endif
|
||||
|
||||
~CelebAOp() override = default;
|
||||
|
||||
|
@ -112,6 +129,9 @@ class CelebAOp : public MappableLeafOp {
|
|||
std::string usage_;
|
||||
std::ifstream partition_file_;
|
||||
std::string attr_file_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function decrypt_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -17,11 +17,11 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include "utils/file_utils.h"
|
||||
#include "utils/ms_utils.h"
|
||||
#include "minddata/dataset/core/config_manager.h"
|
||||
#include "minddata/dataset/core/tensor_shape.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/sampler/sequential_sampler.h"
|
||||
#include "utils/file_utils.h"
|
||||
#include "utils/ms_utils.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
@ -46,6 +46,19 @@ const char kJsonCategoriesName[] = "name";
|
|||
const float kDefaultPadValue = -1.0;
|
||||
const unsigned int kPadValueZero = 0;
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
|
||||
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler, bool extra_metadata, py::function decrypt)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
|
||||
decode_(decode),
|
||||
task_type_(task_type),
|
||||
image_folder_path_(image_folder_path),
|
||||
annotation_path_(annotation_path),
|
||||
data_schema_(std::move(data_schema)),
|
||||
extra_metadata_(extra_metadata),
|
||||
decrypt_(std::move(decrypt)) {}
|
||||
#else
|
||||
CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
|
||||
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
|
||||
|
@ -56,6 +69,7 @@ CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path,
|
|||
annotation_path_(annotation_path),
|
||||
data_schema_(std::move(data_schema)),
|
||||
extra_metadata_(extra_metadata) {}
|
||||
#endif
|
||||
|
||||
void CocoOp::Print(std::ostream &out, bool show_all) const {
|
||||
if (!show_all) {
|
||||
|
@ -556,7 +570,11 @@ Status CocoOp::CategoriesColumnLoad(const nlohmann::json &categories_tree) {
|
|||
|
||||
Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col,
|
||||
std::shared_ptr<Tensor> *tensor) const {
|
||||
#ifdef ENABLE_PYTHON
|
||||
RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(path, tensor, decrypt_));
|
||||
#else
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
|
||||
#endif
|
||||
|
||||
if (decode_) {
|
||||
Status rc = Decode(*tensor, tensor);
|
||||
|
|
|
@ -146,6 +146,23 @@ class CocoOp : public MappableLeafOp {
|
|||
std::unique_ptr<DataSchema> builder_schema_;
|
||||
};
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
/// \brief Constructor.
|
||||
/// \param[in] task_type Task type of Coco.
|
||||
/// \param[in] image_folder_path Image folder path of Coco.
|
||||
/// \param[in] annotation_path Annotation json path of Coco.
|
||||
/// \param[in] num_workers Number of workers reading images in parallel.
|
||||
/// \param[in] queue_size Connector queue size.
|
||||
/// \param[in] num_samples Number of samples to read.
|
||||
/// \param[in] decode Whether to decode images.
|
||||
/// \param[in] data_schema The schema of the Coco dataset.
|
||||
/// \param[in] sampler Sampler tells CocoOp what to read.
|
||||
/// \param[in] decrypt - Image decryption function, which accepts the path of the encrypted image file
|
||||
/// and returns the decrypted bytes data. Default: None, no decryption.
|
||||
CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
|
||||
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler, bool extra_metadata, py::function decrypt = py::none());
|
||||
#else
|
||||
/// \brief Constructor.
|
||||
/// \param[in] task_type Task type of Coco.
|
||||
/// \param[in] image_folder_path Image folder path of Coco.
|
||||
|
@ -159,6 +176,7 @@ class CocoOp : public MappableLeafOp {
|
|||
CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
|
||||
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler, bool extra_metadata);
|
||||
#endif
|
||||
|
||||
/// \brief Destructor.
|
||||
~CocoOp() = default;
|
||||
|
@ -298,6 +316,9 @@ class CocoOp : public MappableLeafOp {
|
|||
std::map<std::string, std::vector<uint32_t>> simple_item_map_;
|
||||
std::map<std::string, std::vector<std::string>> captions_map_;
|
||||
std::set<uint32_t> category_set_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function decrypt_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -27,6 +27,25 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
#ifdef ENABLE_PYTHON
|
||||
ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
|
||||
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler,
|
||||
py::function decrypt)
|
||||
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler)),
|
||||
folder_path_(std::move(file_dir)),
|
||||
recursive_(recursive),
|
||||
decode_(do_decode),
|
||||
extensions_(exts),
|
||||
class_index_(map),
|
||||
data_schema_(std::move(data_schema)),
|
||||
sampler_ind_(0),
|
||||
dirname_offset_(0),
|
||||
decrypt_(std::move(decrypt)) {
|
||||
folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size);
|
||||
image_name_queue_ = std::make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size);
|
||||
}
|
||||
#else
|
||||
ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
|
||||
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
|
||||
|
@ -42,6 +61,7 @@ ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t que
|
|||
folder_name_queue_ = std::make_unique<Queue<std::string>>(num_wkrs * queue_size);
|
||||
image_name_queue_ = std::make_unique<Queue<FolderImagesPair>>(num_wkrs * queue_size);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Master thread that pulls the prescan worker's results.
|
||||
// Keep collecting results until all prescan workers quit
|
||||
|
@ -88,7 +108,11 @@ Status ImageFolderOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
ImageLabelPair pair_ptr = image_label_pairs_[row_id];
|
||||
std::shared_ptr<Tensor> image, label;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(pair_ptr->second, &label));
|
||||
#ifdef ENABLE_PYTHON
|
||||
RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(folder_path_ + (pair_ptr->first), &image, decrypt_));
|
||||
#else
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(folder_path_ + (pair_ptr->first), &image));
|
||||
#endif
|
||||
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
|
|
|
@ -53,6 +53,23 @@ using FolderImagesPair = std::shared_ptr<std::pair<std::string, std::queue<Image
|
|||
|
||||
class ImageFolderOp : public MappableLeafOp {
|
||||
public:
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructor
|
||||
// @param int32_t num_wkrs - Num of workers reading images in parallel
|
||||
// @param std::string - dir directory of ImageNetFolder
|
||||
// @param int32_t queue_size - connector queue size
|
||||
// @param bool recursive - read recursively
|
||||
// @param bool do_decode - decode the images after reading
|
||||
// @param std::set<std::string> &exts - set of file extensions to read, if empty, read everything under the dir
|
||||
// @param std::map<std::string, int32_t> &map- map of folder name and class id
|
||||
// @param std::unique_ptr<dataschema> data_schema - schema of data
|
||||
// @param py::function decrypt - Image decryption function, which accepts the path of the encrypted image file
|
||||
// and returns the decrypted bytes data. Default: None, no decryption.
|
||||
ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
|
||||
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler,
|
||||
py::function decrypt = py::none());
|
||||
#else
|
||||
// Constructor
|
||||
// @param int32_t num_wkrs - Num of workers reading images in parallel
|
||||
// @param std::string - dir directory of ImageNetFolder
|
||||
|
@ -65,6 +82,7 @@ class ImageFolderOp : public MappableLeafOp {
|
|||
ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
|
||||
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
|
||||
#endif
|
||||
|
||||
/// Destructor.
|
||||
~ImageFolderOp() = default;
|
||||
|
@ -144,6 +162,9 @@ class ImageFolderOp : public MappableLeafOp {
|
|||
std::vector<ImageLabelPair> image_label_pairs_;
|
||||
std::unique_ptr<Queue<std::string>> folder_name_queue_;
|
||||
std::unique_ptr<Queue<FolderImagesPair>> image_name_queue_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function decrypt_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -24,6 +24,36 @@ namespace dataset {
|
|||
MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler)
|
||||
: ParallelOp(num_wkrs, queue_size, std::move(sampler)) {}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status MappableLeafOp::ImageDecrypt(const std::string &path, std::shared_ptr<Tensor> *tensor,
|
||||
const py::function &decrypt) {
|
||||
RETURN_UNEXPECTED_IF_NULL(tensor);
|
||||
if (py::isinstance<py::none>(decrypt)) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
|
||||
} else {
|
||||
// Acquire Python GIL
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
if (Py_IsInitialized() == 0) {
|
||||
RETURN_STATUS_ERROR(StatusCode::kMDPythonInterpreterFailure, "[Internal ERROR] Python Interpreter is finalized.");
|
||||
}
|
||||
try {
|
||||
py::bytes ret_py_obj = decrypt(path);
|
||||
int64_t num_bytes = len(ret_py_obj);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(num_bytes < kDeMaxDim,
|
||||
"The length of decrypted bytes returned by the decryption function exceeds the "
|
||||
"maximum value of int64, check path: " +
|
||||
path);
|
||||
std::string ret_str = ret_py_obj;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(TensorShape{num_bytes}, DataType(DataType::DE_UINT8),
|
||||
reinterpret_cast<const uchar *>(ret_str.c_str()), num_bytes, tensor));
|
||||
} catch (const py::error_already_set &e) {
|
||||
RETURN_STATUS_ERROR(StatusCode::kMDPyFuncException, e.what());
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
// Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work
|
||||
Status MappableLeafOp::operator()() {
|
||||
// Registering and launching worker threads have to be before in sync with caller (i.e., before FindMe()::Post())
|
||||
|
|
|
@ -68,6 +68,16 @@ class MappableLeafOp : public ParallelOp<std::unique_ptr<IOBlock>, TensorRow>, p
|
|||
/// @return Name of the current Op
|
||||
std::string Name() const override { return "MappableLeafPp"; }
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
/// \brief Decrypt the encrypted image data as a public function.
|
||||
/// \param[in] path - The path of the image that needs to be decrypted.
|
||||
/// \param[in] decrypt - Image decryption function. Default: None, no decryption.
|
||||
/// \param[out] tensor - Returned tensor.
|
||||
/// \return Status code.
|
||||
static Status ImageDecrypt(const std::string &path, std::shared_ptr<Tensor> *tensor,
|
||||
const py::function &decrypt = py::none());
|
||||
#endif
|
||||
|
||||
protected:
|
||||
/// Initialize Sampler, calls sampler->Init() within
|
||||
/// @return Status The status code returned
|
||||
|
|
|
@ -43,6 +43,22 @@ const char kSegmentationExtension[] = ".png";
|
|||
const char kAnnotationExtension[] = ".xml";
|
||||
const char kImageSetsExtension[] = ".txt";
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
|
||||
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata,
|
||||
py::function decrypt)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
|
||||
decode_(decode),
|
||||
row_cnt_(0),
|
||||
task_type_(task_type),
|
||||
usage_(task_mode),
|
||||
folder_path_(folder_path),
|
||||
class_index_(class_index),
|
||||
data_schema_(std::move(data_schema)),
|
||||
extra_metadata_(extra_metadata),
|
||||
decrypt_(std::move(decrypt)) {}
|
||||
#else
|
||||
VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
|
||||
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata)
|
||||
|
@ -55,6 +71,7 @@ VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std:
|
|||
class_index_(class_index),
|
||||
data_schema_(std::move(data_schema)),
|
||||
extra_metadata_(extra_metadata) {}
|
||||
#endif
|
||||
|
||||
void VOCOp::Print(std::ostream &out, bool show_all) const {
|
||||
if (!show_all) {
|
||||
|
@ -253,7 +270,11 @@ Status VOCOp::PrepareData() {
|
|||
return Status::OK();
|
||||
}
|
||||
Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
|
||||
#ifdef ENABLE_PYTHON
|
||||
RETURN_IF_NOT_OK(MappableLeafOp::ImageDecrypt(path, tensor, decrypt_));
|
||||
#else
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
|
||||
#endif
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(*tensor, tensor);
|
||||
if (rc.IsError()) {
|
||||
|
|
|
@ -50,6 +50,25 @@ class VOCOp : public MappableLeafOp {
|
|||
public:
|
||||
enum class TaskType { Segmentation = 0, Detection = 1 };
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructor
|
||||
// @param TaskType task_type - task type of VOC
|
||||
// @param std::string task_mode - task mode of VOC
|
||||
// @param std::string folder_path - dir directory of VOC
|
||||
// @param std::map<std::string, int32_t> class_index - input class-to-index of annotation
|
||||
// @param int32_t num_workers - number of workers reading images in parallel
|
||||
// @param int32_t queue_size - connector queue size
|
||||
// @param bool decode - whether to decode images
|
||||
// @param std::unique_ptr<DataSchema> data_schema - the schema of the VOC dataset
|
||||
// @param std::shared_ptr<Sampler> sampler - sampler tells VOCOp what to read
|
||||
// @param extra_metadata - flag to add extra meta-data to row
|
||||
// @param py::function decrypt - Image decryption function, which accepts the path of the encrypted image file
|
||||
// and returns the decrypted bytes data. Default: None, no decryption.
|
||||
VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
|
||||
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata,
|
||||
py::function decrypt = py::none());
|
||||
#else
|
||||
// Constructor
|
||||
// @param TaskType task_type - task type of VOC
|
||||
// @param std::string task_mode - task mode of VOC
|
||||
|
@ -64,6 +83,7 @@ class VOCOp : public MappableLeafOp {
|
|||
VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
|
||||
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler, bool extra_metadata);
|
||||
#endif
|
||||
|
||||
// Destructor
|
||||
~VOCOp() = default;
|
||||
|
@ -150,6 +170,9 @@ class VOCOp : public MappableLeafOp {
|
|||
std::map<std::string, int32_t> class_index_;
|
||||
std::map<std::string, int32_t> label_index_;
|
||||
std::map<std::string, Annotation> annotation_map_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function decrypt_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -32,6 +32,20 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructor for CelebANode
|
||||
CelebANode::CelebANode(const std::string &dataset_dir, const std::string &usage,
|
||||
const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
|
||||
const std::set<std::string> &extensions, const std::shared_ptr<DatasetCache> &cache,
|
||||
py::function decrypt)
|
||||
: MappableSourceNode(std::move(cache)),
|
||||
dataset_dir_(dataset_dir),
|
||||
usage_(usage),
|
||||
sampler_(sampler),
|
||||
decode_(decode),
|
||||
extensions_(extensions),
|
||||
decrypt_(decrypt) {}
|
||||
#else
|
||||
// Constructor for CelebANode
|
||||
CelebANode::CelebANode(const std::string &dataset_dir, const std::string &usage,
|
||||
const std::shared_ptr<SamplerObj> &sampler, const bool &decode,
|
||||
|
@ -42,10 +56,15 @@ CelebANode::CelebANode(const std::string &dataset_dir, const std::string &usage,
|
|||
sampler_(sampler),
|
||||
decode_(decode),
|
||||
extensions_(extensions) {}
|
||||
#endif
|
||||
|
||||
std::shared_ptr<DatasetNode> CelebANode::Copy() {
|
||||
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
|
||||
#ifdef ENABLE_PYTHON
|
||||
auto node = std::make_shared<CelebANode>(dataset_dir_, usage_, sampler, decode_, extensions_, cache_, decrypt_);
|
||||
#else
|
||||
auto node = std::make_shared<CelebANode>(dataset_dir_, usage_, sampler, decode_, extensions_, cache_);
|
||||
#endif
|
||||
node->SetNumWorkers(num_workers_);
|
||||
node->SetConnectorQueueSize(connector_que_size_);
|
||||
return node;
|
||||
|
@ -75,8 +94,13 @@ Status CelebANode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
auto celeba_op = std::make_shared<CelebAOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, usage_,
|
||||
extensions_, std::move(schema), std::move(sampler_rt), decrypt_);
|
||||
#else
|
||||
auto celeba_op = std::make_shared<CelebAOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, usage_,
|
||||
extensions_, std::move(schema), std::move(sampler_rt));
|
||||
#endif
|
||||
celeba_op->SetTotalRepeats(GetTotalRepeats());
|
||||
celeba_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(celeba_op);
|
||||
|
|
|
@ -29,9 +29,16 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
class CelebANode : public MappableSourceNode {
|
||||
public:
|
||||
#ifdef ENABLE_PYTHON
|
||||
/// \brief Constructor
|
||||
CelebANode(const std::string &dataset_dir, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
|
||||
const bool &decode, const std::set<std::string> &extensions, const std::shared_ptr<DatasetCache> &cache,
|
||||
py::function decrypt = py::none());
|
||||
#else
|
||||
/// \brief Constructor
|
||||
CelebANode(const std::string &dataset_dir, const std::string &usage, const std::shared_ptr<SamplerObj> &sampler,
|
||||
const bool &decode, const std::set<std::string> &extensions, const std::shared_ptr<DatasetCache> &cache);
|
||||
#endif
|
||||
|
||||
/// \brief Destructor
|
||||
~CelebANode() override = default;
|
||||
|
@ -102,6 +109,9 @@ class CelebANode : public MappableSourceNode {
|
|||
bool decode_;
|
||||
std::set<std::string> extensions_;
|
||||
std::shared_ptr<SamplerObj> sampler_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function decrypt_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -30,6 +30,20 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructor for CocoNode
|
||||
CocoNode::CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
|
||||
const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
|
||||
const bool &extra_metadata, py::function decrypt)
|
||||
: MappableSourceNode(std::move(cache)),
|
||||
dataset_dir_(dataset_dir),
|
||||
annotation_file_(annotation_file),
|
||||
task_(task),
|
||||
decode_(decode),
|
||||
sampler_(sampler),
|
||||
extra_metadata_(extra_metadata),
|
||||
decrypt_(decrypt) {}
|
||||
#else
|
||||
// Constructor for CocoNode
|
||||
CocoNode::CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
|
||||
const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
|
||||
|
@ -41,11 +55,17 @@ CocoNode::CocoNode(const std::string &dataset_dir, const std::string &annotation
|
|||
decode_(decode),
|
||||
sampler_(sampler),
|
||||
extra_metadata_(extra_metadata) {}
|
||||
#endif
|
||||
|
||||
std::shared_ptr<DatasetNode> CocoNode::Copy() {
|
||||
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
|
||||
#ifdef ENABLE_PYTHON
|
||||
auto node = std::make_shared<CocoNode>(dataset_dir_, annotation_file_, task_, decode_, sampler, cache_,
|
||||
extra_metadata_, decrypt_);
|
||||
#else
|
||||
auto node =
|
||||
std::make_shared<CocoNode>(dataset_dir_, annotation_file_, task_, decode_, sampler, cache_, extra_metadata_);
|
||||
#endif
|
||||
node->SetNumWorkers(num_workers_);
|
||||
node->SetConnectorQueueSize(connector_que_size_);
|
||||
return node;
|
||||
|
@ -135,9 +155,15 @@ Status CocoNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
std::shared_ptr<CocoOp> op =
|
||||
std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, connector_que_size_, decode_,
|
||||
std::move(schema), std::move(sampler_rt), extra_metadata_, decrypt_);
|
||||
#else
|
||||
std::shared_ptr<CocoOp> op =
|
||||
std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, connector_que_size_, decode_,
|
||||
std::move(schema), std::move(sampler_rt), extra_metadata_);
|
||||
#endif
|
||||
op->SetTotalRepeats(GetTotalRepeats());
|
||||
op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(op);
|
||||
|
|
|
@ -27,10 +27,17 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
class CocoNode : public MappableSourceNode {
|
||||
public:
|
||||
#ifdef ENABLE_PYTHON
|
||||
/// \brief Constructor.
|
||||
CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
|
||||
const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
|
||||
const bool &extra_metadata, py::function decrypt = py::none());
|
||||
#else
|
||||
/// \brief Constructor.
|
||||
CocoNode(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task,
|
||||
const bool &decode, const std::shared_ptr<SamplerObj> &sampler, std::shared_ptr<DatasetCache> cache,
|
||||
const bool &extra_metadata);
|
||||
#endif
|
||||
|
||||
/// \brief Destructor.
|
||||
~CocoNode() override = default;
|
||||
|
@ -103,6 +110,9 @@ class CocoNode : public MappableSourceNode {
|
|||
bool decode_;
|
||||
std::shared_ptr<SamplerObj> sampler_;
|
||||
bool extra_metadata_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function decrypt_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -32,6 +32,20 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
|
||||
bool recursive, std::set<std::string> extensions,
|
||||
std::map<std::string, int32_t> class_indexing,
|
||||
std::shared_ptr<DatasetCache> cache = nullptr, py::function decrypt)
|
||||
: MappableSourceNode(std::move(cache)),
|
||||
dataset_dir_(dataset_dir),
|
||||
decode_(decode),
|
||||
sampler_(sampler),
|
||||
recursive_(recursive),
|
||||
class_indexing_(class_indexing),
|
||||
exts_(extensions),
|
||||
decrypt_(decrypt) {}
|
||||
#else
|
||||
ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler,
|
||||
bool recursive, std::set<std::string> extensions,
|
||||
std::map<std::string, int32_t> class_indexing,
|
||||
|
@ -43,11 +57,17 @@ ImageFolderNode::ImageFolderNode(std::string dataset_dir, bool decode, std::shar
|
|||
recursive_(recursive),
|
||||
class_indexing_(class_indexing),
|
||||
exts_(extensions) {}
|
||||
#endif
|
||||
|
||||
std::shared_ptr<DatasetNode> ImageFolderNode::Copy() {
|
||||
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
|
||||
#ifdef ENABLE_PYTHON
|
||||
auto node = std::make_shared<ImageFolderNode>(dataset_dir_, decode_, sampler, recursive_, exts_, class_indexing_,
|
||||
cache_, decrypt_);
|
||||
#else
|
||||
auto node =
|
||||
std::make_shared<ImageFolderNode>(dataset_dir_, decode_, sampler, recursive_, exts_, class_indexing_, cache_);
|
||||
#endif
|
||||
node->SetNumWorkers(num_workers_);
|
||||
node->SetConnectorQueueSize(connector_que_size_);
|
||||
return node;
|
||||
|
@ -77,11 +97,17 @@ Status ImageFolderNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const nod
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
auto op = std::make_shared<ImageFolderOp>(num_workers_, dataset_dir_, connector_que_size_, recursive_, decode_, exts_,
|
||||
class_indexing_, std::move(schema), std::move(sampler_rt), decrypt_);
|
||||
#else
|
||||
auto op = std::make_shared<ImageFolderOp>(num_workers_, dataset_dir_, connector_que_size_, recursive_, decode_, exts_,
|
||||
class_indexing_, std::move(schema), std::move(sampler_rt));
|
||||
#endif
|
||||
op->SetTotalRepeats(GetTotalRepeats());
|
||||
op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(op);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -32,11 +32,17 @@ namespace dataset {
|
|||
/// \brief A Dataset derived class to represent ImageFolder dataset
|
||||
class ImageFolderNode : public MappableSourceNode {
|
||||
public:
|
||||
#ifdef ENABLE_PYTHON
|
||||
/// \brief Constructor
|
||||
ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
|
||||
std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing,
|
||||
std::shared_ptr<DatasetCache> cache, py::function decrypt = py::none());
|
||||
#else
|
||||
/// \brief Constructor
|
||||
ImageFolderNode(std::string dataset_dir, bool decode, std::shared_ptr<SamplerObj> sampler, bool recursive,
|
||||
std::set<std::string> extensions, std::map<std::string, int32_t> class_indexing,
|
||||
std::shared_ptr<DatasetCache> cache);
|
||||
|
||||
#endif
|
||||
/// \brief Destructor
|
||||
~ImageFolderNode() override = default;
|
||||
|
||||
|
@ -108,6 +114,9 @@ class ImageFolderNode : public MappableSourceNode {
|
|||
std::shared_ptr<SamplerObj> sampler_;
|
||||
std::map<std::string, int32_t> class_indexing_;
|
||||
std::set<std::string> exts_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function decrypt_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -31,6 +31,21 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructor for VOCNode
|
||||
VOCNode::VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
|
||||
const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
|
||||
std::shared_ptr<DatasetCache> cache, bool extra_metadata, py::function decrypt)
|
||||
: MappableSourceNode(std::move(cache)),
|
||||
dataset_dir_(dataset_dir),
|
||||
task_(task),
|
||||
usage_(usage),
|
||||
class_index_(class_indexing),
|
||||
decode_(decode),
|
||||
sampler_(sampler),
|
||||
extra_metadata_(extra_metadata),
|
||||
decrypt_(decrypt) {}
|
||||
#else
|
||||
// Constructor for VOCNode
|
||||
VOCNode::VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
|
||||
const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
|
||||
|
@ -43,11 +58,17 @@ VOCNode::VOCNode(const std::string &dataset_dir, const std::string &task, const
|
|||
decode_(decode),
|
||||
sampler_(sampler),
|
||||
extra_metadata_(extra_metadata) {}
|
||||
#endif
|
||||
|
||||
std::shared_ptr<DatasetNode> VOCNode::Copy() {
|
||||
std::shared_ptr<SamplerObj> sampler = (sampler_ == nullptr) ? nullptr : sampler_->SamplerCopy();
|
||||
#ifdef ENABLE_PYTHON
|
||||
auto node = std::make_shared<VOCNode>(dataset_dir_, task_, usage_, class_index_, decode_, sampler, cache_,
|
||||
extra_metadata_, decrypt_);
|
||||
#else
|
||||
auto node =
|
||||
std::make_shared<VOCNode>(dataset_dir_, task_, usage_, class_index_, decode_, sampler, cache_, extra_metadata_);
|
||||
#endif
|
||||
node->SetNumWorkers(num_workers_);
|
||||
node->SetConnectorQueueSize(connector_que_size_);
|
||||
return node;
|
||||
|
@ -125,11 +146,18 @@ Status VOCNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
|||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
std::shared_ptr<VOCOp> voc_op;
|
||||
#ifdef ENABLE_PYTHON
|
||||
voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_,
|
||||
decode_, std::move(schema), std::move(sampler_rt), extra_metadata_, decrypt_);
|
||||
|
||||
#else
|
||||
voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_,
|
||||
decode_, std::move(schema), std::move(sampler_rt), extra_metadata_);
|
||||
#endif
|
||||
voc_op->SetTotalRepeats(GetTotalRepeats());
|
||||
voc_op->SetNumRepeatsPerEpoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(voc_op);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -28,10 +28,17 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
class VOCNode : public MappableSourceNode {
|
||||
public:
|
||||
#ifdef ENABLE_PYTHON
|
||||
/// \brief Constructor
|
||||
VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
|
||||
const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
|
||||
std::shared_ptr<DatasetCache> cache, bool extra_metadata = false, py::function decrypt = py::none());
|
||||
#else
|
||||
/// \brief Constructor
|
||||
VOCNode(const std::string &dataset_dir, const std::string &task, const std::string &usage,
|
||||
const std::map<std::string, int32_t> &class_indexing, bool decode, std::shared_ptr<SamplerObj> sampler,
|
||||
std::shared_ptr<DatasetCache> cache, bool extra_metadata = false);
|
||||
#endif
|
||||
|
||||
/// \brief Destructor
|
||||
~VOCNode() override = default;
|
||||
|
@ -112,6 +119,9 @@ class VOCNode : public MappableSourceNode {
|
|||
bool decode_;
|
||||
std::shared_ptr<SamplerObj> sampler_;
|
||||
bool extra_metadata_;
|
||||
#ifdef ENABLE_PYTHON
|
||||
py::function decrypt_;
|
||||
#endif
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -429,6 +429,8 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|||
argument can only be specified when `num_shards` is also specified.
|
||||
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
||||
(default=None, which means no cache is used).
|
||||
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
||||
and returns the decrypted bytes data. Default: None, no decryption.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If `dataset_dir` does not contain data files.
|
||||
|
@ -548,13 +550,14 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
@check_celebadataset
|
||||
def __init__(self, dataset_dir, num_parallel_workers=None, shuffle=None, usage='all', sampler=None, decode=False,
|
||||
extensions=None, num_samples=None, num_shards=None, shard_id=None, cache=None):
|
||||
extensions=None, num_samples=None, num_shards=None, shard_id=None, cache=None, decrypt=None):
|
||||
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
||||
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
||||
self.dataset_dir = dataset_dir
|
||||
self.decode = replace_none(decode, False)
|
||||
self.extensions = replace_none(extensions, [])
|
||||
self.usage = replace_none(usage, "all")
|
||||
self.decrypt = decrypt
|
||||
|
||||
def parse(self, children=None):
|
||||
if self.usage != "all":
|
||||
|
@ -562,7 +565,8 @@ class CelebADataset(MappableDataset, VisionBaseDataset):
|
|||
partition_file = os.path.join(dataset_dir, "list_eval_partition.txt")
|
||||
if os.path.exists(partition_file) is False:
|
||||
raise RuntimeError("Partition file can not be found when usage is not 'all'.")
|
||||
return cde.CelebANode(self.dataset_dir, self.usage, self.sampler, self.decode, self.extensions)
|
||||
return cde.CelebANode(self.dataset_dir, self.usage, self.sampler, self.decode,
|
||||
self.extensions, self.decrypt)
|
||||
|
||||
|
||||
|
||||
|
@ -1023,6 +1027,8 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|||
(default=None, which means no cache is used).
|
||||
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column will be
|
||||
output at the end :py:obj:`[_meta-filename, dtype=string]` (default=False).
|
||||
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
||||
and returns the decrypted bytes data. Default: None, no decryption.
|
||||
|
||||
The generated dataset with different task setting has different output columns:
|
||||
|
||||
|
@ -1195,7 +1201,7 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|||
@check_cocodataset
|
||||
def __init__(self, dataset_dir, annotation_file, task="Detection", num_samples=None, num_parallel_workers=None,
|
||||
shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None, cache=None,
|
||||
extra_metadata=False):
|
||||
extra_metadata=False, decrypt=None):
|
||||
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
||||
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
||||
self.dataset_dir = dataset_dir
|
||||
|
@ -1203,10 +1209,11 @@ class CocoDataset(MappableDataset, VisionBaseDataset):
|
|||
self.task = replace_none(task, "Detection")
|
||||
self.decode = replace_none(decode, False)
|
||||
self.extra_metadata = extra_metadata
|
||||
self.decrypt = decrypt
|
||||
|
||||
def parse(self, children=None):
|
||||
return cde.CocoNode(self.dataset_dir, self.annotation_file, self.task, self.decode, self.sampler,
|
||||
self.extra_metadata)
|
||||
self.extra_metadata, self.decrypt)
|
||||
|
||||
def get_class_indexing(self):
|
||||
"""
|
||||
|
@ -2203,6 +2210,8 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|||
argument can only be specified when `num_shards` is also specified.
|
||||
cache (DatasetCache, optional): Use tensor caching service to speed up dataset processing.
|
||||
(default=None, which means no cache is used).
|
||||
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
||||
and returns the decrypted bytes data. Default: None, no decryption.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If `dataset_dir` does not contain data files.
|
||||
|
@ -2287,7 +2296,8 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|||
|
||||
@check_imagefolderdataset
|
||||
def __init__(self, dataset_dir, num_samples=None, num_parallel_workers=None, shuffle=None, sampler=None,
|
||||
extensions=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None):
|
||||
extensions=None, class_indexing=None, decode=False, num_shards=None, shard_id=None, cache=None,
|
||||
decrypt=None):
|
||||
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
||||
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
||||
|
||||
|
@ -2295,9 +2305,11 @@ class ImageFolderDataset(MappableDataset, VisionBaseDataset):
|
|||
self.extensions = replace_none(extensions, [])
|
||||
self.class_indexing = replace_none(class_indexing, {})
|
||||
self.decode = replace_none(decode, False)
|
||||
self.decrypt = decrypt
|
||||
|
||||
def parse(self, children=None):
|
||||
return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing)
|
||||
return cde.ImageFolderNode(self.dataset_dir, self.decode, self.sampler, self.extensions, self.class_indexing,
|
||||
self.decrypt)
|
||||
|
||||
|
||||
class KITTIDataset(MappableDataset):
|
||||
|
@ -4534,6 +4546,8 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|||
(default=None, which means no cache is used).
|
||||
extra_metadata(bool, optional): Flag to add extra meta-data to row. If True, an additional column named
|
||||
:py:obj:`[_meta-filename, dtype=string]` will be output at the end (default=False).
|
||||
decrypt (callable, optional): Image decryption function, which accepts the path of the encrypted image file
|
||||
and returns the decrypted bytes data. Default: None, no decryption.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If `dataset_dir` does not contain data files.
|
||||
|
@ -4658,7 +4672,7 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|||
@check_vocdataset
|
||||
def __init__(self, dataset_dir, task="Segmentation", usage="train", class_indexing=None, num_samples=None,
|
||||
num_parallel_workers=None, shuffle=None, decode=False, sampler=None, num_shards=None, shard_id=None,
|
||||
cache=None, extra_metadata=False):
|
||||
cache=None, extra_metadata=False, decrypt=None):
|
||||
super().__init__(num_parallel_workers=num_parallel_workers, sampler=sampler, num_samples=num_samples,
|
||||
shuffle=shuffle, num_shards=num_shards, shard_id=shard_id, cache=cache)
|
||||
self.dataset_dir = dataset_dir
|
||||
|
@ -4667,10 +4681,11 @@ class VOCDataset(MappableDataset, VisionBaseDataset):
|
|||
self.class_indexing = replace_none(class_indexing, {})
|
||||
self.decode = replace_none(decode, False)
|
||||
self.extra_metadata = extra_metadata
|
||||
self.decrypt = decrypt
|
||||
|
||||
def parse(self, children=None):
|
||||
return cde.VOCNode(self.dataset_dir, self.task, self.usage, self.class_indexing, self.decode, self.sampler,
|
||||
self.extra_metadata)
|
||||
self.extra_metadata, self.decrypt)
|
||||
|
||||
def get_class_indexing(self):
|
||||
"""
|
||||
|
|
|
@ -111,6 +111,10 @@ def check_imagefolderdataset(method):
|
|||
dataset_dir = param_dict.get('dataset_dir')
|
||||
check_dir(dataset_dir)
|
||||
|
||||
decrypt = param_dict.get('decrypt')
|
||||
if decrypt is not None and not callable(decrypt):
|
||||
raise TypeError("Argument decrypt is not a callable object, but got " + str(type(decrypt)))
|
||||
|
||||
validate_dataset_param_value(nreq_param_int, param_dict, int)
|
||||
validate_dataset_param_value(nreq_param_bool, param_dict, bool)
|
||||
validate_dataset_param_value(nreq_param_list, param_dict, list)
|
||||
|
@ -689,6 +693,10 @@ def check_caltech256_dataset(method):
|
|||
dataset_dir = param_dict.get('dataset_dir')
|
||||
check_dir(dataset_dir)
|
||||
|
||||
decrypt = param_dict.get('decrypt')
|
||||
if decrypt is not None and not callable(decrypt):
|
||||
raise TypeError("Argument decrypt is not a callable object, but got " + str(type(decrypt)))
|
||||
|
||||
validate_dataset_param_value(nreq_param_int, param_dict, int)
|
||||
validate_dataset_param_value(nreq_param_bool, param_dict, bool)
|
||||
check_sampler_shuffle_shard_options(param_dict)
|
||||
|
@ -731,6 +739,10 @@ def check_vocdataset(method):
|
|||
else:
|
||||
raise ValueError("Invalid task : " + task + ".")
|
||||
|
||||
decrypt = param_dict.get('decrypt')
|
||||
if decrypt is not None and not callable(decrypt):
|
||||
raise TypeError("Argument decrypt is not a callable object, but got " + str(type(decrypt)))
|
||||
|
||||
check_file(imagesets_file)
|
||||
|
||||
validate_dataset_param_value(nreq_param_int, param_dict, int)
|
||||
|
@ -768,6 +780,10 @@ def check_cocodataset(method):
|
|||
if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint', 'Captioning'}:
|
||||
raise ValueError("Invalid task type: " + task + ".")
|
||||
|
||||
decrypt = param_dict.get('decrypt')
|
||||
if decrypt is not None and not callable(decrypt):
|
||||
raise TypeError("Argument decrypt is not a callable object, but got " + str(type(decrypt)))
|
||||
|
||||
validate_dataset_param_value(nreq_param_int, param_dict, int)
|
||||
|
||||
validate_dataset_param_value(nreq_param_bool, param_dict, bool)
|
||||
|
|
|
@ -12,12 +12,18 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
import os
|
||||
import shutil
|
||||
import numpy as np
|
||||
import pytest
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.dataset.transforms as data_trans
|
||||
import mindspore.dataset.vision as vision
|
||||
from mindspore import log as logger
|
||||
|
||||
DATA_DIR = "../data/dataset/testPK/data"
|
||||
DATA_DIR_2 = "../data/dataset/testImageNetData2/train"
|
||||
DATA_DIR_3 = "../data/dataset/testImageNetData2/encrypt_train"
|
||||
|
||||
|
||||
def test_imagefolder_basic():
|
||||
|
@ -903,6 +909,108 @@ def test_imagefolder_exception():
|
|||
assert "should be file, but got directory" in str(e)
|
||||
|
||||
|
||||
def encrypt_func():
|
||||
"""
|
||||
Feature: Encrypt function
|
||||
Description: Encrypt and save the image
|
||||
Expectation: Success
|
||||
"""
|
||||
plain_dir = os.path.realpath(DATA_DIR_2)
|
||||
cipher_dir = os.path.realpath(DATA_DIR_3)
|
||||
|
||||
for root, _, files in os.walk(plain_dir):
|
||||
for f in files:
|
||||
fn = os.path.join(root, f)
|
||||
|
||||
enc_file = os.path.join(cipher_dir, os.path.relpath(fn, plain_dir))
|
||||
os.makedirs(os.path.dirname(enc_file), exist_ok=True)
|
||||
|
||||
with open(fn, 'rb')as f:
|
||||
content = f.read()
|
||||
|
||||
new_content = b'helloworld' + content
|
||||
|
||||
with open(enc_file, 'wb')as f:
|
||||
f.write(new_content)
|
||||
|
||||
|
||||
def decrypt_func(cipher_file):
|
||||
"""
|
||||
Feature: Decrypt function
|
||||
Description: Decrypt encrypted image data
|
||||
Expectation: Decryption is successful, return bytes type data
|
||||
"""
|
||||
with open(cipher_file, 'rb')as f:
|
||||
content = f.read()
|
||||
new_content = content[10:]
|
||||
return new_content
|
||||
|
||||
|
||||
def test_imagefolder_decrypt():
|
||||
"""
|
||||
Feature: Test imagefolder decrypt
|
||||
Description: Support decrypting encrypted image data
|
||||
Expectation: Success
|
||||
"""
|
||||
logger.info("Test imagefolder decrypt")
|
||||
|
||||
encrypt_func()
|
||||
|
||||
resize_height = 224
|
||||
resize_width = 224
|
||||
|
||||
# Create dataset and define map operations
|
||||
ds1 = ds.ImageFolderDataset(DATA_DIR_3, decrypt=decrypt_func)
|
||||
|
||||
num_classes = 3
|
||||
decode_op = vision.Decode()
|
||||
resize_op = vision.Resize((resize_height, resize_width), vision.Inter.LINEAR)
|
||||
one_hot_encode = data_trans.OneHot(num_classes) # num_classes is input argument
|
||||
|
||||
ds1 = ds1.map(operations=decode_op, input_columns=["image"])
|
||||
ds1 = ds1.map(operations=resize_op, input_columns=["image"])
|
||||
ds1 = ds1.map(operations=one_hot_encode, input_columns=["label"])
|
||||
|
||||
# apply batch operations
|
||||
batch_size = 3
|
||||
ds1 = ds1.batch(batch_size, drop_remainder=True)
|
||||
|
||||
ds2 = ds1
|
||||
alpha = 0.2
|
||||
transforms = [vision.MixUp(batch_size=batch_size, alpha=alpha, is_single=False)
|
||||
]
|
||||
ds1 = ds1.map(operations=transforms, input_columns=["image", "label"])
|
||||
num_iter = 0
|
||||
batch1_image1 = 0
|
||||
for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True),
|
||||
ds2.create_dict_iterator(num_epochs=1, output_numpy=True)):
|
||||
image1 = data1["image"]
|
||||
label1 = data1["label"]
|
||||
logger.info("label: {}".format(label1))
|
||||
|
||||
image2 = data2["image"]
|
||||
label2 = data2["label"]
|
||||
logger.info("label2: {}".format(label2))
|
||||
|
||||
if num_iter == 0:
|
||||
batch1_image1 = image1
|
||||
|
||||
if num_iter == 1:
|
||||
lam = np.abs(label2 - label1)
|
||||
logger.info("lam value in multi: {}".format(lam))
|
||||
for index in range(batch_size):
|
||||
if np.square(lam[index]).mean() != 0:
|
||||
lam_value = 1 - np.sum(lam[index]) / 2
|
||||
img_golden = lam_value * image2[index] + (1 - lam_value) * batch1_image1[index]
|
||||
assert image1[index].all() == img_golden.all()
|
||||
logger.info("====test several batch mixup ok====")
|
||||
break
|
||||
num_iter += 1
|
||||
|
||||
if os.path.exists(DATA_DIR_3):
|
||||
shutil.rmtree(DATA_DIR_3)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_imagefolder_basic()
|
||||
logger.info('test_imagefolder_basic Ended.\n')
|
||||
|
@ -987,3 +1095,6 @@ if __name__ == '__main__':
|
|||
|
||||
test_imagefolder_exception()
|
||||
logger.info('test_imagefolder_exception Ended.\n')
|
||||
|
||||
test_imagefolder_decrypt()
|
||||
logger.info('test_imagefolder_decrypt Ended.\n')
|
||||
|
|
Loading…
Reference in New Issue