all the updated input order

Fix FillOperation to_json function

Fix FillOperation to_json function

serialize tensor without testing

fix unity input order bug, next step is to test the tensor serialization

serialize tensor, testing code in tensor_test.cc

change for tensor serialization

changes after review

changes after review

clang-format the files

rename
This commit is contained in:
zetongzhao 2021-05-13 10:16:04 -04:00
parent f0078fc1ce
commit 4cb53823fd
97 changed files with 5580 additions and 194 deletions

View File

@ -612,13 +612,9 @@ std::shared_ptr<TensorOperation> RandomResizedCropWithBBox::Parse() {
// RandomRotation Transform Operation.
struct RandomRotation::Data {
Data(const std::vector<float> &degrees, InterpolationMode interpolation_mode, bool expand,
const std::vector<float> &center, const std::vector<uint8_t> &fill_value)
: degrees_(degrees),
interpolation_mode_(interpolation_mode),
expand_(expand),
center_(center),
fill_value_(fill_value) {}
Data(const std::vector<float> &degrees, InterpolationMode resample, bool expand, const std::vector<float> &center,
const std::vector<uint8_t> &fill_value)
: degrees_(degrees), interpolation_mode_(resample), expand_(expand), center_(center), fill_value_(fill_value) {}
std::vector<float> degrees_;
InterpolationMode interpolation_mode_;
std::vector<float> center_;
@ -626,9 +622,9 @@ struct RandomRotation::Data {
std::vector<uint8_t> fill_value_;
};
RandomRotation::RandomRotation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand,
RandomRotation::RandomRotation(std::vector<float> degrees, InterpolationMode resample, bool expand,
std::vector<float> center, std::vector<uint8_t> fill_value)
: data_(std::make_shared<Data>(degrees, interpolation_mode, expand, center, fill_value)) {}
: data_(std::make_shared<Data>(degrees, resample, expand, center, fill_value)) {}
std::shared_ptr<TensorOperation> RandomRotation::Parse() {
return std::make_shared<RandomRotationOperation>(data_->degrees_, data_->interpolation_mode_, data_->expand_,

View File

@ -448,6 +448,13 @@ void Tensor::Print(std::ostream &out) const {
out << "[Data area is null]";
}
}
void Tensor::Print_data(std::ostream &out) const {
if (data_) {
PrintRecursive(out, 0, std::vector<dsize_t>{});
}
}
Status Tensor::AllocateBuffer(const dsize_t &length) {
RETURN_UNEXPECTED_IF_NULL(data_allocator_);
if (data_ == nullptr) {
@ -620,6 +627,17 @@ Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
}
#endif
Status Tensor::to_json(nlohmann::json *out_json) {
nlohmann::json args;
args["shape"] = shape_.ToString();
args["type"] = type_.ToString();
std::stringstream ss;
this->Print_data(ss);
args["data"] = ss.str();
*out_json = args;
return Status::OK();
}
template <typename T>
Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
if (data_ == nullptr) {

View File

@ -217,6 +217,8 @@ class Tensor {
bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); }
Status to_json(nlohmann::json *out_json);
/// Get item located at `index`, caller needs to provide the type.
/// \tparam T
/// \param[in] index vector<dsize_t>
@ -661,6 +663,10 @@ class Tensor {
/// \param[out] out output stream
void Print(std::ostream &out) const;
/// A function that prints info about the tensor
/// \param[out] out output stream
void Print_data(std::ostream &out) const;
/// A function that print the value as specified by its index
/// \param[in] index vector representing the index
/// \param[out] out

View File

@ -35,7 +35,7 @@ ConcatOp::Builder::Builder() {
// The builder "build" method creates the final object.
Status ConcatOp::Builder::Build(std::shared_ptr<ConcatOp> *ptr) {
if (builder_sampler_ == nullptr) {
builder_sampler_ = std::make_shared<DistributedSamplerRT>(0, 1, 0, false);
builder_sampler_ = std::make_shared<DistributedSamplerRT>(1, 0, false, 0);
}
*ptr = std::make_shared<ConcatOp>(builder_sampler_, children_flag_and_nums_, children_start_end_index_);
return Status::OK();

View File

@ -23,13 +23,13 @@
namespace mindspore {
namespace dataset {
DistributedSamplerRT::DistributedSamplerRT(int64_t num_samples, int64_t num_dev, int64_t dev_id, bool shuffle,
DistributedSamplerRT::DistributedSamplerRT(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples,
uint32_t seed, int64_t offset, bool even_dist)
: SamplerRT(num_samples, std::numeric_limits<int64_t>::max()),
cnt_(0),
seed_(seed == std::numeric_limits<uint32_t>::max() ? GetSeed() : seed),
device_id_(dev_id),
num_devices_(num_dev),
device_id_(shard_id),
num_devices_(num_shards),
shuffle_(shuffle),
even_dist_(even_dist),
offset_(offset),

View File

@ -28,10 +28,10 @@ namespace dataset {
class DistributedSamplerRT : public SamplerRT {
public:
/// \brief Constructor
/// \param[in] num_samples The total number of rows in the dataset
/// \param[in] num_dev Total number of shards for the distributed sampler
/// \param[in] dev_id Device id of the shard
/// \param[in] num_shards Total number of shards for the distributed sampler
/// \param[in] shard_id Device id of the shard
/// \param[in] shuffle Option to shuffle
/// \param[in] num_samples The total number of rows in the dataset
/// \param seed Seed parameter to shuffle, default to max unsigned int (different seed in sampler will
/// result in different samples being picked
/// \param[in] offset The starting device id where the elements in the dataset are send to, which should be no more
@ -40,7 +40,7 @@ class DistributedSamplerRT : public SamplerRT {
/// This option is not exposed in the python API. Current behavior is that the remainder will always
/// be handled by the first n shards, n being the corresponding device id. Please notice that when offset is set,
/// even_dist will be forcibly converted to false for sending rest datasets in concatdataset scenario.
DistributedSamplerRT(int64_t num_samples, int64_t num_dev, int64_t dev_id, bool shuffle,
DistributedSamplerRT(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples,
uint32_t seed = std::numeric_limits<uint32_t>::max(), int64_t offset = -1,
bool even_dist = true);

View File

@ -20,12 +20,12 @@
namespace mindspore {
namespace dataset {
PKSamplerRT::PKSamplerRT(int64_t num_samples, int64_t val, bool shuffle, int64_t samples_per_tensor)
PKSamplerRT::PKSamplerRT(int64_t num_val, bool shuffle, int64_t num_samples, int64_t samples_per_tensor)
: SamplerRT(num_samples, samples_per_tensor),
shuffle_(shuffle),
seed_(GetSeed()),
next_id_(0),
samples_per_class_(val) {}
samples_per_class_(num_val) {}
Status PKSamplerRT::InitSampler() {
if (is_initialized) {

View File

@ -28,11 +28,11 @@ namespace mindspore {
namespace dataset {
class PKSamplerRT : public SamplerRT { // NOT YET FINISHED
public:
// @param num_samples - the number of samples to draw. value of 0 means to take the full amount
// @param int64_t val
// @param int64_t num_val
// @param bool shuffle - shuffle all classIds or not, if true, classes may be 5,1,4,3,2
// @param num_samples - the number of samples to draw. value of 0 means to take the full amount
// @param int64_t samples_per_tensor - Num of Sampler Ids to fetch via 1 GetNextSample call
PKSamplerRT(int64_t num_samples, int64_t val, bool shuffle,
PKSamplerRT(int64_t num_val, bool shuffle, int64_t num_samples,
int64_t samples_per_tensor = std::numeric_limits<int64_t>::max());
// default destructor

View File

@ -22,7 +22,7 @@
namespace mindspore {
namespace dataset {
RandomSamplerRT::RandomSamplerRT(int64_t num_samples, bool replacement, bool reshuffle_each_epoch,
RandomSamplerRT::RandomSamplerRT(bool replacement, int64_t num_samples, bool reshuffle_each_epoch,
int64_t samples_per_tensor)
: SamplerRT(num_samples, samples_per_tensor),
seed_(GetSeed()),

View File

@ -27,11 +27,11 @@ namespace dataset {
class RandomSamplerRT : public SamplerRT {
public:
// Constructor
// @param int64_t num_samples - number samples to draw
// @param bool replacement - put he id back / or not after a sample
// @param int64_t num_samples - number samples to draw
// @param reshuffle_each_epoch - T/F to reshuffle after epoch
// @param int64_t samples_per_tensor - Num of Sampler Ids to fetch via 1 GetNextSample call
RandomSamplerRT(int64_t num_samples, bool replacement, bool reshuffle_each_epoch,
RandomSamplerRT(bool replacement, int64_t num_samples, bool reshuffle_each_epoch,
int64_t samples_per_tensor = std::numeric_limits<int64_t>::max());
// Destructor.

View File

@ -21,7 +21,7 @@
namespace mindspore {
namespace dataset {
SequentialSamplerRT::SequentialSamplerRT(int64_t num_samples, int64_t start_index, int64_t samples_per_tensor)
SequentialSamplerRT::SequentialSamplerRT(int64_t start_index, int64_t num_samples, int64_t samples_per_tensor)
: SamplerRT(num_samples, samples_per_tensor), current_id_(start_index), start_index_(start_index), id_count_(0) {}
Status SequentialSamplerRT::GetNextSample(TensorRow *out) {

View File

@ -26,11 +26,11 @@ namespace dataset {
class SequentialSamplerRT : public SamplerRT {
public:
// Constructor
// @param start_index - The starting index value
// @param num_samples - The number of samples to draw. A value of 0 indicates the sampler should produce the
// full amount of ids from the dataset
// @param start_index - The starting index value
// @param int64_t samples_per_tensor - Num of Sampler Ids to fetch via 1 GetNextSample call
SequentialSamplerRT(int64_t num_samples, int64_t start_index,
SequentialSamplerRT(int64_t start_index, int64_t num_samples,
int64_t samples_per_tensor = std::numeric_limits<int64_t>::max());
// Destructor.

View File

@ -25,9 +25,9 @@
namespace mindspore {
namespace dataset {
// Constructor.
SubsetRandomSamplerRT::SubsetRandomSamplerRT(int64_t num_samples, const std::vector<int64_t> &indices,
SubsetRandomSamplerRT::SubsetRandomSamplerRT(const std::vector<int64_t> &indices, int64_t num_samples,
int64_t samples_per_tensor)
: SubsetSamplerRT(num_samples, indices, samples_per_tensor) {}
: SubsetSamplerRT(indices, num_samples, samples_per_tensor) {}
// Initialized this Sampler.
Status SubsetRandomSamplerRT::InitSampler() {

View File

@ -29,11 +29,11 @@ namespace dataset {
class SubsetRandomSamplerRT : public SubsetSamplerRT {
public:
/// Constructor.
/// \param num_samples The number of samples to draw. 0 for the full amount.
/// \param indices List of indices from where we will randomly draw samples.
/// \param num_samples The number of samples to draw. 0 for the full amount.
/// \param samples_per_tensor The number of ids we draw on each call to GetNextSample().
/// When samples_per_tensor=0, GetNextSample() will draw all the sample ids and return them at once.
SubsetRandomSamplerRT(int64_t num_samples, const std::vector<int64_t> &indices,
SubsetRandomSamplerRT(const std::vector<int64_t> &indices, int64_t num_samples,
std::int64_t samples_per_tensor = std::numeric_limits<int64_t>::max());
/// Destructor.

View File

@ -22,7 +22,7 @@
namespace mindspore {
namespace dataset {
// Constructor.
SubsetSamplerRT::SubsetSamplerRT(int64_t num_samples, const std::vector<int64_t> &indices, int64_t samples_per_tensor)
SubsetSamplerRT::SubsetSamplerRT(const std::vector<int64_t> &indices, int64_t num_samples, int64_t samples_per_tensor)
: SamplerRT(num_samples, samples_per_tensor), indices_(indices), sample_id_(0) {}
// Initialized this Sampler.

View File

@ -28,11 +28,11 @@ namespace dataset {
class SubsetSamplerRT : public SamplerRT {
public:
/// Constructor.
/// \param num_samples The number of elements to sample. 0 for the full amount.
/// \param indices List of indices.
/// \param num_samples The number of elements to sample. 0 for the full amount.
/// \param samples_per_tensor The number of ids we draw on each call to GetNextSample().
/// When samples_per_tensor=0, GetNextSample() will draw all the sample ids and return them at once.
SubsetSamplerRT(int64_t num_samples, const std::vector<int64_t> &indices,
SubsetSamplerRT(const std::vector<int64_t> &indices, int64_t num_samples,
std::int64_t samples_per_tensor = std::numeric_limits<int64_t>::max());
/// Destructor.

View File

@ -27,7 +27,7 @@
namespace mindspore {
namespace dataset {
// Constructor.
WeightedRandomSamplerRT::WeightedRandomSamplerRT(int64_t num_samples, const std::vector<double> &weights,
WeightedRandomSamplerRT::WeightedRandomSamplerRT(const std::vector<double> &weights, int64_t num_samples,
bool replacement, int64_t samples_per_tensor)
: SamplerRT(num_samples, samples_per_tensor), weights_(weights), replacement_(replacement), sample_id_(0) {}

View File

@ -29,12 +29,12 @@ namespace dataset {
class WeightedRandomSamplerRT : public SamplerRT {
public:
// Constructor.
// @param num_samples Number of samples to be drawn.
// @param weights A lift of sample weights.
// @param num_samples Number of samples to be drawn.
// @param replacement Determine if samples are drawn with/without replacement.
// @param samples_per_tensor The number of ids we draw on each call to GetNextSample().
// When samples_per_tensor=0, GetNextSample() will draw all the sample ids and return them at once.
WeightedRandomSamplerRT(int64_t num_samples, const std::vector<double> &weights, bool replacement,
WeightedRandomSamplerRT(const std::vector<double> &weights, int64_t num_samples, bool replacement,
int64_t samples_per_tensor = std::numeric_limits<int64_t>::max());
// Destructor.

View File

@ -77,7 +77,7 @@ Status DistributedSamplerObj::ValidateParams() {
Status DistributedSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
// runtime sampler object
*sampler = std::make_shared<dataset::DistributedSamplerRT>(num_samples_, num_shards_, shard_id_, shuffle_, seed_,
*sampler = std::make_shared<dataset::DistributedSamplerRT>(num_shards_, shard_id_, shuffle_, num_samples_, seed_,
offset_, even_dist_);
Status s = BuildChildren(sampler);
sampler = s.IsOk() ? sampler : nullptr;

View File

@ -71,7 +71,7 @@ Status PKSamplerObj::to_json(nlohmann::json *const out_json) {
Status PKSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
// runtime sampler object
*sampler = std::make_shared<dataset::PKSamplerRT>(num_samples_, num_val_, shuffle_);
*sampler = std::make_shared<dataset::PKSamplerRT>(num_val_, shuffle_, num_samples_);
Status s = BuildChildren(sampler);
sampler = s.IsOk() ? sampler : nullptr;
return s;

View File

@ -67,7 +67,7 @@ Status RandomSamplerObj::to_json(nlohmann::json *const out_json) {
Status RandomSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
// runtime sampler object
*sampler = std::make_shared<dataset::RandomSamplerRT>(num_samples_, replacement_, reshuffle_each_epoch_);
*sampler = std::make_shared<dataset::RandomSamplerRT>(replacement_, num_samples_, reshuffle_each_epoch_);
Status s = BuildChildren(sampler);
sampler = s.IsOk() ? sampler : nullptr;
return s;

View File

@ -72,7 +72,7 @@ Status SequentialSamplerObj::to_json(nlohmann::json *const out_json) {
Status SequentialSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
// runtime sampler object
*sampler = std::make_shared<dataset::SequentialSamplerRT>(num_samples_, start_index_);
*sampler = std::make_shared<dataset::SequentialSamplerRT>(start_index_, num_samples_);
Status s = BuildChildren(sampler);
sampler = s.IsOk() ? sampler : nullptr;
return s;

View File

@ -40,7 +40,7 @@ SubsetRandomSamplerObj::~SubsetRandomSamplerObj() = default;
Status SubsetRandomSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
// runtime sampler object
*sampler = std::make_shared<dataset::SubsetRandomSamplerRT>(num_samples_, indices_);
*sampler = std::make_shared<dataset::SubsetRandomSamplerRT>(indices_, num_samples_);
Status s = BuildChildren(sampler);
sampler = s.IsOk() ? sampler : nullptr;
return s;

View File

@ -49,7 +49,7 @@ Status SubsetSamplerObj::ValidateParams() {
Status SubsetSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
// runtime sampler object
*sampler = std::make_shared<dataset::SubsetSamplerRT>(num_samples_, indices_);
*sampler = std::make_shared<dataset::SubsetSamplerRT>(indices_, num_samples_);
Status s = BuildChildren(sampler);
sampler = s.IsOk() ? sampler : nullptr;
return s;

View File

@ -73,7 +73,7 @@ Status WeightedRandomSamplerObj::to_json(nlohmann::json *const out_json) {
}
Status WeightedRandomSamplerObj::SamplerBuild(std::shared_ptr<SamplerRT> *sampler) {
*sampler = std::make_shared<dataset::WeightedRandomSamplerRT>(num_samples_, weights_, replacement_);
*sampler = std::make_shared<dataset::WeightedRandomSamplerRT>(weights_, num_samples_, replacement_);
Status s = BuildChildren(sampler);
sampler = s.IsOk() ? sampler : nullptr;
return s;

View File

@ -0,0 +1,27 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
namespace mindspore {
namespace dataset {
namespace audio {} // namespace audio
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_

View File

@ -0,0 +1,86 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONFIG_H
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONFIG_H
#include <cstdint>
#include <string>
#include <vector>
#include "include/api/dual_abi_helper.h"
namespace mindspore {
namespace dataset {
// Config operations for setting and getting the configuration.
namespace config {
/// \brief Function to set the seed to be used in any random generator. This is used to produce deterministic results.
/// \param[in] seed the default seed to use.
bool set_seed(int32_t seed);
/// \brief Function to get the seed.
/// \return the seed set in the configuration.
uint32_t get_seed();
/// \brief Function to set the number of rows to be prefetched.
/// \param[in] prefetch_size total number of rows to be prefetched.
bool set_prefetch_size(int32_t prefetch_size);
/// \brief Function to get the prefetch size in number of rows.
/// \return total number of rows to be prefetched.
int32_t get_prefetch_size();
/// \brief Function to set the default number of parallel workers.
/// \param[in] num_parallel_workers number of parallel workers to be used as a default for each operation.
bool set_num_parallel_workers(int32_t num_parallel_workers);
/// \brief Function to get the default number of parallel workers.
/// \return number of parallel workers to be used as a default for each operation.
int32_t get_num_parallel_workers();
/// \brief Function to set the default interval (in milliseconds) for monitor sampling.
/// \param[in] interval interval (in milliseconds) to be used for performance monitor sampling.
bool set_monitor_sampling_interval(int32_t interval);
/// \brief Function to get the default interval of performance monitor sampling.
/// \return interval (in milliseconds) for performance monitor sampling.
int32_t get_monitor_sampling_interval();
/// \brief Function to set the default timeout (in seconds) for DSWaitedCallback. In case of a deadlock, the wait
/// function will exit after the timeout period.
/// \param[in] timeout timeout (in seconds) to be used to end the wait in DSWaitedCallback in case of a deadlock.
bool set_callback_timeout(int32_t timeout);
/// \brief Function to get the default timeout for DSWaitedCallback. In case of a deadback, the wait function will exit
/// after the timeout period.
/// \return the duration in seconds.
int32_t get_callback_timeout();
/// \brief Function to load configuration from a file.
/// \param[in] file path of the configuration file to be loaded.
/// \note This API exists because std::string will constrained by ABI compile option while char don't.
bool load(const std::vector<char> &file);
/// \brief Function to load configuration from a file.
/// \param[in] file path of the configuration file to be loaded.
inline bool load(std::string file) { return load(StringToChar(file)); }
} // namespace config
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONFIG_H

View File

@ -0,0 +1,123 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONSTANTS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONSTANTS_H_
#include <cstdint>
#include <limits>
#include <random>
namespace mindspore {
namespace dataset {
// Various type defines for convenience
using uchar = unsigned char;
using dsize_t = int64_t;
/// \brief Target devices to perform map operation
enum class MapTargetDevice { kCpu, kGpu, kAscend310 };
/// \brief Possible dataset types for holding the data and client type
enum class DatasetType { kUnknown, kArrow, kTf };
/// \brief Possible flavours of Tensor implementations
enum class TensorImpl { kNone, kFlexible, kCv, kNP };
/// \brief Possible values for shuffle
enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2, kInfile = 3 };
/// \brief Possible values for Border types
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
/// \brief Possible values for Image format types in a batch
enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 };
/// \brief Possible values for Image format types
enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 };
/// \brief Possible interpolation modes
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3, kCubicPil = 4 };
/// \brief Possible JiebaMode modes
enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 };
/// \brief Possible values for SPieceTokenizerOutType
enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
/// \brief Possible values for SPieceTokenizerLoadType
enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };
/// \brief Possible values for SentencePieceModel
enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };
/// \brief Possible values for NormalizeForm
enum class NormalizeForm {
kNone = 0,
kNfc,
kNfkc,
kNfd,
kNfkd,
};
/// \brief Possible values for Mask
enum class RelationalOp {
kEqual = 0, // ==
kNotEqual, // !=
kLess, // <
kLessEqual, // <=
kGreater, // >
kGreaterEqual, // >=
};
/// \brief Possible values for SamplingStrategy
enum class SamplingStrategy { kRandom = 0, kEdgeWeight = 1 };
// convenience functions for 32bit int bitmask
inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; }
inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); }
constexpr int64_t kDeMaxDim = std::numeric_limits<int64_t>::max();
constexpr int32_t kDeMaxRank = std::numeric_limits<int32_t>::max();
constexpr int64_t kDeMaxFreq = std::numeric_limits<int64_t>::max(); // 9223372036854775807 or 2^(64-1)
constexpr int64_t kDeMaxTopk = std::numeric_limits<int64_t>::max();
constexpr uint32_t kCfgRowsPerBuffer = 1;
constexpr uint32_t kCfgParallelWorkers = 8;
constexpr uint32_t kCfgWorkerConnectorSize = 16;
constexpr uint32_t kCfgOpConnectorSize = 16;
constexpr int32_t kCfgDefaultRankId = -1;
constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed;
constexpr uint32_t kCfgMonitorSamplingInterval = 10;
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
constexpr int32_t kCfgDefaultCachePort = 50052;
constexpr char kCfgDefaultCacheHost[] = "127.0.0.1";
constexpr int32_t kDftPrefetchSize = 20;
constexpr int32_t kDftNumConnections = 12;
constexpr int32_t kDftAutoNumWorkers = false;
constexpr char kDftMetaColumnPrefix[] = "_meta-";
// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h)
constexpr uint8_t kCVInvalidType = 255;
using connection_id_type = uint64_t;
using session_id_type = uint32_t;
using row_id_type = int64_t;
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONSTANTS_H_

View File

@ -0,0 +1,448 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_
#include <sys/stat.h>
#include <fstream>
#include <iostream>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
namespace mindspore {
namespace dataset {
/// \brief Simple class to do data manipulation, contains helper function to update json files in dataset
class DataHelper {
public:
/// \brief constructor
DataHelper() {}
/// \brief Destructor
~DataHelper() = default;
/// \brief Create an Album dataset while taking in a path to a image folder
/// Creates the output directory if doesn't exist
/// \param[in] in_dir Image folder directory that takes in images
/// \param[in] out_dir Directory containing output json files
Status CreateAlbum(const std::string &in_dir, const std::string &out_dir) {
return CreateAlbumIF(StringToChar(in_dir), StringToChar(out_dir));
}
/// \brief Update a json file field with a vector of string values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional input for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<std::string> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), VectorStringToChar(value), StringToChar(out_file));
}
/// \brief Update a json file field with a vector of bool values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<bool> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of int8 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int8_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of uint8 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint8_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of int16 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int16_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of uint16 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint16_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of int32 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int32_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of uint32 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint32_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of int64 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int64_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of uint64 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint64_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of float values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<float> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of double values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<double> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a string value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const std::string &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), StringToChar(value), StringToChar(out_file));
}
/// \brief Update a json file field with a bool value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const bool &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an int8 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const int8_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an uint8 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const uint8_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an int16 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const int16_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an uint16 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const uint16_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an int32 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const int32_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an uint32 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const uint32_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an int64 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const int64_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an uint64 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const uint64_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a float value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const float &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a double value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const double &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Template function to write tensor to file
/// \param[in] in_file File to write to
/// \param[in] data Array of type T values
/// \return Status The status code returned
template <typename T>
Status WriteBinFile(const std::string &in_file, const std::vector<T> &data) {
try {
std::ofstream o(in_file, std::ios::binary | std::ios::out);
if (!o.is_open()) {
return Status(kMDUnexpectedError, "Error opening Bin file to write");
}
size_t length = data.size();
o.write(reinterpret_cast<const char *>(&data[0]), std::streamsize(length * sizeof(T)));
o.close();
}
// Catch any exception and convert to Status return code
catch (const std::exception &err) {
return Status(kMDUnexpectedError, "Write bin file failed ");
}
return Status::OK();
}
/// \brief Write pointer to bin, use pointer to avoid memcpy
/// \param[in] in_file File name to write to
/// \param[in] data Pointer to data
/// \param[in] length Length of values to write from pointer
/// \return Status The status code returned
template <typename T>
Status WriteBinFile(const std::string &in_file, T *data, size_t length) {
try {
std::ofstream o(in_file, std::ios::binary | std::ios::out);
if (!o.is_open()) {
return Status(kMDUnexpectedError, "Error opening Bin file to write");
}
o.write(reinterpret_cast<const char *>(data), std::streamsize(length * sizeof(T)));
o.close();
}
// Catch any exception and convert to Status return code
catch (const std::exception &err) {
return Status(kMDUnexpectedError, "Write bin file failed ");
}
return Status::OK();
}
/// \brief Helper function to copy content of a tensor to buffer
/// \note This function iterates over the tensor in bytes, since
/// \param[in] tensor_addr The memory held by a tensor
/// \param[in] tensor_size The amount of data in bytes in tensor_addr, e.g. tensor->SizeInBytes()
/// \param[out] addr The address to copy tensor data to
/// \param[in] buffer_size The buffer size of addr
/// \return The size of the tensor (bytes copied
size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size);
/// \brief Helper function to delete key in json file
/// note This function will return okay even if key not found
/// \param[in] in_file Json file to remove key from
/// \param[in] key The key to remove
/// \return Status The status code returned
Status RemoveKey(const std::string &in_file, const std::string &key, const std::string &out_file = "") {
return RemoveKeyIF(StringToChar(in_file), StringToChar(key), StringToChar(out_file));
}
/// \brief A print method typically used for debugging
/// \param out - The output stream to write output to
void Print(std::ostream &out) const;
/// \brief << Stream output operator overload
/// \notes This allows you to write the debug print info using stream operators
/// \param out Reference to the output stream being overloaded
/// \param ds Reference to the DataSchema to display
/// \return The output stream must be returned
friend std::ostream &operator<<(std::ostream &out, const DataHelper &dh) {
dh.Print(out);
return out;
}
private:
// Helper function for dual ABI support
Status CreateAlbumIF(const std::vector<char> &in_dir, const std::vector<char> &out_dir);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<std::vector<char>> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<bool> &value,
const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<int8_t> &value,
const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint8_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int16_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint16_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int32_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint32_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int64_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint64_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<float> &value,
const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<double> &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const bool &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int8_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint8_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int16_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint16_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int32_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint32_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int64_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint64_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const float &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const double &value,
const std::vector<char> &out_file);
Status RemoveKeyIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &out_file);
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_EXECUTE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_EXECUTE_H_
#include <string>
#include <vector>
#include <map>
#include <memory>
#include "include/api/context.h"
#include "include/api/types.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
namespace mindspore {
namespace dataset {
class DeviceResource;
// class to run tensor operations in eager mode
class Execute {
public:
/// \brief Constructor.
/// \param[in] op TensorOperation to be applied in Eager mode, it accepts op in type of shared pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::shared_ptr<TensorOperation> op, MapTargetDevice deviceType = MapTargetDevice::kCpu,
uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of shared pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::shared_ptr<TensorTransform> op, MapTargetDevice deviceType = MapTargetDevice::kCpu,
uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of reference.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::reference_wrapper<TensorTransform> op, MapTargetDevice deviceType = MapTargetDevice::kCpu,
uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of raw pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(TensorTransform *op, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] ops A vector of TensorOperations to be applied in Eager mode, it accepts op in type of shared pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::vector<std::shared_ptr<TensorOperation>> ops,
MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of shared pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::vector<std::shared_ptr<TensorTransform>> ops,
MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of raw pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(const std::vector<std::reference_wrapper<TensorTransform>> ops,
MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of raw pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(const std::vector<TensorTransform *> &ops, MapTargetDevice deviceType = MapTargetDevice::kCpu,
uint32_t device_id = 0);
/// \brief Destructor.
~Execute();
/// \brief Callable function to execute the TensorTransform in eager mode.
/// \param[in] input Tensor to be transformed.
/// \param[out] output Transformed tensor.
/// \return Status error code, returns OK if no error encountered.
Status operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output);
/// \brief Callable function to execute the TensorTransform in eager mode.
/// \param[in] input_tensor_list List of Tensor to be transformed.
/// \param[out] out Result tensor after transform.
/// \return Status error code, returns OK if no error encountered.
Status operator()(const std::vector<mindspore::MSTensor> &input_tensor_list, std::vector<mindspore::MSTensor> *out);
Status DeviceMemoryRelease();
std::string AippCfgGenerator();
private:
Status ParseTransforms_();
Status validate_device_();
std::vector<std::shared_ptr<TensorTransform>> transforms_;
std::vector<std::shared_ptr<TensorOperation>> ops_;
MapTargetDevice device_type_;
std::shared_ptr<DeviceResource> device_resource_;
struct ExtraInfo;
std::shared_ptr<ExtraInfo> info_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_EXECUTE_H_

View File

@ -0,0 +1,153 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_ITERATOR_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_ITERATOR_H_
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/api/types.h"
namespace mindspore {
namespace dataset {
// Forward declare
class ExecutionTree;
class DatasetOp;
class Tensor;
class NativeRuntimeContext;
class IteratorConsumer;
class PullBasedIteratorConsumer;
class Dataset;
using MSTensorMap = std::unordered_map<std::string, mindspore::MSTensor>;
using MSTensorMapChar = std::map<std::vector<char>, mindspore::MSTensor>;
using MSTensorVec = std::vector<mindspore::MSTensor>;
// Abstract class for iterating over the dataset.
class Iterator {
public:
/// \brief Constructor
Iterator();
/// \brief Destructor
~Iterator();
/// \brief Method for building and launching the pipeline.
/// \param[in] ops - a vector of DatasetOp in the data pipeline.
/// \param[in] num_epochs Number of epochs passed down to EpochCtrlNode, default -1, infinite epochs
/// \return Status error code, returns OK if no error encountered.
Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds, int32_t num_epochs);
/// \brief Function to get the next row from the data pipeline.
/// \note Type of return data is a map(with column name).
/// \param[out] row - the output tensor row.
/// \return Status error code, returns OK if no error encountered.
Status GetNextRow(MSTensorMap *row) {
MSTensorMapChar row_;
row_.clear();
row->clear();
Status s = GetNextRowCharIF(&row_);
TensorMapCharToString(&row_, row);
return s;
}
// Char interface(CharIF) of GetNextRow
// This This API exists because std::string will constrained by ABI compile option while char don't.
Status GetNextRowCharIF(MSTensorMapChar *row);
/// \brief Function to get the next row from the data pipeline.
/// \note Type of return data is a vector(without column name).
/// \param[out] row - the output tensor row.
/// \return Status error code, returns OK if no error encountered.
virtual Status GetNextRow(MSTensorVec *row);
/// \brief Function to shut down the data pipeline.
void Stop();
class _Iterator {
public:
explicit _Iterator(Iterator *lt);
// Destructor
~_Iterator() {
if (cur_row_) {
delete cur_row_;
}
}
_Iterator &operator++(); // prefix ++ overload
MSTensorMap &operator*() { return *cur_row_; } // dereference operator
MSTensorMap *operator->() { return cur_row_; }
bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; }
private:
int ind_; // the cur node our Iterator points to
Iterator *lt_;
MSTensorMap *cur_row_;
};
_Iterator begin() { return _Iterator(this); }
_Iterator end() { return _Iterator(nullptr); }
private:
std::unique_ptr<NativeRuntimeContext> runtime_context_;
IteratorConsumer *consumer_;
};
class PullIterator : public Iterator {
public:
/// \brief Constructor
PullIterator();
/// \brief Destructor
~PullIterator() = default;
/// \brief Function to get next row from the data pipeline.
/// \note Type of return data is a vector(without column name).
/// \param[out] row The output tensor row.
/// \return Status error code, returns OK if no error encountered else false.
Status GetNextRow(MSTensorVec *const row) override;
/// \brief Function to get specified rows from the data pipeline.
/// \note Type of return data is a vector(without column name).
/// \note This behavior is subject to change
/// \param[in] num_rows The number of rows to fetch.
/// \param[out] row The output tensor row.
/// \return Status error code, returns OK if no error encountered else false.
Status GetRows(int32_t num_rows, std::vector<MSTensorVec> *const row);
/// \brief Method for building and launching the pipeline.
/// \note Consider making this function protected.
/// \param[in] ds - The root node that calls the function
/// \return Status error code, returns OK if no error encountered.
Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds);
private:
std::unique_ptr<PullBasedIteratorConsumer> pull_consumer_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_ITERATOR_H_

View File

@ -0,0 +1,74 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
#define INCLUDE_NLOHMANN_JSON_FWD_HPP_
#include <cstdint> // int64_t, uint64_t
#include <map> // map
#include <memory> // allocator
#include <string> // string
#include <vector> // vector
/*!
@brief namespace for Niels Lohmann
@see https://github.com/nlohmann
@since version 1.0.0
*/
namespace nlohmann {
/*!
@brief default JSONSerializer template argument
This serializer ignores the template arguments and uses ADL
([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
for serialization.
*/
template <typename T = void, typename SFINAE = void>
struct adl_serializer;
template <template <typename U, typename V, typename... Args> class ObjectType = std::map,
template <typename U, typename... Args> class ArrayType = std::vector, class StringType = std::string,
class BooleanType = bool, class NumberIntegerType = std::int64_t, class NumberUnsignedType = std::uint64_t,
class NumberFloatType = double, template <typename U> class AllocatorType = std::allocator,
template <typename T, typename SFINAE = void> class JSONSerializer = adl_serializer>
class basic_json;
/*!
@brief JSON Pointer
A JSON pointer defines a string syntax for identifying a specific value
within a JSON document. It can be used with functions `at` and
`operator[]`. Furthermore, JSON pointers are the base for JSON patches.
@sa [RFC 6901](https://tools.ietf.org/html/rfc6901)
@since version 2.0.0
*/
template <typename BasicJsonType>
class json_pointer;
/*!
@brief default JSON class
This type is the default specialization of the @ref basic_json class which
uses the standard template types.
@since version 1.0.0
*/
using json = basic_json<>;
} // namespace nlohmann
#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_

View File

@ -0,0 +1,251 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_SAMPLERS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_SAMPLERS_H_
#include <memory>
#include <vector>
namespace mindspore {
namespace dataset {
// Forward declare
class SamplerObj;
// Abstract class to represent a sampler in the data pipeline.
/// \class Sampler samplers.h
/// \brief An abstract base class to represent a sampler in the data pipeline.
class Sampler : std::enable_shared_from_this<Sampler> {
friend class AlbumDataset;
friend class CelebADataset;
friend class Cifar10Dataset;
friend class Cifar100Dataset;
friend class CLUEDataset;
friend class CocoDataset;
friend class CSVDataset;
friend class ImageFolderDataset;
friend class ManifestDataset;
friend class MindDataDataset;
friend class MnistDataset;
friend class RandomDataDataset;
friend class TextFileDataset;
friend class TFRecordDataset;
friend class VOCDataset;
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
Sampler() {}
/// \brief Destructor
~Sampler() = default;
/// \brief A virtual function to add a child sampler.
/// \param[in] child The child sampler to be added as a children of this sampler.
virtual void AddChild(std::shared_ptr<Sampler> child) { children_.push_back(child); }
protected:
/// \brief Pure virtual function to convert a Sampler class into an IR Sampler object.
/// \return shared pointer to the newly created TensorOperation.
virtual std::shared_ptr<SamplerObj> Parse() const = 0;
std::vector<std::shared_ptr<Sampler>> children_;
};
/// \brief A class to represent a Distributed Sampler in the data pipeline.
/// \note A Sampler that accesses a shard of the dataset.
class DistributedSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] num_shards Number of shards to divide the dataset into.
/// \param[in] shard_id Shard ID of the current shard within num_shards.
/// \param[in] shuffle If true, the indices are shuffled (default=true).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \param[in] seed The seed in use when shuffle is true (default=1).
/// \param[in] offset The starting position where access to elements in the dataset begins (default=-1).
/// \param[in] even_dist If true, each shard would return the same number of rows (default=true).
/// If false the total rows returned by all the shards would not have overlap.
explicit DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, int64_t num_samples = 0,
uint32_t seed = 1, int64_t offset = -1, bool even_dist = true);
/// \brief Destructor.
~DistributedSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
int64_t num_shards_;
int64_t shard_id_;
bool shuffle_;
int64_t num_samples_;
uint32_t seed_;
int64_t offset_;
bool even_dist_;
};
/// \brief A class to represent a PK Sampler in the data pipeline.
/// \note Samples K elements for each P class in the dataset.
/// This will sample all classes.
class PKSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] num_val Number of elements to sample for each class.
/// \param[in] shuffle If true, the class IDs are shuffled (default=false).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0);
/// \brief Destructor.
~PKSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
int64_t num_val_;
bool shuffle_;
int64_t num_samples_;
};
/// \brief A class to represent a Random Sampler in the data pipeline.
/// \note Samples the elements randomly.
class RandomSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] replacement If true, put the sample ID back for the next draw (default=false).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit RandomSampler(bool replacement = false, int64_t num_samples = 0);
/// \brief Destructor.
~RandomSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
bool replacement_;
int64_t num_samples_;
};
/// \brief A class to represent a Sequential Sampler in the data pipeline.
/// \note Samples the dataset elements sequentially, same as not having a sampler.
class SequentialSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] start_index Index to start sampling at (default=0, start at first id).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0);
/// \brief Destructor.
~SequentialSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
int64_t start_index_;
int64_t num_samples_;
};
/// \brief A class to represent a Subset Sampler in the data pipeline.
/// \note Samples the elements from a sequence of indices.
class SubsetSampler : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] indices A vector sequence of indices.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit SubsetSampler(std::vector<int64_t> indices, int64_t num_samples = 0);
/// \brief Destructor.
~SubsetSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
std::vector<int64_t> indices_;
int64_t num_samples_;
};
/// \brief A class to represent a Subset Random Sampler in the data pipeline.
/// \note Samples the elements randomly from a sequence of indices.
class SubsetRandomSampler final : public SubsetSampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] indices A vector sequence of indices.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit SubsetRandomSampler(std::vector<int64_t> indices, int64_t num_samples = 0);
/// \brief Destructor.
~SubsetRandomSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
};
/// \brief A class to represent a Weighted Random Sampler in the data pipeline.
/// \note Samples the elements from [0, len(weights) - 1] randomly with the given
/// weights (probabilities).
class WeightedRandomSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] weights A vector sequence of weights, not necessarily summing up to 1.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \param[in] replacement If true, put the sample ID back for the next draw (default=true).
explicit WeightedRandomSampler(std::vector<double> weights, int64_t num_samples = 0, bool replacement = true);
/// \brief Destructor.
~WeightedRandomSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
std::vector<double> weights_;
int64_t num_samples_;
bool replacement_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_SAMPLERS_H_

View File

@ -0,0 +1,545 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TEXT_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TEXT_H_
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
namespace mindspore {
namespace dataset {
class Vocab;
class SentencePieceVocab;
class TensorOperation;
// Transform operations for text
namespace text {
#ifndef _WIN32
/// \brief Tokenize a scalar tensor of UTF-8 string by specific rules.
/// \note BasicTokenizer is not supported on Windows platform yet.
class BasicTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] lower_case If true, apply CaseFold, NormalizeUTF8 (NFD mode), RegexReplace operation on input text to
/// fold the text to lower case and strip accents characters. If false, only apply
/// NormalizeUTF8('normalization_form' mode) operation on input text (default=false).
/// \param[in] keep_whitespace If true, the whitespace will be kept in out tokens (default=false).
/// \param[in] normalize_form Used to specify a specific normalize mode. This is only effective when 'lower_case' is
/// false. See NormalizeUTF8 for details (default=NormalizeForm::kNone).
/// \param[in] preserve_unused_token If true, do not split special tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]',
/// '[MASK]' (default=true).
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit BasicTokenizer(bool lower_case = false, bool keep_whitespace = false,
const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true,
bool with_offsets = false);
/// \brief Destructor
~BasicTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenizer used for Bert text process.
/// \note BertTokenizer is not supported on Windows platform yet.
class BertTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] vocab A Vocab object.
/// \param[in] suffix_indicator Used to show that the subword is the last part of a word (default='##').
/// \param[in] max_bytes_per_token Tokens exceeding this length will not be further split (default=100).
/// \param[in] unknown_token When a token cannot be found, return the token directly if 'unknown_token' is an empty
/// string, else return the string specified(default='[UNK]').
/// \param[in] lower_case If true, apply CaseFold, NormalizeUTF8 (NFD mode), RegexReplace operation on input text to
/// fold the text to lower case and strip accents characters. If false, only apply
/// NormalizeUTF8('normalization_form' mode) operation on input text (default=false).
/// \param[in] keep_whitespace If true, the whitespace will be kept in out tokens (default=false).
/// \param[in] normalize_form Used to specify a specific normalize mode. This is only effective when 'lower_case' is
/// false. See NormalizeUTF8 for details (default=NormalizeForm::kNone).
/// \param[in] preserve_unused_token If true, do not split special tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]',
/// '[MASK]' (default=true).
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit BertTokenizer(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator = "##",
int32_t max_bytes_per_token = 100, const std::string &unknown_token = "[UNK]",
bool lower_case = false, bool keep_whitespace = false,
const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true,
bool with_offsets = false)
: BertTokenizer(vocab, StringToChar(suffix_indicator), max_bytes_per_token, StringToChar(unknown_token),
lower_case, keep_whitespace, normalize_form, preserve_unused_token, with_offsets) {}
explicit BertTokenizer(const std::shared_ptr<Vocab> &vocab, const std::vector<char> &suffix_indicator,
int32_t max_bytes_per_token, const std::vector<char> &unknown_token, bool lower_case,
bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token,
bool with_offsets);
/// \brief Destructor
~BertTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Apply case fold operation on UTF-8 string tensor.
/// \return Shared pointer to the current TensorOperation.
class CaseFold final : public TensorTransform {
public:
/// \brief Constructor.
CaseFold();
/// \brief Destructor
~CaseFold() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
#endif
/// \brief Tokenize Chinese string into words based on dictionary.
/// \note The integrity of the HMMSEgment algorithm and MPSegment algorithm files must be confirmed.
class JiebaTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] hmm_path Dictionary file is used by HMMSegment algorithm. The dictionary can be obtained on the
/// official website of cppjieba.
/// \param[in] mp_path Dictionary file is used by MPSegment algorithm. The dictionary can be obtained on the
/// official website of cppjieba.
/// \param[in] mode Valid values can be any of [JiebaMode.MP, JiebaMode.HMM, JiebaMode.MIX](default=JiebaMode.MIX).
/// - JiebaMode.kMP, tokenize with MPSegment algorithm.
/// - JiebaMode.kHMM, tokenize with Hiddel Markov Model Segment algorithm.
/// - JiebaMode.kMIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit JiebaTokenizer(const std::string &hmm_path, const std::string &mp_path,
const JiebaMode &mode = JiebaMode::kMix, bool with_offsets = false)
: JiebaTokenizer(StringToChar(hmm_path), StringToChar(mp_path), mode, with_offsets) {}
explicit JiebaTokenizer(const std::vector<char> &hmm_path, const std::vector<char> &mp_path, const JiebaMode &mode,
bool with_offsets);
/// \brief Destructor
~JiebaTokenizer() = default;
/// \brief Add user defined word to JiebaTokenizer's dictionary.
/// \param[in] word The word to be added to the JiebaTokenizer instance.
/// The added word will not be written into the built-in dictionary on disk.
/// \param[in] freq The frequency of the word to be added. The higher the frequency,
/// the better chance the word will be tokenized (default=None, use default frequency).
/// \return Status error code, returns OK if no error encountered.
Status AddWord(const std::string &word, int64_t freq = 0) { return AddWordChar(StringToChar(word), freq); }
/// \brief Add user defined dictionary of word-freq pairs to JiebaTokenizer's dictionary.
/// \param[in] user_dict Vector of word-freq pairs to be added to JiebaTokenizer's dictionary.
/// \return Status error code, returns OK if no error encountered.
Status AddDict(const std::vector<std::pair<std::string, int64_t>> &user_dict) {
return AddDictChar(PairStringInt64ToPairCharInt64(user_dict));
}
/// \brief Add user defined dictionary of word-freq pairs to JiebaTokenizer's dictionary from a file.
/// Only valid word-freq pairs in user provided file will be added into the dictionary.
/// Rows containing invalid input will be ignored, no error nor warning Status is returned.
/// \param[in] file_path Path to the dictionary which includes user defined word-freq pairs.
/// \return Status error code, returns OK if no error encountered.
Status AddDict(const std::string &file_path) { return AddDictChar(StringToChar(file_path)); }
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
/// \brief Parser user defined word by file.
/// \param[in] file_path Path to the user defined file.
/// \param[in] user_dict Vector of word-freq pairs extracted from the user provided file.
Status ParserFile(const std::string &file_path, std::vector<std::pair<std::string, int64_t>> *const user_dict);
/// \brief Used to translate all API string to vector of char and back
Status AddWordChar(const std::vector<char> &word, int64_t freq = 0);
/// \brief Used to translate all API string to vector of char and back
Status AddDictChar(const std::vector<std::pair<std::vector<char>, int64_t>> &user_dict);
/// \brief Used to translate all API string to vector of char and back
Status AddDictChar(const std::vector<char> &file_path);
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Look up a word into an id according to the input vocabulary table.
class Lookup final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] vocab a Vocab object.
/// \param[in] unknown_token word to use for lookup if the word being looked up is out of Vocabulary (oov).
/// If unknown_token is oov, runtime error will be thrown. If unknown_token is {}, which means that not to
/// specify unknown_token when word being out of Vocabulary (default={}).
/// \param[in] data_type mindspore::DataType of the tensor after lookup; must be numeric, including bool.
/// (default=mindspore::DataType::kNumberTypeInt32).
explicit Lookup(const std::shared_ptr<Vocab> &vocab, const std::optional<std::string> &unknown_token = {},
mindspore::DataType data_type = mindspore::DataType::kNumberTypeInt32)
: Lookup(vocab, OptionalStringToChar(unknown_token), data_type) {}
explicit Lookup(const std::shared_ptr<Vocab> &vocab, const std::optional<std::vector<char>> &unknown_token,
mindspore::DataType data_type = mindspore::DataType::kNumberTypeInt32);
/// \brief Destructor
~Lookup() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief TensorOp to generate n-gram from a 1-D string Tensor.
class Ngram final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] ngrams ngrams is a vector of positive integers. For example, if ngrams={4, 3}, then the result
/// would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up
/// for a n-gram, an empty string will be returned.
/// \param[in] left_pad {"pad_token", pad_width}. Padding performed on left side of the sequence. pad_width will
/// be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default={"", 0}}).
/// \param[in] right_pad {"pad_token", pad_width}. Padding performed on right side of the sequence.pad_width will
/// be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" (default={"", 0}}).
/// \param[in] separator Symbol used to join strings together (default=" ").
explicit Ngram(const std::vector<int32_t> &ngrams, const std::pair<std::string, int32_t> &left_pad = {"", 0},
const std::pair<std::string, int32_t> &right_pad = {"", 0}, const std::string &separator = " ")
: Ngram(ngrams, PairStringToChar(left_pad), PairStringToChar(right_pad), StringToChar(separator)) {}
explicit Ngram(const std::vector<int32_t> &ngrams, const std::pair<std::vector<char>, int32_t> &left_pad,
const std::pair<std::vector<char>, int32_t> &right_pad, const std::vector<char> &separator);
/// \brief Destructor
~Ngram() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
#ifndef _WIN32
/// \brief Apply normalize operation on UTF-8 string tensor.
class NormalizeUTF8 final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] normalize_form Valid values can be any of [NormalizeForm::kNone,NormalizeForm::kNfc,
/// NormalizeForm::kNfkc,
/// NormalizeForm::kNfd, NormalizeForm::kNfkd](default=NormalizeForm::kNfkc).
/// See http://unicode.org/reports/tr15/ for details.
/// - NormalizeForm.NONE, do nothing for input string tensor.
/// - NormalizeForm.NFC, normalize with Normalization Form C.
/// - NormalizeForm.NFKC, normalize with Normalization Form KC.
/// - NormalizeForm.NFD, normalize with Normalization Form D.
/// - NormalizeForm.NFKD, normalize with Normalization Form KD.
explicit NormalizeUTF8(NormalizeForm normalize_form = NormalizeForm::kNfkc);
/// \brief Destructor
~NormalizeUTF8() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Replace UTF-8 string tensor with 'replace' according to regular expression 'pattern'.
class RegexReplace final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] pattern The regex expression patterns.
/// \param[in] replace The string to replace matched element.
/// \param[in] replace_all Confirm whether to replace all. If false, only replace first matched element;
/// if true, replace all matched elements (default=true).
explicit RegexReplace(std::string pattern, std::string replace, bool replace_all = true)
: RegexReplace(StringToChar(pattern), StringToChar(replace), replace_all) {}
explicit RegexReplace(const std::vector<char> &pattern, const std::vector<char> &replace, bool replace_all);
/// \brief Destructor
~RegexReplace() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenize a scalar tensor of UTF-8 string by regex expression pattern.
class RegexTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] delim_pattern The pattern of regex delimiters.
/// \param[in] keep_delim_pattern The string matched by 'delim_pattern' can be kept as a token if it can be
/// matched by 'keep_delim_pattern'. The default value is an empty string ("").
/// which means that delimiters will not be kept as an output token (default="").
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit RegexTokenizer(std::string delim_pattern, std::string keep_delim_pattern = "", bool with_offsets = false)
: RegexTokenizer(StringToChar(delim_pattern), StringToChar(keep_delim_pattern), with_offsets) {}
explicit RegexTokenizer(const std::vector<char> &delim_pattern, const std::vector<char> &keep_delim_pattern,
bool with_offsets);
/// \brief Destructor
~RegexTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
#endif
/// \brief Tokenize scalar token or 1-D tokens to tokens by sentencepiece.
class SentencePieceTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] vocab a SentencePieceVocab object.
/// \param[in] out_type The type of output.
SentencePieceTokenizer(const std::shared_ptr<SentencePieceVocab> &vocab,
mindspore::dataset::SPieceTokenizerOutType out_type);
/// \brief Constructor.
/// \param[in] vocab_path vocab model file path.
/// \param[in] out_type The type of output.
SentencePieceTokenizer(const std::string &vocab_path, mindspore::dataset::SPieceTokenizerOutType out_type)
: SentencePieceTokenizer(StringToChar(vocab_path), out_type) {}
SentencePieceTokenizer(const std::vector<char> &vocab_path, mindspore::dataset::SPieceTokenizerOutType out_type);
/// \brief Destructor
~SentencePieceTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension
/// axis is a slice of data starting at the corresponding position, with a specified width.
class SlidingWindow final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] width The width of the window. It must be an integer and greater than zero.
/// \param[in] axis The axis along which the sliding window is computed (default=0), axis support 0 or -1 only
/// for now.
explicit SlidingWindow(const int32_t width, const int32_t axis = 0);
/// \brief Destructor
~SlidingWindow() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tensor operation to convert every element of a string tensor to a number.
/// Strings are cast according to the rules specified in the following links:
/// https://en.cppreference.com/w/cpp/string/basic_string/stof,
/// https://en.cppreference.com/w/cpp/string/basic_string/stoul,
/// except that any strings which represent negative numbers cannot be cast to an unsigned integer type.
class ToNumber final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] data_type mindspore::DataType of the tensor to be cast to. Must be a numeric type, excluding bool.
explicit ToNumber(mindspore::DataType data_type);
/// \brief Destructor
~ToNumber() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Truncate a pair of rank-1 tensors such that the total length is less than max_length.
class TruncateSequencePair final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] max_length Maximum length required.
explicit TruncateSequencePair(int32_t max_length);
/// \brief Destructor
~TruncateSequencePair() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenize a scalar tensor of UTF-8 string to Unicode characters.
class UnicodeCharTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit UnicodeCharTokenizer(bool with_offsets = false);
/// \brief Destructor
~UnicodeCharTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenize scalar token or 1-D tokens to 1-D subword tokens.
class WordpieceTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] vocab A Vocab object.
/// \param[in] suffix_indicator Used to show that the subword is the last part of a word (default='##').
/// \param[in] max_bytes_per_token Tokens exceeding this length will not be further split (default=100).
/// \param[in] unknown_token When a token cannot be found, return the token directly if 'unknown_token' is an empty
/// string, else return the string specified (default='[UNK]').
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit WordpieceTokenizer(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator = "##",
int32_t max_bytes_per_token = 100, const std::string &unknown_token = "[UNK]",
bool with_offsets = false)
: WordpieceTokenizer(vocab, StringToChar(suffix_indicator), max_bytes_per_token, StringToChar(unknown_token),
with_offsets) {}
explicit WordpieceTokenizer(const std::shared_ptr<Vocab> &vocab, const std::vector<char> &suffix_indicator,
int32_t max_bytes_per_token, const std::vector<char> &unknown_token, bool with_offsets);
/// \brief Destructor
~WordpieceTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
#ifndef _WIN32
/// \brief Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries.
class UnicodeScriptTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] keep_whitespace Whether or not emit whitespace tokens (default=false).
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit UnicodeScriptTokenizer(bool keep_whitespace = false, bool with_offsets = false);
/// \brief Destructor
~UnicodeScriptTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces.
class WhitespaceTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit WhitespaceTokenizer(bool with_offsets = false);
/// \brief Destructor
~WhitespaceTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
#endif
} // namespace text
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TEXT_H_

View File

@ -0,0 +1,413 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TRANSFORMS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TRANSFORMS_H_
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/api/types.h"
#include "include/dataset/constants.h"
namespace mindspore {
namespace dataset {
class TensorOperation;
// We need the following two groups of forward declaration to friend the class in class TensorTransform.
namespace transforms {
class Compose;
class RandomApply;
class RandomChoice;
} // namespace transforms
namespace vision {
class BoundingBoxAugment;
class RandomSelectSubpolicy;
class UniformAugment;
} // namespace vision
// Abstract class to represent a tensor transform operation in the data pipeline.
/// \class TensorTransform transforms.h
/// \brief A base class to represent a tensor transform operation in the data pipeline.
class TensorTransform : public std::enable_shared_from_this<TensorTransform> {
friend class Dataset;
friend class Execute;
friend class transforms::Compose;
friend class transforms::RandomApply;
friend class transforms::RandomChoice;
friend class vision::BoundingBoxAugment;
friend class vision::RandomSelectSubpolicy;
friend class vision::UniformAugment;
public:
/// \brief Constructor
TensorTransform() {}
/// \brief Destructor
~TensorTransform() = default;
protected:
/// \brief Pure virtual function to convert a TensorTransform class into a IR TensorOperation object.
/// \return shared pointer to the newly created TensorOperation.
virtual std::shared_ptr<TensorOperation> Parse() = 0;
/// \brief Virtual function to convert a TensorTransform class into a IR TensorOperation object.
/// \param[in] env A string to determine the running environment
/// \return shared pointer to the newly created TensorOperation.
virtual std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) { return nullptr; }
};
/// \brief Slice object used in SliceOption.
class Slice {
public:
/// \brief Constructor, with start, stop and step default to 0.
Slice() : start_(0), stop_(0), step_(0) {}
/// \brief Constructor.
/// \param[in] start Starting integer specifying where to start the slicing.
/// \param[in] stop Ending integer specifying where to stop the slicing.
/// \param[in] step An integer specifying the step of the slicing.
Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {}
/// \brief Constructor, with step=1
/// \param[in] start Starting integer specifying where to start the slicing.
/// \param[in] stop Ending integer specifying where to stop the slicing.
Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {}
/// \brief Constructor, with start=0 and step=1
/// \param[in] stop Ending integer specifying where to stop the slicing.
explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {}
Slice(Slice const &slice) = default;
~Slice() = default;
bool valid() const { return step_ != 0; }
dsize_t start_;
dsize_t stop_;
dsize_t step_;
};
/// \brief SliceOption used in Slice Op.
class SliceOption {
public:
/// \param[in] all Slice the whole dimension
explicit SliceOption(bool all) : all_(all) {}
/// \param[in] indices Slice these indices along the dimension. Negative indices are supported.
explicit SliceOption(std::vector<dsize_t> indices) : indices_(indices) {}
/// \param[in] slice Slice the generated indices from the slice object along the dimension.
explicit SliceOption(Slice slice) : slice_(slice) {}
SliceOption(SliceOption const &slice) = default;
~SliceOption() = default;
// only one of the following will be valid
// given indices to slice the Tensor.
std::vector<dsize_t> indices_ = {};
// Slice object. All start, stop and step are 0 if invalid.
Slice slice_;
bool all_ = false;
};
// Transform operations for performing data transformation.
namespace transforms {
/// \brief Compose Op.
/// \note Compose a list of transforms into a single transform.
class Compose final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transforms A vector of raw pointers to TensorTransform objects to be applied.
explicit Compose(const std::vector<TensorTransform *> &transforms);
/// \brief Constructor.
/// \param[in] transforms A vector of shared pointers to TensorTransform objects to be applied.
explicit Compose(const std::vector<std::shared_ptr<TensorTransform>> &transforms);
/// \brief Constructor.
/// \param[in] transforms A vector of TensorTransform objects to be applied.
explicit Compose(const std::vector<std::reference_wrapper<TensorTransform>> &transforms);
/// \brief Destructor
~Compose() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Concatenate Op.
/// \note Tensor operation that concatenates all columns into a single tensor.
class Concatenate final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] axis Concatenate the tensors along given axis, only support 0 or -1 so far (default=0).
/// \param[in] prepend MSTensor to be prepended to the already concatenated tensors (default={}).
/// \param[in] append MSTensor to be appended to the already concatenated tensors (default={}).
explicit Concatenate(int8_t axis = 0, MSTensor prepend = {}, MSTensor append = {});
/// \brief Destructor
~Concatenate() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Duplicate Op.
/// \note Duplicate the input tensor to a new output tensor.
/// The input tensor is carried over to the output list.
class Duplicate final : public TensorTransform {
public:
/// \brief Constructor.
Duplicate();
/// \brief Destructor
~Duplicate() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief Fill Op.
/// \note Tensor operation to fill all elements in the tensor with the specified value.
/// The output tensor will have the same shape and type as the input tensor.
class Fill final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] fill_value Scalar value to fill the tensor with.
/// Can only be MSTensor of the following types from mindspore::DataType:
/// String, Bool, Int8/16/32/64, UInt8/16/32/64, Float16/32/64.
explicit Fill(MSTensor fill_value);
/// \brief Destructor
~Fill() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Mask Op.
/// \note Mask content of the input tensor with the given predicate.
/// Any element of the tensor that matches the predicate will be evaluated to True, otherwise False.
class Mask final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] op One of the relational operators EQ, NE LT, GT, LE or GE.
/// \param[in] constant Constant to be compared to.
/// Can only be MSTensor of str, int, float, bool.
/// \param[in] de_type Type of the generated mask. Can only be numeric or boolean datatype.
/// (default=mindspore::DataType::kNumberTypeBool)
explicit Mask(RelationalOp op, MSTensor constant,
mindspore::DataType ms_type = mindspore::DataType(mindspore::DataType::kNumberTypeBool));
/// \brief Destructor
~Mask() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief OneHot Op.
/// \note Convert the labels into OneHot format.
class OneHot final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] num_classes number of classes.
explicit OneHot(int32_t num_classes);
/// \brief Destructor
~OneHot() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief PadEnd Op.
/// \note Pad input tensor according to pad_shape, need to have same rank.
class PadEnd final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] pad_shape List of integers representing the shape needed.
/// Dimensions that set to `None` will not be padded (i.e., original dim will be used).
/// Shorter dimensions will truncate the values.
/// \param[in] pad_value Value used to pad (default={}).
explicit PadEnd(const std::vector<dsize_t> &pad_shape, MSTensor pad_value = {});
/// \brief Destructor
~PadEnd() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomApply Op.
/// \note Randomly perform a series of transforms with a given probability.
class RandomApply final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transforms A vector of raw pointers to TensorTransform objects to be applied.
/// \param[in] prob The probability to apply the transformation list (default=0.5).
explicit RandomApply(const std::vector<TensorTransform *> &transforms, double prob = 0.5);
/// \brief Constructor.
/// \param[in] transforms A vector of shared pointers to TensorTransform objects to be applied.
/// \param[in] prob The probability to apply the transformation list (default=0.5).
explicit RandomApply(const std::vector<std::shared_ptr<TensorTransform>> &transforms, double prob = 0.5);
/// \brief Constructor.
/// \param[in] transforms A vector of TensorTransform objects to be applied.
/// \param[in] prob The probability to apply the transformation list (default=0.5).
explicit RandomApply(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, double prob = 0.5);
/// \brief Destructor
~RandomApply() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomChoice Op.
/// \note Randomly selects one transform from a list of transforms to perform operation.
class RandomChoice final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transforms A vector of raw pointers to TensorTransform objects to be applied.
explicit RandomChoice(const std::vector<TensorTransform *> &transforms);
/// \brief Constructor.
/// \param[in] transforms A vector of shared pointers to TensorTransform objects to be applied.
explicit RandomChoice(const std::vector<std::shared_ptr<TensorTransform>> &transforms);
/// \brief Constructor.
/// \param[in] transforms A vector of TensorTransform objects to be applied.
explicit RandomChoice(const std::vector<std::reference_wrapper<TensorTransform>> &transforms);
/// \brief Destructor
~RandomChoice() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Slice Op.
/// \note Slice operation to extract a tensor out using the given n slices.
/// The functionality of Slice is similar to NumPy's indexing feature.
/// (Currently only rank-1 tensors are supported).
class Slice final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] slice_input Vector of SliceOption
explicit Slice(const std::vector<SliceOption> &slice_input);
/// \brief Destructor
~Slice() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief TypeCast Op.
/// \note Tensor operation to cast to a given MindSpore data type.
class TypeCast final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] data_type mindspore::DataType to be cast to.
explicit TypeCast(mindspore::DataType data_type);
/// \brief Destructor
~TypeCast() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Unique Op.
/// \note Return an output tensor containing all the unique elements of the input tensor in
/// the same order that they occur in the input tensor.
class Unique final : public TensorTransform {
public:
/// \brief Constructor.
Unique();
/// \brief Destructor
~Unique() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
} // namespace transforms
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TRANSFORMS_H_

View File

@ -0,0 +1,955 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
#include "include/dataset/vision_lite.h"
namespace mindspore {
namespace dataset {
class TensorOperation;
// Transform operations for performing computer vision.
namespace vision {
/// \brief AutoContrast TensorTransform.
/// \note Apply automatic contrast on input image.
class AutoContrast final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] cutoff Percent of pixels to cut off from the histogram, the valid range of cutoff value is 0 to 100.
/// \param[in] ignore Pixel values to ignore.
explicit AutoContrast(float cutoff = 0.0, std::vector<uint32_t> ignore = {});
/// \brief Destructor.
~AutoContrast() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief BoundingBoxAugment TensorTransform.
/// \note Apply a given image transform on a random selection of bounding box regions of a given image.
class BoundingBoxAugment final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transform Raw pointer to a TensorTransform operation.
/// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3);
/// \brief Constructor.
/// \param[in] transform Smart pointer to a TensorTransform operation.
/// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3);
/// \brief Constructor.
/// \param[in] transform Object pointer to a TensorTransform operation.
/// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> transform, float ratio = 0.3);
/// \brief Destructor.
~BoundingBoxAugment() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Constructor to apply CutMix on a batch of images.
/// \note Masks a random section of each image with the corresponding part of another randomly
/// selected image in that batch.
class CutMixBatch final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] image_batch_format The format of the batch.
/// \param[in] alpha The hyperparameter of beta distribution (default = 1.0).
/// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0).
explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);
/// \brief Destructor.
~CutMixBatch() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief CutOut TensorOp.
/// \note Randomly cut (mask) out a given number of square patches from the input image.
class CutOut final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] length Integer representing the side length of each square patch.
/// \param[in] num_patches Integer representing the number of patches to be cut out of an image.
explicit CutOut(int32_t length, int32_t num_patches = 1);
/// \brief Destructor.
~CutOut() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Equalize TensorTransform.
/// \note Apply histogram equalization on input image.
class Equalize final : public TensorTransform {
public:
/// \brief Constructor.
Equalize();
/// \brief Destructor.
~Equalize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief HwcToChw TensorTransform.
/// \note Transpose the input image; shape (H, W, C) to shape (C, H, W).
class HWC2CHW final : public TensorTransform {
public:
/// \brief Constructor.
HWC2CHW();
/// \brief Destructor.
~HWC2CHW() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief Invert TensorTransform.
/// \note Apply invert on input image in RGB mode.
class Invert final : public TensorTransform {
public:
/// \brief Constructor.
Invert();
/// \brief Destructor.
~Invert() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief MixUpBatch TensorTransform.
/// \note Apply MixUp transformation on an input batch of images and labels. The labels must be in
/// one-hot format and Batch must be called before calling this function.
class MixUpBatch final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] alpha hyperparameter of beta distribution (default = 1.0).
explicit MixUpBatch(float alpha = 1);
/// \brief Destructor.
~MixUpBatch() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief NormalizePad TensorTransform.
/// \note Normalize the input image with respect to mean and standard deviation and pad an extra
/// channel with value zero.
class NormalizePad final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
/// The mean values must be in range [0.0, 255.0].
/// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
/// The standard deviation values must be in range (0.0, 255.0].
/// \param[in] dtype The output datatype of Tensor.
/// The standard deviation values must be "float32" or "float16"default = "float32".
explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std,
const std::string &dtype = "float32")
: NormalizePad(mean, std, StringToChar(dtype)) {}
explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype);
/// \brief Destructor.
~NormalizePad() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Pad TensorOp.
/// \note Pads the image according to padding parameters.
class Pad final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] padding A vector representing the number of pixels to pad the image
/// If vector has one value, it pads all sides of the image with that value.
/// If vector has two values, it pads left and top with the first and
/// right and bottom with the second value.
/// If vector has four values, it pads left, top, right, and bottom with
/// those values respectively.
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
/// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels. If 3 values are provided,
/// it is used to fill R, G, B channels respectively.
/// \param[in] padding_mode The method of padding (default=BorderType.kConstant).
/// Can be any of
/// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
/// - BorderType.kConstant, means it fills the border with constant values
/// - BorderType.kEdge, means it pads with the last value on the edge
/// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
/// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
explicit Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
BorderType padding_mode = BorderType::kConstant);
/// \brief Destructor.
~Pad() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Blends an image with its grayscale version with random weights
/// t and 1 - t generated from a given range. If the range is trivial
/// then the weights are determinate and t equals the bound of the interval.
class RandomColor final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] t_lb Lower bound on the range of random weights.
/// \param[in] t_lb Upper bound on the range of random weights.
explicit RandomColor(float t_lb, float t_ub);
/// \brief Destructor.
~RandomColor() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomColorAdjust TensorTransform.
/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image.
class RandomColorAdjust final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}).
/// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}).
/// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}).
/// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
/// (Default={0, 0}).
explicit RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
/// \brief Destructor.
~RandomColorAdjust() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomCrop TensorTransform.
/// \note Crop the input image at a random location.
class RandomCrop final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] padding A vector representing the number of pixels to pad the image
/// If vector has one value, it pads all sides of the image with that value.
/// If vector has two values, it pads left and top with the first and
/// right and bottom with the second value.
/// If vector has four values, it pads left, top, right, and bottom with
/// those values respectively.
/// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
/// the given output size.
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
/// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
explicit RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
BorderType padding_mode = BorderType::kConstant);
/// \brief Destructor.
~RandomCrop() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomCropDecodeResize TensorTransform.
/// \note Equivalent to RandomResizedCrop, but crops before decodes.
class RandomCropDecodeResize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] scale Range [min, max) of respective size of the
/// original size to be cropped (default=(0.08, 1.0)).
/// \param[in] ratio Range [min, max) of aspect ratio to be
/// cropped (default=(3. / 4., 4. / 3.)).
/// \param[in] interpolation An enum for the mode of interpolation.
/// \param[in] The maximum number of attempts to propose a valid crop_area (default=10).
/// If exceeded, fall back to use center_crop instead.
explicit RandomCropDecodeResize(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
std::vector<float> ratio = {3. / 4, 4. / 3},
InterpolationMode interpolation = InterpolationMode::kLinear,
int32_t max_attempts = 10);
/// \brief Destructor.
~RandomCropDecodeResize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomCropWithBBox TensorTransform.
/// \note Crop the input image at a random location and adjust bounding boxes accordingly.
/// If cropped area is out of bbox, the return bbox will be empty.
class RandomCropWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] padding A vector representing the number of pixels to pad the image
/// If vector has one value, it pads all sides of the image with that value.
/// If vector has two values, it pads left and top with the first and
/// right and bottom with the second value.
/// If vector has four values, it pads left, top, right, and bottom with
/// those values respectively.
/// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
/// the given output size.
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
/// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
/// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
/// [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
explicit RandomCropWithBBox(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
BorderType padding_mode = BorderType::kConstant);
/// \brief Destructor.
~RandomCropWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomHorizontalFlip TensorTransform.
/// \note Tensor operation to perform random horizontal flip.
class RandomHorizontalFlip final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] prob A float representing the probability of flip.
explicit RandomHorizontalFlip(float prob = 0.5);
/// \brief Destructor.
~RandomHorizontalFlip() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomHorizontalFlipWithBBox TensorTransform.
/// \note Flip the input image horizontally, randomly with a given probability and adjust bounding boxes accordingly.
class RandomHorizontalFlipWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] prob A float representing the probability of flip.
explicit RandomHorizontalFlipWithBBox(float prob = 0.5);
/// \brief Destructor.
~RandomHorizontalFlipWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomPosterize TensorTransform.
/// \note Tensor operation to perform random posterize.
class RandomPosterize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] bit_range - uint8_t vector representing the minimum and maximum bit in range (Default={4, 8}).
explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});
/// \brief Destructor.
~RandomPosterize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomResize TensorTransform.
/// \note Resize the input image using a randomly selected interpolation mode.
// the same image aspect ratio. If size has 2 values, it should be (height, width).
class RandomResize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, the smaller edge of the image will be resized to this value with.
explicit RandomResize(std::vector<int32_t> size);
/// \brief Destructor.
~RandomResize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomResizeWithBBox TensorTransform.
/// \note Resize the input image using a randomly selected interpolation mode and adjust
/// bounding boxes accordingly.
class RandomResizeWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, the smaller edge of the image will be resized to this value with
// the same image aspect ratio. If size has 2 values, it should be (height, width).
explicit RandomResizeWithBBox(std::vector<int32_t> size);
/// \brief Destructor.
~RandomResizeWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomResizedCrop TensorTransform.
/// \note Crop the input image to a random size and aspect ratio.
class RandomResizedCrop final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] scale Range [min, max) of respective size of the original
/// size to be cropped (default=(0.08, 1.0)).
/// \param[in] ratio Range [min, max) of aspect ratio to be cropped
/// (default=(3. / 4., 4. / 3.)).
/// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear).
/// \param[in] max_attempts The maximum number of attempts to propose a valid.
/// crop_area (default=10). If exceeded, fall back to use center_crop instead.
explicit RandomResizedCrop(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
std::vector<float> ratio = {3. / 4., 4. / 3.},
InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
/// \brief Destructor.
~RandomResizedCrop() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomResizedCropWithBBox TensorTransform.
/// \note Crop the input image to a random size and aspect ratio.
/// If cropped area is out of bbox, the return bbox will be empty.
class RandomResizedCropWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] scale Range [min, max) of respective size of the original
/// size to be cropped (default=(0.08, 1.0)).
/// \param[in] ratio Range [min, max) of aspect ratio to be cropped
/// (default=(3. / 4., 4. / 3.)).
/// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear).
/// \param[in] max_attempts The maximum number of attempts to propose a valid
/// crop_area (default=10). If exceeded, fall back to use center_crop instead.
RandomResizedCropWithBBox(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
std::vector<float> ratio = {3. / 4., 4. / 3.},
InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
/// \brief Destructor.
~RandomResizedCropWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomRotation TensorOp.
/// \note Rotates the image according to parameters.
class RandomRotation final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] degrees A float vector of size, representing the starting and ending degree.
/// \param[in] resample An enum for the mode of interpolation.
/// \param[in] expand A boolean representing whether the image is expanded after rotation.
/// \param[in] center A float vector of size 2, representing the x and y center of rotation.
/// \param[in] fill_value A vector representing the value to fill the area outside the transform.
/// in the output image. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
RandomRotation(std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour,
bool expand = false, std::vector<float> center = {-1, -1},
std::vector<uint8_t> fill_value = {0, 0, 0});
/// \brief Destructor.
~RandomRotation() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomSelectSubpolicy TensorTransform.
/// \note Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples
/// (op, prob), where op is a TensorTransform operation and prob is the probability that this op will be applied.
/// Once a sub-policy is selected, each op within the sub-policy with be applied in sequence according to its
/// probability.
class RandomSelectSubpolicy final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers.
explicit RandomSelectSubpolicy(const std::vector<std::vector<std::pair<TensorTransform *, double>>> &policy);
/// \brief Constructor.
/// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers.
explicit RandomSelectSubpolicy(
const std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> &policy);
/// \brief Constructor.
/// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers.
explicit RandomSelectSubpolicy(
const std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> &policy);
/// \brief Destructor.
~RandomSelectSubpolicy() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomSharpness TensorTransform.
/// \note Tensor operation to perform random sharpness.
class RandomSharpness final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] degrees A float vector of size 2, representing the starting and ending degree to uniformly.
/// sample from, to select a degree to adjust sharpness.
explicit RandomSharpness(std::vector<float> degrees = {0.1, 1.9});
/// \brief Destructor.
~RandomSharpness() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomSolarize TensorTransform.
/// \note Invert pixels randomly within specified range. If min=max, it is a single fixed magnitude operation
/// to inverts all pixel above that threshold.
class RandomSolarize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] threshold A vector with two elements specifying the pixel range to invert.
explicit RandomSolarize(std::vector<uint8_t> threshold = {0, 255});
/// \brief Destructor.
~RandomSolarize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomVerticalFlip TensorTransform.
/// \note Tensor operation to perform random vertical flip.
class RandomVerticalFlip final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] prob A float representing the probability of flip.
explicit RandomVerticalFlip(float prob = 0.5);
/// \brief Destructor.
~RandomVerticalFlip() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomVerticalFlipWithBBox TensorTransform.
/// \note Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly.
class RandomVerticalFlipWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] prob A float representing the probability of flip.
explicit RandomVerticalFlipWithBBox(float prob = 0.5);
/// \brief Destructor.
~RandomVerticalFlipWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RescaleOperation TensorTransform.
/// \note Tensor operation to rescale the input image.
class Rescale final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] rescale Rescale factor.
/// \param[in] shift Shift factor.
Rescale(float rescale, float shift);
/// \brief Destructor.
~Rescale() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief ResizeWithBBox TensorTransform.
/// \note Resize the input image to the given size and adjust bounding boxes accordingly.
class ResizeWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size The output size of the resized image.
/// If size is an integer, smaller edge of the image will be resized to this value with the same image aspect
/// ratio. If size is a sequence of length 2, it should be (height, width).
/// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear).
explicit ResizeWithBBox(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);
/// \brief Destructor.
~ResizeWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RgbaToBgr TensorTransform.
/// \note Changes the input 4 channel RGBA tensor to 3 channel BGR.
class RGBA2BGR final : public TensorTransform {
public:
/// \brief Constructor.
RGBA2BGR();
/// \brief Destructor.
~RGBA2BGR() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief RgbaToRgb TensorTransform.
/// \note Changes the input 4 channel RGBA tensor to 3 channel RGB.
class RGBA2RGB final : public TensorTransform {
public:
/// \brief Constructor.
RGBA2RGB();
/// \brief Destructor.
~RGBA2RGB() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief SoftDvppDecodeRandomCropResizeJpeg TensorTransform.
/// \note Tensor operation to decode, random crop and resize JPEG image using the simulation algorithm of
/// Ascend series chip DVPP module. The usage scenario is consistent with SoftDvppDecodeResizeJpeg.
/// The input image size should be in range [32*32, 8192*8192].
/// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
/// Only images with an even resolution can be output. The output of odd resolution is not supported.
class SoftDvppDecodeRandomCropResizeJpeg final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, smaller edge of the image will be resized to this value with
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
/// \param[in] scale Range [min, max) of respective size of the original
/// size to be cropped (default=(0.08, 1.0)).
/// \param[in] ratio Range [min, max) of aspect ratio to be cropped
/// (default=(3. / 4., 4. / 3.)).
/// \param[in] max_attempts The maximum number of attempts to propose a valid
/// crop_area (default=10). If exceeded, fall back to use center_crop instead.
SoftDvppDecodeRandomCropResizeJpeg(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
std::vector<float> ratio = {3. / 4., 4. / 3.}, int32_t max_attempts = 10);
/// \brief Destructor.
~SoftDvppDecodeRandomCropResizeJpeg() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief SoftDvppDecodeResizeJpeg TensorTransform.
/// \note Tensor operation to decode and resize JPEG image using the simulation algorithm of Ascend series
/// chip DVPP module. It is recommended to use this algorithm in the following scenarios:
/// When training, the DVPP of the Ascend chip is not used,
/// and the DVPP of the Ascend chip is used during inference,
/// and the accuracy of inference is lower than the accuracy of training;
/// and the input image size should be in range [32*32, 8192*8192].
/// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
/// Only images with an even resolution can be output. The output of odd resolution is not supported.
class SoftDvppDecodeResizeJpeg final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, smaller edge of the image will be resized to this value with
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
explicit SoftDvppDecodeResizeJpeg(std::vector<int32_t> size);
/// \brief Destructor.
~SoftDvppDecodeResizeJpeg() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief SwapRedBlue TensorOp.
/// \note Swaps the red and blue channels in image.
class SwapRedBlue final : public TensorTransform {
public:
/// \brief Constructor.
SwapRedBlue();
/// \brief Destructor.
~SwapRedBlue() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief UniformAugment TensorTransform.
/// \note Tensor operation to perform randomly selected augmentation.
class UniformAugment final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transforms Raw pointer to vector of TensorTransform operations.
/// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2);
/// \brief Constructor.
/// \param[in] transforms Smart pointer to vector of TensorTransform operations.
/// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2);
/// \brief Constructor.
/// \param[in] transforms Object pointer to vector of TensorTransform operations.
/// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2);
/// \brief Destructor.
~UniformAugment() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
} // namespace vision
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_

View File

@ -0,0 +1,100 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
namespace mindspore {
namespace dataset {
// Transform operations for performing computer vision.
namespace vision {
/* ##################################### API class ###########################################*/
class DvppDecodeResizeJpeg final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] resize A vector of int value for each dimension, w.r.t H,W order.
explicit DvppDecodeResizeJpeg(std::vector<uint32_t> resize);
/// \brief Destructor.
~DvppDecodeResizeJpeg() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
class DvppDecodeResizeCropJpeg final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] crop A vector of int value for each dimension after final crop, w.r.t H,W order.
/// \param[in] resize A vector of int value for each dimension after resize, w.r.t H,W order.
explicit DvppDecodeResizeCropJpeg(std::vector<uint32_t> crop, std::vector<uint32_t> resize);
/// \brief Destructor.
~DvppDecodeResizeCropJpeg() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
class DvppDecodePng final : public TensorTransform {
public:
/// \brief Constructor.
DvppDecodePng();
/// \brief Destructor.
~DvppDecodePng() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
};
} // namespace vision
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_

View File

@ -0,0 +1,292 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
namespace mindspore {
namespace dataset {
// Transform operations for performing computer vision.
namespace vision {
// Forward Declarations
class RotateOperation;
/// \brief Affine TensorTransform.
/// \note Apply affine transform on input image.
class Affine final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] degrees The degrees to rotate the image by.
/// \param[in] translation The value representing vertical and horizontal translation (default = {0.0, 0.0}).
/// The first value represent the x axis translation while the second represents y axis translation.
/// \param[in] scale The scaling factor for the image (default = 0.0).
/// \param[in] shear A float vector of size 2, representing the shear degrees (default = {0.0, 0.0}).
/// \param[in] interpolation An enum for the mode of interpolation.
/// \param[in] fill_value A vector representing the value to fill the area outside the transform
/// in the output image. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
explicit Affine(float_t degrees, const std::vector<float> &translation = {0.0, 0.0}, float scale = 0.0,
const std::vector<float> &shear = {0.0, 0.0},
InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
const std::vector<uint8_t> &fill_value = {0, 0, 0});
/// \brief Destructor.
~Affine() = default;
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief CenterCrop TensorTransform.
/// \note Crops the input image at the center to the given size.
class CenterCrop final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
explicit CenterCrop(std::vector<int32_t> size);
/// \brief Destructor.
~CenterCrop() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RGB2GRAY TensorTransform.
/// \note Convert RGB image or color image to grayscale image.
class RGB2GRAY final : public TensorTransform {
public:
/// \brief Constructor.
RGB2GRAY() = default;
/// \brief Destructor.
~RGB2GRAY() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief Crop TensorTransform.
/// \note Crop an image based on location and crop size.
class Crop final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor}.
/// \param[in] size Size of the cropped area.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
Crop(std::vector<int32_t> coordinates, std::vector<int32_t> size);
/// \brief Destructor.
~Crop() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Decode TensorTransform.
/// \note Decode the input image in RGB mode.
class Decode final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] rgb A boolean of whether to decode in RGB mode or not.
explicit Decode(bool rgb = true);
/// \brief Destructor.
~Decode() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Normalize TensorTransform.
/// \note Normalize the input image with respect to mean and standard deviation.
class Normalize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
/// The mean values must be in range [0.0, 255.0].
/// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
/// The standard deviation values must be in range (0.0, 255.0].
Normalize(std::vector<float> mean, std::vector<float> std);
/// \brief Destructor.
~Normalize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomAffine TensorTransform.
/// \note Applies a Random Affine transformation on input image in RGB or Greyscale mode.
class RandomAffine final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] degrees A float vector of size 2, representing the starting and ending degree.
/// \param[in] translate_range A float vector of size 2 or 4, representing percentages of translation on x and y axes.
/// If size is 2, (min_dx, max_dx, 0, 0).
/// if size is 4, (min_dx, max_dx, min_dy, max_dy),
/// all values are in range [-1, 1].
/// \param[in] scale_range A float vector of size 2, representing the starting and ending scales in the range.
/// \param[in] shear_ranges A float vector of size 2 or 4, representing the starting and ending shear degrees
/// vertically and horizontally.
/// If size is 2, (min_shear_x, max_shear_x, 0, 0),
/// if size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y).
/// \param[in] interpolation An enum for the mode of interpolation.
/// \param[in] fill_value A vector representing the value to fill the area outside the transform
/// in the output image. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
explicit RandomAffine(const std::vector<float_t> &degrees,
const std::vector<float_t> &translate_range = {0.0, 0.0, 0.0, 0.0},
const std::vector<float_t> &scale_range = {1.0, 1.0},
const std::vector<float_t> &shear_ranges = {0.0, 0.0, 0.0, 0.0},
InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
const std::vector<uint8_t> &fill_value = {0, 0, 0});
/// \brief Destructor.
~RandomAffine() = default;
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Resize TensorTransform.
/// \note Resize the input image to the given size.
class Resize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, the image will be resized to this value with
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
/// \param[in] interpolation An enum for the mode of interpolation.
explicit Resize(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);
/// \brief Destructor.
~Resize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief ResizePreserveAR TensorTransform.
/// \note Keep the original picture ratio and fill the rest.
class ResizePreserveAR final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] height The height of image output value after resizing.
/// \param[in] width The width of image output value after resizing.
/// \param[in] img_orientation Angle method of image rotation.
ResizePreserveAR(int32_t height, int32_t width, int32_t img_orientation = 0);
/// \brief Destructor.
~ResizePreserveAR() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Rotate TensorTransform.
/// \note Rotate the input image using a specified angle id.
class Rotate final : public TensorTransform {
public:
/// \brief Constructor.
Rotate();
/// \brief Destructor.
~Rotate() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
std::shared_ptr<RotateOperation> op_;
};
} // namespace vision
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_

View File

@ -28,7 +28,7 @@ namespace mindspore {
namespace dataset {
class FillOp : public TensorOp {
public:
explicit FillOp(std::shared_ptr<Tensor> value) : fill_value_(value) {}
explicit FillOp(std::shared_ptr<Tensor> fill_value) : fill_value_(fill_value) {}
~FillOp() override = default;

View File

@ -59,7 +59,7 @@ Status RandomApplyOp::Compute(const TensorRow &input, TensorRow *output) {
}
return Status::OK();
}
RandomApplyOp::RandomApplyOp(double prob, const std::vector<std::shared_ptr<TensorOp>> &ops)
RandomApplyOp::RandomApplyOp(const std::vector<std::shared_ptr<TensorOp>> &ops, double prob)
: prob_(prob), gen_(GetSeed()), rand_double_(0, 1) {
compose_ = std::make_unique<ComposeOp>(ops);
is_deterministic_ = false;

View File

@ -33,9 +33,9 @@ namespace dataset {
class RandomApplyOp : public TensorOp {
public:
/// constructor
/// \param[in] prob probability whether the list of TensorOps will be applied
/// \param[in] ops the list of TensorOps to apply with prob likelihood
explicit RandomApplyOp(double prob, const std::vector<std::shared_ptr<TensorOp>> &ops);
/// \param[in] prob probability whether the list of TensorOps will be applied
explicit RandomApplyOp(const std::vector<std::shared_ptr<TensorOp>> &ops, double prob);
/// default destructor
~RandomApplyOp() = default;

View File

@ -31,7 +31,7 @@ namespace dataset {
class SliceOp : public TensorOp {
public:
explicit SliceOp(std::vector<SliceOption> slice_options) : slice_options_(slice_options) {}
explicit SliceOp(std::vector<SliceOption> slice_input) : slice_options_(slice_input) {}
explicit SliceOp(SliceOption slice_option) { slice_options_.push_back(slice_option); }
// short hand notation for slicing along fist dimension
explicit SliceOp(Slice slice) { slice_options_.push_back(SliceOption(slice)); }

View File

@ -26,7 +26,7 @@ namespace mindspore {
namespace dataset {
const bool DecodeOp::kDefRgbFormat = true;
DecodeOp::DecodeOp(bool is_rgb_format) : is_rgb_format_(is_rgb_format) {
DecodeOp::DecodeOp(bool rgb) : is_rgb_format_(rgb) {
if (is_rgb_format_) { // RGB colour mode
MS_LOG(DEBUG) << "Decode colour mode is RGB.";
} else {

View File

@ -31,7 +31,7 @@ class DecodeOp : public TensorOp {
// Default values, also used by python_bindings.cc
static const bool kDefRgbFormat;
explicit DecodeOp(bool is_rgb_format = true);
explicit DecodeOp(bool rgb = true);
~DecodeOp() = default;

View File

@ -26,13 +26,13 @@ const uint8_t PadOp::kDefFillR = 0;
const uint8_t PadOp::kDefFillG = 0;
const uint8_t PadOp::kDefFillB = 0;
PadOp::PadOp(int32_t pad_top, int32_t pad_bottom, int32_t pad_left, int32_t pad_right, BorderType border_types,
PadOp::PadOp(int32_t pad_top, int32_t pad_bottom, int32_t pad_left, int32_t pad_right, BorderType padding_mode,
uint8_t fill_r, uint8_t fill_g, uint8_t fill_b)
: pad_top_(pad_top),
pad_bottom_(pad_bottom),
pad_left_(pad_left),
pad_right_(pad_right),
boarder_type_(border_types),
boarder_type_(padding_mode),
fill_r_(fill_r),
fill_g_(fill_g),
fill_b_(fill_b) {}

View File

@ -40,11 +40,11 @@ class PadOp : public TensorOp {
// @param pad_bottom number of pixels to pad the bottom of the image with.
// @param pad_left number of pixels to pad the left of the image with.
// @param pad_right number of pixels to pad the right of the image with.
// @param border_types BorderType enum, the type of boarders that we are using.
// @param padding_mode BorderType enum, the type of boarders that we are using.
// @param fill_r R value for the color to pad with.
// @param fill_g G value for the color to pad with.
// @param fill_b B value for the color to pad with.
PadOp(int32_t pad_top, int32_t pad_bottom, int32_t pad_left, int32_t pad_right, BorderType border_types,
PadOp(int32_t pad_top, int32_t pad_bottom, int32_t pad_left, int32_t pad_right, BorderType padding_mode,
uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG, uint8_t fill_b = kDefFillB);
~PadOp() override = default;

View File

@ -31,7 +31,7 @@ const int32_t RandomCropAndResizeOp::kDefMaxIter = 10;
RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t target_width, float scale_lb,
float scale_ub, float aspect_lb, float aspect_ub,
InterpolationMode interpolation, int32_t max_iter)
InterpolationMode interpolation, int32_t max_attempts)
: target_height_(target_height),
target_width_(target_width),
rnd_scale_(scale_lb, scale_ub),
@ -39,7 +39,7 @@ RandomCropAndResizeOp::RandomCropAndResizeOp(int32_t target_height, int32_t targ
interpolation_(interpolation),
aspect_lb_(aspect_lb),
aspect_ub_(aspect_ub),
max_iter_(max_iter) {
max_iter_(max_attempts) {
rnd_.seed(GetSeed());
is_deterministic_ = false;
}

View File

@ -40,7 +40,7 @@ class RandomCropAndResizeOp : public TensorOp {
RandomCropAndResizeOp(int32_t target_height, int32_t target_width, float scale_lb = kDefScaleLb,
float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, float aspect_ub = kDefAspectUb,
InterpolationMode interpolation = kDefInterpolation, int32_t max_iter = kDefMaxIter);
InterpolationMode interpolation = kDefInterpolation, int32_t max_attempts = kDefMaxIter);
RandomCropAndResizeOp() = default;

View File

@ -28,9 +28,9 @@ class RandomCropAndResizeWithBBoxOp : public RandomCropAndResizeOp {
RandomCropAndResizeWithBBoxOp(int32_t target_height, int32_t target_width, float scale_lb = kDefScaleLb,
float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb,
float aspect_ub = kDefAspectUb, InterpolationMode interpolation = kDefInterpolation,
int32_t max_iter = kDefMaxIter)
int32_t max_attempts = kDefMaxIter)
: RandomCropAndResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation,
max_iter) {}
max_attempts) {}
~RandomCropAndResizeWithBBoxOp() override = default;

View File

@ -23,9 +23,9 @@ namespace mindspore {
namespace dataset {
RandomCropDecodeResizeOp::RandomCropDecodeResizeOp(int32_t target_height, int32_t target_width, float scale_lb,
float scale_ub, float aspect_lb, float aspect_ub,
InterpolationMode interpolation, int32_t max_iter)
InterpolationMode interpolation, int32_t max_attempts)
: RandomCropAndResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub, interpolation,
max_iter) {}
max_attempts) {}
Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
if (input == nullptr) {

View File

@ -33,7 +33,7 @@ class RandomCropDecodeResizeOp : public RandomCropAndResizeOp {
public:
RandomCropDecodeResizeOp(int32_t target_height, int32_t target_width, float scale_lb = kDefScaleLb,
float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb, float aspect_ub = kDefAspectUb,
InterpolationMode interpolation = kDefInterpolation, int32_t max_iter = kDefMaxIter);
InterpolationMode interpolation = kDefInterpolation, int32_t max_attempts = kDefMaxIter);
explicit RandomCropDecodeResizeOp(const RandomCropAndResizeOp &rhs) : RandomCropAndResizeOp(rhs) {}

View File

@ -33,7 +33,7 @@ const uint8_t RandomCropOp::kDefFillG = 0;
const uint8_t RandomCropOp::kDefFillB = 0;
RandomCropOp::RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_top, int32_t pad_bottom,
int32_t pad_left, int32_t pad_right, BorderType border_types, bool pad_if_needed,
int32_t pad_left, int32_t pad_right, bool pad_if_needed, BorderType padding_mode,
uint8_t fill_r, uint8_t fill_g, uint8_t fill_b)
: crop_height_(crop_height),
crop_width_(crop_width),
@ -42,7 +42,7 @@ RandomCropOp::RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_
pad_left_(pad_left),
pad_right_(pad_right),
pad_if_needed_(pad_if_needed),
border_type_(border_types),
border_type_(padding_mode),
fill_r_(fill_r),
fill_g_(fill_g),
fill_b_(fill_b) {

View File

@ -43,7 +43,7 @@ class RandomCropOp : public TensorOp {
RandomCropOp(int32_t crop_height, int32_t crop_width, int32_t pad_top = kDefPadTop,
int32_t pad_bottom = kDefPadBottom, int32_t pad_left = kDefPadLeft, int32_t pad_right = kDefPadRight,
BorderType border_types = kDefBorderType, bool pad_if_needed = kDefPadIfNeeded,
bool pad_if_needed = kDefPadIfNeeded, BorderType padding_mode = kDefBorderType,
uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG, uint8_t fill_b = kDefFillB);
RandomCropOp(const RandomCropOp &rhs) = default;

View File

@ -29,10 +29,10 @@ class RandomCropWithBBoxOp : public RandomCropOp {
// Constructor for RandomCropWithBBoxOp, with default value and passing to base class constructor
RandomCropWithBBoxOp(int32_t crop_height, int32_t crop_width, int32_t pad_top = kDefPadTop,
int32_t pad_bottom = kDefPadBottom, int32_t pad_left = kDefPadLeft,
int32_t pad_right = kDefPadRight, BorderType border_types = kDefBorderType,
bool pad_if_needed = kDefPadIfNeeded, uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG,
int32_t pad_right = kDefPadRight, bool pad_if_needed = kDefPadIfNeeded,
BorderType padding_mode = kDefBorderType, uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG,
uint8_t fill_b = kDefFillB)
: RandomCropOp(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, border_types, pad_if_needed,
: RandomCropOp(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right, pad_if_needed, padding_mode,
fill_r, fill_g, fill_b) {}
~RandomCropWithBBoxOp() override = default;

View File

@ -32,7 +32,7 @@ class RandomHorizontalFlipOp : public TensorOp {
// Default values, also used by python_bindings.cc
static const float kDefProbability;
explicit RandomHorizontalFlipOp(float probability = kDefProbability) : distribution_(probability) {
explicit RandomHorizontalFlipOp(float prob = kDefProbability) : distribution_(prob) {
is_deterministic_ = false;
rnd_.seed(GetSeed());
}

View File

@ -33,14 +33,13 @@ const uint8_t RandomRotationOp::kDefFillG = 0;
const uint8_t RandomRotationOp::kDefFillB = 0;
// constructor
RandomRotationOp::RandomRotationOp(float start_degree, float end_degree, float center_x, float center_y,
InterpolationMode interpolation, bool expand, uint8_t fill_r, uint8_t fill_g,
uint8_t fill_b)
RandomRotationOp::RandomRotationOp(float start_degree, float end_degree, InterpolationMode resample, bool expand,
float center_x, float center_y, uint8_t fill_r, uint8_t fill_g, uint8_t fill_b)
: degree_start_(start_degree),
degree_end_(end_degree),
center_x_(center_x),
center_y_(center_y),
interpolation_(interpolation),
interpolation_(resample),
expand_(expand),
fill_r_(fill_r),
fill_g_(fill_g),

View File

@ -42,18 +42,18 @@ class RandomRotationOp : public TensorOp {
// Constructor for RandomRotationOp
// @param startDegree starting range for random degree
// @param endDegree ending range for random degree
// @param centerX x coordinate for center of image rotation
// @param centerY y coordinate for center of image rotation
// @param interpolation DE interpolation mode for rotation
// @param expand option for the output image shape to change
// @param center_x coordinate for center of image rotation
// @param center_y coordinate for center of image rotation
// @param fill_r R value for the color to pad with
// @param fill_g G value for the color to pad with
// @param fill_b B value for the color to pad with
// @details the randomly chosen degree is uniformly distributed
// @details the output shape, if changed, will contain the entire rotated image
// @note maybe using unsigned long int isn't the best here according to our coding rules
RandomRotationOp(float start_degree, float end_degree, float center_x = kDefCenterX, float center_y = kDefCenterY,
InterpolationMode interpolation = kDefInterpolation, bool expand = kDefExpand,
RandomRotationOp(float start_degree, float end_degree, InterpolationMode resample = kDefInterpolation,
bool expand = kDefExpand, float center_x = kDefCenterX, float center_y = kDefCenterY,
uint8_t fill_r = kDefFillR, uint8_t fill_g = kDefFillG, uint8_t fill_b = kDefFillB);
~RandomRotationOp() override = default;

View File

@ -32,7 +32,7 @@ class RandomVerticalFlipOp : public TensorOp {
// Default values, also used by python_bindings.cc
static const float kDefProbability;
explicit RandomVerticalFlipOp(float probability = kDefProbability) : distribution_(probability) {
explicit RandomVerticalFlipOp(float prob = kDefProbability) : distribution_(prob) {
rnd_.seed(GetSeed());
is_deterministic_ = false;
}

View File

@ -28,7 +28,7 @@ namespace mindspore {
namespace dataset {
class RescaleOp : public TensorOp {
public:
RescaleOp(float rescale_ratio, float shift_ratio) : rescale_(rescale_ratio), shift_(shift_ratio) {}
RescaleOp(float rescale, float shift) : rescale_(rescale), shift_(shift) {}
~RescaleOp() override = default;

View File

@ -45,8 +45,8 @@ class ResizeOp : public TensorOp {
// @param size2: the second size of output. If this is also provided, the output size
// will be (size1, size2)
// @param InterpolationMode: the interpolation mode being used.
explicit ResizeOp(int32_t size1, int32_t size2 = kDefWidth, InterpolationMode mInterpolation = kDefInterpolation)
: size1_(size1), size2_(size2), interpolation_(mInterpolation) {}
explicit ResizeOp(int32_t size1, int32_t size2 = kDefWidth, InterpolationMode interpolation = kDefInterpolation)
: size1_(size1), size2_(size2), interpolation_(interpolation) {}
ResizeOp(const ResizeOp &rhs) = default;

View File

@ -27,9 +27,9 @@ namespace dataset {
SoftDvppDecodeRandomCropResizeJpegOp::SoftDvppDecodeRandomCropResizeJpegOp(int32_t target_height, int32_t target_width,
float scale_lb, float scale_ub,
float aspect_lb, float aspect_ub,
int32_t max_iter)
int32_t max_attempts)
: RandomCropAndResizeOp(target_height, target_width, scale_lb, scale_ub, aspect_lb, aspect_ub,
InterpolationMode::kLinear, max_iter) {}
InterpolationMode::kLinear, max_attempts) {}
Status SoftDvppDecodeRandomCropResizeJpegOp::GetCropInfo(const std::shared_ptr<Tensor> &input,
SoftDpCropInfo *crop_info) {

View File

@ -32,7 +32,7 @@ class SoftDvppDecodeRandomCropResizeJpegOp : public RandomCropAndResizeOp {
public:
SoftDvppDecodeRandomCropResizeJpegOp(int32_t target_height, int32_t target_width, float scale_lb = kDefScaleLb,
float scale_ub = kDefScaleUb, float aspect_lb = kDefAspectLb,
float aspect_ub = kDefAspectUb, int32_t max_iter = kDefMaxIter);
float aspect_ub = kDefAspectUb, int32_t max_attempts = kDefMaxIter);
/// \brief Destructor
~SoftDvppDecodeRandomCropResizeJpegOp() = default;

View File

@ -131,9 +131,7 @@ Status FillOperation::ValidateParams() {
std::shared_ptr<TensorOp> FillOperation::Build() { return std::make_shared<FillOp>(fill_value_); }
Status FillOperation::to_json(nlohmann::json *out_json) {
nlohmann::json args;
args["fill_value"] = fill_value_->ToString();
*out_json = args;
RETURN_IF_NOT_OK(fill_value_->to_json(out_json));
return Status::OK();
}
@ -218,7 +216,7 @@ std::shared_ptr<TensorOp> RandomApplyOperation::Build() {
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
return std::make_shared<RandomApplyOp>(prob_, tensor_ops);
return std::make_shared<RandomApplyOp>(tensor_ops, prob_);
}
// RandomChoiceOperation

View File

@ -97,7 +97,7 @@ std::shared_ptr<TensorOp> RandomCropOperation::Build() {
}
auto tensor_op = std::make_shared<RandomCropOp>(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right,
padding_mode_, pad_if_needed_, fill_r, fill_g, fill_b);
pad_if_needed_, padding_mode_, fill_r, fill_g, fill_b);
return tensor_op;
}

View File

@ -98,7 +98,7 @@ std::shared_ptr<TensorOp> RandomCropWithBBoxOperation::Build() {
auto tensor_op =
std::make_shared<RandomCropWithBBoxOp>(crop_height, crop_width, pad_top, pad_bottom, pad_left, pad_right,
padding_mode_, pad_if_needed_, fill_r, fill_g, fill_b);
pad_if_needed_, padding_mode_, fill_r, fill_g, fill_b);
return tensor_op;
}

View File

@ -31,8 +31,7 @@ namespace vision {
#ifndef ENABLE_ANDROID
// RandomHorizontalFlipOperation
RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float probability)
: TensorOperation(true), probability_(probability) {}
RandomHorizontalFlipOperation::RandomHorizontalFlipOperation(float prob) : TensorOperation(true), probability_(prob) {}
RandomHorizontalFlipOperation::~RandomHorizontalFlipOperation() = default;

View File

@ -37,7 +37,7 @@ constexpr char kRandomHorizontalFlipOperation[] = "RandomHorizontalFlip";
class RandomHorizontalFlipOperation : public TensorOperation {
public:
explicit RandomHorizontalFlipOperation(float probability);
explicit RandomHorizontalFlipOperation(float prob);
~RandomHorizontalFlipOperation();

View File

@ -30,12 +30,11 @@ namespace vision {
#ifndef ENABLE_ANDROID
// Function to create RandomRotationOperation.
RandomRotationOperation::RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode,
bool expand, std::vector<float> center,
std::vector<uint8_t> fill_value)
RandomRotationOperation::RandomRotationOperation(std::vector<float> degrees, InterpolationMode resample, bool expand,
std::vector<float> center, std::vector<uint8_t> fill_value)
: TensorOperation(true),
degrees_(degrees),
interpolation_mode_(interpolation_mode),
interpolation_mode_(resample),
expand_(expand),
center_(center),
fill_value_(fill_value) {}
@ -98,7 +97,7 @@ std::shared_ptr<TensorOp> RandomRotationOperation::Build() {
}
std::shared_ptr<RandomRotationOp> tensor_op = std::make_shared<RandomRotationOp>(
start_degree, end_degree, center_[0], center_[1], interpolation_mode_, expand_, fill_r, fill_g, fill_b);
start_degree, end_degree, interpolation_mode_, expand_, center_[0], center_[1], fill_r, fill_g, fill_b);
return tensor_op;
}

View File

@ -37,7 +37,7 @@ constexpr char kRandomRotationOperation[] = "RandomRotation";
class RandomRotationOperation : public TensorOperation {
public:
RandomRotationOperation(std::vector<float> degrees, InterpolationMode interpolation_mode, bool expand,
RandomRotationOperation(std::vector<float> degrees, InterpolationMode resample, bool expand,
std::vector<float> center, std::vector<uint8_t> fill_value);
~RandomRotationOperation();

View File

@ -31,8 +31,7 @@ namespace vision {
#ifndef ENABLE_ANDROID
// RandomVerticalFlipOperation
RandomVerticalFlipOperation::RandomVerticalFlipOperation(float probability)
: TensorOperation(true), probability_(probability) {}
RandomVerticalFlipOperation::RandomVerticalFlipOperation(float prob) : TensorOperation(true), probability_(prob) {}
RandomVerticalFlipOperation::~RandomVerticalFlipOperation() = default;

View File

@ -37,7 +37,7 @@ constexpr char kRandomVerticalFlipOperation[] = "RandomVerticalFlip";
class RandomVerticalFlipOperation : public TensorOperation {
public:
explicit RandomVerticalFlipOperation(float probability);
explicit RandomVerticalFlipOperation(float prob);
~RandomVerticalFlipOperation();

View File

@ -31,8 +31,8 @@ namespace vision {
#ifndef ENABLE_ANDROID
// RandomVerticalFlipWithBBoxOperation
RandomVerticalFlipWithBBoxOperation::RandomVerticalFlipWithBBoxOperation(float probability)
: TensorOperation(true), probability_(probability) {}
RandomVerticalFlipWithBBoxOperation::RandomVerticalFlipWithBBoxOperation(float prob)
: TensorOperation(true), probability_(prob) {}
RandomVerticalFlipWithBBoxOperation::~RandomVerticalFlipWithBBoxOperation() = default;

View File

@ -37,7 +37,7 @@ constexpr char kRandomVerticalFlipWithBBoxOperation[] = "RandomVerticalFlipWithB
class RandomVerticalFlipWithBBoxOperation : public TensorOperation {
public:
explicit RandomVerticalFlipWithBBoxOperation(float probability);
explicit RandomVerticalFlipWithBBoxOperation(float prob);
~RandomVerticalFlipWithBBoxOperation();

View File

@ -37,7 +37,7 @@ constexpr char kResizeOperation[] = "Resize";
class ResizeOperation : public TensorOperation {
public:
explicit ResizeOperation(std::vector<int32_t> size, InterpolationMode interpolation_mode);
explicit ResizeOperation(std::vector<int32_t> size, InterpolationMode interpolation);
~ResizeOperation();

View File

@ -257,7 +257,7 @@ std::shared_ptr<TensorOp> NgramOperation::Build() {
int32_t r_len = right_pad_.second;
std::string l_pad = left_pad_.first;
std::string r_pad = right_pad_.first;
std::shared_ptr<NgramOp> tensor_op = std::make_shared<NgramOp>(ngrams_, l_len, r_len, l_pad, r_pad, separator_);
std::shared_ptr<NgramOp> tensor_op = std::make_shared<NgramOp>(ngrams_, l_len, l_pad, r_len, r_pad, separator_);
return tensor_op;
}

View File

@ -24,7 +24,7 @@
namespace mindspore {
namespace dataset {
NgramOp::NgramOp(const std::vector<int32_t> &ngrams, int32_t l_len, int32_t r_len, const std::string &l_pad,
NgramOp::NgramOp(const std::vector<int32_t> &ngrams, int32_t l_len, const std::string &l_pad, int32_t r_len,
const std::string &r_pad, const std::string &separator)
: ngrams_(ngrams),
l_len_(l_len),

View File

@ -32,12 +32,12 @@ class NgramOp : public TensorOp {
public:
// Constructor of Ngram model
// @param const std::vector<int32_t> &ngrams
// @param int32_tl_len - padding length on the left
// @param int32_t r_len - padding length on the right
// @param int32_t l_len - padding length on the left
// @param const std::string &l_pad - padding token on the left
// @param int32_t r_len - padding length on the right
// @param const std::string &r_pad - padding token on the right
// @param const std::string &separator - use to join strings
NgramOp(const std::vector<int32_t> &ngrams, int32_t l_len, int32_t r_len, const std::string &l_pad,
NgramOp(const std::vector<int32_t> &ngrams, int32_t l_len, const std::string &l_pad, int32_t r_len,
const std::string &r_pad, const std::string &separator);
// perform ngram model on each tensor

View File

@ -32,9 +32,9 @@
namespace mindspore {
namespace dataset {
ToNumberOp::ToNumberOp(const DataType &cast_to_type) : cast_to_type_(cast_to_type) {}
ToNumberOp::ToNumberOp(const DataType &data_type) : cast_to_type_(data_type) {}
ToNumberOp::ToNumberOp(const std::string &cast_to_type) : cast_to_type_(DataType(cast_to_type)) {}
ToNumberOp::ToNumberOp(const std::string &data_type) : cast_to_type_(DataType(data_type)) {}
Status ToNumberOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "ToNumber: input should be string datatype.");

View File

@ -32,12 +32,12 @@ namespace dataset {
class ToNumberOp : public TensorOp {
public:
// Constructor of ToNumberOp
// @param const DataType &cast_to_type - the type to convert string inputs to.
explicit ToNumberOp(const DataType &cast_to_type);
// @param const DataType &data_type - the type to convert string inputs to.
explicit ToNumberOp(const DataType &data_type);
// Constructor of ToNumberOp
// @param const std::string &cast_to_type - the type in string form to convert string inputs to.
explicit ToNumberOp(const std::string &cast_to_type);
// @param const std::string &data_type - the type in string form to convert string inputs to.
explicit ToNumberOp(const std::string &data_type);
~ToNumberOp() override = default;

View File

@ -32,7 +32,7 @@ namespace dataset {
class TruncateSequencePairOp : public TensorOp {
public:
explicit TruncateSequencePairOp(dsize_t length) : max_length_(length) {}
explicit TruncateSequencePairOp(dsize_t max_length) : max_length_(max_length) {}
~TruncateSequencePairOp() override = default;

View File

@ -273,7 +273,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) {
int64_t num_samples = 0;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index);
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
rc = CacheOp::Builder()
.SetNumWorkers(5)
.SetClient(myClient)
@ -391,18 +391,14 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) {
// CacheOp
int64_t num_samples = 0;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index);
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
CacheClient::Builder builder;
builder.SetSessionId(env_session).SetCacheMemSz(4).SetSpill(true);
std::shared_ptr<CacheClient> myClient;
rc = builder.Build(&myClient);
ASSERT_TRUE(rc.IsOk());
std::shared_ptr<CacheOp> myCacheOp;
rc = CacheOp::Builder()
.SetNumWorkers(4)
.SetClient(myClient)
.SetSampler(std::move(seq_sampler))
.Build(&myCacheOp);
rc = CacheOp::Builder().SetNumWorkers(4).SetClient(myClient).SetSampler(std::move(seq_sampler)).Build(&myCacheOp);
ASSERT_TRUE(rc.IsOk());
rc = myTree->AssociateNode(myCacheOp);
ASSERT_TRUE(rc.IsOk());
@ -466,7 +462,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestImageFolderCacheMerge) {
rc = GetSessionFromEnv(&env_session);
ASSERT_TRUE(rc.IsOk());
auto seq_sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index);
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
CacheClient::Builder ccbuilder;
ccbuilder.SetSessionId(env_session).SetCacheMemSz(0).SetSpill(true);

View File

@ -138,7 +138,7 @@ TEST_F(MindDataTestCelebaDataset, TestCelebaRepeat) {
TEST_F(MindDataTestCelebaDataset, TestSubsetRandomSamplerCeleba) {
std::vector<int64_t> indices({1});
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(num_samples, indices);
std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples);
uint32_t expect_labels[1][40] = {{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}};
std::string dir = datasets_root_path_ + "/testCelebAData/";

View File

@ -35,9 +35,9 @@
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;
std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
@ -60,11 +60,11 @@ class MindDataTestCifarOp : public UT::DatasetOpTesting {
};
TEST_F(MindDataTestCifarOp, TestSequentialSamplerCifar10) {
//Note: CIFAR and Mnist datasets are not included
//as part of the build tree.
//Download datasets and rebuild if data doesn't
//appear in this dataset
//Example: python tests/dataset/data/prep_data.py
// Note: CIFAR and Mnist datasets are not included
// as part of the build tree.
// Download datasets and rebuild if data doesn't
// appear in this dataset
// Example: python tests/dataset/data/prep_data.py
std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
auto tree = Build({Cifarop(16, 2, 32, folder_path, nullptr)});
tree->Prepare();
@ -93,7 +93,7 @@ TEST_F(MindDataTestCifarOp, TestSequentialSamplerCifar10) {
TEST_F(MindDataTestCifarOp, TestRandomSamplerCifar10) {
uint32_t original_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(0);
std::shared_ptr<SamplerRT> sampler = std::make_unique<RandomSamplerRT>(12, true, true);
std::shared_ptr<SamplerRT> sampler = std::make_unique<RandomSamplerRT>(true, 12, true);
std::string folder_path = datasets_root_path_ + "/testCifar10Data/";
auto tree = Build({Cifarop(16, 2, 32, folder_path, std::move(sampler))});
tree->Prepare();
@ -139,8 +139,8 @@ TEST_F(MindDataTestCifarOp, TestSequentialSamplerCifar100) {
while (tensor_map.size() != 0 && i < 100) {
tensor_map["coarse_label"]->GetItemAt<uint32_t>(&coarse, {});
tensor_map["fine_label"]->GetItemAt<uint32_t>(&fine, {});
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << " coarse:"
<< coarse << " fine:" << fine << "\n";
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << " coarse:" << coarse << " fine:" << fine
<< "\n";
i++;
ASSERT_OK(di.GetNextAsMap(&tensor_map));
}

View File

@ -27,9 +27,9 @@
#include <unordered_set>
using namespace mindspore::dataset;
using mindspore::MsLogLevel::INFO;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;
class MindDataTestDistributedSampler : public UT::Common {
public:
@ -48,7 +48,7 @@ TEST_F(MindDataTestDistributedSampler, TestTwoShardsOne) {
uint64_t num_samples = 7;
// create sampler with replacement = true
DistributedSamplerRT m_sampler(num_samples, 2, 0, false, 0, -1, false);
DistributedSamplerRT m_sampler(2, 0, false, num_samples, 0, -1, false);
DummyRandomAccessOp dummyRandomAccessOp(num_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -72,7 +72,7 @@ TEST_F(MindDataTestDistributedSampler, TestTwoShardsTwo) {
uint64_t num_samples = 7;
// create sampler with replacement = true
DistributedSamplerRT m_sampler(num_samples, 2, 1, false, 0, -1, false);
DistributedSamplerRT m_sampler(2, 1, false, num_samples, 0, -1, false);
DummyRandomAccessOp dummyRandomAccessOp(num_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -97,7 +97,7 @@ TEST_F(MindDataTestDistributedSampler, TestThreeShards) {
uint64_t num_samples = 2;
// create sampler with replacement = true
DistributedSamplerRT m_sampler(num_samples, 3, 2, false, 0, -1, false);
DistributedSamplerRT m_sampler(3, 2, false, num_samples, 0, -1, false);
DummyRandomAccessOp dummyRandomAccessOp(num_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);

View File

@ -138,7 +138,7 @@ TEST_F(MindDataTestImageFolderSampler, TestRandomSamplerImageFolder) {
int32_t original_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(0);
int64_t num_samples = 12;
std::shared_ptr<SamplerRT> sampler = std::make_unique<RandomSamplerRT>(num_samples, true, true);
std::shared_ptr<SamplerRT> sampler = std::make_unique<RandomSamplerRT>(true, num_samples, true);
int32_t res[] = {2, 2, 2, 3, 2, 3, 2, 3, 1, 2, 2, 1}; // ground truth label
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
@ -206,7 +206,7 @@ TEST_F(MindDataTestImageFolderSampler, TestSubsetRandomSamplerImageFolder) {
// id range 0 - 10 is label 0, and id range 11 - 21 is label 1
std::vector<int64_t> indices({0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 11});
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(num_samples, indices);
std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples);
std::string folder_path = datasets_root_path_ + "/testPK/data";
// Expect 6 samples for label 0 and 1
int res[2] = {6, 6};
@ -244,7 +244,7 @@ TEST_F(MindDataTestImageFolderSampler, TestWeightedRandomSamplerImageFolder) {
// create sampler with replacement = replacement
std::shared_ptr<SamplerRT> sampler =
std::make_shared<WeightedRandomSamplerRT>(num_samples, weights, true, samples_per_tensor);
std::make_shared<WeightedRandomSamplerRT>(weights, num_samples, true, samples_per_tensor);
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
@ -302,7 +302,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderClassIndex) {
TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) {
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(num_samples, 11, 10, false);
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(11, 10, false, num_samples);
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto op1 = ImageFolder(16, 2, 32, folder_path, false, std::move(sampler));
auto op2 = Repeat(4);
@ -334,7 +334,7 @@ TEST_F(MindDataTestImageFolderSampler, TestDistributedSampler) {
TEST_F(MindDataTestImageFolderSampler, TestPKSamplerImageFolder) {
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<PKSamplerRT>(num_samples, 3, false, 4);
std::shared_ptr<SamplerRT> sampler = std::make_shared<PKSamplerRT>(3, false, num_samples, 4);
int32_t res[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; // ground truth label
std::string folder_path = datasets_root_path_ + "/testPK/data";
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler))});
@ -369,7 +369,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderDecode) {
map["wrong folder name"] = 1234; // this is skipped
int64_t num_samples = 20;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index);
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(seq_sampler), map, true)});
int64_t res[2] = {111, 333};
tree->Prepare();
@ -399,7 +399,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderDecode) {
TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) {
int64_t num_samples = 5;
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(num_samples, 4, 0, false);
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 0, false, num_samples);
std::string folder_path = datasets_root_path_ + "/testPK/data";
// numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode
auto tree = Build({ImageFolder(16, 2, 32, folder_path, false, std::move(sampler), {})});
@ -429,7 +429,7 @@ TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding1) {
TEST_F(MindDataTestImageFolderSampler, TestImageFolderSharding2) {
int64_t num_samples = 12;
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(num_samples, 4, 3, false);
std::shared_ptr<SamplerRT> sampler = std::make_shared<DistributedSamplerRT>(4, 3, false, num_samples);
std::string folder_path = datasets_root_path_ + "/testPK/data";
// numWrks, rows, conns, path, shuffle, sampler, map, numSamples, decode
auto tree = Build({ImageFolder(16, 16, 32, folder_path, false, std::move(sampler), {})});

View File

@ -33,9 +33,9 @@
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;
std::shared_ptr<RepeatOp> Repeat(int repeatCnt);
@ -95,7 +95,7 @@ TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) {
TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) {
std::vector<int64_t> indices({1});
int64_t num_samples = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(num_samples, indices);
std::shared_ptr<SamplerRT> sampler = std::make_shared<SubsetRandomSamplerRT>(indices, num_samples);
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
// Expect 6 samples for label 0 and 1
auto tree = Build({Manifest(16, 2, 32, file, "train", std::move(sampler))});
@ -156,7 +156,7 @@ TEST_F(MindDataTestManifest, MindDataTestManifestNumSamples) {
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
int64_t num_samples = 1;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index);
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
auto op1 = Manifest(16, 2, 32, file, "train", std::move(seq_sampler), {});
auto op2 = Repeat(4);
op1->set_total_repeats(4);
@ -189,7 +189,7 @@ TEST_F(MindDataTestManifest, MindDataTestManifestEval) {
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
int64_t num_samples = 1;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index);
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})});
tree->Prepare();
Status rc = tree->Launch();

View File

@ -38,9 +38,9 @@
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
@ -77,7 +77,7 @@ TEST_F(MindDataTestMnistSampler, TestSequentialMnistWithRepeat) {
std::string folder_path = datasets_root_path_ + "/testMnistData/";
int64_t num_samples = 10;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index);
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
auto op1 = CreateMnist(16, 2, 32, folder_path, false, std::move(seq_sampler));
auto op2 = Repeat(2);
op1->set_total_repeats(2);
@ -111,7 +111,7 @@ TEST_F(MindDataTestMnistSampler, TestSequentialImageFolderWithRepeatBatch) {
std::string folder_path = datasets_root_path_ + "/testMnistData/";
int64_t num_samples = 10;
int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index);
auto seq_sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples);
auto op1 = CreateMnist(16, 2, 32, folder_path, false, std::move(seq_sampler));
auto op2 = Repeat(2);
auto op3 = Batch(5);
@ -119,10 +119,7 @@ TEST_F(MindDataTestMnistSampler, TestSequentialImageFolderWithRepeatBatch) {
op1->set_num_repeats_per_epoch(2);
auto tree = Build({op1, op2, op3});
tree->Prepare();
uint32_t res[4][5] = { {0, 0, 0, 0, 0 },
{0, 0, 0, 0, 0 },
{0, 0, 0, 0, 0 },
{0, 0, 0, 0, 0 } };
uint32_t res[4][5] = {{0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}};
Status rc = tree->Launch();
if (rc.IsError()) {
MS_LOG(ERROR) << "Return code error detected during tree launch: " << rc.ToString() << ".";

View File

@ -19,9 +19,9 @@
#include "utils/log_adapter.h"
using namespace mindspore::dataset;
using mindspore::MsLogLevel::INFO;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;
class MindDataTestRandomCropOp : public UT::CVOP::CVOpCommon {
protected:
@ -35,7 +35,7 @@ TEST_F(MindDataTestRandomCropOp, TestOp1) {
// Crop params
unsigned int crop_height = 128;
unsigned int crop_width = 128;
std::unique_ptr<RandomCropOp> op(new RandomCropOp(crop_height, crop_width, 0, 0, 0, 0, BorderType::kConstant, false));
std::unique_ptr<RandomCropOp> op(new RandomCropOp(crop_height, crop_width, 0, 0, 0, 0, false, BorderType::kConstant));
EXPECT_TRUE(op->OneToOne());
Status s = op->Compute(input_tensor_, &output_tensor_);
size_t actual = 0;
@ -52,7 +52,7 @@ TEST_F(MindDataTestRandomCropOp, TestOp2) {
unsigned int crop_height = 1280;
unsigned int crop_width = 1280;
std::unique_ptr<RandomCropOp> op(
new RandomCropOp(crop_height, crop_width, 513, 513, 513, 513, BorderType::kConstant, false));
new RandomCropOp(crop_height, crop_width, 513, 513, 513, 513, false, BorderType::kConstant));
EXPECT_TRUE(op->OneToOne());
Status s = op->Compute(input_tensor_, &output_tensor_);
EXPECT_EQ(true, s.IsOk());

View File

@ -44,7 +44,7 @@ TEST_F(MindDataTestRandomCropWithBBoxOp, TestOp1) {
uint32_t current_seed = GlobalContext::config_manager()->seed();
GlobalContext::config_manager()->set_seed(327362);
std::unique_ptr<RandomCropWithBBoxOp> op(
new RandomCropWithBBoxOp(crop_height, crop_width, 0, 0, 0, 0, BorderType::kConstant, false));
new RandomCropWithBBoxOp(crop_height, crop_width, 0, 0, 0, 0, false, BorderType::kConstant));
for (auto tensor_row_ : images_and_annotations_) {
Status s = op->Compute(tensor_row_, &output_tensor_row_);
size_t actual = 0;
@ -79,7 +79,7 @@ TEST_F(MindDataTestRandomCropWithBBoxOp, TestOp2) {
GlobalContext::config_manager()->set_seed(327362);
std::unique_ptr<RandomCropWithBBoxOp> op(
new RandomCropWithBBoxOp(crop_height, crop_width, 513, 513, 513, 513, BorderType::kConstant, false));
new RandomCropWithBBoxOp(crop_height, crop_width, 513, 513, 513, 513, false, BorderType::kConstant));
for (auto tensor_row_ : images_and_annotations_) {
Status s = op->Compute(tensor_row_, &output_tensor_row_);
@ -107,7 +107,7 @@ TEST_F(MindDataTestRandomCropWithBBoxOp, TestOp3) {
std::unique_ptr<RandomCropWithBBoxOp> op(new RandomCropWithBBoxOp(crop_height, crop_width, crop_height * 3 + 1,
crop_height * 3 + 1, crop_width * 3 + 1,
crop_width * 3 + 1, BorderType::kConstant, false));
crop_width * 3 + 1, false, BorderType::kConstant));
for (auto tensor_row_ : images_and_annotations_) {
Status s = op->Compute(tensor_row_, &output_tensor_row_);

View File

@ -20,9 +20,9 @@
#include "utils/log_adapter.h"
using namespace mindspore::dataset;
using mindspore::MsLogLevel::INFO;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;
class MindDataTestRandomRotationOp : public UT::CVOP::CVOpCommon {
public:
@ -38,8 +38,8 @@ TEST_F(MindDataTestRandomRotationOp, TestOp) {
float xCenter = -1;
float yCenter = -1;
bool expand = false;
std::unique_ptr<RandomRotationOp> op(new RandomRotationOp(
sDegree, eDegree, xCenter, yCenter, InterpolationMode::kLinear, expand));
std::unique_ptr<RandomRotationOp> op(
new RandomRotationOp(sDegree, eDegree, InterpolationMode::kLinear, expand, xCenter, yCenter));
EXPECT_TRUE(op->OneToOne());
Status s = op->Compute(input_tensor_, &output_tensor);
EXPECT_TRUE(s.IsOk());

View File

@ -62,7 +62,7 @@ TEST_F(MindDataTestStandAloneSampler, TestDistributedSampler) {
TensorRow sample_row;
for (int i = 0; i < 6; i++) {
std::shared_ptr<SamplerRT> sampler =
std::make_shared<DistributedSamplerRT>(num_samples, 3, i % 3, (i < 3 ? false : true));
std::make_shared<DistributedSamplerRT>(3, i % 3, (i < 3 ? false : true), num_samples);
sampler->HandshakeRandomAccessOp(&mock);
sampler->GetNextSample(&sample_row);
tensor = sample_row[0];
@ -82,7 +82,7 @@ TEST_F(MindDataTestStandAloneSampler, TestStandAoneSequentialSampler) {
CreateINT64Tensor(&label2, 2, reinterpret_cast<unsigned char *>(res + 3));
int64_t num_samples = 0;
int64_t start_index = 0;
std::shared_ptr<SamplerRT> sampler = std::make_shared<SequentialSamplerRT>(num_samples, start_index, 3);
std::shared_ptr<SamplerRT> sampler = std::make_shared<SequentialSamplerRT>(start_index, num_samples, 3);
std::shared_ptr<Tensor> tensor;
TensorRow sample_row;

View File

@ -41,7 +41,7 @@ TEST_F(MindDataTestSubsetRandomSampler, TestAllAtOnce) {
std::vector<int64_t> in({0, 1, 2, 3, 4});
std::unordered_set<int64_t> in_set(in.begin(), in.end());
int64_t num_samples = 0;
SubsetRandomSamplerRT sampler(num_samples, in);
SubsetRandomSamplerRT sampler(in, num_samples);
DummyRandomAccessOp dummyRandomAccessOp(5);
sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -69,7 +69,7 @@ TEST_F(MindDataTestSubsetRandomSampler, TestGetNextSample) {
int64_t samples_per_tensor = 10;
int64_t num_samples = 0;
std::vector<int64_t> input(total_samples, 1);
SubsetRandomSamplerRT sampler(num_samples, input, samples_per_tensor);
SubsetRandomSamplerRT sampler(input, num_samples, samples_per_tensor);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -98,7 +98,7 @@ TEST_F(MindDataTestSubsetRandomSampler, TestReset) {
std::vector<int64_t> in({0, 1, 2, 3, 4});
std::unordered_set<int64_t> in_set(in.begin(), in.end());
int64_t num_samples = 0;
SubsetRandomSamplerRT sampler(num_samples, in);
SubsetRandomSamplerRT sampler(in, num_samples);
DummyRandomAccessOp dummyRandomAccessOp(5);
sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);

View File

@ -41,7 +41,7 @@ TEST_F(MindDataTestSubsetSampler, TestAllAtOnce) {
std::vector<int64_t> in({3, 1, 4, 0, 1});
std::unordered_set<int64_t> in_set(in.begin(), in.end());
int64_t num_samples = 0;
SubsetSamplerRT sampler(num_samples, in);
SubsetSamplerRT sampler(in, num_samples);
DummyRandomAccessOp dummyRandomAccessOp(5);
sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -69,7 +69,7 @@ TEST_F(MindDataTestSubsetSampler, TestGetNextSample) {
int64_t samples_per_tensor = 10;
int64_t num_samples = 0;
std::vector<int64_t> input(total_samples, 1);
SubsetSamplerRT sampler(num_samples, input, samples_per_tensor);
SubsetSamplerRT sampler(input, num_samples, samples_per_tensor);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -99,7 +99,7 @@ TEST_F(MindDataTestSubsetSampler, TestReset) {
std::vector<int64_t> in({0, 1, 2, 3, 4});
std::unordered_set<int64_t> in_set(in.begin(), in.end());
int64_t num_samples = 0;
SubsetSamplerRT sampler(num_samples, in);
SubsetSamplerRT sampler(in, num_samples);
DummyRandomAccessOp dummyRandomAccessOp(5);
sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);

View File

@ -536,3 +536,44 @@ TEST_F(MindDataTestTensorDE, TensorEmpty) {
t2->Invalidate();
ASSERT_TRUE(!t2->HasData());
}
TEST_F(MindDataTestTensorDE, TestTensor_json) {
MS_LOG(INFO) << "Doing TestTensor.";
std::vector<uint64_t> labels = {1, 1, 2};
std::shared_ptr<Tensor> input;
Tensor::CreateFromVector(labels, &input);
nlohmann::json out_json;
input->to_json(&out_json);
std::shared_ptr<Tensor> check;
std::stringstream ss;
ss << out_json["shape"];
std::string shape = ss.str();
ss.str("");
ss << out_json["type"];
std::string type = ss.str();
ss.str("");
ss << out_json["data"];
std::string data = ss.str();
ss.str("");
ASSERT_TRUE('"' + input->shape().ToString() + '"' == shape);
ASSERT_TRUE('"' + input->type().ToString() + '"' == type);
std::string input_data;
input_data.push_back('"');
input_data.push_back('[');
for (int i = 0; i < labels.size(); i++) {
input_data += std::to_string(labels[i]);
if (i < labels.size() - 1) {
input_data.push_back(',');
}
}
input_data.push_back(']');
input_data.push_back('"');
std::cout << input_data << std::endl;
std::cout << data << std::endl;
ASSERT_TRUE(input_data == data);
}

View File

@ -21,9 +21,9 @@
#include "utils/log_adapter.h"
using namespace mindspore::dataset;
using mindspore::MsLogLevel::INFO;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;
class MindDataTestToFloat16Op : public UT::CVOP::CVOpCommon {
public:
@ -39,8 +39,8 @@ TEST_F(MindDataTestToFloat16Op, TestOp) {
float x_center = -1;
float y_center = -1;
bool expand = false;
std::unique_ptr<RandomRotationOp> op(new RandomRotationOp(
s_degree, e_degree, x_center, y_center, InterpolationMode::kLinear, expand));
std::unique_ptr<RandomRotationOp> op(
new RandomRotationOp(s_degree, e_degree, InterpolationMode::kLinear, expand, x_center, y_center));
EXPECT_TRUE(op->OneToOne());
Status s = op->Compute(input_tensor_, &output_tensor);
EXPECT_TRUE(s.IsOk());

View File

@ -51,7 +51,7 @@ TEST_F(MindDataTestWeightedRandomSampler, TestOneshotReplacement) {
std::vector<uint64_t> freq(total_samples, 0);
// create sampler with replacement = true
WeightedRandomSamplerRT m_sampler(num_samples, weights, true);
WeightedRandomSamplerRT m_sampler(weights, num_samples, true);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -80,7 +80,7 @@ TEST_F(MindDataTestWeightedRandomSampler, TestOneshotNoReplacement) {
std::vector<uint64_t> freq(total_samples, 0);
// create sampler with replacement = replacement
WeightedRandomSamplerRT m_sampler(num_samples, weights, false);
WeightedRandomSamplerRT m_sampler(weights, num_samples, false);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -115,7 +115,7 @@ TEST_F(MindDataTestWeightedRandomSampler, TestGetNextSampleReplacement) {
std::vector<double> weights(total_samples, std::rand() % 100);
// create sampler with replacement = replacement
WeightedRandomSamplerRT m_sampler(num_samples, weights, true, samples_per_tensor);
WeightedRandomSamplerRT m_sampler(weights, num_samples, true, samples_per_tensor);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -150,7 +150,7 @@ TEST_F(MindDataTestWeightedRandomSampler, TestGetNextSampleNoReplacement) {
std::vector<uint64_t> freq(total_samples, 0);
// create sampler with replacement = replacement
WeightedRandomSamplerRT m_sampler(num_samples, weights, false, samples_per_tensor);
WeightedRandomSamplerRT m_sampler(weights, num_samples, false, samples_per_tensor);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -190,7 +190,7 @@ TEST_F(MindDataTestWeightedRandomSampler, TestResetReplacement) {
std::vector<uint64_t> freq(total_samples, 0);
// create sampler with replacement = true
WeightedRandomSamplerRT m_sampler(num_samples, weights, true);
WeightedRandomSamplerRT m_sampler(weights, num_samples, true);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);
@ -234,7 +234,7 @@ TEST_F(MindDataTestWeightedRandomSampler, TestResetNoReplacement) {
std::vector<uint64_t> freq(total_samples, 0);
// create sampler with replacement = true
WeightedRandomSamplerRT m_sampler(num_samples, weights, false);
WeightedRandomSamplerRT m_sampler(weights, num_samples, false);
DummyRandomAccessOp dummyRandomAccessOp(total_samples);
m_sampler.HandshakeRandomAccessOp(&dummyRandomAccessOp);