!17061 Delete extra files, and change camel format

From: @zetongzhao
Reviewed-by: @pandoublefeng,@robingrosman
Signed-off-by: @robingrosman
This commit is contained in:
mindspore-ci-bot 2021-05-29 09:43:34 +08:00 committed by Gitee
commit 497505631c
17 changed files with 4 additions and 5342 deletions

View File

@ -449,7 +449,7 @@ void Tensor::Print(std::ostream &out) const {
}
}
void Tensor::Print_data(std::ostream &out) const {
void Tensor::PrintData(std::ostream &out) const {
if (data_) {
PrintRecursive(out, 0, std::vector<dsize_t>{});
}
@ -632,7 +632,7 @@ Status Tensor::to_json(nlohmann::json *out_json) {
args["shape"] = shape_.ToString();
args["type"] = type_.ToString();
std::stringstream ss;
this->Print_data(ss);
this->PrintData(ss);
args["data"] = ss.str();
*out_json = args;
return Status::OK();

View File

@ -665,7 +665,7 @@ class Tensor {
/// A function that prints info about the tensor
/// \param[out] out output stream
void Print_data(std::ostream &out) const;
void PrintData(std::ostream &out) const;
/// A function that print the value as specified by its index
/// \param[in] index vector representing the index

View File

@ -1,27 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_
namespace mindspore {
namespace dataset {
namespace audio {} // namespace audio
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_AUDIO_H_

View File

@ -1,86 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONFIG_H
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONFIG_H
#include <cstdint>
#include <string>
#include <vector>
#include "include/api/dual_abi_helper.h"
namespace mindspore {
namespace dataset {
// Config operations for setting and getting the configuration.
namespace config {
/// \brief Function to set the seed to be used in any random generator. This is used to produce deterministic results.
/// \param[in] seed the default seed to use.
bool set_seed(int32_t seed);
/// \brief Function to get the seed.
/// \return the seed set in the configuration.
uint32_t get_seed();
/// \brief Function to set the number of rows to be prefetched.
/// \param[in] prefetch_size total number of rows to be prefetched.
bool set_prefetch_size(int32_t prefetch_size);
/// \brief Function to get the prefetch size in number of rows.
/// \return total number of rows to be prefetched.
int32_t get_prefetch_size();
/// \brief Function to set the default number of parallel workers.
/// \param[in] num_parallel_workers number of parallel workers to be used as a default for each operation.
bool set_num_parallel_workers(int32_t num_parallel_workers);
/// \brief Function to get the default number of parallel workers.
/// \return number of parallel workers to be used as a default for each operation.
int32_t get_num_parallel_workers();
/// \brief Function to set the default interval (in milliseconds) for monitor sampling.
/// \param[in] interval interval (in milliseconds) to be used for performance monitor sampling.
bool set_monitor_sampling_interval(int32_t interval);
/// \brief Function to get the default interval of performance monitor sampling.
/// \return interval (in milliseconds) for performance monitor sampling.
int32_t get_monitor_sampling_interval();
/// \brief Function to set the default timeout (in seconds) for DSWaitedCallback. In case of a deadlock, the wait
/// function will exit after the timeout period.
/// \param[in] timeout timeout (in seconds) to be used to end the wait in DSWaitedCallback in case of a deadlock.
bool set_callback_timeout(int32_t timeout);
/// \brief Function to get the default timeout for DSWaitedCallback. In case of a deadback, the wait function will exit
/// after the timeout period.
/// \return the duration in seconds.
int32_t get_callback_timeout();
/// \brief Function to load configuration from a file.
/// \param[in] file path of the configuration file to be loaded.
/// \note This API exists because std::string will constrained by ABI compile option while char don't.
bool load(const std::vector<char> &file);
/// \brief Function to load configuration from a file.
/// \param[in] file path of the configuration file to be loaded.
inline bool load(std::string file) { return load(StringToChar(file)); }
} // namespace config
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONFIG_H

View File

@ -1,123 +0,0 @@
/**
* Copyright 2019-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONSTANTS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONSTANTS_H_
#include <cstdint>
#include <limits>
#include <random>
namespace mindspore {
namespace dataset {
// Various type defines for convenience
using uchar = unsigned char;
using dsize_t = int64_t;
/// \brief Target devices to perform map operation
enum class MapTargetDevice { kCpu, kGpu, kAscend310 };
/// \brief Possible dataset types for holding the data and client type
enum class DatasetType { kUnknown, kArrow, kTf };
/// \brief Possible flavours of Tensor implementations
enum class TensorImpl { kNone, kFlexible, kCv, kNP };
/// \brief Possible values for shuffle
enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2, kInfile = 3 };
/// \brief Possible values for Border types
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
/// \brief Possible values for Image format types in a batch
enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 };
/// \brief Possible values for Image format types
enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 };
/// \brief Possible interpolation modes
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3, kCubicPil = 4 };
/// \brief Possible JiebaMode modes
enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 };
/// \brief Possible values for SPieceTokenizerOutType
enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
/// \brief Possible values for SPieceTokenizerLoadType
enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };
/// \brief Possible values for SentencePieceModel
enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };
/// \brief Possible values for NormalizeForm
enum class NormalizeForm {
kNone = 0,
kNfc,
kNfkc,
kNfd,
kNfkd,
};
/// \brief Possible values for Mask
enum class RelationalOp {
kEqual = 0, // ==
kNotEqual, // !=
kLess, // <
kLessEqual, // <=
kGreater, // >
kGreaterEqual, // >=
};
/// \brief Possible values for SamplingStrategy
enum class SamplingStrategy { kRandom = 0, kEdgeWeight = 1 };
// convenience functions for 32bit int bitmask
inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; }
inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); }
constexpr int64_t kDeMaxDim = std::numeric_limits<int64_t>::max();
constexpr int32_t kDeMaxRank = std::numeric_limits<int32_t>::max();
constexpr int64_t kDeMaxFreq = std::numeric_limits<int64_t>::max(); // 9223372036854775807 or 2^(64-1)
constexpr int64_t kDeMaxTopk = std::numeric_limits<int64_t>::max();
constexpr uint32_t kCfgRowsPerBuffer = 1;
constexpr uint32_t kCfgParallelWorkers = 8;
constexpr uint32_t kCfgWorkerConnectorSize = 16;
constexpr uint32_t kCfgOpConnectorSize = 16;
constexpr int32_t kCfgDefaultRankId = -1;
constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed;
constexpr uint32_t kCfgMonitorSamplingInterval = 10;
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
constexpr int32_t kCfgDefaultCachePort = 50052;
constexpr char kCfgDefaultCacheHost[] = "127.0.0.1";
constexpr int32_t kDftPrefetchSize = 20;
constexpr int32_t kDftNumConnections = 12;
constexpr int32_t kDftAutoNumWorkers = false;
constexpr char kDftMetaColumnPrefix[] = "_meta-";
// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h)
constexpr uint8_t kCVInvalidType = 255;
using connection_id_type = uint64_t;
using session_id_type = uint32_t;
using row_id_type = int64_t;
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_CONSTANTS_H_

View File

@ -1,448 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_
#include <sys/stat.h>
#include <fstream>
#include <iostream>
#include <map>
#include <memory>
#include <sstream>
#include <string>
#include <unordered_map>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
namespace mindspore {
namespace dataset {
/// \brief Simple class to do data manipulation, contains helper function to update json files in dataset
class DataHelper {
public:
/// \brief constructor
DataHelper() {}
/// \brief Destructor
~DataHelper() = default;
/// \brief Create an Album dataset while taking in a path to a image folder
/// Creates the output directory if doesn't exist
/// \param[in] in_dir Image folder directory that takes in images
/// \param[in] out_dir Directory containing output json files
Status CreateAlbum(const std::string &in_dir, const std::string &out_dir) {
return CreateAlbumIF(StringToChar(in_dir), StringToChar(out_dir));
}
/// \brief Update a json file field with a vector of string values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional input for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<std::string> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), VectorStringToChar(value), StringToChar(out_file));
}
/// \brief Update a json file field with a vector of bool values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<bool> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of int8 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int8_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of uint8 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint8_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of int16 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int16_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of uint16 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint16_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of int32 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int32_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of uint32 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint32_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of int64 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<int64_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of uint64 values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<uint64_t> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of float values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<float> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a vector of double values
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value array to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateArray(const std::string &in_file, const std::string &key, const std::vector<double> &value,
const std::string &out_file = "") {
return UpdateArrayIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a string value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const std::string &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), StringToChar(value), StringToChar(out_file));
}
/// \brief Update a json file field with a bool value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const bool &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an int8 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const int8_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an uint8 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const uint8_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an int16 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const int16_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an uint16 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const uint16_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an int32 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const int32_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an uint32 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const uint32_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an int64 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const int64_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with an uint64 value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const uint64_t &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a float value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const float &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Update a json file field with a double value
/// \param in_file The input file name to read in
/// \param key Key of field to write to
/// \param value Value to write to file
/// \param out_file Optional parameter for output file path, will write to input file if not specified
/// \return Status The status code returned
Status UpdateValue(const std::string &in_file, const std::string &key, const double &value,
const std::string &out_file = "") {
return UpdateValueIF(StringToChar(in_file), StringToChar(key), value, StringToChar(out_file));
}
/// \brief Template function to write tensor to file
/// \param[in] in_file File to write to
/// \param[in] data Array of type T values
/// \return Status The status code returned
template <typename T>
Status WriteBinFile(const std::string &in_file, const std::vector<T> &data) {
try {
std::ofstream o(in_file, std::ios::binary | std::ios::out);
if (!o.is_open()) {
return Status(kMDUnexpectedError, "Error opening Bin file to write");
}
size_t length = data.size();
o.write(reinterpret_cast<const char *>(&data[0]), std::streamsize(length * sizeof(T)));
o.close();
}
// Catch any exception and convert to Status return code
catch (const std::exception &err) {
return Status(kMDUnexpectedError, "Write bin file failed ");
}
return Status::OK();
}
/// \brief Write pointer to bin, use pointer to avoid memcpy
/// \param[in] in_file File name to write to
/// \param[in] data Pointer to data
/// \param[in] length Length of values to write from pointer
/// \return Status The status code returned
template <typename T>
Status WriteBinFile(const std::string &in_file, T *data, size_t length) {
try {
std::ofstream o(in_file, std::ios::binary | std::ios::out);
if (!o.is_open()) {
return Status(kMDUnexpectedError, "Error opening Bin file to write");
}
o.write(reinterpret_cast<const char *>(data), std::streamsize(length * sizeof(T)));
o.close();
}
// Catch any exception and convert to Status return code
catch (const std::exception &err) {
return Status(kMDUnexpectedError, "Write bin file failed ");
}
return Status::OK();
}
/// \brief Helper function to copy content of a tensor to buffer
/// \note This function iterates over the tensor in bytes, since
/// \param[in] tensor_addr The memory held by a tensor
/// \param[in] tensor_size The amount of data in bytes in tensor_addr, e.g. tensor->SizeInBytes()
/// \param[out] addr The address to copy tensor data to
/// \param[in] buffer_size The buffer size of addr
/// \return The size of the tensor (bytes copied
size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size);
/// \brief Helper function to delete key in json file
/// note This function will return okay even if key not found
/// \param[in] in_file Json file to remove key from
/// \param[in] key The key to remove
/// \return Status The status code returned
Status RemoveKey(const std::string &in_file, const std::string &key, const std::string &out_file = "") {
return RemoveKeyIF(StringToChar(in_file), StringToChar(key), StringToChar(out_file));
}
/// \brief A print method typically used for debugging
/// \param out - The output stream to write output to
void Print(std::ostream &out) const;
/// \brief << Stream output operator overload
/// \notes This allows you to write the debug print info using stream operators
/// \param out Reference to the output stream being overloaded
/// \param ds Reference to the DataSchema to display
/// \return The output stream must be returned
friend std::ostream &operator<<(std::ostream &out, const DataHelper &dh) {
dh.Print(out);
return out;
}
private:
// Helper function for dual ABI support
Status CreateAlbumIF(const std::vector<char> &in_dir, const std::vector<char> &out_dir);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<std::vector<char>> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<bool> &value,
const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<int8_t> &value,
const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint8_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int16_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint16_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int32_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint32_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<int64_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key,
const std::vector<uint64_t> &value, const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<float> &value,
const std::vector<char> &out_file);
Status UpdateArrayIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<double> &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const bool &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int8_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint8_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int16_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint16_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int32_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint32_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const int64_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const uint64_t &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const float &value,
const std::vector<char> &out_file);
Status UpdateValueIF(const std::vector<char> &in_file, const std::vector<char> &key, const double &value,
const std::vector<char> &out_file);
Status RemoveKeyIF(const std::vector<char> &in_file, const std::vector<char> &key, const std::vector<char> &out_file);
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_DATA_HELPER_H_

View File

@ -1,128 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_EXECUTE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_EXECUTE_H_
#include <string>
#include <vector>
#include <map>
#include <memory>
#include "include/api/context.h"
#include "include/api/types.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
namespace mindspore {
namespace dataset {
class DeviceResource;
// class to run tensor operations in eager mode
class Execute {
public:
/// \brief Constructor.
/// \param[in] op TensorOperation to be applied in Eager mode, it accepts op in type of shared pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::shared_ptr<TensorOperation> op, MapTargetDevice deviceType = MapTargetDevice::kCpu,
uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of shared pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::shared_ptr<TensorTransform> op, MapTargetDevice deviceType = MapTargetDevice::kCpu,
uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of reference.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::reference_wrapper<TensorTransform> op, MapTargetDevice deviceType = MapTargetDevice::kCpu,
uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of raw pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(TensorTransform *op, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] ops A vector of TensorOperations to be applied in Eager mode, it accepts op in type of shared pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::vector<std::shared_ptr<TensorOperation>> ops,
MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of shared pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(std::vector<std::shared_ptr<TensorTransform>> ops,
MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of raw pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(const std::vector<std::reference_wrapper<TensorTransform>> ops,
MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0);
/// \brief Constructor.
/// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of raw pointer.
/// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU).
/// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0).
explicit Execute(const std::vector<TensorTransform *> &ops, MapTargetDevice deviceType = MapTargetDevice::kCpu,
uint32_t device_id = 0);
/// \brief Destructor.
~Execute();
/// \brief Callable function to execute the TensorTransform in eager mode.
/// \param[in] input Tensor to be transformed.
/// \param[out] output Transformed tensor.
/// \return Status error code, returns OK if no error encountered.
Status operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output);
/// \brief Callable function to execute the TensorTransform in eager mode.
/// \param[in] input_tensor_list List of Tensor to be transformed.
/// \param[out] out Result tensor after transform.
/// \return Status error code, returns OK if no error encountered.
Status operator()(const std::vector<mindspore::MSTensor> &input_tensor_list, std::vector<mindspore::MSTensor> *out);
Status DeviceMemoryRelease();
std::string AippCfgGenerator();
private:
Status ParseTransforms_();
Status validate_device_();
std::vector<std::shared_ptr<TensorTransform>> transforms_;
std::vector<std::shared_ptr<TensorOperation>> ops_;
MapTargetDevice device_type_;
std::shared_ptr<DeviceResource> device_resource_;
struct ExtraInfo;
std::shared_ptr<ExtraInfo> info_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_EXECUTE_H_

View File

@ -1,153 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_ITERATOR_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_ITERATOR_H_
#include <map>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/api/types.h"
namespace mindspore {
namespace dataset {
// Forward declare
class ExecutionTree;
class DatasetOp;
class Tensor;
class NativeRuntimeContext;
class IteratorConsumer;
class PullBasedIteratorConsumer;
class Dataset;
using MSTensorMap = std::unordered_map<std::string, mindspore::MSTensor>;
using MSTensorMapChar = std::map<std::vector<char>, mindspore::MSTensor>;
using MSTensorVec = std::vector<mindspore::MSTensor>;
// Abstract class for iterating over the dataset.
class Iterator {
public:
/// \brief Constructor
Iterator();
/// \brief Destructor
~Iterator();
/// \brief Method for building and launching the pipeline.
/// \param[in] ops - a vector of DatasetOp in the data pipeline.
/// \param[in] num_epochs Number of epochs passed down to EpochCtrlNode, default -1, infinite epochs
/// \return Status error code, returns OK if no error encountered.
Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds, int32_t num_epochs);
/// \brief Function to get the next row from the data pipeline.
/// \note Type of return data is a map(with column name).
/// \param[out] row - the output tensor row.
/// \return Status error code, returns OK if no error encountered.
Status GetNextRow(MSTensorMap *row) {
MSTensorMapChar row_;
row_.clear();
row->clear();
Status s = GetNextRowCharIF(&row_);
TensorMapCharToString(&row_, row);
return s;
}
// Char interface(CharIF) of GetNextRow
// This This API exists because std::string will constrained by ABI compile option while char don't.
Status GetNextRowCharIF(MSTensorMapChar *row);
/// \brief Function to get the next row from the data pipeline.
/// \note Type of return data is a vector(without column name).
/// \param[out] row - the output tensor row.
/// \return Status error code, returns OK if no error encountered.
virtual Status GetNextRow(MSTensorVec *row);
/// \brief Function to shut down the data pipeline.
void Stop();
class _Iterator {
public:
explicit _Iterator(Iterator *lt);
// Destructor
~_Iterator() {
if (cur_row_) {
delete cur_row_;
}
}
_Iterator &operator++(); // prefix ++ overload
MSTensorMap &operator*() { return *cur_row_; } // dereference operator
MSTensorMap *operator->() { return cur_row_; }
bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; }
private:
int ind_; // the cur node our Iterator points to
Iterator *lt_;
MSTensorMap *cur_row_;
};
_Iterator begin() { return _Iterator(this); }
_Iterator end() { return _Iterator(nullptr); }
private:
std::unique_ptr<NativeRuntimeContext> runtime_context_;
IteratorConsumer *consumer_;
};
class PullIterator : public Iterator {
public:
/// \brief Constructor
PullIterator();
/// \brief Destructor
~PullIterator() = default;
/// \brief Function to get next row from the data pipeline.
/// \note Type of return data is a vector(without column name).
/// \param[out] row The output tensor row.
/// \return Status error code, returns OK if no error encountered else false.
Status GetNextRow(MSTensorVec *const row) override;
/// \brief Function to get specified rows from the data pipeline.
/// \note Type of return data is a vector(without column name).
/// \note This behavior is subject to change
/// \param[in] num_rows The number of rows to fetch.
/// \param[out] row The output tensor row.
/// \return Status error code, returns OK if no error encountered else false.
Status GetRows(int32_t num_rows, std::vector<MSTensorVec> *const row);
/// \brief Method for building and launching the pipeline.
/// \note Consider making this function protected.
/// \param[in] ds - The root node that calls the function
/// \return Status error code, returns OK if no error encountered.
Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds);
private:
std::unique_ptr<PullBasedIteratorConsumer> pull_consumer_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_ITERATOR_H_

View File

@ -1,74 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_
#define INCLUDE_NLOHMANN_JSON_FWD_HPP_
#include <cstdint> // int64_t, uint64_t
#include <map> // map
#include <memory> // allocator
#include <string> // string
#include <vector> // vector
/*!
@brief namespace for Niels Lohmann
@see https://github.com/nlohmann
@since version 1.0.0
*/
namespace nlohmann {
/*!
@brief default JSONSerializer template argument
This serializer ignores the template arguments and uses ADL
([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl))
for serialization.
*/
template <typename T = void, typename SFINAE = void>
struct adl_serializer;
template <template <typename U, typename V, typename... Args> class ObjectType = std::map,
template <typename U, typename... Args> class ArrayType = std::vector, class StringType = std::string,
class BooleanType = bool, class NumberIntegerType = std::int64_t, class NumberUnsignedType = std::uint64_t,
class NumberFloatType = double, template <typename U> class AllocatorType = std::allocator,
template <typename T, typename SFINAE = void> class JSONSerializer = adl_serializer>
class basic_json;
/*!
@brief JSON Pointer
A JSON pointer defines a string syntax for identifying a specific value
within a JSON document. It can be used with functions `at` and
`operator[]`. Furthermore, JSON pointers are the base for JSON patches.
@sa [RFC 6901](https://tools.ietf.org/html/rfc6901)
@since version 2.0.0
*/
template <typename BasicJsonType>
class json_pointer;
/*!
@brief default JSON class
This type is the default specialization of the @ref basic_json class which
uses the standard template types.
@since version 1.0.0
*/
using json = basic_json<>;
} // namespace nlohmann
#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_

View File

@ -1,251 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_SAMPLERS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_SAMPLERS_H_
#include <memory>
#include <vector>
namespace mindspore {
namespace dataset {
// Forward declare
class SamplerObj;
// Abstract class to represent a sampler in the data pipeline.
/// \class Sampler samplers.h
/// \brief An abstract base class to represent a sampler in the data pipeline.
class Sampler : std::enable_shared_from_this<Sampler> {
friend class AlbumDataset;
friend class CelebADataset;
friend class Cifar10Dataset;
friend class Cifar100Dataset;
friend class CLUEDataset;
friend class CocoDataset;
friend class CSVDataset;
friend class ImageFolderDataset;
friend class ManifestDataset;
friend class MindDataDataset;
friend class MnistDataset;
friend class RandomDataDataset;
friend class TextFileDataset;
friend class TFRecordDataset;
friend class VOCDataset;
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
Sampler() {}
/// \brief Destructor
~Sampler() = default;
/// \brief A virtual function to add a child sampler.
/// \param[in] child The child sampler to be added as a children of this sampler.
virtual void AddChild(std::shared_ptr<Sampler> child) { children_.push_back(child); }
protected:
/// \brief Pure virtual function to convert a Sampler class into an IR Sampler object.
/// \return shared pointer to the newly created TensorOperation.
virtual std::shared_ptr<SamplerObj> Parse() const = 0;
std::vector<std::shared_ptr<Sampler>> children_;
};
/// \brief A class to represent a Distributed Sampler in the data pipeline.
/// \note A Sampler that accesses a shard of the dataset.
class DistributedSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] num_shards Number of shards to divide the dataset into.
/// \param[in] shard_id Shard ID of the current shard within num_shards.
/// \param[in] shuffle If true, the indices are shuffled (default=true).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \param[in] seed The seed in use when shuffle is true (default=1).
/// \param[in] offset The starting position where access to elements in the dataset begins (default=-1).
/// \param[in] even_dist If true, each shard would return the same number of rows (default=true).
/// If false the total rows returned by all the shards would not have overlap.
explicit DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, int64_t num_samples = 0,
uint32_t seed = 1, int64_t offset = -1, bool even_dist = true);
/// \brief Destructor.
~DistributedSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
int64_t num_shards_;
int64_t shard_id_;
bool shuffle_;
int64_t num_samples_;
uint32_t seed_;
int64_t offset_;
bool even_dist_;
};
/// \brief A class to represent a PK Sampler in the data pipeline.
/// \note Samples K elements for each P class in the dataset.
/// This will sample all classes.
class PKSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] num_val Number of elements to sample for each class.
/// \param[in] shuffle If true, the class IDs are shuffled (default=false).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0);
/// \brief Destructor.
~PKSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
int64_t num_val_;
bool shuffle_;
int64_t num_samples_;
};
/// \brief A class to represent a Random Sampler in the data pipeline.
/// \note Samples the elements randomly.
class RandomSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] replacement If true, put the sample ID back for the next draw (default=false).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit RandomSampler(bool replacement = false, int64_t num_samples = 0);
/// \brief Destructor.
~RandomSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
bool replacement_;
int64_t num_samples_;
};
/// \brief A class to represent a Sequential Sampler in the data pipeline.
/// \note Samples the dataset elements sequentially, same as not having a sampler.
class SequentialSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] start_index Index to start sampling at (default=0, start at first id).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0);
/// \brief Destructor.
~SequentialSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
int64_t start_index_;
int64_t num_samples_;
};
/// \brief A class to represent a Subset Sampler in the data pipeline.
/// \note Samples the elements from a sequence of indices.
class SubsetSampler : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] indices A vector sequence of indices.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit SubsetSampler(std::vector<int64_t> indices, int64_t num_samples = 0);
/// \brief Destructor.
~SubsetSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
std::vector<int64_t> indices_;
int64_t num_samples_;
};
/// \brief A class to represent a Subset Random Sampler in the data pipeline.
/// \note Samples the elements randomly from a sequence of indices.
class SubsetRandomSampler final : public SubsetSampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] indices A vector sequence of indices.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
explicit SubsetRandomSampler(std::vector<int64_t> indices, int64_t num_samples = 0);
/// \brief Destructor.
~SubsetRandomSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
};
/// \brief A class to represent a Weighted Random Sampler in the data pipeline.
/// \note Samples the elements from [0, len(weights) - 1] randomly with the given
/// weights (probabilities).
class WeightedRandomSampler final : public Sampler {
friend std::shared_ptr<SamplerObj> SelectSampler(int64_t, bool, int32_t, int32_t);
public:
/// \brief Constructor
/// \param[in] weights A vector sequence of weights, not necessarily summing up to 1.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \param[in] replacement If true, put the sample ID back for the next draw (default=true).
explicit WeightedRandomSampler(std::vector<double> weights, int64_t num_samples = 0, bool replacement = true);
/// \brief Destructor.
~WeightedRandomSampler() = default;
protected:
/// \brief Function to convert a Sampler into an IR SamplerObj.
/// \return shared pointer to the newly created SamplerObj.
std::shared_ptr<SamplerObj> Parse() const override;
private:
std::vector<double> weights_;
int64_t num_samples_;
bool replacement_;
};
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_SAMPLERS_H_

View File

@ -1,545 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TEXT_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TEXT_H_
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
namespace mindspore {
namespace dataset {
class Vocab;
class SentencePieceVocab;
class TensorOperation;
// Transform operations for text
namespace text {
#ifndef _WIN32
/// \brief Tokenize a scalar tensor of UTF-8 string by specific rules.
/// \note BasicTokenizer is not supported on Windows platform yet.
class BasicTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] lower_case If true, apply CaseFold, NormalizeUTF8 (NFD mode), RegexReplace operation on input text to
/// fold the text to lower case and strip accents characters. If false, only apply
/// NormalizeUTF8('normalization_form' mode) operation on input text (default=false).
/// \param[in] keep_whitespace If true, the whitespace will be kept in out tokens (default=false).
/// \param[in] normalize_form Used to specify a specific normalize mode. This is only effective when 'lower_case' is
/// false. See NormalizeUTF8 for details (default=NormalizeForm::kNone).
/// \param[in] preserve_unused_token If true, do not split special tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]',
/// '[MASK]' (default=true).
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit BasicTokenizer(bool lower_case = false, bool keep_whitespace = false,
const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true,
bool with_offsets = false);
/// \brief Destructor
~BasicTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenizer used for Bert text process.
/// \note BertTokenizer is not supported on Windows platform yet.
class BertTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] vocab A Vocab object.
/// \param[in] suffix_indicator Used to show that the subword is the last part of a word (default='##').
/// \param[in] max_bytes_per_token Tokens exceeding this length will not be further split (default=100).
/// \param[in] unknown_token When a token cannot be found, return the token directly if 'unknown_token' is an empty
/// string, else return the string specified(default='[UNK]').
/// \param[in] lower_case If true, apply CaseFold, NormalizeUTF8 (NFD mode), RegexReplace operation on input text to
/// fold the text to lower case and strip accents characters. If false, only apply
/// NormalizeUTF8('normalization_form' mode) operation on input text (default=false).
/// \param[in] keep_whitespace If true, the whitespace will be kept in out tokens (default=false).
/// \param[in] normalize_form Used to specify a specific normalize mode. This is only effective when 'lower_case' is
/// false. See NormalizeUTF8 for details (default=NormalizeForm::kNone).
/// \param[in] preserve_unused_token If true, do not split special tokens like '[CLS]', '[SEP]', '[UNK]', '[PAD]',
/// '[MASK]' (default=true).
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit BertTokenizer(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator = "##",
int32_t max_bytes_per_token = 100, const std::string &unknown_token = "[UNK]",
bool lower_case = false, bool keep_whitespace = false,
const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true,
bool with_offsets = false)
: BertTokenizer(vocab, StringToChar(suffix_indicator), max_bytes_per_token, StringToChar(unknown_token),
lower_case, keep_whitespace, normalize_form, preserve_unused_token, with_offsets) {}
explicit BertTokenizer(const std::shared_ptr<Vocab> &vocab, const std::vector<char> &suffix_indicator,
int32_t max_bytes_per_token, const std::vector<char> &unknown_token, bool lower_case,
bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token,
bool with_offsets);
/// \brief Destructor
~BertTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Apply case fold operation on UTF-8 string tensor.
/// \return Shared pointer to the current TensorOperation.
class CaseFold final : public TensorTransform {
public:
/// \brief Constructor.
CaseFold();
/// \brief Destructor
~CaseFold() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
#endif
/// \brief Tokenize Chinese string into words based on dictionary.
/// \note The integrity of the HMMSEgment algorithm and MPSegment algorithm files must be confirmed.
class JiebaTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] hmm_path Dictionary file is used by HMMSegment algorithm. The dictionary can be obtained on the
/// official website of cppjieba.
/// \param[in] mp_path Dictionary file is used by MPSegment algorithm. The dictionary can be obtained on the
/// official website of cppjieba.
/// \param[in] mode Valid values can be any of [JiebaMode.MP, JiebaMode.HMM, JiebaMode.MIX](default=JiebaMode.MIX).
/// - JiebaMode.kMP, tokenize with MPSegment algorithm.
/// - JiebaMode.kHMM, tokenize with Hiddel Markov Model Segment algorithm.
/// - JiebaMode.kMIX, tokenize with a mix of MPSegment and HMMSegment algorithm.
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit JiebaTokenizer(const std::string &hmm_path, const std::string &mp_path,
const JiebaMode &mode = JiebaMode::kMix, bool with_offsets = false)
: JiebaTokenizer(StringToChar(hmm_path), StringToChar(mp_path), mode, with_offsets) {}
explicit JiebaTokenizer(const std::vector<char> &hmm_path, const std::vector<char> &mp_path, const JiebaMode &mode,
bool with_offsets);
/// \brief Destructor
~JiebaTokenizer() = default;
/// \brief Add user defined word to JiebaTokenizer's dictionary.
/// \param[in] word The word to be added to the JiebaTokenizer instance.
/// The added word will not be written into the built-in dictionary on disk.
/// \param[in] freq The frequency of the word to be added. The higher the frequency,
/// the better chance the word will be tokenized (default=None, use default frequency).
/// \return Status error code, returns OK if no error encountered.
Status AddWord(const std::string &word, int64_t freq = 0) { return AddWordChar(StringToChar(word), freq); }
/// \brief Add user defined dictionary of word-freq pairs to JiebaTokenizer's dictionary.
/// \param[in] user_dict Vector of word-freq pairs to be added to JiebaTokenizer's dictionary.
/// \return Status error code, returns OK if no error encountered.
Status AddDict(const std::vector<std::pair<std::string, int64_t>> &user_dict) {
return AddDictChar(PairStringInt64ToPairCharInt64(user_dict));
}
/// \brief Add user defined dictionary of word-freq pairs to JiebaTokenizer's dictionary from a file.
/// Only valid word-freq pairs in user provided file will be added into the dictionary.
/// Rows containing invalid input will be ignored, no error nor warning Status is returned.
/// \param[in] file_path Path to the dictionary which includes user defined word-freq pairs.
/// \return Status error code, returns OK if no error encountered.
Status AddDict(const std::string &file_path) { return AddDictChar(StringToChar(file_path)); }
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
/// \brief Parser user defined word by file.
/// \param[in] file_path Path to the user defined file.
/// \param[in] user_dict Vector of word-freq pairs extracted from the user provided file.
Status ParserFile(const std::string &file_path, std::vector<std::pair<std::string, int64_t>> *const user_dict);
/// \brief Used to translate all API string to vector of char and back
Status AddWordChar(const std::vector<char> &word, int64_t freq = 0);
/// \brief Used to translate all API string to vector of char and back
Status AddDictChar(const std::vector<std::pair<std::vector<char>, int64_t>> &user_dict);
/// \brief Used to translate all API string to vector of char and back
Status AddDictChar(const std::vector<char> &file_path);
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Look up a word into an id according to the input vocabulary table.
class Lookup final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] vocab a Vocab object.
/// \param[in] unknown_token word to use for lookup if the word being looked up is out of Vocabulary (oov).
/// If unknown_token is oov, runtime error will be thrown. If unknown_token is {}, which means that not to
/// specify unknown_token when word being out of Vocabulary (default={}).
/// \param[in] data_type mindspore::DataType of the tensor after lookup; must be numeric, including bool.
/// (default=mindspore::DataType::kNumberTypeInt32).
explicit Lookup(const std::shared_ptr<Vocab> &vocab, const std::optional<std::string> &unknown_token = {},
mindspore::DataType data_type = mindspore::DataType::kNumberTypeInt32)
: Lookup(vocab, OptionalStringToChar(unknown_token), data_type) {}
explicit Lookup(const std::shared_ptr<Vocab> &vocab, const std::optional<std::vector<char>> &unknown_token,
mindspore::DataType data_type = mindspore::DataType::kNumberTypeInt32);
/// \brief Destructor
~Lookup() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief TensorOp to generate n-gram from a 1-D string Tensor.
class Ngram final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] ngrams ngrams is a vector of positive integers. For example, if ngrams={4, 3}, then the result
/// would be a 4-gram followed by a 3-gram in the same tensor. If the number of words is not enough to make up
/// for a n-gram, an empty string will be returned.
/// \param[in] left_pad {"pad_token", pad_width}. Padding performed on left side of the sequence. pad_width will
/// be capped at n-1. left_pad=("_",2) would pad left side of the sequence with "__" (default={"", 0}}).
/// \param[in] right_pad {"pad_token", pad_width}. Padding performed on right side of the sequence.pad_width will
/// be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" (default={"", 0}}).
/// \param[in] separator Symbol used to join strings together (default=" ").
explicit Ngram(const std::vector<int32_t> &ngrams, const std::pair<std::string, int32_t> &left_pad = {"", 0},
const std::pair<std::string, int32_t> &right_pad = {"", 0}, const std::string &separator = " ")
: Ngram(ngrams, PairStringToChar(left_pad), PairStringToChar(right_pad), StringToChar(separator)) {}
explicit Ngram(const std::vector<int32_t> &ngrams, const std::pair<std::vector<char>, int32_t> &left_pad,
const std::pair<std::vector<char>, int32_t> &right_pad, const std::vector<char> &separator);
/// \brief Destructor
~Ngram() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
#ifndef _WIN32
/// \brief Apply normalize operation on UTF-8 string tensor.
class NormalizeUTF8 final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] normalize_form Valid values can be any of [NormalizeForm::kNone,NormalizeForm::kNfc,
/// NormalizeForm::kNfkc,
/// NormalizeForm::kNfd, NormalizeForm::kNfkd](default=NormalizeForm::kNfkc).
/// See http://unicode.org/reports/tr15/ for details.
/// - NormalizeForm.NONE, do nothing for input string tensor.
/// - NormalizeForm.NFC, normalize with Normalization Form C.
/// - NormalizeForm.NFKC, normalize with Normalization Form KC.
/// - NormalizeForm.NFD, normalize with Normalization Form D.
/// - NormalizeForm.NFKD, normalize with Normalization Form KD.
explicit NormalizeUTF8(NormalizeForm normalize_form = NormalizeForm::kNfkc);
/// \brief Destructor
~NormalizeUTF8() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Replace UTF-8 string tensor with 'replace' according to regular expression 'pattern'.
class RegexReplace final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] pattern The regex expression patterns.
/// \param[in] replace The string to replace matched element.
/// \param[in] replace_all Confirm whether to replace all. If false, only replace first matched element;
/// if true, replace all matched elements (default=true).
explicit RegexReplace(std::string pattern, std::string replace, bool replace_all = true)
: RegexReplace(StringToChar(pattern), StringToChar(replace), replace_all) {}
explicit RegexReplace(const std::vector<char> &pattern, const std::vector<char> &replace, bool replace_all);
/// \brief Destructor
~RegexReplace() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenize a scalar tensor of UTF-8 string by regex expression pattern.
class RegexTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] delim_pattern The pattern of regex delimiters.
/// \param[in] keep_delim_pattern The string matched by 'delim_pattern' can be kept as a token if it can be
/// matched by 'keep_delim_pattern'. The default value is an empty string ("").
/// which means that delimiters will not be kept as an output token (default="").
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit RegexTokenizer(std::string delim_pattern, std::string keep_delim_pattern = "", bool with_offsets = false)
: RegexTokenizer(StringToChar(delim_pattern), StringToChar(keep_delim_pattern), with_offsets) {}
explicit RegexTokenizer(const std::vector<char> &delim_pattern, const std::vector<char> &keep_delim_pattern,
bool with_offsets);
/// \brief Destructor
~RegexTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
#endif
/// \brief Tokenize scalar token or 1-D tokens to tokens by sentencepiece.
class SentencePieceTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] vocab a SentencePieceVocab object.
/// \param[in] out_type The type of output.
SentencePieceTokenizer(const std::shared_ptr<SentencePieceVocab> &vocab,
mindspore::dataset::SPieceTokenizerOutType out_type);
/// \brief Constructor.
/// \param[in] vocab_path vocab model file path.
/// \param[in] out_type The type of output.
SentencePieceTokenizer(const std::string &vocab_path, mindspore::dataset::SPieceTokenizerOutType out_type)
: SentencePieceTokenizer(StringToChar(vocab_path), out_type) {}
SentencePieceTokenizer(const std::vector<char> &vocab_path, mindspore::dataset::SPieceTokenizerOutType out_type);
/// \brief Destructor
~SentencePieceTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension
/// axis is a slice of data starting at the corresponding position, with a specified width.
class SlidingWindow final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] width The width of the window. It must be an integer and greater than zero.
/// \param[in] axis The axis along which the sliding window is computed (default=0), axis support 0 or -1 only
/// for now.
explicit SlidingWindow(const int32_t width, const int32_t axis = 0);
/// \brief Destructor
~SlidingWindow() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tensor operation to convert every element of a string tensor to a number.
/// Strings are cast according to the rules specified in the following links:
/// https://en.cppreference.com/w/cpp/string/basic_string/stof,
/// https://en.cppreference.com/w/cpp/string/basic_string/stoul,
/// except that any strings which represent negative numbers cannot be cast to an unsigned integer type.
class ToNumber final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] data_type mindspore::DataType of the tensor to be cast to. Must be a numeric type, excluding bool.
explicit ToNumber(mindspore::DataType data_type);
/// \brief Destructor
~ToNumber() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Truncate a pair of rank-1 tensors such that the total length is less than max_length.
class TruncateSequencePair final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] max_length Maximum length required.
explicit TruncateSequencePair(int32_t max_length);
/// \brief Destructor
~TruncateSequencePair() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenize a scalar tensor of UTF-8 string to Unicode characters.
class UnicodeCharTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit UnicodeCharTokenizer(bool with_offsets = false);
/// \brief Destructor
~UnicodeCharTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenize scalar token or 1-D tokens to 1-D subword tokens.
class WordpieceTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] vocab A Vocab object.
/// \param[in] suffix_indicator Used to show that the subword is the last part of a word (default='##').
/// \param[in] max_bytes_per_token Tokens exceeding this length will not be further split (default=100).
/// \param[in] unknown_token When a token cannot be found, return the token directly if 'unknown_token' is an empty
/// string, else return the string specified (default='[UNK]').
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit WordpieceTokenizer(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator = "##",
int32_t max_bytes_per_token = 100, const std::string &unknown_token = "[UNK]",
bool with_offsets = false)
: WordpieceTokenizer(vocab, StringToChar(suffix_indicator), max_bytes_per_token, StringToChar(unknown_token),
with_offsets) {}
explicit WordpieceTokenizer(const std::shared_ptr<Vocab> &vocab, const std::vector<char> &suffix_indicator,
int32_t max_bytes_per_token, const std::vector<char> &unknown_token, bool with_offsets);
/// \brief Destructor
~WordpieceTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
#ifndef _WIN32
/// \brief Tokenize a scalar tensor of UTF-8 string on Unicode script boundaries.
class UnicodeScriptTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] keep_whitespace Whether or not emit whitespace tokens (default=false).
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit UnicodeScriptTokenizer(bool keep_whitespace = false, bool with_offsets = false);
/// \brief Destructor
~UnicodeScriptTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces.
class WhitespaceTokenizer final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] with_offsets Whether or not output offsets of tokens (default=false).
explicit WhitespaceTokenizer(bool with_offsets = false);
/// \brief Destructor
~WhitespaceTokenizer() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
#endif
} // namespace text
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TEXT_H_

View File

@ -1,413 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TRANSFORMS_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TRANSFORMS_H_
#include <map>
#include <memory>
#include <string>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/api/types.h"
#include "include/dataset/constants.h"
namespace mindspore {
namespace dataset {
class TensorOperation;
// We need the following two groups of forward declaration to friend the class in class TensorTransform.
namespace transforms {
class Compose;
class RandomApply;
class RandomChoice;
} // namespace transforms
namespace vision {
class BoundingBoxAugment;
class RandomSelectSubpolicy;
class UniformAugment;
} // namespace vision
// Abstract class to represent a tensor transform operation in the data pipeline.
/// \class TensorTransform transforms.h
/// \brief A base class to represent a tensor transform operation in the data pipeline.
class TensorTransform : public std::enable_shared_from_this<TensorTransform> {
friend class Dataset;
friend class Execute;
friend class transforms::Compose;
friend class transforms::RandomApply;
friend class transforms::RandomChoice;
friend class vision::BoundingBoxAugment;
friend class vision::RandomSelectSubpolicy;
friend class vision::UniformAugment;
public:
/// \brief Constructor
TensorTransform() {}
/// \brief Destructor
~TensorTransform() = default;
protected:
/// \brief Pure virtual function to convert a TensorTransform class into a IR TensorOperation object.
/// \return shared pointer to the newly created TensorOperation.
virtual std::shared_ptr<TensorOperation> Parse() = 0;
/// \brief Virtual function to convert a TensorTransform class into a IR TensorOperation object.
/// \param[in] env A string to determine the running environment
/// \return shared pointer to the newly created TensorOperation.
virtual std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) { return nullptr; }
};
/// \brief Slice object used in SliceOption.
class Slice {
public:
/// \brief Constructor, with start, stop and step default to 0.
Slice() : start_(0), stop_(0), step_(0) {}
/// \brief Constructor.
/// \param[in] start Starting integer specifying where to start the slicing.
/// \param[in] stop Ending integer specifying where to stop the slicing.
/// \param[in] step An integer specifying the step of the slicing.
Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {}
/// \brief Constructor, with step=1
/// \param[in] start Starting integer specifying where to start the slicing.
/// \param[in] stop Ending integer specifying where to stop the slicing.
Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {}
/// \brief Constructor, with start=0 and step=1
/// \param[in] stop Ending integer specifying where to stop the slicing.
explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {}
Slice(Slice const &slice) = default;
~Slice() = default;
bool valid() const { return step_ != 0; }
dsize_t start_;
dsize_t stop_;
dsize_t step_;
};
/// \brief SliceOption used in Slice Op.
class SliceOption {
public:
/// \param[in] all Slice the whole dimension
explicit SliceOption(bool all) : all_(all) {}
/// \param[in] indices Slice these indices along the dimension. Negative indices are supported.
explicit SliceOption(std::vector<dsize_t> indices) : indices_(indices) {}
/// \param[in] slice Slice the generated indices from the slice object along the dimension.
explicit SliceOption(Slice slice) : slice_(slice) {}
SliceOption(SliceOption const &slice) = default;
~SliceOption() = default;
// only one of the following will be valid
// given indices to slice the Tensor.
std::vector<dsize_t> indices_ = {};
// Slice object. All start, stop and step are 0 if invalid.
Slice slice_;
bool all_ = false;
};
// Transform operations for performing data transformation.
namespace transforms {
/// \brief Compose Op.
/// \note Compose a list of transforms into a single transform.
class Compose final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transforms A vector of raw pointers to TensorTransform objects to be applied.
explicit Compose(const std::vector<TensorTransform *> &transforms);
/// \brief Constructor.
/// \param[in] transforms A vector of shared pointers to TensorTransform objects to be applied.
explicit Compose(const std::vector<std::shared_ptr<TensorTransform>> &transforms);
/// \brief Constructor.
/// \param[in] transforms A vector of TensorTransform objects to be applied.
explicit Compose(const std::vector<std::reference_wrapper<TensorTransform>> &transforms);
/// \brief Destructor
~Compose() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Concatenate Op.
/// \note Tensor operation that concatenates all columns into a single tensor.
class Concatenate final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] axis Concatenate the tensors along given axis, only support 0 or -1 so far (default=0).
/// \param[in] prepend MSTensor to be prepended to the already concatenated tensors (default={}).
/// \param[in] append MSTensor to be appended to the already concatenated tensors (default={}).
explicit Concatenate(int8_t axis = 0, MSTensor prepend = {}, MSTensor append = {});
/// \brief Destructor
~Concatenate() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Duplicate Op.
/// \note Duplicate the input tensor to a new output tensor.
/// The input tensor is carried over to the output list.
class Duplicate final : public TensorTransform {
public:
/// \brief Constructor.
Duplicate();
/// \brief Destructor
~Duplicate() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief Fill Op.
/// \note Tensor operation to fill all elements in the tensor with the specified value.
/// The output tensor will have the same shape and type as the input tensor.
class Fill final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] fill_value Scalar value to fill the tensor with.
/// Can only be MSTensor of the following types from mindspore::DataType:
/// String, Bool, Int8/16/32/64, UInt8/16/32/64, Float16/32/64.
explicit Fill(MSTensor fill_value);
/// \brief Destructor
~Fill() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Mask Op.
/// \note Mask content of the input tensor with the given predicate.
/// Any element of the tensor that matches the predicate will be evaluated to True, otherwise False.
class Mask final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] op One of the relational operators EQ, NE LT, GT, LE or GE.
/// \param[in] constant Constant to be compared to.
/// Can only be MSTensor of str, int, float, bool.
/// \param[in] de_type Type of the generated mask. Can only be numeric or boolean datatype.
/// (default=mindspore::DataType::kNumberTypeBool)
explicit Mask(RelationalOp op, MSTensor constant,
mindspore::DataType ms_type = mindspore::DataType(mindspore::DataType::kNumberTypeBool));
/// \brief Destructor
~Mask() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief OneHot Op.
/// \note Convert the labels into OneHot format.
class OneHot final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] num_classes number of classes.
explicit OneHot(int32_t num_classes);
/// \brief Destructor
~OneHot() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief PadEnd Op.
/// \note Pad input tensor according to pad_shape, need to have same rank.
class PadEnd final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] pad_shape List of integers representing the shape needed.
/// Dimensions that set to `None` will not be padded (i.e., original dim will be used).
/// Shorter dimensions will truncate the values.
/// \param[in] pad_value Value used to pad (default={}).
explicit PadEnd(const std::vector<dsize_t> &pad_shape, MSTensor pad_value = {});
/// \brief Destructor
~PadEnd() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomApply Op.
/// \note Randomly perform a series of transforms with a given probability.
class RandomApply final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transforms A vector of raw pointers to TensorTransform objects to be applied.
/// \param[in] prob The probability to apply the transformation list (default=0.5).
explicit RandomApply(const std::vector<TensorTransform *> &transforms, double prob = 0.5);
/// \brief Constructor.
/// \param[in] transforms A vector of shared pointers to TensorTransform objects to be applied.
/// \param[in] prob The probability to apply the transformation list (default=0.5).
explicit RandomApply(const std::vector<std::shared_ptr<TensorTransform>> &transforms, double prob = 0.5);
/// \brief Constructor.
/// \param[in] transforms A vector of TensorTransform objects to be applied.
/// \param[in] prob The probability to apply the transformation list (default=0.5).
explicit RandomApply(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, double prob = 0.5);
/// \brief Destructor
~RandomApply() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomChoice Op.
/// \note Randomly selects one transform from a list of transforms to perform operation.
class RandomChoice final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transforms A vector of raw pointers to TensorTransform objects to be applied.
explicit RandomChoice(const std::vector<TensorTransform *> &transforms);
/// \brief Constructor.
/// \param[in] transforms A vector of shared pointers to TensorTransform objects to be applied.
explicit RandomChoice(const std::vector<std::shared_ptr<TensorTransform>> &transforms);
/// \brief Constructor.
/// \param[in] transforms A vector of TensorTransform objects to be applied.
explicit RandomChoice(const std::vector<std::reference_wrapper<TensorTransform>> &transforms);
/// \brief Destructor
~RandomChoice() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Slice Op.
/// \note Slice operation to extract a tensor out using the given n slices.
/// The functionality of Slice is similar to NumPy's indexing feature.
/// (Currently only rank-1 tensors are supported).
class Slice final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] slice_input Vector of SliceOption
explicit Slice(const std::vector<SliceOption> &slice_input);
/// \brief Destructor
~Slice() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief TypeCast Op.
/// \note Tensor operation to cast to a given MindSpore data type.
class TypeCast final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] data_type mindspore::DataType to be cast to.
explicit TypeCast(mindspore::DataType data_type);
/// \brief Destructor
~TypeCast() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Unique Op.
/// \note Return an output tensor containing all the unique elements of the input tensor in
/// the same order that they occur in the input tensor.
class Unique final : public TensorTransform {
public:
/// \brief Constructor.
Unique();
/// \brief Destructor
~Unique() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
} // namespace transforms
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_TRANSFORMS_H_

View File

@ -1,955 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
#include "include/dataset/vision_lite.h"
namespace mindspore {
namespace dataset {
class TensorOperation;
// Transform operations for performing computer vision.
namespace vision {
/// \brief AutoContrast TensorTransform.
/// \note Apply automatic contrast on input image.
class AutoContrast final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] cutoff Percent of pixels to cut off from the histogram, the valid range of cutoff value is 0 to 100.
/// \param[in] ignore Pixel values to ignore.
explicit AutoContrast(float cutoff = 0.0, std::vector<uint32_t> ignore = {});
/// \brief Destructor.
~AutoContrast() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief BoundingBoxAugment TensorTransform.
/// \note Apply a given image transform on a random selection of bounding box regions of a given image.
class BoundingBoxAugment final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transform Raw pointer to a TensorTransform operation.
/// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3);
/// \brief Constructor.
/// \param[in] transform Smart pointer to a TensorTransform operation.
/// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3);
/// \brief Constructor.
/// \param[in] transform Object pointer to a TensorTransform operation.
/// \param[in] ratio Ratio of bounding boxes to apply augmentation on. Range: [0, 1] (default=0.3).
explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> transform, float ratio = 0.3);
/// \brief Destructor.
~BoundingBoxAugment() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Constructor to apply CutMix on a batch of images.
/// \note Masks a random section of each image with the corresponding part of another randomly
/// selected image in that batch.
class CutMixBatch final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] image_batch_format The format of the batch.
/// \param[in] alpha The hyperparameter of beta distribution (default = 1.0).
/// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0).
explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);
/// \brief Destructor.
~CutMixBatch() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief CutOut TensorOp.
/// \note Randomly cut (mask) out a given number of square patches from the input image.
class CutOut final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] length Integer representing the side length of each square patch.
/// \param[in] num_patches Integer representing the number of patches to be cut out of an image.
explicit CutOut(int32_t length, int32_t num_patches = 1);
/// \brief Destructor.
~CutOut() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Equalize TensorTransform.
/// \note Apply histogram equalization on input image.
class Equalize final : public TensorTransform {
public:
/// \brief Constructor.
Equalize();
/// \brief Destructor.
~Equalize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief HwcToChw TensorTransform.
/// \note Transpose the input image; shape (H, W, C) to shape (C, H, W).
class HWC2CHW final : public TensorTransform {
public:
/// \brief Constructor.
HWC2CHW();
/// \brief Destructor.
~HWC2CHW() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief Invert TensorTransform.
/// \note Apply invert on input image in RGB mode.
class Invert final : public TensorTransform {
public:
/// \brief Constructor.
Invert();
/// \brief Destructor.
~Invert() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief MixUpBatch TensorTransform.
/// \note Apply MixUp transformation on an input batch of images and labels. The labels must be in
/// one-hot format and Batch must be called before calling this function.
class MixUpBatch final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] alpha hyperparameter of beta distribution (default = 1.0).
explicit MixUpBatch(float alpha = 1);
/// \brief Destructor.
~MixUpBatch() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief NormalizePad TensorTransform.
/// \note Normalize the input image with respect to mean and standard deviation and pad an extra
/// channel with value zero.
class NormalizePad final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
/// The mean values must be in range [0.0, 255.0].
/// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
/// The standard deviation values must be in range (0.0, 255.0].
/// \param[in] dtype The output datatype of Tensor.
/// The standard deviation values must be "float32" or "float16"default = "float32".
explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std,
const std::string &dtype = "float32")
: NormalizePad(mean, std, StringToChar(dtype)) {}
explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype);
/// \brief Destructor.
~NormalizePad() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Pad TensorOp.
/// \note Pads the image according to padding parameters.
class Pad final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] padding A vector representing the number of pixels to pad the image
/// If vector has one value, it pads all sides of the image with that value.
/// If vector has two values, it pads left and top with the first and
/// right and bottom with the second value.
/// If vector has four values, it pads left, top, right, and bottom with
/// those values respectively.
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
/// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels. If 3 values are provided,
/// it is used to fill R, G, B channels respectively.
/// \param[in] padding_mode The method of padding (default=BorderType.kConstant).
/// Can be any of
/// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric]
/// - BorderType.kConstant, means it fills the border with constant values
/// - BorderType.kEdge, means it pads with the last value on the edge
/// - BorderType.kReflect, means it reflects the values on the edge omitting the last value of edge
/// - BorderType.kSymmetric, means it reflects the values on the edge repeating the last value of edge
explicit Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
BorderType padding_mode = BorderType::kConstant);
/// \brief Destructor.
~Pad() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Blends an image with its grayscale version with random weights
/// t and 1 - t generated from a given range. If the range is trivial
/// then the weights are determinate and t equals the bound of the interval.
class RandomColor final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] t_lb Lower bound on the range of random weights.
/// \param[in] t_lb Upper bound on the range of random weights.
explicit RandomColor(float t_lb, float t_ub);
/// \brief Destructor.
~RandomColor() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomColorAdjust TensorTransform.
/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image.
class RandomColorAdjust final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}).
/// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}).
/// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}).
/// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values
/// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5
/// (Default={0, 0}).
explicit RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});
/// \brief Destructor.
~RandomColorAdjust() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomCrop TensorTransform.
/// \note Crop the input image at a random location.
class RandomCrop final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] padding A vector representing the number of pixels to pad the image
/// If vector has one value, it pads all sides of the image with that value.
/// If vector has two values, it pads left and top with the first and
/// right and bottom with the second value.
/// If vector has four values, it pads left, top, right, and bottom with
/// those values respectively.
/// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
/// the given output size.
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
/// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
explicit RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
BorderType padding_mode = BorderType::kConstant);
/// \brief Destructor.
~RandomCrop() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomCropDecodeResize TensorTransform.
/// \note Equivalent to RandomResizedCrop, but crops before decodes.
class RandomCropDecodeResize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] scale Range [min, max) of respective size of the
/// original size to be cropped (default=(0.08, 1.0)).
/// \param[in] ratio Range [min, max) of aspect ratio to be
/// cropped (default=(3. / 4., 4. / 3.)).
/// \param[in] interpolation An enum for the mode of interpolation.
/// \param[in] The maximum number of attempts to propose a valid crop_area (default=10).
/// If exceeded, fall back to use center_crop instead.
explicit RandomCropDecodeResize(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
std::vector<float> ratio = {3. / 4, 4. / 3},
InterpolationMode interpolation = InterpolationMode::kLinear,
int32_t max_attempts = 10);
/// \brief Destructor.
~RandomCropDecodeResize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomCropWithBBox TensorTransform.
/// \note Crop the input image at a random location and adjust bounding boxes accordingly.
/// If cropped area is out of bbox, the return bbox will be empty.
class RandomCropWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] padding A vector representing the number of pixels to pad the image
/// If vector has one value, it pads all sides of the image with that value.
/// If vector has two values, it pads left and top with the first and
/// right and bottom with the second value.
/// If vector has four values, it pads left, top, right, and bottom with
/// those values respectively.
/// \param[in] pad_if_needed A boolean whether to pad the image if either side is smaller than
/// the given output size.
/// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is
/// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
/// \param[in] padding_mode The method of padding (default=BorderType::kConstant).It can be any of
/// [BorderType::kConstant, BorderType::kEdge, BorderType::kReflect, BorderType::kSymmetric].
explicit RandomCropWithBBox(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
BorderType padding_mode = BorderType::kConstant);
/// \brief Destructor.
~RandomCropWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomHorizontalFlip TensorTransform.
/// \note Tensor operation to perform random horizontal flip.
class RandomHorizontalFlip final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] prob A float representing the probability of flip.
explicit RandomHorizontalFlip(float prob = 0.5);
/// \brief Destructor.
~RandomHorizontalFlip() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomHorizontalFlipWithBBox TensorTransform.
/// \note Flip the input image horizontally, randomly with a given probability and adjust bounding boxes accordingly.
class RandomHorizontalFlipWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] prob A float representing the probability of flip.
explicit RandomHorizontalFlipWithBBox(float prob = 0.5);
/// \brief Destructor.
~RandomHorizontalFlipWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomPosterize TensorTransform.
/// \note Tensor operation to perform random posterize.
class RandomPosterize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] bit_range - uint8_t vector representing the minimum and maximum bit in range (Default={4, 8}).
explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});
/// \brief Destructor.
~RandomPosterize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomResize TensorTransform.
/// \note Resize the input image using a randomly selected interpolation mode.
// the same image aspect ratio. If size has 2 values, it should be (height, width).
class RandomResize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, the smaller edge of the image will be resized to this value with.
explicit RandomResize(std::vector<int32_t> size);
/// \brief Destructor.
~RandomResize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomResizeWithBBox TensorTransform.
/// \note Resize the input image using a randomly selected interpolation mode and adjust
/// bounding boxes accordingly.
class RandomResizeWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, the smaller edge of the image will be resized to this value with
// the same image aspect ratio. If size has 2 values, it should be (height, width).
explicit RandomResizeWithBBox(std::vector<int32_t> size);
/// \brief Destructor.
~RandomResizeWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomResizedCrop TensorTransform.
/// \note Crop the input image to a random size and aspect ratio.
class RandomResizedCrop final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] scale Range [min, max) of respective size of the original
/// size to be cropped (default=(0.08, 1.0)).
/// \param[in] ratio Range [min, max) of aspect ratio to be cropped
/// (default=(3. / 4., 4. / 3.)).
/// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear).
/// \param[in] max_attempts The maximum number of attempts to propose a valid.
/// crop_area (default=10). If exceeded, fall back to use center_crop instead.
explicit RandomResizedCrop(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
std::vector<float> ratio = {3. / 4., 4. / 3.},
InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
/// \brief Destructor.
~RandomResizedCrop() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomResizedCropWithBBox TensorTransform.
/// \note Crop the input image to a random size and aspect ratio.
/// If cropped area is out of bbox, the return bbox will be empty.
class RandomResizedCropWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
/// \param[in] scale Range [min, max) of respective size of the original
/// size to be cropped (default=(0.08, 1.0)).
/// \param[in] ratio Range [min, max) of aspect ratio to be cropped
/// (default=(3. / 4., 4. / 3.)).
/// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear).
/// \param[in] max_attempts The maximum number of attempts to propose a valid
/// crop_area (default=10). If exceeded, fall back to use center_crop instead.
RandomResizedCropWithBBox(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
std::vector<float> ratio = {3. / 4., 4. / 3.},
InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);
/// \brief Destructor.
~RandomResizedCropWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomRotation TensorOp.
/// \note Rotates the image according to parameters.
class RandomRotation final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] degrees A float vector of size, representing the starting and ending degree.
/// \param[in] resample An enum for the mode of interpolation.
/// \param[in] expand A boolean representing whether the image is expanded after rotation.
/// \param[in] center A float vector of size 2, representing the x and y center of rotation.
/// \param[in] fill_value A vector representing the value to fill the area outside the transform.
/// in the output image. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
RandomRotation(std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour,
bool expand = false, std::vector<float> center = {-1, -1},
std::vector<uint8_t> fill_value = {0, 0, 0});
/// \brief Destructor.
~RandomRotation() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomSelectSubpolicy TensorTransform.
/// \note Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples
/// (op, prob), where op is a TensorTransform operation and prob is the probability that this op will be applied.
/// Once a sub-policy is selected, each op within the sub-policy with be applied in sequence according to its
/// probability.
class RandomSelectSubpolicy final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers.
explicit RandomSelectSubpolicy(const std::vector<std::vector<std::pair<TensorTransform *, double>>> &policy);
/// \brief Constructor.
/// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers.
explicit RandomSelectSubpolicy(
const std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> &policy);
/// \brief Constructor.
/// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers.
explicit RandomSelectSubpolicy(
const std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> &policy);
/// \brief Destructor.
~RandomSelectSubpolicy() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomSharpness TensorTransform.
/// \note Tensor operation to perform random sharpness.
class RandomSharpness final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] degrees A float vector of size 2, representing the starting and ending degree to uniformly.
/// sample from, to select a degree to adjust sharpness.
explicit RandomSharpness(std::vector<float> degrees = {0.1, 1.9});
/// \brief Destructor.
~RandomSharpness() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomSolarize TensorTransform.
/// \note Invert pixels randomly within specified range. If min=max, it is a single fixed magnitude operation
/// to inverts all pixel above that threshold.
class RandomSolarize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] threshold A vector with two elements specifying the pixel range to invert.
explicit RandomSolarize(std::vector<uint8_t> threshold = {0, 255});
/// \brief Destructor.
~RandomSolarize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomVerticalFlip TensorTransform.
/// \note Tensor operation to perform random vertical flip.
class RandomVerticalFlip final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] prob A float representing the probability of flip.
explicit RandomVerticalFlip(float prob = 0.5);
/// \brief Destructor.
~RandomVerticalFlip() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomVerticalFlipWithBBox TensorTransform.
/// \note Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly.
class RandomVerticalFlipWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] prob A float representing the probability of flip.
explicit RandomVerticalFlipWithBBox(float prob = 0.5);
/// \brief Destructor.
~RandomVerticalFlipWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RescaleOperation TensorTransform.
/// \note Tensor operation to rescale the input image.
class Rescale final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] rescale Rescale factor.
/// \param[in] shift Shift factor.
Rescale(float rescale, float shift);
/// \brief Destructor.
~Rescale() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief ResizeWithBBox TensorTransform.
/// \note Resize the input image to the given size and adjust bounding boxes accordingly.
class ResizeWithBBox final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size The output size of the resized image.
/// If size is an integer, smaller edge of the image will be resized to this value with the same image aspect
/// ratio. If size is a sequence of length 2, it should be (height, width).
/// \param[in] interpolation An enum for the mode of interpolation (default=InterpolationMode::kLinear).
explicit ResizeWithBBox(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);
/// \brief Destructor.
~ResizeWithBBox() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RgbaToBgr TensorTransform.
/// \note Changes the input 4 channel RGBA tensor to 3 channel BGR.
class RGBA2BGR final : public TensorTransform {
public:
/// \brief Constructor.
RGBA2BGR();
/// \brief Destructor.
~RGBA2BGR() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief RgbaToRgb TensorTransform.
/// \note Changes the input 4 channel RGBA tensor to 3 channel RGB.
class RGBA2RGB final : public TensorTransform {
public:
/// \brief Constructor.
RGBA2RGB();
/// \brief Destructor.
~RGBA2RGB() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief SoftDvppDecodeRandomCropResizeJpeg TensorTransform.
/// \note Tensor operation to decode, random crop and resize JPEG image using the simulation algorithm of
/// Ascend series chip DVPP module. The usage scenario is consistent with SoftDvppDecodeResizeJpeg.
/// The input image size should be in range [32*32, 8192*8192].
/// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
/// Only images with an even resolution can be output. The output of odd resolution is not supported.
class SoftDvppDecodeRandomCropResizeJpeg final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, smaller edge of the image will be resized to this value with
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
/// \param[in] scale Range [min, max) of respective size of the original
/// size to be cropped (default=(0.08, 1.0)).
/// \param[in] ratio Range [min, max) of aspect ratio to be cropped
/// (default=(3. / 4., 4. / 3.)).
/// \param[in] max_attempts The maximum number of attempts to propose a valid
/// crop_area (default=10). If exceeded, fall back to use center_crop instead.
SoftDvppDecodeRandomCropResizeJpeg(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
std::vector<float> ratio = {3. / 4., 4. / 3.}, int32_t max_attempts = 10);
/// \brief Destructor.
~SoftDvppDecodeRandomCropResizeJpeg() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief SoftDvppDecodeResizeJpeg TensorTransform.
/// \note Tensor operation to decode and resize JPEG image using the simulation algorithm of Ascend series
/// chip DVPP module. It is recommended to use this algorithm in the following scenarios:
/// When training, the DVPP of the Ascend chip is not used,
/// and the DVPP of the Ascend chip is used during inference,
/// and the accuracy of inference is lower than the accuracy of training;
/// and the input image size should be in range [32*32, 8192*8192].
/// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16].
/// Only images with an even resolution can be output. The output of odd resolution is not supported.
class SoftDvppDecodeResizeJpeg final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, smaller edge of the image will be resized to this value with
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
explicit SoftDvppDecodeResizeJpeg(std::vector<int32_t> size);
/// \brief Destructor.
~SoftDvppDecodeResizeJpeg() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief SwapRedBlue TensorOp.
/// \note Swaps the red and blue channels in image.
class SwapRedBlue final : public TensorTransform {
public:
/// \brief Constructor.
SwapRedBlue();
/// \brief Destructor.
~SwapRedBlue() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief UniformAugment TensorTransform.
/// \note Tensor operation to perform randomly selected augmentation.
class UniformAugment final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] transforms Raw pointer to vector of TensorTransform operations.
/// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2);
/// \brief Constructor.
/// \param[in] transforms Smart pointer to vector of TensorTransform operations.
/// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2);
/// \brief Constructor.
/// \param[in] transforms Object pointer to vector of TensorTransform operations.
/// \param[in] num_ops An integer representing the number of OPs to be selected and applied.
explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2);
/// \brief Destructor.
~UniformAugment() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
} // namespace vision
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_

View File

@ -1,100 +0,0 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
namespace mindspore {
namespace dataset {
// Transform operations for performing computer vision.
namespace vision {
/* ##################################### API class ###########################################*/
class DvppDecodeResizeJpeg final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] resize A vector of int value for each dimension, w.r.t H,W order.
explicit DvppDecodeResizeJpeg(std::vector<uint32_t> resize);
/// \brief Destructor.
~DvppDecodeResizeJpeg() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
class DvppDecodeResizeCropJpeg final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] crop A vector of int value for each dimension after final crop, w.r.t H,W order.
/// \param[in] resize A vector of int value for each dimension after resize, w.r.t H,W order.
explicit DvppDecodeResizeCropJpeg(std::vector<uint32_t> crop, std::vector<uint32_t> resize);
/// \brief Destructor.
~DvppDecodeResizeCropJpeg() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
class DvppDecodePng final : public TensorTransform {
public:
/// \brief Constructor.
DvppDecodePng();
/// \brief Destructor.
~DvppDecodePng() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
};
} // namespace vision
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_ASCEND_H_

View File

@ -1,292 +0,0 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
namespace mindspore {
namespace dataset {
// Transform operations for performing computer vision.
namespace vision {
// Forward Declarations
class RotateOperation;
/// \brief Affine TensorTransform.
/// \note Apply affine transform on input image.
class Affine final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] degrees The degrees to rotate the image by.
/// \param[in] translation The value representing vertical and horizontal translation (default = {0.0, 0.0}).
/// The first value represent the x axis translation while the second represents y axis translation.
/// \param[in] scale The scaling factor for the image (default = 0.0).
/// \param[in] shear A float vector of size 2, representing the shear degrees (default = {0.0, 0.0}).
/// \param[in] interpolation An enum for the mode of interpolation.
/// \param[in] fill_value A vector representing the value to fill the area outside the transform
/// in the output image. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
explicit Affine(float_t degrees, const std::vector<float> &translation = {0.0, 0.0}, float scale = 0.0,
const std::vector<float> &shear = {0.0, 0.0},
InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
const std::vector<uint8_t> &fill_value = {0, 0, 0});
/// \brief Destructor.
~Affine() = default;
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief CenterCrop TensorTransform.
/// \note Crops the input image at the center to the given size.
class CenterCrop final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the cropped image.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
explicit CenterCrop(std::vector<int32_t> size);
/// \brief Destructor.
~CenterCrop() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RGB2GRAY TensorTransform.
/// \note Convert RGB image or color image to grayscale image.
class RGB2GRAY final : public TensorTransform {
public:
/// \brief Constructor.
RGB2GRAY() = default;
/// \brief Destructor.
~RGB2GRAY() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
};
/// \brief Crop TensorTransform.
/// \note Crop an image based on location and crop size.
class Crop final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor}.
/// \param[in] size Size of the cropped area.
/// If size is a single value, a square crop of size (size, size) is returned.
/// If size has 2 values, it should be (height, width).
Crop(std::vector<int32_t> coordinates, std::vector<int32_t> size);
/// \brief Destructor.
~Crop() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Decode TensorTransform.
/// \note Decode the input image in RGB mode.
class Decode final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] rgb A boolean of whether to decode in RGB mode or not.
explicit Decode(bool rgb = true);
/// \brief Destructor.
~Decode() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Normalize TensorTransform.
/// \note Normalize the input image with respect to mean and standard deviation.
class Normalize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
/// The mean values must be in range [0.0, 255.0].
/// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
/// The standard deviation values must be in range (0.0, 255.0].
Normalize(std::vector<float> mean, std::vector<float> std);
/// \brief Destructor.
~Normalize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief RandomAffine TensorTransform.
/// \note Applies a Random Affine transformation on input image in RGB or Greyscale mode.
class RandomAffine final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] degrees A float vector of size 2, representing the starting and ending degree.
/// \param[in] translate_range A float vector of size 2 or 4, representing percentages of translation on x and y axes.
/// If size is 2, (min_dx, max_dx, 0, 0).
/// if size is 4, (min_dx, max_dx, min_dy, max_dy),
/// all values are in range [-1, 1].
/// \param[in] scale_range A float vector of size 2, representing the starting and ending scales in the range.
/// \param[in] shear_ranges A float vector of size 2 or 4, representing the starting and ending shear degrees
/// vertically and horizontally.
/// If size is 2, (min_shear_x, max_shear_x, 0, 0),
/// if size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y).
/// \param[in] interpolation An enum for the mode of interpolation.
/// \param[in] fill_value A vector representing the value to fill the area outside the transform
/// in the output image. If 1 value is provided, it is used for all RGB channels.
/// If 3 values are provided, it is used to fill R, G, B channels respectively.
explicit RandomAffine(const std::vector<float_t> &degrees,
const std::vector<float_t> &translate_range = {0.0, 0.0, 0.0, 0.0},
const std::vector<float_t> &scale_range = {1.0, 1.0},
const std::vector<float_t> &shear_ranges = {0.0, 0.0, 0.0, 0.0},
InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
const std::vector<uint8_t> &fill_value = {0, 0, 0});
/// \brief Destructor.
~RandomAffine() = default;
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Resize TensorTransform.
/// \note Resize the input image to the given size.
class Resize final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] size A vector representing the output size of the resized image.
/// If size is a single value, the image will be resized to this value with
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
/// \param[in] interpolation An enum for the mode of interpolation.
explicit Resize(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);
/// \brief Destructor.
~Resize() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
std::shared_ptr<TensorOperation> Parse(const MapTargetDevice &env) override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief ResizePreserveAR TensorTransform.
/// \note Keep the original picture ratio and fill the rest.
class ResizePreserveAR final : public TensorTransform {
public:
/// \brief Constructor.
/// \param[in] height The height of image output value after resizing.
/// \param[in] width The width of image output value after resizing.
/// \param[in] img_orientation Angle method of image rotation.
ResizePreserveAR(int32_t height, int32_t width, int32_t img_orientation = 0);
/// \brief Destructor.
~ResizePreserveAR() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
struct Data;
std::shared_ptr<Data> data_;
};
/// \brief Rotate TensorTransform.
/// \note Rotate the input image using a specified angle id.
class Rotate final : public TensorTransform {
public:
/// \brief Constructor.
Rotate();
/// \brief Destructor.
~Rotate() = default;
protected:
/// \brief Function to convert TensorTransform object into a TensorOperation object.
/// \return Shared pointer to TensorOperation object.
std::shared_ptr<TensorOperation> Parse() override;
private:
std::shared_ptr<RotateOperation> op_;
};
} // namespace vision
} // namespace dataset
} // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_LITE_H_

View File

@ -537,7 +537,7 @@ TEST_F(MindDataTestTensorDE, TensorEmpty) {
ASSERT_TRUE(!t2->HasData());
}
TEST_F(MindDataTestTensorDE, TestTensor_json) {
TEST_F(MindDataTestTensorDE, TestTensorJson) {
MS_LOG(INFO) << "Doing TestTensor.";
std::vector<uint64_t> labels = {1, 1, 2};
std::shared_ptr<Tensor> input;