forked from mindspore-Ecosystem/mindspore
add code for MD lite C++ include and api
This commit is contained in:
parent
b2fb825d89
commit
ee09b38f00
|
@ -16,7 +16,33 @@ set(MIND_DATA_LIB_DIR ${RUNTIME_PKG_NAME}/minddata/lib)
|
|||
|
||||
set(LIB_DIR_RUN_X86 ${RUNTIME_PKG_NAME}/lib)
|
||||
|
||||
if(BUILD_MINDDATA STREQUAL "full" OR BUILD_MINDDATA STREQUAL "wrapper")
|
||||
if(BUILD_MINDDATA STREQUAL "full")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/liteapi/include/ DESTINATION
|
||||
${MIND_DATA_INC_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE)
|
||||
install(FILES ${TOP_DIR}/include/api/status.h DESTINATION ${MIND_DATA_INC_DIR}
|
||||
RENAME ms_status.h COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
|
||||
if(PLATFORM_ARM64)
|
||||
file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so)
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so
|
||||
DESTINATION ${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
elseif(PLATFORM_ARM32)
|
||||
file(GLOB JPEGTURBO_LIB_LIST ${jpeg_turbo_LIBPATH}/*.so)
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
|
||||
${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${JPEGTURBO_LIB_LIST} DESTINATION ${TURBO_DIR}/lib COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
else()
|
||||
install(FILES ${TOP_DIR}/mindspore/lite/build/minddata/libminddata-lite.so DESTINATION
|
||||
${MIND_DATA_LIB_DIR} COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${jpeg_turbo_LIBPATH}/libjpeg.so.62.3.0 DESTINATION ${TURBO_DIR}/lib
|
||||
RENAME libjpeg.so.62 COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
install(FILES ${jpeg_turbo_LIBPATH}/libturbojpeg.so.0.2.0 DESTINATION ${TURBO_DIR}/lib
|
||||
RENAME libturbojpeg.so.0 COMPONENT ${RUNTIME_COMPONENT_NAME})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(BUILD_MINDDATA STREQUAL "wrapper")
|
||||
install(DIRECTORY ${TOP_DIR}/mindspore/ccsrc/minddata/dataset/include/ DESTINATION ${MIND_DATA_INC_DIR}
|
||||
COMPONENT ${RUNTIME_COMPONENT_NAME} FILES_MATCHING PATTERN "*.h" PATTERN "vision.h" EXCLUDE)
|
||||
if(PLATFORM_ARM64)
|
||||
|
|
|
@ -79,6 +79,7 @@
|
|||
|
||||
// IR leaf nodes
|
||||
#include "minddata/dataset/engine/ir/datasetops/source/album_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
|
||||
|
||||
// IR leaf nodes disabled for android
|
||||
#ifndef ENABLE_ANDROID
|
||||
|
@ -89,7 +90,6 @@
|
|||
#include "minddata/dataset/engine/ir/datasetops/source/coco_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/source/csv_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/source/image_folder_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/source/mnist_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/source/random_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/source/text_file_node.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/source/manifest_node.h"
|
||||
|
@ -390,7 +390,7 @@ std::shared_ptr<MindDataDataset> MindData(const std::vector<std::string> &datase
|
|||
|
||||
return ds;
|
||||
}
|
||||
|
||||
#endif
|
||||
// Function to create a MnistDataset.
|
||||
std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage,
|
||||
const std::shared_ptr<SamplerObj> &sampler,
|
||||
|
@ -399,6 +399,8 @@ std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::s
|
|||
|
||||
return ds;
|
||||
}
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
// Function to overload "+" operator to concat two datasets
|
||||
std::shared_ptr<ConcatDataset> operator+(const std::shared_ptr<Dataset> &datasets1,
|
||||
const std::shared_ptr<Dataset> &datasets2) {
|
||||
|
@ -906,12 +908,15 @@ MindDataDataset::MindDataDataset(const std::vector<std::string> &dataset_files,
|
|||
auto ds = std::make_shared<MindDataNode>(dataset_files, columns_list, sampler, padded_sample, num_padded);
|
||||
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
|
||||
}
|
||||
#endif
|
||||
|
||||
MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage,
|
||||
const std::shared_ptr<SamplerObj> &sampler, const std::shared_ptr<DatasetCache> &cache) {
|
||||
auto ds = std::make_shared<MnistNode>(dataset_dir, usage, sampler, cache);
|
||||
ir_node_ = std::static_pointer_cast<DatasetNode>(ds);
|
||||
}
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
TextFileDataset::TextFileDataset(const std::vector<std::string> &dataset_files, int64_t num_samples,
|
||||
ShuffleMode shuffle, int32_t num_shards, int32_t shard_id,
|
||||
const std::shared_ptr<DatasetCache> &cache) {
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "minddata/dataset/include/samplers.h"
|
||||
#include "minddata/dataset/core/config_manager.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/sampler/sampler.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/sampler/distributed_sampler.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/sampler/random_sampler.h"
|
||||
|
@ -32,8 +33,6 @@
|
|||
#include "minddata/mindrecord/include/shard_sequential_sample.h"
|
||||
#include "minddata/mindrecord/include/shard_shuffle.h"
|
||||
#include "minddata/dataset/util/random.h"
|
||||
#else
|
||||
#include "minddata/dataset/core/config_manager.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore {
|
||||
|
|
|
@ -25,9 +25,9 @@
|
|||
#include "minddata/dataset/engine/datasetops/device_queue_op.h"
|
||||
#include "minddata/dataset/engine/opt/pre/getter_pass.h"
|
||||
#include "minddata/dataset/engine/tree_adapter.h"
|
||||
#include "minddata/mindrecord/include/shard_index_generator.h"
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
#include "minddata/mindrecord/include/shard_index_generator.h"
|
||||
#include "minddata/mindrecord/include/shard_header.h"
|
||||
#include "minddata/mindrecord/include/shard_writer.h"
|
||||
#endif
|
||||
|
@ -324,10 +324,9 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map<std::string,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
|
||||
const std::unordered_map<std::string, int32_t> &column_name_id_map,
|
||||
nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
|
||||
static Status ValidateInputParams(nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data,
|
||||
const std::unordered_map<std::string, int32_t> &column_name_id_map) {
|
||||
if (row_raw_data == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: row raw data is NULL.");
|
||||
}
|
||||
|
@ -337,76 +336,104 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
|
|||
if (column_name_id_map.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: column not found");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
|
||||
std::unique_ptr<std::vector<uint8_t>> *data_ptr) {
|
||||
auto column_type = tensor->type();
|
||||
Status s;
|
||||
if (column_type == DataType::DE_FLOAT32) {
|
||||
std::unique_ptr<float> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_FLOAT64) {
|
||||
std::unique_ptr<double> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
|
||||
auto column_type = tensor->type();
|
||||
Status s;
|
||||
std::unique_ptr<std::vector<uint8_t>> data_ptr;
|
||||
if (column_type == DataType::DE_INT8) {
|
||||
std::unique_ptr<int32_t> data;
|
||||
std::unique_ptr<int8_t> dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_INT16) {
|
||||
std::unique_ptr<int32_t> data;
|
||||
std::unique_ptr<int16_t> dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_UINT16) {
|
||||
std::unique_ptr<int32_t> data;
|
||||
std::unique_ptr<uint16_t> dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_UINT8) {
|
||||
std::unique_ptr<uint8_t> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_INT32) {
|
||||
std::unique_ptr<int32_t> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_UINT32) {
|
||||
std::unique_ptr<int64_t> data;
|
||||
std::unique_ptr<uint32_t> dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_INT64) {
|
||||
std::unique_ptr<int64_t> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_FLOAT32 || column_type == DataType::DE_FLOAT64) {
|
||||
s = FetchFloatData(tensor, column_name, row_raw_data, &data_ptr);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
} else if (column_type == DataType::DE_STRING) {
|
||||
std::string_view sv;
|
||||
RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor
|
||||
std::string ss(sv);
|
||||
(*row_raw_data)[column_name] = std::move(ss);
|
||||
return Status::OK();
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data.");
|
||||
}
|
||||
if (data_ptr != nullptr) {
|
||||
(*row_bin_data)[column_name] = std::move(data_ptr);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
|
||||
const std::unordered_map<std::string, int32_t> &column_name_id_map,
|
||||
nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
|
||||
Status s;
|
||||
s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map);
|
||||
if (s.IsError()) {
|
||||
return s;
|
||||
}
|
||||
for (auto &col : column_name_id_map) {
|
||||
auto idx = col.second;
|
||||
auto column_name = col.first;
|
||||
auto &tensor = row[idx];
|
||||
auto column_type = tensor->type();
|
||||
|
||||
std::unique_ptr<std::vector<uint8_t>> data_ptr;
|
||||
if (column_type == DataType::DE_INT8) {
|
||||
std::unique_ptr<int32_t> data;
|
||||
std::unique_ptr<int8_t> dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_INT16) {
|
||||
std::unique_ptr<int32_t> data;
|
||||
std::unique_ptr<int16_t> dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_UINT16) {
|
||||
std::unique_ptr<int32_t> data;
|
||||
std::unique_ptr<uint16_t> dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_UINT8) {
|
||||
std::unique_ptr<uint8_t> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_INT32) {
|
||||
std::unique_ptr<int32_t> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_UINT32) {
|
||||
std::unique_ptr<int64_t> data;
|
||||
std::unique_ptr<uint32_t> dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy, true);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_INT64) {
|
||||
std::unique_ptr<int64_t> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_FLOAT32) {
|
||||
std::unique_ptr<float> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_FLOAT64) {
|
||||
std::unique_ptr<double> data, dummy;
|
||||
s = TransformTensor(tensor->GetBuffer(), tensor->shape(), tensor->Size(), &data, &data_ptr, &dummy);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_STRING) {
|
||||
std::string_view sv;
|
||||
RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor
|
||||
std::string ss(sv);
|
||||
(*row_raw_data)[column_name] = std::move(ss);
|
||||
continue;
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data.");
|
||||
}
|
||||
s = FetchItemData(tensor, column_name, row_raw_data, row_bin_data);
|
||||
RETURN_IF_NOT_OK(s);
|
||||
if (data_ptr != nullptr) {
|
||||
(*row_bin_data)[column_name] = std::move(data_ptr);
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -130,6 +130,12 @@ class SaveToDisk : public TreeConsumer {
|
|||
nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data);
|
||||
|
||||
Status FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
|
||||
std::unique_ptr<std::vector<uint8_t>> *data_ptr);
|
||||
|
||||
Status FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data);
|
||||
|
||||
std::string dataset_path_;
|
||||
int32_t num_files_;
|
||||
std::string dataset_type_;
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <memory>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <set>
|
||||
#include "minddata/dataset/engine/datasetops/map_op/cpu_map_job.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
|
|
@ -86,8 +86,9 @@
|
|||
#include "minddata/dataset/engine/datasetops/source/csv_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/text_file_op.h"
|
||||
#endif
|
||||
#include "minddata/dataset/engine/datasetops/source/voc_op.h"
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "minddata/dataset/engine/datasetops/source/voc_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/filter_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/generator_op.h"
|
||||
#endif
|
||||
|
|
|
@ -80,9 +80,10 @@ Status ProfilingManager::Initialize() {
|
|||
std::shared_ptr<Sampling> connector_thr_sampling = std::make_shared<ConnectorThroughput>(tree_);
|
||||
RETURN_IF_NOT_OK(RegisterSamplingNode(connector_thr_sampling));
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
std::shared_ptr<Sampling> cpu_sampling = std::make_shared<CpuSampling>(tree_);
|
||||
RETURN_IF_NOT_OK(RegisterSamplingNode(cpu_sampling));
|
||||
|
||||
#endif
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -18,14 +18,16 @@
|
|||
|
||||
#include "minddata/dataset/core/client.h"
|
||||
#include "minddata/dataset/engine/ir/datasetops/root_node.h"
|
||||
#ifndef ENABLE_ANDROID
|
||||
#include "minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.h"
|
||||
#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
|
||||
#include "minddata/dataset/engine/opt/post/repeat_pass.h"
|
||||
#endif
|
||||
#include "minddata/dataset/engine/opt/pass.h"
|
||||
#include "minddata/dataset/engine/opt/post/auto_worker_pass.h"
|
||||
#include "minddata/dataset/engine/opt/post/repeat_pass.h"
|
||||
#ifdef ENABLE_PYTHON
|
||||
#include "minddata/dataset/engine/opt/post/generator_node_pass.h"
|
||||
#endif
|
||||
#include "minddata/dataset/engine/opt/pre/cache_transform_pass.h"
|
||||
#include "minddata/dataset/engine/opt/pre/cache_validation_pass.h"
|
||||
#include "minddata/dataset/engine/opt/pre/deep_copy_pass.h"
|
||||
#include "minddata/dataset/engine/opt/pre/epoch_ctrl_pass.h"
|
||||
|
@ -55,7 +57,9 @@ Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) {
|
|||
actions.emplace_back(std::make_unique<NodeRemovalPass>());
|
||||
actions.emplace_back(std::make_unique<EpochCtrlPass>());
|
||||
if (usage_ == kDeGetter) actions.emplace_back(std::make_unique<GetterPass>());
|
||||
#ifndef ENABLE_ANDROID
|
||||
actions.emplace_back(std::make_unique<CacheTransformPass>());
|
||||
#endif
|
||||
// Vector of flags for each action
|
||||
std::vector<bool> modified(actions.size(), false);
|
||||
// Apply pre-pass actions
|
||||
|
@ -72,7 +76,9 @@ Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) {
|
|||
// Vector of optimizations
|
||||
std::vector<std::unique_ptr<IRNodePass>> optimizations;
|
||||
MS_LOG(INFO) << "Running optimization pass loops";
|
||||
#ifndef ENABLE_ANDROID
|
||||
optimizations.emplace_back(std::make_unique<TensorOpFusionPass>());
|
||||
#endif
|
||||
// Apply optimization pass actions
|
||||
for (auto i = 0; i < optimizations.size(); i++) {
|
||||
bool modified = false;
|
||||
|
@ -95,8 +101,9 @@ Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) {
|
|||
#ifdef ENABLE_PYTHON
|
||||
actions.emplace_back(std::make_unique<GeneratorNodePass>());
|
||||
#endif
|
||||
#ifndef ENABLE_ANDROID
|
||||
actions.emplace_back(std::make_unique<RepeatPass>());
|
||||
|
||||
#endif
|
||||
// We will gradually move RepeatPass from ExecutionTree::PrepareTreePostAction to here.
|
||||
|
||||
// Vector of flags for each action
|
||||
|
|
|
@ -830,6 +830,7 @@ std::shared_ptr<MindDataDataset> MindData(const std::vector<std::string> &datase
|
|||
const std::vector<std::string> &columns_list = {},
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
|
||||
nlohmann::json padded_sample = nullptr, int64_t num_padded = 0);
|
||||
#endif
|
||||
|
||||
class MnistDataset : public Dataset {
|
||||
public:
|
||||
|
@ -850,7 +851,7 @@ class MnistDataset : public Dataset {
|
|||
std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage = "all",
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr);
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
/// \brief Function to create a ConcatDataset
|
||||
/// \notes Reload "+" operator to concat two datasets
|
||||
/// \param[in] datasets1 Shared pointer to the first dataset to be concatenated
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
#include "minddata/dataset/include/status.h"
|
||||
#ifndef ENABLE_ANDROID
|
||||
|
|
|
@ -0,0 +1,190 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_
|
||||
|
||||
#include <cstdlib>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include "include/memory_pool.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
// The following conforms to the requirements of
|
||||
// std::allocator. Do not rename/change any needed
|
||||
// requirements, e.g. function names, typedef etc.
|
||||
template <typename T>
|
||||
class Allocator {
|
||||
public:
|
||||
template <typename U>
|
||||
friend class Allocator;
|
||||
|
||||
using value_type = T;
|
||||
using pointer = T *;
|
||||
using const_pointer = const T *;
|
||||
using reference = T &;
|
||||
using const_reference = const T &;
|
||||
using size_type = uint64_t;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
|
||||
template <typename U>
|
||||
struct rebind {
|
||||
using other = Allocator<U>;
|
||||
};
|
||||
|
||||
using propagate_on_container_copy_assignment = std::true_type;
|
||||
using propagate_on_container_move_assignment = std::true_type;
|
||||
using propagate_on_container_swap = std::true_type;
|
||||
|
||||
explicit Allocator(const std::shared_ptr<MemoryPool> &b) : pool_(b) {}
|
||||
|
||||
~Allocator() = default;
|
||||
|
||||
template <typename U>
|
||||
explicit Allocator(Allocator<U> const &rhs) : pool_(rhs.pool_) {}
|
||||
|
||||
template <typename U>
|
||||
bool operator==(Allocator<U> const &rhs) const {
|
||||
return pool_ == rhs.pool_;
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
bool operator!=(Allocator<U> const &rhs) const {
|
||||
return pool_ != rhs.pool_;
|
||||
}
|
||||
|
||||
pointer allocate(std::size_t n) {
|
||||
void *p = nullptr;
|
||||
Status rc = pool_->Allocate(n * sizeof(T), &p);
|
||||
if (rc.IsOk()) {
|
||||
return reinterpret_cast<pointer>(p);
|
||||
} else if (rc == StatusCode::kMDOutOfMemory) {
|
||||
throw std::bad_alloc();
|
||||
} else {
|
||||
throw std::exception();
|
||||
}
|
||||
}
|
||||
|
||||
void deallocate(pointer p, std::size_t n = 0) noexcept { pool_->Deallocate(p); }
|
||||
|
||||
size_type max_size() { return pool_->get_max_size(); }
|
||||
|
||||
private:
|
||||
std::shared_ptr<MemoryPool> pool_;
|
||||
};
|
||||
/// \brief It is a wrapper of unique_ptr with a custom Allocator class defined above
|
||||
template <typename T, typename C = std::allocator<T>, typename... Args>
|
||||
Status MakeUnique(std::unique_ptr<T[], std::function<void(T *)>> *out, C alloc, size_t n, Args &&... args) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "size must be positive");
|
||||
try {
|
||||
T *data = alloc.allocate(n);
|
||||
// Some of our implementation of allocator (e.g. NumaAllocator) don't throw std::bad_alloc.
|
||||
// So we have to catch for null ptr
|
||||
if (data == nullptr) {
|
||||
return Status(StatusCode::kMDOutOfMemory);
|
||||
}
|
||||
if (!std::is_arithmetic<T>::value) {
|
||||
for (auto i = 0; i < n; i++) {
|
||||
std::allocator_traits<C>::construct(alloc, &(data[i]), std::forward<Args>(args)...);
|
||||
}
|
||||
}
|
||||
auto deleter = [](T *p, C f_alloc, size_t f_n) {
|
||||
if (!std::is_arithmetic<T>::value && std::is_destructible<T>::value) {
|
||||
for (auto i = 0; i < f_n; ++i) {
|
||||
std::allocator_traits<C>::destroy(f_alloc, &p[i]);
|
||||
}
|
||||
}
|
||||
f_alloc.deallocate(p, f_n);
|
||||
};
|
||||
*out = std::unique_ptr<T[], std::function<void(T *)>>(data, std::bind(deleter, std::placeholders::_1, alloc, n));
|
||||
} catch (const std::bad_alloc &e) {
|
||||
return Status(StatusCode::kMDOutOfMemory);
|
||||
} catch (const std::exception &e) {
|
||||
RETURN_STATUS_UNEXPECTED(e.what());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
/// \brief It is a wrapper of the above custom unique_ptr with some additional methods
|
||||
/// \tparam T The type of object to be allocated
|
||||
/// \tparam C Allocator. Default to std::allocator
|
||||
template <typename T, typename C = std::allocator<T>>
|
||||
class MemGuard {
|
||||
public:
|
||||
using allocator = C;
|
||||
MemGuard() : n_(0) {}
|
||||
explicit MemGuard(allocator a) : n_(0), alloc_(a) {}
|
||||
// There is no copy constructor nor assignment operator because the memory is solely owned by this object.
|
||||
MemGuard(const MemGuard &) = delete;
|
||||
MemGuard &operator=(const MemGuard &) = delete;
|
||||
// On the other hand, We can support move constructor
|
||||
MemGuard(MemGuard &&lhs) noexcept : n_(lhs.n_), alloc_(std::move(lhs.alloc_)), ptr_(std::move(lhs.ptr_)) {}
|
||||
MemGuard &operator=(MemGuard &&lhs) noexcept {
|
||||
if (this != &lhs) {
|
||||
this->deallocate();
|
||||
n_ = lhs.n_;
|
||||
alloc_ = std::move(lhs.alloc_);
|
||||
ptr_ = std::move(lhs.ptr_);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
/// \brief Explicitly deallocate the memory if allocated
|
||||
void deallocate() {
|
||||
if (ptr_) {
|
||||
ptr_.reset();
|
||||
}
|
||||
}
|
||||
/// \brief Allocate memory (with emplace feature). Previous one will be released. If size is 0, no new memory is
|
||||
/// allocated.
|
||||
/// \param n Number of objects of type T to be allocated
|
||||
/// \tparam Args Extra arguments pass to the constructor of T
|
||||
template <typename... Args>
|
||||
Status allocate(size_t n, Args &&... args) noexcept {
|
||||
deallocate();
|
||||
n_ = n;
|
||||
return MakeUnique(&ptr_, alloc_, n, std::forward<Args>(args)...);
|
||||
}
|
||||
~MemGuard() noexcept { deallocate(); }
|
||||
/// \brief Getter function
|
||||
/// \return The pointer to the memory allocated
|
||||
T *GetPointer() const { return ptr_.get(); }
|
||||
/// \brief Getter function
|
||||
/// \return The pointer to the memory allocated
|
||||
T *GetMutablePointer() { return ptr_.get(); }
|
||||
/// \brief Overload [] operator to access a particular element
|
||||
/// \param x index to the element. Must be less than number of element allocated.
|
||||
/// \return pointer to the x-th element
|
||||
T *operator[](size_t x) { return GetMutablePointer() + x; }
|
||||
/// \brief Overload [] operator to access a particular element
|
||||
/// \param x index to the element. Must be less than number of element allocated.
|
||||
/// \return pointer to the x-th element
|
||||
T *operator[](size_t x) const { return GetPointer() + x; }
|
||||
/// \brief Return how many bytes are allocated in total
|
||||
/// \return Number of bytes allocated in total
|
||||
size_t GetSizeInBytes() const { return n_ * sizeof(T); }
|
||||
|
||||
private:
|
||||
size_t n_;
|
||||
allocator alloc_;
|
||||
std::unique_ptr<T[], std::function<void(T *)>> ptr_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_ALLOCATOR_H_
|
|
@ -0,0 +1,109 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <limits>
|
||||
#include <random>
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
// Various type defines for convenience
|
||||
using uchar = unsigned char;
|
||||
using dsize_t = int64_t;
|
||||
|
||||
// Target devices to perform map operation
|
||||
enum class MapTargetDevice { kCpu, kGpu, kDvpp };
|
||||
|
||||
// Possible dataset types for holding the data and client type
|
||||
enum class DatasetType { kUnknown, kArrow, kTf };
|
||||
|
||||
// Possible flavours of Tensor implementations
|
||||
enum class TensorImpl { kNone, kFlexible, kCv, kNP };
|
||||
|
||||
// Possible values for shuffle
|
||||
enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2 };
|
||||
|
||||
// Possible values for Border types
|
||||
enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 };
|
||||
|
||||
// Possible values for Image format types in a batch
|
||||
enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 };
|
||||
|
||||
// Possible values for Image format types
|
||||
enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 };
|
||||
|
||||
// Possible interpolation modes
|
||||
enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3 };
|
||||
|
||||
// Possible JiebaMode modes
|
||||
enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 };
|
||||
|
||||
// Possible values for SPieceTokenizerOutType
|
||||
enum class SPieceTokenizerOutType { kString = 0, kInt = 1 };
|
||||
|
||||
// Possible values for SPieceTokenizerLoadType
|
||||
enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 };
|
||||
|
||||
// Possible values for SentencePieceModel
|
||||
enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 };
|
||||
|
||||
// Possible values for NormalizeForm
|
||||
enum class NormalizeForm {
|
||||
kNone = 0,
|
||||
kNfc,
|
||||
kNfkc,
|
||||
kNfd,
|
||||
kNfkd,
|
||||
};
|
||||
|
||||
// convenience functions for 32bit int bitmask
|
||||
inline bool BitTest(uint32_t bits, uint32_t bitMask) { return (bits & bitMask) == bitMask; }
|
||||
|
||||
inline void BitSet(uint32_t *bits, uint32_t bitMask) { *bits |= bitMask; }
|
||||
|
||||
inline void BitClear(uint32_t *bits, uint32_t bitMask) { *bits &= (~bitMask); }
|
||||
|
||||
constexpr int32_t kDeMaxDim = std::numeric_limits<int32_t>::max(); // 2147483647 or 2^32 -1
|
||||
constexpr int32_t kDeMaxRank = std::numeric_limits<int32_t>::max();
|
||||
constexpr int64_t kDeMaxFreq = std::numeric_limits<int64_t>::max(); // 9223372036854775807 or 2^(64-1)
|
||||
constexpr int64_t kDeMaxTopk = std::numeric_limits<int64_t>::max();
|
||||
|
||||
constexpr uint32_t kCfgRowsPerBuffer = 1;
|
||||
constexpr uint32_t kCfgParallelWorkers = 4;
|
||||
constexpr uint32_t kCfgWorkerConnectorSize = 16;
|
||||
constexpr uint32_t kCfgOpConnectorSize = 16;
|
||||
constexpr int32_t kCfgDefaultRankId = -1;
|
||||
constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed;
|
||||
constexpr uint32_t kCfgMonitorSamplingInterval = 10;
|
||||
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
|
||||
constexpr int32_t kCfgDefaultCachePort = 50052;
|
||||
constexpr char kCfgDefaultCacheHost[] = "127.0.0.1";
|
||||
constexpr int32_t kDftPrefetchSize = 20;
|
||||
constexpr int32_t kDftNumConnections = 12;
|
||||
constexpr int32_t kDftAutoNumWorkers = false;
|
||||
|
||||
// Invalid OpenCV type should not be from 0 to 7 (opencv4/opencv2/core/hal/interface.h)
|
||||
constexpr uint8_t kCVInvalidType = 255;
|
||||
|
||||
using connection_id_type = uint64_t;
|
||||
using session_id_type = uint32_t;
|
||||
using row_id_type = int64_t;
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_CONSTANTS_H_
|
|
@ -0,0 +1,291 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "include/constants.h"
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Class that represents basic data types in DataEngine.
|
||||
class DataType {
|
||||
public:
|
||||
enum Type : uint8_t {
|
||||
DE_UNKNOWN = 0,
|
||||
DE_BOOL,
|
||||
DE_INT8,
|
||||
DE_UINT8,
|
||||
DE_INT16,
|
||||
DE_UINT16,
|
||||
DE_INT32,
|
||||
DE_UINT32,
|
||||
DE_INT64,
|
||||
DE_UINT64,
|
||||
DE_FLOAT16,
|
||||
DE_FLOAT32,
|
||||
DE_FLOAT64,
|
||||
DE_STRING,
|
||||
NUM_OF_TYPES
|
||||
};
|
||||
|
||||
struct TypeInfo {
|
||||
const char *name_; // name to be represent the type while printing
|
||||
const uint8_t sizeInBytes_; // number of bytes needed for this type
|
||||
const char *pybindType_; // Python matching type, used in get_output_types
|
||||
const std::string pybindFormatDescriptor_; // pybind format used for numpy types
|
||||
const uint8_t cvType_; // OpenCv matching type
|
||||
};
|
||||
|
||||
// android and no python
|
||||
static inline const TypeInfo kTypeInfo[] = {
|
||||
// name, sizeInBytes, formatDescriptor
|
||||
{"unknown", 0, "object", "", kCVInvalidType}, // DE_UNKNOWN
|
||||
{"bool", 1, "bool", ""}, // DE_BOOL
|
||||
{"int8", 1, "int8", ""}, // DE_INT8
|
||||
{"uint8", 1, "uint8", ""}, // DE_UINT8
|
||||
{"int16", 2, "int16", ""}, // DE_INT16
|
||||
{"uint16", 2, "uint16", ""}, // DE_UINT16
|
||||
{"int32", 4, "int32", ""}, // DE_INT32
|
||||
{"uint32", 4, "uint32", "", kCVInvalidType}, // DE_UINT32
|
||||
{"int64", 8, "int64", "", kCVInvalidType}, // DE_INT64
|
||||
{"uint64", 8, "uint64", "", kCVInvalidType}, // DE_UINT64
|
||||
{"float16", 2, "float16", ""}, // DE_FLOAT16
|
||||
{"float32", 4, "float32", ""}, // DE_FLOAT32
|
||||
{"float64", 8, "double", ""}, // DE_FLOAT64
|
||||
{"string", 0, "bytes", "", kCVInvalidType} // DE_STRING
|
||||
};
|
||||
|
||||
// No arg constructor to create an unknown shape
|
||||
DataType() : type_(DE_UNKNOWN) {}
|
||||
|
||||
// Create a type from a given string
|
||||
/// \param type_str
|
||||
explicit DataType(const std::string &type_str);
|
||||
|
||||
// Default destructor
|
||||
~DataType() = default;
|
||||
|
||||
// Create a type from a given enum
|
||||
/// \param d
|
||||
constexpr explicit DataType(Type d) : type_(d) {}
|
||||
|
||||
constexpr bool operator==(const DataType a) const { return type_ == a.type_; }
|
||||
|
||||
constexpr bool operator==(const Type a) const { return type_ == a; }
|
||||
|
||||
constexpr bool operator!=(const DataType a) const { return type_ != a.type_; }
|
||||
|
||||
constexpr bool operator!=(const Type a) const { return type_ != a; }
|
||||
|
||||
// Disable this usage `if(d)` where d is of type DataType
|
||||
/// \return
|
||||
operator bool() = delete;
|
||||
|
||||
// To be used in Switch/case
|
||||
/// \return
|
||||
operator Type() const { return type_; }
|
||||
|
||||
// The number of bytes needed to store one value of this type
|
||||
/// \return
|
||||
uint8_t SizeInBytes() const;
|
||||
|
||||
// Returns a string representation of the type
|
||||
/// \return
|
||||
std::string ToString() const;
|
||||
|
||||
// returns true if the template type is the same as the Tensor type_
|
||||
/// \tparam T
|
||||
/// \return true or false
|
||||
template <typename T>
|
||||
bool IsCompatible() const {
|
||||
return type_ == FromCType<T>();
|
||||
}
|
||||
|
||||
// returns true if the template type is the same as the Tensor type_
|
||||
/// \tparam T
|
||||
/// \return true or false
|
||||
template <typename T>
|
||||
bool IsLooselyCompatible() const;
|
||||
|
||||
// << Stream output operator overload
|
||||
/// \notes This allows you to print the info using stream operators
|
||||
/// \param out - reference to the output stream being overloaded
|
||||
/// \param rO - reference to the DataType to display
|
||||
/// \return - the output stream must be returned
|
||||
friend std::ostream &operator<<(std::ostream &out, const DataType &so) {
|
||||
out << so.ToString();
|
||||
return out;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static DataType FromCType();
|
||||
|
||||
// Get the buffer string format of the current type. Used in pybind buffer protocol.
|
||||
/// \return
|
||||
std::string GetPybindFormat() const;
|
||||
|
||||
bool IsSignedInt() const {
|
||||
return type_ == DataType::DE_INT8 || type_ == DataType::DE_INT16 || type_ == DataType::DE_INT32 ||
|
||||
type_ == DataType::DE_INT64;
|
||||
}
|
||||
|
||||
bool IsUnsignedInt() const {
|
||||
return type_ == DataType::DE_UINT8 || type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT32 ||
|
||||
type_ == DataType::DE_UINT64;
|
||||
}
|
||||
|
||||
bool IsInt() const { return IsSignedInt() || IsUnsignedInt(); }
|
||||
|
||||
bool IsFloat() const {
|
||||
return type_ == DataType::DE_FLOAT16 || type_ == DataType::DE_FLOAT32 || type_ == DataType::DE_FLOAT64;
|
||||
}
|
||||
|
||||
bool IsBool() const { return type_ == DataType::DE_BOOL; }
|
||||
|
||||
bool IsNumeric() const { return type_ != DataType::DE_STRING; }
|
||||
|
||||
Type value() const { return type_; }
|
||||
|
||||
private:
|
||||
Type type_;
|
||||
};
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<bool>() {
|
||||
return DataType(DataType::DE_BOOL);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<double>() {
|
||||
return DataType(DataType::DE_FLOAT64);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<float>() {
|
||||
return DataType(DataType::DE_FLOAT32);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<int64_t>() {
|
||||
return DataType(DataType::DE_INT64);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<uint64_t>() {
|
||||
return DataType(DataType::DE_UINT64);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<int32_t>() {
|
||||
return DataType(DataType::DE_INT32);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<uint32_t>() {
|
||||
return DataType(DataType::DE_UINT32);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<int16_t>() {
|
||||
return DataType(DataType::DE_INT16);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<uint16_t>() {
|
||||
return DataType(DataType::DE_UINT16);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<int8_t>() {
|
||||
return DataType(DataType::DE_INT8);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<uint8_t>() {
|
||||
return DataType(DataType::DE_UINT8);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<std::string_view>() {
|
||||
return DataType(DataType::DE_STRING);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<std::string>() {
|
||||
return DataType(DataType::DE_STRING);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<bool>() const {
|
||||
return type_ == DataType::DE_BOOL;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<double>() const {
|
||||
return type_ == DataType::DE_FLOAT64 || type_ == DataType::DE_FLOAT32;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<float>() const {
|
||||
return type_ == DataType::DE_FLOAT32;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<int64_t>() const {
|
||||
return type_ == DataType::DE_INT64 || type_ == DataType::DE_INT32 || type_ == DataType::DE_INT16 ||
|
||||
type_ == DataType::DE_INT8;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<uint64_t>() const {
|
||||
return type_ == DataType::DE_UINT64 || type_ == DataType::DE_UINT32 || type_ == DataType::DE_UINT16 ||
|
||||
type_ == DataType::DE_UINT8;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<int32_t>() const {
|
||||
return type_ == DataType::DE_INT32 || type_ == DataType::DE_INT16 || type_ == DataType::DE_INT8;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<uint32_t>() const {
|
||||
return type_ == DataType::DE_UINT32 || type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT8;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<int16_t>() const {
|
||||
return type_ == DataType::DE_INT16 || type_ == DataType::DE_INT8;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<uint16_t>() const {
|
||||
return type_ == DataType::DE_UINT16 || type_ == DataType::DE_UINT8;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<int8_t>() const {
|
||||
return type_ == DataType::DE_INT8;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<uint8_t>() const {
|
||||
return type_ == DataType::DE_UINT8;
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_DATA_TYPE_H_
|
|
@ -0,0 +1,254 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_
|
||||
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "include/iterator.h"
|
||||
#include "include/samplers.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
class Tensor;
|
||||
class TensorShape;
|
||||
class TreeGetters;
|
||||
|
||||
class DatasetCache;
|
||||
class DatasetNode;
|
||||
class Iterator;
|
||||
class TensorOperation;
|
||||
class SchemaObj;
|
||||
class SamplerObj;
|
||||
|
||||
// Dataset classes (in alphabetical order)
|
||||
class BatchDataset;
|
||||
class MapDataset;
|
||||
class ProjectDataset;
|
||||
class ShuffleDataset;
|
||||
class DSCallback;
|
||||
|
||||
/// \class Dataset datasets.h
|
||||
/// \brief A base class to represent a dataset in the data pipeline.
|
||||
class Dataset : public std::enable_shared_from_this<Dataset> {
|
||||
public:
|
||||
// need friend class so they can access the children_ field
|
||||
friend class Iterator;
|
||||
friend class TransferNode;
|
||||
|
||||
/// \brief Constructor
|
||||
Dataset();
|
||||
|
||||
/// \brief Destructor
|
||||
~Dataset() = default;
|
||||
|
||||
/// \brief Gets the dataset size
|
||||
/// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting
|
||||
/// dataset size at the expense of accuracy.
|
||||
/// \return dataset size. If failed, return -1
|
||||
int64_t GetDatasetSize(bool estimate = false);
|
||||
|
||||
// /// \brief Gets the output type
|
||||
// /// \return a vector of DataType. If failed, return an empty vector
|
||||
// std::vector<DataType> GetOutputTypes();
|
||||
|
||||
/// \brief Gets the output shape
|
||||
/// \return a vector of TensorShape. If failed, return an empty vector
|
||||
std::vector<TensorShape> GetOutputShapes();
|
||||
|
||||
/// \brief Gets the batch size
|
||||
/// \return int64_t
|
||||
int64_t GetBatchSize();
|
||||
|
||||
/// \brief Gets the repeat count
|
||||
/// \return int64_t
|
||||
int64_t GetRepeatCount();
|
||||
|
||||
/// \brief Gets the number of classes
|
||||
/// \return number of classes. If failed, return -1
|
||||
int64_t GetNumClasses();
|
||||
|
||||
/// \brief Gets the column names
|
||||
/// \return Names of the columns. If failed, return an empty vector
|
||||
std::vector<std::string> GetColumnNames();
|
||||
|
||||
/// \brief Gets the class indexing
|
||||
/// \return a map of ClassIndexing. If failed, return an empty map
|
||||
std::vector<std::pair<std::string, std::vector<int32_t>>> GetClassIndexing();
|
||||
|
||||
/// \brief Setter function for runtime number of workers
|
||||
/// \param[in] num_workers The number of threads in this operator
|
||||
/// \return Shared pointer to the original object
|
||||
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers);
|
||||
|
||||
/// \brief Function to create an Iterator over the Dataset pipeline
|
||||
/// \param[in] columns List of columns to be used to specify the order of columns
|
||||
/// \param[in] num_epochs Number of epochs to run through the pipeline, default -1 which means infinite epochs.
|
||||
/// An empty row is returned at the end of each epoch
|
||||
/// \return Shared pointer to the Iterator
|
||||
std::shared_ptr<Iterator> CreateIterator(std::vector<std::string> columns = {}, int32_t num_epochs = -1);
|
||||
|
||||
/// \brief Function to create a BatchDataset
|
||||
/// \notes Combines batch_size number of consecutive rows into batches
|
||||
/// \param[in] batch_size The number of rows each batch is created with
|
||||
/// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete
|
||||
/// batch. If true, and if there are less than batch_size rows
|
||||
/// available to make the last batch, then those rows will
|
||||
/// be dropped and not propagated to the next node
|
||||
/// \return Shared pointer to the current BatchDataset
|
||||
std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
|
||||
|
||||
/// \brief Function to create a MapDataset
|
||||
/// \notes Applies each operation in operations to this dataset
|
||||
/// \param[in] operations Vector of operations to be applied on the dataset. Operations are
|
||||
/// applied in the order they appear in this list
|
||||
/// \param[in] input_columns Vector of the names of the columns that will be passed to the first
|
||||
/// operation as input. The size of this list must match the number of
|
||||
/// input columns expected by the first operator. The default input_columns
|
||||
/// is the first column
|
||||
/// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation
|
||||
/// This parameter is mandatory if len(input_columns) != len(output_columns)
|
||||
/// The size of this list must match the number of output columns of the
|
||||
/// last operation. The default output_columns will have the same
|
||||
/// name as the input columns, i.e., the columns will be replaced
|
||||
/// \param[in] project_columns A list of column names to project
|
||||
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
|
||||
/// \return Shared pointer to the current MapDataset
|
||||
std::shared_ptr<MapDataset> Map(std::vector<std::shared_ptr<TensorOperation>> operations,
|
||||
const std::vector<std::string> &input_columns = {},
|
||||
const std::vector<std::string> &output_columns = {},
|
||||
const std::vector<std::string> &project_columns = {},
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr,
|
||||
std::vector<std::shared_ptr<DSCallback>> callbacks = {}) {
|
||||
return std::make_shared<MapDataset>(shared_from_this(), operations, input_columns, output_columns, project_columns,
|
||||
cache, callbacks);
|
||||
}
|
||||
|
||||
/// \brief Function to create a Project Dataset
|
||||
/// \notes Applies project to the dataset
|
||||
/// \param[in] columns The name of columns to project
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns) {
|
||||
return std::make_shared<ProjectDataset>(shared_from_this(), columns);
|
||||
}
|
||||
|
||||
/// \brief Function to create a Shuffle Dataset
|
||||
/// \notes Randomly shuffles the rows of this dataset
|
||||
/// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
|
||||
/// \return Shared pointer to the current ShuffleDataset
|
||||
std::shared_ptr<ShuffleDataset> Shuffle(int32_t buffer_size) {
|
||||
return std::make_shared<ShuffleDataset>(shared_from_this(), buffer_size);
|
||||
}
|
||||
|
||||
std::shared_ptr<DatasetNode> IRNode() { return ir_node_; }
|
||||
|
||||
protected:
|
||||
std::shared_ptr<TreeGetters> tree_getters_;
|
||||
std::shared_ptr<DatasetNode> ir_node_;
|
||||
};
|
||||
|
||||
class BatchDataset : public Dataset {
|
||||
public:
|
||||
BatchDataset(std::shared_ptr<Dataset> input, int32_t batch_size, bool drop_remainder = false);
|
||||
~BatchDataset() = default;
|
||||
};
|
||||
|
||||
class MapDataset : public Dataset {
|
||||
public:
|
||||
MapDataset(std::shared_ptr<Dataset> input, std::vector<std::shared_ptr<TensorOperation>> operations,
|
||||
const std::vector<std::string> &input_columns, const std::vector<std::string> &output_columns,
|
||||
const std::vector<std::string> &project_columns, const std::shared_ptr<DatasetCache> &cache,
|
||||
std::vector<std::shared_ptr<DSCallback>> callbacks);
|
||||
~MapDataset() = default;
|
||||
};
|
||||
|
||||
class ProjectDataset : public Dataset {
|
||||
public:
|
||||
ProjectDataset(std::shared_ptr<Dataset> input, const std::vector<std::string> &columns);
|
||||
~ProjectDataset() = default;
|
||||
};
|
||||
|
||||
class ShuffleDataset : public Dataset {
|
||||
public:
|
||||
ShuffleDataset(std::shared_ptr<Dataset> input, int32_t buffer_size);
|
||||
~ShuffleDataset() = default;
|
||||
};
|
||||
|
||||
/// \brief Function to create a SchemaObj
|
||||
/// \param[in] schema_file Path of schema file
|
||||
/// \return Shared pointer to the current schema
|
||||
std::shared_ptr<SchemaObj> Schema(const std::string &schema_file = "");
|
||||
|
||||
class AlbumDataset : public Dataset {
|
||||
public:
|
||||
AlbumDataset(const std::string &dataset_dir, const std::string &data_schema,
|
||||
const std::vector<std::string> &column_names = {}, bool decode = false,
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr);
|
||||
~AlbumDataset() = default;
|
||||
};
|
||||
|
||||
/// \brief Function to create an AlbumDataset
|
||||
/// \notes The generated dataset is specified through setting a schema
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] data_schema Path to dataset schema file
|
||||
/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns.
|
||||
/// (default = {})
|
||||
/// \param[in] decode the option to decode the images in dataset (default = false)
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
|
||||
/// \return Shared pointer to the current Dataset
|
||||
std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
|
||||
const std::vector<std::string> &column_names = {}, bool decode = false,
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr);
|
||||
|
||||
class MnistDataset : public Dataset {
|
||||
public:
|
||||
explicit MnistDataset(const std::string &dataset_dir, const std::string &usage = "all",
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr);
|
||||
~MnistDataset() = default;
|
||||
};
|
||||
|
||||
/// \brief Function to create a MnistDataset
|
||||
/// \notes The generated dataset has two columns ["image", "label"]
|
||||
/// \param[in] dataset_dir Path to the root directory that contains the dataset
|
||||
/// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all").
|
||||
/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given,
|
||||
/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler())
|
||||
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
|
||||
/// \return Shared pointer to the current MnistDataset
|
||||
std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage = "all",
|
||||
const std::shared_ptr<SamplerObj> &sampler = RandomSampler(),
|
||||
const std::shared_ptr<DatasetCache> &cache = nullptr);
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASETS_H_
|
|
@ -0,0 +1,58 @@
|
|||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "include/api/types.h"
|
||||
#include "include/constants.h"
|
||||
#include "dataset/include/transforms.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// class to run tensor operations in eager mode
|
||||
class Execute {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
explicit Execute(std::shared_ptr<TensorOperation> op);
|
||||
|
||||
explicit Execute(std::vector<std::shared_ptr<TensorOperation>> ops);
|
||||
|
||||
/// \brief Destructor
|
||||
~Execute() = default;
|
||||
|
||||
/// \brief callable function to execute the TensorOperation in eager mode
|
||||
/// \param[in] input Tensor to be transformed
|
||||
/// \param[out] output Transformed tensor
|
||||
/// \return Status code
|
||||
Status operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output);
|
||||
|
||||
/// \brief callable function to execute the TensorOperation in eager mode
|
||||
/// \param[in] input_tensor_list List of Tensor to be transformed
|
||||
/// \param[out] out Result tensor after transform
|
||||
/// \return - Status
|
||||
Status operator()(const std::vector<mindspore::MSTensor> &input_tensor_list, std::vector<mindspore::MSTensor> *out);
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> ops_;
|
||||
};
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_EXECUTE_H_
|
|
@ -0,0 +1,120 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "include/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Forward declare
|
||||
class ExecutionTree;
|
||||
class DatasetIterator;
|
||||
class DatasetOp;
|
||||
class Tensor;
|
||||
|
||||
class NativeRuntimeContext;
|
||||
class IteratorConsumer;
|
||||
|
||||
class Dataset;
|
||||
|
||||
using TensorMap = std::unordered_map<std::string, std::shared_ptr<Tensor>>;
|
||||
using TensorVec = std::vector<std::shared_ptr<Tensor>>;
|
||||
|
||||
// Abstract class for iterating over the dataset.
|
||||
class Iterator {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
Iterator();
|
||||
|
||||
/// \brief Destructor
|
||||
~Iterator();
|
||||
|
||||
/// \brief Method for building and launching the pipeline.
|
||||
/// \param[in] ops - a vector of DatasetOp in the data pipeline.
|
||||
/// \return - a Status error code, returns OK if no error encountered.
|
||||
Status BuildAndLaunchTree(std::shared_ptr<Dataset> ds);
|
||||
|
||||
/// \brief Function to get the next row from the data pipeline.
|
||||
/// \note Type of return data is a map(with column name).
|
||||
/// \param[out] row - the output tensor row.
|
||||
/// \return Returns true if no error encountered else false.
|
||||
bool GetNextRow(TensorMap *row);
|
||||
|
||||
/// \brief Function to get the next row from the data pipeline.
|
||||
/// \note Type of return data is a vector(without column name).
|
||||
/// \param[out] row - the output tensor row.
|
||||
/// \return Returns true if no error encountered else false.
|
||||
bool GetNextRow(TensorVec *row);
|
||||
|
||||
/// \brief Function to shut down the data pipeline.
|
||||
void Stop();
|
||||
|
||||
class _Iterator {
|
||||
public:
|
||||
explicit _Iterator(Iterator *lt) : lt_{lt}, cur_row_{nullptr} {
|
||||
if (lt_) {
|
||||
cur_row_ = new TensorMap();
|
||||
lt_->GetNextRow(cur_row_);
|
||||
}
|
||||
}
|
||||
|
||||
// Destructor
|
||||
~_Iterator() {
|
||||
if (cur_row_) {
|
||||
delete cur_row_;
|
||||
}
|
||||
}
|
||||
|
||||
_Iterator &operator++() {
|
||||
if (lt_) {
|
||||
++ind_;
|
||||
lt_->GetNextRow(cur_row_);
|
||||
}
|
||||
if (cur_row_ && cur_row_->size() == 0) {
|
||||
delete cur_row_;
|
||||
cur_row_ = nullptr;
|
||||
}
|
||||
return *this;
|
||||
} // prefix ++ overload
|
||||
TensorMap &operator*() { return *cur_row_; } // dereference operator
|
||||
TensorMap *operator->() { return cur_row_; }
|
||||
|
||||
bool operator!=(const _Iterator &rhs) { return cur_row_ != rhs.cur_row_; }
|
||||
|
||||
private:
|
||||
int ind_; // the cur node our Iterator points to
|
||||
Iterator *lt_;
|
||||
TensorMap *cur_row_;
|
||||
};
|
||||
|
||||
_Iterator begin() { return _Iterator(this); }
|
||||
|
||||
_Iterator end() { return _Iterator(nullptr); }
|
||||
|
||||
private:
|
||||
std::unique_ptr<NativeRuntimeContext> runtime_context_;
|
||||
IteratorConsumer *consumer_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_
|
|
@ -0,0 +1,59 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include "include/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
// Abstract class of a memory pool
|
||||
class MemoryPool {
|
||||
public:
|
||||
// Allocate a block of size n
|
||||
virtual Status Allocate(size_t, void **) = 0;
|
||||
|
||||
// Enlarge or shrink a block from oldSz to newSz
|
||||
virtual Status Reallocate(void **, size_t old_sz, size_t new_sz) = 0;
|
||||
|
||||
// Free a pointer
|
||||
virtual void Deallocate(void *) = 0;
|
||||
|
||||
// What is the maximum size I can allocate ?
|
||||
virtual uint64_t get_max_size() const = 0;
|
||||
|
||||
virtual int PercentFree() const = 0;
|
||||
|
||||
// Destructor
|
||||
virtual ~MemoryPool() {}
|
||||
};
|
||||
|
||||
Status DeMalloc(std::size_t s, void **p, bool);
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
||||
void *operator new(std::size_t, mindspore::Status *, std::shared_ptr<mindspore::dataset::MemoryPool>);
|
||||
|
||||
void *operator new[](std::size_t, mindspore::Status *, std::shared_ptr<mindspore::dataset::MemoryPool>);
|
||||
|
||||
void operator delete(void *, std::shared_ptr<mindspore::dataset::MemoryPool>);
|
||||
|
||||
void operator delete[](void *, std::shared_ptr<mindspore::dataset::MemoryPool>);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_MEMORY_POOL_H_
|
|
@ -0,0 +1,126 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_
|
||||
|
||||
#include <dirent.h>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "include/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
class Path {
|
||||
public:
|
||||
class DirIterator {
|
||||
public:
|
||||
static std::shared_ptr<DirIterator> OpenDirectory(Path *f);
|
||||
|
||||
~DirIterator();
|
||||
|
||||
bool hasNext();
|
||||
|
||||
Path next();
|
||||
|
||||
private:
|
||||
explicit DirIterator(Path *f);
|
||||
|
||||
Path *dir_;
|
||||
DIR *dp_;
|
||||
struct dirent *entry_;
|
||||
};
|
||||
|
||||
explicit Path(const std::string &);
|
||||
|
||||
explicit Path(const char *);
|
||||
|
||||
~Path() = default;
|
||||
|
||||
Path(const Path &);
|
||||
|
||||
Path &operator=(const Path &);
|
||||
|
||||
Path(Path &&) noexcept;
|
||||
|
||||
Path &operator=(Path &&) noexcept;
|
||||
|
||||
std::string toString() const { return path_; }
|
||||
|
||||
Path operator+(const Path &);
|
||||
|
||||
Path operator+(const std::string &);
|
||||
|
||||
Path operator+(const char *);
|
||||
|
||||
Path &operator+=(const Path &rhs);
|
||||
|
||||
Path &operator+=(const std::string &);
|
||||
|
||||
Path &operator+=(const char *);
|
||||
|
||||
Path operator/(const Path &);
|
||||
|
||||
Path operator/(const std::string &);
|
||||
|
||||
Path operator/(const char *);
|
||||
|
||||
bool operator==(const Path &rhs) const { return (path_ == rhs.path_); }
|
||||
|
||||
bool operator!=(const Path &rhs) const { return (path_ != rhs.path_); }
|
||||
|
||||
bool operator<(const Path &rhs) const { return (path_ < rhs.path_); }
|
||||
|
||||
bool operator>(const Path &rhs) const { return (path_ > rhs.path_); }
|
||||
|
||||
bool operator<=(const Path &rhs) const { return (path_ <= rhs.path_); }
|
||||
|
||||
bool operator>=(const Path &rhs) const { return (path_ >= rhs.path_); }
|
||||
|
||||
bool Exists();
|
||||
|
||||
bool IsDirectory();
|
||||
|
||||
Status CreateDirectory();
|
||||
|
||||
Status CreateDirectories();
|
||||
|
||||
std::string Extension() const;
|
||||
|
||||
std::string ParentPath();
|
||||
|
||||
Status Remove();
|
||||
|
||||
Status CreateFile(int *fd);
|
||||
|
||||
Status OpenFile(int *fd, bool create = false);
|
||||
|
||||
Status CloseFile(int fd) const;
|
||||
|
||||
Status TruncateFile(int fd) const;
|
||||
|
||||
std::string Basename();
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const Path &s);
|
||||
|
||||
private:
|
||||
static char separator_;
|
||||
std::string path_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_PATH_H_
|
|
@ -0,0 +1,301 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "include/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Internal Sampler class forward declaration
|
||||
class SamplerRT;
|
||||
|
||||
class SamplerObj : public std::enable_shared_from_this<SamplerObj> {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
SamplerObj();
|
||||
|
||||
/// \brief Destructor
|
||||
~SamplerObj() = default;
|
||||
|
||||
/// \brief Pure virtual function for derived class to implement parameters validation
|
||||
/// \return The Status code of the function. It returns OK status if parameters are valid.
|
||||
virtual Status ValidateParams() = 0;
|
||||
|
||||
/// \brief Pure virtual function to convert a SamplerObj class into a runtime sampler object
|
||||
/// \return Shared pointers to the newly created Sampler
|
||||
virtual std::shared_ptr<SamplerRT> Build() = 0;
|
||||
|
||||
/// \brief Pure virtual function to copy a SamplerObj class
|
||||
/// \return Shared pointers to the newly copied SamplerObj
|
||||
virtual std::shared_ptr<SamplerObj> Copy() = 0;
|
||||
|
||||
/// \brief Function for derived class to get the shard id of sampler
|
||||
/// \return The shard id of the derived sampler
|
||||
virtual int64_t ShardId() { return 0; }
|
||||
|
||||
/// \brief Adds a child to the sampler
|
||||
/// \param[in] child The sampler to be added as child
|
||||
/// \return the Status code returned
|
||||
Status AddChild(std::shared_ptr<SamplerObj> child);
|
||||
|
||||
protected:
|
||||
/// \brief A function that calls build on the children of this sampler
|
||||
/// \param[in] sampler The samplerRT object built from this sampler
|
||||
void BuildChildren(std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
std::vector<std::shared_ptr<SamplerObj>> children_;
|
||||
};
|
||||
|
||||
class DistributedSamplerObj;
|
||||
class PKSamplerObj;
|
||||
class PreBuiltSamplerObj;
|
||||
class RandomSamplerObj;
|
||||
class SequentialSamplerObj;
|
||||
class SubsetRandomSamplerObj;
|
||||
class WeightedRandomSamplerObj;
|
||||
|
||||
/// Function to create a Distributed Sampler.
|
||||
/// \notes A Sampler that access a shard of the dataset.
|
||||
/// \param[in] num_shards - Number of shards to divide the dataset into.
|
||||
/// \param[in] shard_id - Shard ID of the current shard within num_shards.
|
||||
/// \param[in] shuffle - If true, the indices are shuffled.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \param[in] seed - The seed in use when shuffle is true.
|
||||
/// \param[in] offset - The starting position where access to elements in the dataset begins.
|
||||
/// \param[in] even_dist - If true, each shard would return the same number of rows (default to true).
|
||||
/// If false the total rows returned by all the shards would not have overlap.
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<DistributedSamplerObj> DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true,
|
||||
int64_t num_samples = 0, uint32_t seed = 1,
|
||||
int64_t offset = -1, bool even_dist = true);
|
||||
|
||||
/// Function to create a PK Sampler.
|
||||
/// \notes Samples K elements for each P class in the dataset.
|
||||
/// This will sample all classes.
|
||||
/// \param[in] num_val - Number of elements to sample for each class.
|
||||
/// \param[in] shuffle - If true, the class IDs are shuffled.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<PKSamplerObj> PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0);
|
||||
|
||||
/// Function to create a Random Sampler.
|
||||
/// \notes Samples the elements randomly.
|
||||
/// \param[in] replacement - If true, put the sample ID back for the next draw.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<RandomSamplerObj> RandomSampler(bool replacement = false, int64_t num_samples = 0);
|
||||
|
||||
/// Function to create a Sequential Sampler.
|
||||
/// \notes Samples the dataset elements sequentially, same as not having a sampler.
|
||||
/// \param[in] start_index - Index to start sampling at (default to start at first id).
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<SequentialSamplerObj> SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0);
|
||||
|
||||
/// Function to create a Subset Random Sampler.
|
||||
/// \notes Samples the elements randomly from a sequence of indices.
|
||||
/// \param[in] indices - A vector sequence of indices.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<SubsetRandomSamplerObj> SubsetRandomSampler(std::vector<int64_t> indices, int64_t num_samples = 0);
|
||||
|
||||
/// Function to create a Weighted Random Sampler.
|
||||
/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given
|
||||
/// weights (probabilities).
|
||||
/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1.
|
||||
/// \param[in] num_samples - The number of samples to draw (default to all elements).
|
||||
/// \param[in] replacement - If true, put the sample ID back for the next draw.
|
||||
/// \return Shared pointer to the current Sampler.
|
||||
std::shared_ptr<WeightedRandomSamplerObj> WeightedRandomSampler(std::vector<double> weights, int64_t num_samples = 0,
|
||||
bool replacement = true);
|
||||
|
||||
/* ####################################### Derived Sampler classes ################################# */
|
||||
class DistributedSamplerObj : public SamplerObj {
|
||||
public:
|
||||
DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed,
|
||||
int64_t offset, bool even_dist);
|
||||
|
||||
~DistributedSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<SamplerRT> Build() override;
|
||||
|
||||
std::shared_ptr<SamplerObj> Copy() override {
|
||||
auto sampler = std::make_shared<DistributedSamplerObj>(num_shards_, shard_id_, shuffle_, num_samples_, seed_,
|
||||
offset_, even_dist_);
|
||||
for (auto child : children_) {
|
||||
sampler->AddChild(child);
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
/// \brief Function to get the shard id of sampler
|
||||
/// \return The shard id of sampler
|
||||
int64_t ShardId() override { return shard_id_; }
|
||||
|
||||
private:
|
||||
int64_t num_shards_;
|
||||
int64_t shard_id_;
|
||||
bool shuffle_;
|
||||
int64_t num_samples_;
|
||||
uint32_t seed_;
|
||||
int64_t offset_;
|
||||
bool even_dist_;
|
||||
};
|
||||
|
||||
class PKSamplerObj : public SamplerObj {
|
||||
public:
|
||||
PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples);
|
||||
|
||||
~PKSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<SamplerRT> Build() override;
|
||||
|
||||
std::shared_ptr<SamplerObj> Copy() override {
|
||||
auto sampler = std::make_shared<PKSamplerObj>(num_val_, shuffle_, num_samples_);
|
||||
for (auto child : children_) {
|
||||
sampler->AddChild(child);
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
private:
|
||||
int64_t num_val_;
|
||||
bool shuffle_;
|
||||
int64_t num_samples_;
|
||||
};
|
||||
|
||||
class PreBuiltSamplerObj : public SamplerObj {
|
||||
public:
|
||||
explicit PreBuiltSamplerObj(std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
~PreBuiltSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<SamplerRT> Build() override;
|
||||
|
||||
std::shared_ptr<SamplerObj> Copy() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<SamplerRT> sp_;
|
||||
};
|
||||
|
||||
class RandomSamplerObj : public SamplerObj {
|
||||
public:
|
||||
RandomSamplerObj(bool replacement, int64_t num_samples);
|
||||
|
||||
~RandomSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<SamplerRT> Build() override;
|
||||
|
||||
std::shared_ptr<SamplerObj> Copy() override {
|
||||
auto sampler = std::make_shared<RandomSamplerObj>(replacement_, num_samples_);
|
||||
for (auto child : children_) {
|
||||
sampler->AddChild(child);
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
private:
|
||||
bool replacement_;
|
||||
int64_t num_samples_;
|
||||
};
|
||||
|
||||
class SequentialSamplerObj : public SamplerObj {
|
||||
public:
|
||||
SequentialSamplerObj(int64_t start_index, int64_t num_samples);
|
||||
|
||||
~SequentialSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<SamplerRT> Build() override;
|
||||
|
||||
std::shared_ptr<SamplerObj> Copy() override {
|
||||
auto sampler = std::make_shared<SequentialSamplerObj>(start_index_, num_samples_);
|
||||
for (auto child : children_) {
|
||||
sampler->AddChild(child);
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
private:
|
||||
int64_t start_index_;
|
||||
int64_t num_samples_;
|
||||
};
|
||||
|
||||
class SubsetRandomSamplerObj : public SamplerObj {
|
||||
public:
|
||||
SubsetRandomSamplerObj(std::vector<int64_t> indices, int64_t num_samples);
|
||||
|
||||
~SubsetRandomSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<SamplerRT> Build() override;
|
||||
|
||||
std::shared_ptr<SamplerObj> Copy() override {
|
||||
auto sampler = std::make_shared<SubsetRandomSamplerObj>(indices_, num_samples_);
|
||||
for (auto child : children_) {
|
||||
sampler->AddChild(child);
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
private:
|
||||
const std::vector<int64_t> indices_;
|
||||
int64_t num_samples_;
|
||||
};
|
||||
|
||||
class WeightedRandomSamplerObj : public SamplerObj {
|
||||
public:
|
||||
explicit WeightedRandomSamplerObj(std::vector<double> weights, int64_t num_samples = 0, bool replacement = true);
|
||||
|
||||
~WeightedRandomSamplerObj() = default;
|
||||
|
||||
std::shared_ptr<SamplerRT> Build() override;
|
||||
|
||||
std::shared_ptr<SamplerObj> Copy() override {
|
||||
auto sampler = std::make_shared<WeightedRandomSamplerObj>(weights_, num_samples_, replacement_);
|
||||
for (auto child : children_) {
|
||||
sampler->AddChild(child);
|
||||
}
|
||||
return sampler;
|
||||
}
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
private:
|
||||
const std::vector<double> weights_;
|
||||
int64_t num_samples_;
|
||||
bool replacement_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_SAMPLERS_H_
|
|
@ -0,0 +1,105 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_
|
||||
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define DEPRECATED __attribute__((deprecated))
|
||||
#elif defined(_MSC_VER)
|
||||
#define DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
#pragma message("WARNING: You need to implement DEPRECATED for this compiler")
|
||||
#define DEPRECATED
|
||||
#endif
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
|
||||
#include "include/ms_status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
#define RETURN_IF_NOT_OK(_s) \
|
||||
do { \
|
||||
Status __rc = (_s); \
|
||||
if (__rc.IsError()) { \
|
||||
return __rc; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define RETURN_STATUS_UNEXPECTED(_e) \
|
||||
do { \
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, _e); \
|
||||
} while (false)
|
||||
|
||||
#define CHECK_FAIL_RETURN_UNEXPECTED(_condition, _e) \
|
||||
do { \
|
||||
if (!(_condition)) { \
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, _e); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e) \
|
||||
do { \
|
||||
if (!(_condition)) { \
|
||||
return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define CHECK_FAIL_RETURN_SYNTAX_ERROR(_condition, _e) \
|
||||
do { \
|
||||
if (!(_condition)) { \
|
||||
return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define RETURN_UNEXPECTED_IF_NULL(_ptr) \
|
||||
do { \
|
||||
if ((_ptr) == nullptr) { \
|
||||
std::string err_msg = "The pointer[" + std::string(#_ptr) + "] is null."; \
|
||||
RETURN_STATUS_UNEXPECTED(err_msg); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define RETURN_OK_IF_TRUE(_condition) \
|
||||
do { \
|
||||
if (_condition) { \
|
||||
return Status::OK(); \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#define RETURN_STATUS_SYNTAX_ERROR(_e) \
|
||||
do { \
|
||||
return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, _e); \
|
||||
} while (false)
|
||||
|
||||
#define RETURN_SECOND_IF_ERROR(_s, _r) \
|
||||
do { \
|
||||
Status __rc = (_s); \
|
||||
if (__rc.IsError()) { \
|
||||
MS_LOG(ERROR) << __rc; \
|
||||
return _r; \
|
||||
} \
|
||||
} while (false)
|
||||
|
||||
#if !defined(_WIN32) && !defined(_WIN64)
|
||||
const float MAX_MEMORY_USAGE_THRESHOLD = 0.95;
|
||||
float GetMemoryUsage();
|
||||
#endif
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_
|
|
@ -0,0 +1,632 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
|
||||
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#undef HAVE_STDDEF_H
|
||||
#undef HAVE_STDLIB_H
|
||||
#endif
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/data_type.h"
|
||||
#include "include/tensor_helpers.h"
|
||||
#include "include/tensor_shape.h"
|
||||
#include "include/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
class Tensor;
|
||||
template <typename T>
|
||||
class Allocator;
|
||||
|
||||
using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
|
||||
using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors
|
||||
using offset_t = uint32_t; // type of offset values to store strings locations
|
||||
using TensorPtr = std::shared_ptr<Tensor>;
|
||||
|
||||
class Tensor {
|
||||
public:
|
||||
Tensor() = delete;
|
||||
Tensor(const Tensor &other) = delete;
|
||||
Tensor &operator=(const Tensor &other) = delete;
|
||||
|
||||
/// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead
|
||||
/// \note The shape and type information should be known and valid
|
||||
/// \note The constructor does not allocate data
|
||||
/// \param shape TensorShape
|
||||
/// \param type DataType
|
||||
Tensor(const TensorShape &shape, const DataType &type);
|
||||
|
||||
/// Move constructor
|
||||
/// \param other Tensor to be moved
|
||||
Tensor(Tensor &&other) noexcept;
|
||||
|
||||
/// Move assignment operator
|
||||
/// \param other Tensor to be moved
|
||||
Tensor &operator=(Tensor &&other) noexcept;
|
||||
|
||||
/// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out);
|
||||
|
||||
/// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type.
|
||||
/// Data will be copied into the new created tensor.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[in] src pointer to the source data
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out);
|
||||
|
||||
/// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[in] src pointer to the source data
|
||||
/// \param[in] length length of the src data
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src,
|
||||
const dsize_t &length, TensorPtr *out);
|
||||
|
||||
/// Create a copy of the input tensor
|
||||
/// \param[in] in original tensor to be copied
|
||||
/// \param[out] out output tensor to be generated
|
||||
/// \return Status
|
||||
static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) {
|
||||
return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out);
|
||||
}
|
||||
|
||||
/// Create a Tensor from a given list of values.
|
||||
/// \tparam type of the values to be inserted.
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
template <typename T>
|
||||
static Status CreateFromVector(const std::vector<T> &items, const TensorShape &shape, TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
items.size() == shape.NumOfElements(),
|
||||
"Number of elements in the vector does not match the number of elements of the shape required");
|
||||
// cppcheck-suppress shadowFunction
|
||||
DataType type = DataType::FromCType<T>();
|
||||
// if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case.
|
||||
auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
|
||||
return CreateFromMemory(shape, type, items_ptr, out);
|
||||
}
|
||||
|
||||
/// Create a 1D Tensor from a given list of values.
|
||||
/// \tparam type of the values to be inserted.
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
template <typename T>
|
||||
static Status CreateFromVector(const std::vector<T> &items, TensorPtr *out) {
|
||||
return CreateFromVector(items, TensorShape({static_cast<dsize_t>(items.size())}), out);
|
||||
}
|
||||
|
||||
/// Create a 1D boolean Tensor from a given list of boolean values.
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
static Status CreateFromVector(const std::vector<bool> &items, const TensorShape &shape, TensorPtr *out) {
|
||||
std::vector<uint8_t> temp(items.begin(), items.end());
|
||||
RETURN_IF_NOT_OK(CreateFromVector(temp, shape, out));
|
||||
(*out)->type_ = DataType(DataType::DE_BOOL);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
/// Create a numeric scalar Tensor from the given value.
|
||||
/// \tparam T type of value
|
||||
/// \param[in] item value
|
||||
/// \param[out] out Created tensor
|
||||
/// \return Status code
|
||||
template <typename T>
|
||||
static Status CreateScalar(const T &item, TensorPtr *out) {
|
||||
// cppcheck-suppress shadowFunction
|
||||
DataType type = DataType::FromCType<T>();
|
||||
auto item_ptr = reinterpret_cast<const uchar *>(&item);
|
||||
return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out);
|
||||
}
|
||||
|
||||
/// Create a tensor from a binary file on disk.
|
||||
/// \param[in] path file to be read
|
||||
/// \param[out] out Created Tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromFile(const std::string &path, TensorPtr *out);
|
||||
|
||||
/// Destruct the tensor and release the memory using the allocator
|
||||
virtual ~Tensor();
|
||||
|
||||
/// Equality operator. compares tensor shape, type and data
|
||||
/// \param[in] rhs Tensor to be compared with
|
||||
/// \return bool
|
||||
bool operator==(const Tensor &rhs) const;
|
||||
|
||||
bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); }
|
||||
|
||||
/// Get item located at `index`, caller needs to provide the type.
|
||||
/// \tparam T
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return the item specified at index
|
||||
template <typename T>
|
||||
Status GetItemAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
/// Get string located at `index`.
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return std::string_view specified at index
|
||||
Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
template <typename T>
|
||||
Status GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
template <typename T>
|
||||
Status GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
template <typename T>
|
||||
Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
/// set item at location specified by index
|
||||
/// \tparam `T`
|
||||
/// \param[in] index
|
||||
/// \param[in] value of type `T`
|
||||
template <typename T>
|
||||
Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
|
||||
T *ptr = nullptr;
|
||||
RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
|
||||
*ptr = value;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value);
|
||||
|
||||
/// fill tensor with Zeros. Does not support strings.
|
||||
Status Zero();
|
||||
|
||||
/// Fill all elements in the Tensor with the given value of type `T`. Does not support strings.
|
||||
/// \tparam T
|
||||
/// \param value[in]
|
||||
template <typename T>
|
||||
Status Fill(const T &value);
|
||||
|
||||
/// Getter function for shape
|
||||
/// \return
|
||||
const TensorShape &shape() const { return shape_; }
|
||||
|
||||
/// Check if tensor has data
|
||||
/// \return bool - true if tensor is not empty
|
||||
bool HasData() const { return data_ != nullptr; }
|
||||
|
||||
/// Reshape the tensor. The given shape should have the same number of elements in the Tensor
|
||||
/// \param shape
|
||||
virtual Status Reshape(const TensorShape &shape);
|
||||
|
||||
/// \return number of elements in this tensor
|
||||
dsize_t Size() const { return shape().NumOfElements(); }
|
||||
|
||||
/// \return the number of bytes this tensor is needs
|
||||
dsize_t SizeInBytes() const {
|
||||
if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements();
|
||||
return data_end_ - data_;
|
||||
}
|
||||
|
||||
/// \return the rank of the tensor
|
||||
dsize_t Rank() const { return shape().Rank(); }
|
||||
|
||||
/// Get the starting memory address as a constant for the data of the tensor. This potentially
|
||||
/// drives an allocation if the data area.
|
||||
/// \return const unsigned char*
|
||||
const unsigned char *GetBuffer() const { return data_; }
|
||||
|
||||
/// Getter of the type
|
||||
/// \return
|
||||
// cppcheck-suppress shadowFunction
|
||||
DataType type() const { return type_; }
|
||||
|
||||
/// Provide stream operator for displaying it
|
||||
/// \param output stream
|
||||
/// \param so the Tensor object to be printed
|
||||
/// \return output stream
|
||||
friend std::ostream &operator<<(std::ostream &out, const Tensor &so) {
|
||||
so.Print(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
/// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
|
||||
/// Calling this method will make the Tensor and its data inaccessible, use it with caution.
|
||||
void Invalidate();
|
||||
|
||||
/// Copy input tensor into self at the location index.
|
||||
/// Index is a vector of axes which can be incomplete:
|
||||
/// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
|
||||
/// \param index
|
||||
/// \param input
|
||||
/// \param partial_insert: boolean to determine if insertion along the full axis is enforced
|
||||
/// \return Status code
|
||||
Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input,
|
||||
const bool partial_insert = false);
|
||||
|
||||
/// Find the address of the given index. Used in InsertTensor.
|
||||
/// Example:
|
||||
/// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
|
||||
/// \param index incomplete index
|
||||
/// \param output: startAddrofIndex
|
||||
/// \param output: remaining
|
||||
/// \return Status code
|
||||
Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining);
|
||||
|
||||
/// Expand the shape of the Tensor with one extra dimension.
|
||||
/// For example, if the shape is <512,512,3>:
|
||||
/// *- ExpandDim(0) gives: <1,512,512,3>
|
||||
/// *- ExpandDim(1) gives: <512,1,512,3>
|
||||
/// *- ExpandDim(3) gives: <512,512,3,1>
|
||||
/// \param axis location of the dim
|
||||
virtual Status ExpandDim(const dsize_t &axis);
|
||||
|
||||
virtual void Squeeze();
|
||||
|
||||
/// Calculates the strides of the Tensor
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
/// The strides will be {6,2,1}.
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
/// The strides will be {24,8,4}.
|
||||
/// \return vector of integers
|
||||
std::vector<dsize_t> Strides() const;
|
||||
|
||||
std::string ToString() {
|
||||
std::stringstream ss;
|
||||
this->Print(ss);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
/// Handle negative indices.
|
||||
/// \param[out] out modified index
|
||||
/// \param[in] index
|
||||
/// \param[in] length axis length used to modify index
|
||||
/// \return dsize_t modified index
|
||||
static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; }
|
||||
|
||||
/// Handle negative indices for a vector of indices.
|
||||
/// \param[out] out modified vector of indices
|
||||
/// \param[in] index_vector vector of indices
|
||||
/// \return std::vector<dsize_t> modified vector of indices
|
||||
static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) {
|
||||
std::vector<dsize_t> indices(index_vector.size(), 0);
|
||||
for (int i = 0; i < index_vector.size(); i++) {
|
||||
indices[i] = HandleNeg(index_vector[i], length[i]);
|
||||
}
|
||||
return indices;
|
||||
}
|
||||
|
||||
/// Slice tensor bases on the given indices. Copy the sliced data into out tensor.
|
||||
/// Based on the type of tensor, SliceNumeric or SliceString will be called
|
||||
/// \param[out] out Tensor
|
||||
/// \param[in] slice_options vector of SliceOption objects
|
||||
/// \return Status error code
|
||||
// cppcheck-suppress passedByValue
|
||||
Status Slice(TensorPtr *out, const std::vector<mindspore::dataset::SliceOption> slice_options);
|
||||
|
||||
/// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
|
||||
/// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
|
||||
/// \tparam T type of values in the Tensor Iterator
|
||||
template <typename T, bool = true>
|
||||
class TensorIterator {
|
||||
public:
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
using value_type = T;
|
||||
using difference_type = ptrdiff_t;
|
||||
using pointer = T *;
|
||||
using reference = T &;
|
||||
|
||||
explicit TensorIterator(uchar *ptr = nullptr) { ptr_ = reinterpret_cast<T *>(ptr); }
|
||||
|
||||
TensorIterator(const TensorIterator<T> &raw_iterator) { ptr_ = raw_iterator.ptr_; }
|
||||
|
||||
~TensorIterator() = default;
|
||||
|
||||
// cppcheck-suppress operatorEqVarError
|
||||
TensorIterator<T> &operator=(const TensorIterator<T> &rhs) {
|
||||
ptr_ = rhs.ptr_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<T> &operator=(T *rhs) {
|
||||
ptr_ = rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator==(const TensorIterator<T> &rhs) { return ptr_ == rhs.ptr_; }
|
||||
|
||||
bool operator!=(const TensorIterator<T> &rhs) { return !(*this == rhs); }
|
||||
|
||||
operator bool() const { return ptr_ != nullptr; }
|
||||
|
||||
T &operator*() { return *ptr_; }
|
||||
|
||||
const T &operator*() const { return *ptr_; }
|
||||
|
||||
T *operator->() { return ptr_; }
|
||||
|
||||
TensorIterator<T> &operator+=(const ptrdiff_t &inc) {
|
||||
ptr_ += inc;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<T> &operator-=(const ptrdiff_t &inc) {
|
||||
ptr_ -= inc;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<T> &operator++() {
|
||||
++ptr_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<T> &operator--() {
|
||||
--ptr_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<T> operator++(int) {
|
||||
auto temp(*this);
|
||||
++ptr_;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<T> operator--(int) {
|
||||
auto temp(*this);
|
||||
--ptr_;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<T> operator+(const ptrdiff_t &inc) {
|
||||
auto oldPtr = ptr_;
|
||||
ptr_ += inc;
|
||||
auto temp(*this);
|
||||
ptr_ = oldPtr;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<T> operator-(const ptrdiff_t &inc) {
|
||||
auto oldPtr = ptr_;
|
||||
ptr_ -= inc;
|
||||
auto temp(*this);
|
||||
ptr_ = oldPtr;
|
||||
return temp;
|
||||
}
|
||||
|
||||
protected:
|
||||
T *ptr_;
|
||||
};
|
||||
|
||||
// Specialization of TensorIterator for strings. It returns std::string_view for every item.
|
||||
// \tparam DUMMY, used to mbe able to specialize the inner class
|
||||
template <bool DUMMY>
|
||||
class TensorIterator<std::string_view, DUMMY> {
|
||||
public:
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
using value_type = std::string_view;
|
||||
using difference_type = ptrdiff_t;
|
||||
using pointer = std::string_view *;
|
||||
using reference = std::string_view &;
|
||||
|
||||
explicit TensorIterator(uchar *data = nullptr, dsize_t index = 0) {
|
||||
data_ = reinterpret_cast<const char *>(data);
|
||||
// cppcheck-suppress useInitializationList
|
||||
index_ = index;
|
||||
}
|
||||
|
||||
TensorIterator(const TensorIterator<std::string_view, DUMMY> &raw_iterator) {
|
||||
data_ = raw_iterator.data_;
|
||||
// cppcheck-suppress useInitializationList
|
||||
index_ = raw_iterator.index_;
|
||||
}
|
||||
|
||||
~TensorIterator() = default;
|
||||
|
||||
bool operator==(const TensorIterator<std::string_view> &rhs) { return data_ == rhs.data_ && index_ == rhs.index_; }
|
||||
|
||||
bool operator!=(const TensorIterator<std::string_view> &rhs) { return !(*this == rhs); }
|
||||
|
||||
operator bool() const { return data_ != nullptr; }
|
||||
|
||||
std::string_view operator*() const {
|
||||
auto offset_ = reinterpret_cast<const offset_t *>(data_);
|
||||
offset_t start = offset_[index_];
|
||||
return std::string_view{data_ + start};
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> &operator+=(const dsize_t &inc) {
|
||||
index_ += inc;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> &operator-=(const dsize_t &inc) {
|
||||
index_ -= inc;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> &operator++() {
|
||||
++index_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> &operator--() {
|
||||
--index_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> operator++(int) {
|
||||
auto temp(*this);
|
||||
++index_;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> operator--(int) {
|
||||
auto temp(*this);
|
||||
--index_;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> operator+(const dsize_t &inc) {
|
||||
auto oldPtr = index_;
|
||||
index_ += inc;
|
||||
auto temp(*this);
|
||||
index_ = oldPtr;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> operator-(const dsize_t &inc) {
|
||||
auto oldPtr = index_;
|
||||
index_ -= inc;
|
||||
auto temp(*this);
|
||||
index_ = oldPtr;
|
||||
return temp;
|
||||
}
|
||||
|
||||
protected:
|
||||
dsize_t index_;
|
||||
const char *data_;
|
||||
};
|
||||
|
||||
/// Return a TensorIterator that points to the start of the Tensor.
|
||||
/// It's the user responsibility to use the correct type that matches the Tensor type
|
||||
/// \tparam T The type of values in the Tensor
|
||||
/// \return TensorIterator
|
||||
template <typename T>
|
||||
TensorIterator<T> begin() {
|
||||
return TensorIterator<T>(data_);
|
||||
}
|
||||
|
||||
/// Return a linear iterator that points to the place after the last element of the Tensor.
|
||||
/// \tparam T The type of values in the Tensor
|
||||
/// \return TensorIterator
|
||||
template <typename T>
|
||||
TensorIterator<T> end() {
|
||||
return TensorIterator<T>(data_end_);
|
||||
}
|
||||
|
||||
/// Copies the last dimension at `index` from Tensor `src` to this Tensor.
|
||||
/// \param[in] src Tensor
|
||||
/// \param[in] index vector to the start of the dimension. The last dim should be 0
|
||||
/// \return Status
|
||||
Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);
|
||||
|
||||
protected:
|
||||
/// Allocate memory for the tensor using the data_allocator
|
||||
/// \param[in] length number of bytes to be allocated
|
||||
/// \return Error Status
|
||||
Status AllocateBuffer(const dsize_t &length);
|
||||
|
||||
/// Get the starting memory address for the data of the tensor. This potentially
|
||||
/// drives an allocation if the data is null.
|
||||
/// \return unsigned char*
|
||||
unsigned char *GetMutableBuffer() { return data_; }
|
||||
|
||||
/// A function that prints Tensor recursively, first called by print
|
||||
/// \param[in] out
|
||||
/// \param[in] cur_dim
|
||||
/// \param[in] cur_index
|
||||
void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const;
|
||||
|
||||
/// A function that prints info about the tensor
|
||||
/// \param[out] out output stream
|
||||
void Print(std::ostream &out) const;
|
||||
|
||||
/// A function that print the value as specified by its index
|
||||
/// \param[in] index vector representing the index
|
||||
/// \param[out] out
|
||||
void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const;
|
||||
|
||||
/// Get pointer to item located at `index`, caller needs to provide the type.
|
||||
/// \tparam T
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return a pointer to the item specified at index of type `T`
|
||||
template <typename T>
|
||||
Status GetItemPtr(T **, const std::vector<dsize_t> &index) const;
|
||||
|
||||
/// Get pointer to string located at `index` and the length of string
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return a pointer to the string specified at index and the length of the string
|
||||
Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const;
|
||||
|
||||
/// Given a flat index of an item string, return the start and length of the item
|
||||
/// \param[in] index flat index of the item
|
||||
/// \param[out] start address of the ths string
|
||||
/// \param[out] length of the string
|
||||
Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const;
|
||||
|
||||
/// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if
|
||||
/// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string
|
||||
/// of the tensor.
|
||||
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
|
||||
|
||||
/// all access to shape_ should be via shape
|
||||
TensorShape shape_;
|
||||
/// data type of tensor
|
||||
DataType type_;
|
||||
/// pointer to the start of the physical data
|
||||
unsigned char *data_;
|
||||
/// An allocator for data_
|
||||
CharAllocPtr data_allocator_;
|
||||
/// pointer to the end of the physical data
|
||||
unsigned char *data_end_ = nullptr;
|
||||
|
||||
private:
|
||||
/// Slice numeric tensors.
|
||||
Status SliceNumeric(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape);
|
||||
|
||||
/// Slice string tensors
|
||||
Status SliceString(TensorPtr *out, const std::vector<std::vector<dsize_t>> &indices, const TensorShape &shape);
|
||||
|
||||
/// Copy raw data of a array based on shape and strides to the destination pointer
|
||||
/// \param dst [out] Pointer to the destination array where the content is to be copied
|
||||
/// \param[in] src Pointer to the source of strided array to be copied
|
||||
/// \param[in] shape shape of the source array
|
||||
/// \param[in] strides strides of the source array
|
||||
/// \param[in] type_size number of bytes needed to store one array element's type
|
||||
/// \return Status Code
|
||||
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
|
||||
std::vector<dsize_t> strides, uint8_t type_size);
|
||||
|
||||
/// const of the size of the offset variable
|
||||
static constexpr uint8_t kOffsetSize = sizeof(offset_t);
|
||||
};
|
||||
template <>
|
||||
inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
|
||||
return TensorIterator<std::string_view>(data_, shape_.NumOfElements());
|
||||
}
|
||||
|
||||
/// Create a string scalar Tensor from the given value.
|
||||
/// \param[in] item value
|
||||
/// \param[out] out Created tensor
|
||||
/// \return Status code
|
||||
template <>
|
||||
inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
|
||||
return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
|
|
@ -0,0 +1,83 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "include/constants.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
class Slice {
|
||||
public:
|
||||
Slice() : start_(0), stop_(0), step_(0) {}
|
||||
Slice(dsize_t start, dsize_t stop, dsize_t step) : start_(start), stop_(stop), step_(step) {}
|
||||
Slice(dsize_t start, dsize_t stop) : start_(start), stop_(stop), step_(1) {}
|
||||
explicit Slice(dsize_t stop) : start_(0), stop_(stop), step_(1) {}
|
||||
Slice(Slice const &slice) = default;
|
||||
|
||||
~Slice() = default;
|
||||
|
||||
bool valid() const { return step_ != 0; }
|
||||
dsize_t start_;
|
||||
dsize_t stop_;
|
||||
dsize_t step_;
|
||||
};
|
||||
|
||||
class SliceOption {
|
||||
public:
|
||||
explicit SliceOption(bool all) : all_(all) {}
|
||||
explicit SliceOption(std::vector<dsize_t> indices) : indices_(indices) {}
|
||||
explicit SliceOption(Slice slice) : slice_(slice) {}
|
||||
SliceOption(SliceOption const &slice) = default;
|
||||
|
||||
~SliceOption() = default;
|
||||
|
||||
// only one of the following will be valid
|
||||
// given indices to slice the Tensor.
|
||||
std::vector<dsize_t> indices_ = {};
|
||||
// Slice object. All start, stop and step are 0 if invalid.
|
||||
Slice slice_;
|
||||
bool all_ = false;
|
||||
};
|
||||
|
||||
/// Recursive helper function to generate indices based on vector of SliceOptions. It recursively iterates through each
|
||||
/// range represented by slice_options to generate a list of indices to be sliced.
|
||||
/// \param[out] matrix Generated nested vector of indices
|
||||
/// Example: For a 4 x 2 tensor, and with slice_list = {SliceOption({0})} (the first row), matrix will become
|
||||
/// {{0}}. For slice_list = {SliceOption(all), SliceOption({0})} (the first column), matrix will become
|
||||
/// {{0, 0}, {1, 0}, {2, 0}, {3, 0}}.
|
||||
/// For slice_list = {SliceOption({0, 2})}, matrix will become {{0}, {2}}. The size of each nested array is always
|
||||
/// equal to (slice_list).size().
|
||||
/// \param[in] depth used to keep track of recursion level
|
||||
/// \param[in] numbers vector used to represent current index
|
||||
/// \param[in] matrix 2D vector to be populated with desired indices
|
||||
/// \param[in] slice_options vector of SliceOption objects
|
||||
void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers, const std::vector<SliceOption> &slice_list,
|
||||
std::vector<std::vector<dsize_t>> *matrix);
|
||||
|
||||
/// Generate indices based on vector of SliceOptions
|
||||
/// Calls the recursive helper function IndexGeneratorHelper
|
||||
/// \param[in] slice_list vector of SliceOption objects. Note: If the user passes
|
||||
/// {SliceOption(true), SliceOption(true)}, it will return a M x 2 vector, instead of reducing it to
|
||||
/// {SliceOption(true)} first to only generate a M x 1 vector.
|
||||
/// \return std::vector<std::vector<dsize_t>> 2D vector of generated indices, M x (slice_list).size()
|
||||
std::vector<std::vector<dsize_t>> IndexGenerator(const std::vector<SliceOption> &slice_list);
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_HELPERS_H_
|
|
@ -0,0 +1,176 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <ostream>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "include/constants.h"
|
||||
#include "include/status.h"
|
||||
#include "include/allocator.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
using IntAlloc = Allocator<dsize_t>;
|
||||
// Class that represents a shape of a Tensor. A shape can be:
|
||||
// -# Known shape (mKnown = true)
|
||||
// -# Scalar --> empty vector --> <>
|
||||
// -# n-Dim --> not empty vector --> <d1, d2, d2, d3, ...> where di is >= 0\n
|
||||
// Example: <1,2>, <1>, <1,13,10,11,1>
|
||||
// -# Unknown shape (mKnown = false)
|
||||
// -# Rank is unknown --> empty vector --> <>
|
||||
// -# one or more dim is unknown --> not empty vector --> <d1, d2, d2, d3, ...> where di is unknown\n
|
||||
// Example: <3,?> (the 1st dim is unknown)\n
|
||||
// <2,?,?,?> (all dims but the 0th dim are unknown)
|
||||
|
||||
/// \brief TensorShape supports any dim > 0 and < 2^31-1
|
||||
|
||||
class TensorShape {
|
||||
public:
|
||||
static constexpr dsize_t kDimUnknown = -1; // constant for an unknown dimension
|
||||
|
||||
// Force the compiler to not create a no-arg constructor
|
||||
TensorShape() = delete;
|
||||
|
||||
/// \brief Create a Shape from an initialization list (e.g., TensorShape s = {2,2}).
|
||||
/// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
|
||||
/// \param[in] list
|
||||
explicit TensorShape(const std::initializer_list<dsize_t> &list);
|
||||
|
||||
/// \brief Create a Shape from a vector (e.g., TensorShape s = std::vector<dsize_t>({2,2}) ).
|
||||
/// If one of the dims is set to DIM_UNKNOWN, the shape will flagged as unKnown
|
||||
/// \param[in] list
|
||||
explicit TensorShape(const std::vector<dsize_t> &list);
|
||||
|
||||
/// \brief Copy constructor
|
||||
/// \param[in] shape
|
||||
TensorShape(const TensorShape &shape);
|
||||
|
||||
~TensorShape() = default;
|
||||
|
||||
/// \brief Create a scalar Shape (i.e., empty shape with mKnown = true)
|
||||
/// \return TensorShape
|
||||
static TensorShape CreateScalar() { return TensorShape({}); }
|
||||
|
||||
/// \brief Create a shape with an unknown rank.
|
||||
/// \return TensorShape
|
||||
static TensorShape CreateUnknownRankShape();
|
||||
|
||||
/// \brief Create a shape with a known rank .
|
||||
/// \return TensorShape
|
||||
static TensorShape CreateUnknownShapeWithRank(dsize_t rank);
|
||||
|
||||
/// \brief Insert a new dim into a copy of the current shape.
|
||||
/// \param[in] dim to be added
|
||||
/// \param[in] axis the index where dim should be added
|
||||
/// \return New modified shape
|
||||
TensorShape InsertDim(dsize_t axis, dsize_t dim) const;
|
||||
|
||||
/// \brief Insert new dim at index 0. For example, <2,4> --> PrependDim(4) --> <4,2,4>
|
||||
/// \param[in] dim
|
||||
/// \return
|
||||
TensorShape PrependDim(dsize_t dim) const;
|
||||
|
||||
/// \brief Insert a new dim at the end of the shape. For example, <2,4> --> AppendDim(4) --> <2,4,4>
|
||||
/// \param[in] dim
|
||||
/// \return
|
||||
TensorShape AppendDim(dsize_t dim) const;
|
||||
|
||||
dsize_t Size() const { return raw_shape_.size(); }
|
||||
|
||||
dsize_t Rank() const { return raw_shape_.size(); }
|
||||
|
||||
bool known() const { return known_; }
|
||||
|
||||
bool empty() const { return raw_shape_.empty(); }
|
||||
|
||||
dsize_t NumOfElements() const;
|
||||
|
||||
bool operator==(const TensorShape &rhs) const { return known_ == rhs.known_ && raw_shape_ == rhs.raw_shape_; }
|
||||
|
||||
bool operator!=(const TensorShape &rhs) const { return !(rhs == *this); }
|
||||
|
||||
dsize_t operator[](const dsize_t index) const {
|
||||
if (index < 0) return raw_shape_[raw_shape_.size() + index];
|
||||
return raw_shape_[index];
|
||||
}
|
||||
|
||||
/// \brief Return the Shape as a vector
|
||||
/// \return
|
||||
std::vector<dsize_t> AsVector() const;
|
||||
|
||||
/// \brief Returns the class info as a string
|
||||
/// \return
|
||||
std::string ToString() const {
|
||||
std::stringstream ss;
|
||||
ss << *this;
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
/// \brief Actual print function used by operator<<
|
||||
/// \param out output string stream
|
||||
void Print(std::ostream &out) const;
|
||||
|
||||
/// \brief << Stream output operator overload
|
||||
/// This allows you to print the info using stream operators
|
||||
/// \param[in] out - reference to the output stream being overloaded
|
||||
/// \param[in] rO - reference to the TensorShape to display
|
||||
/// \return - the output stream must be returned
|
||||
friend std::ostream &operator<<(std::ostream &out, const TensorShape &so) {
|
||||
so.Print(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
/// \brief Checks if the given index is a valid index for this tensor.
|
||||
/// For example: Tensor<3,4> Index<1,1> is valid. But Index<4,1> or <1> are not.
|
||||
/// \param[in] index
|
||||
/// \return bool
|
||||
bool IsValidIndex(const std::vector<dsize_t> &index) const;
|
||||
|
||||
TensorShape Squeeze() const;
|
||||
|
||||
std::vector<dsize_t> Strides() const;
|
||||
|
||||
/// \brief Returns the location of the item assuming row major memory layout.
|
||||
/// \param[in] index
|
||||
/// \param[out] flat_index
|
||||
/// \return
|
||||
Status ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const;
|
||||
|
||||
private:
|
||||
// True if known and valid shape, false otherwise
|
||||
bool known_;
|
||||
// Vector to keep the dims of the shape.
|
||||
std::vector<dsize_t, IntAlloc> raw_shape_;
|
||||
// Vector to keep the strides of the shape. The size is rank+1
|
||||
std::vector<dsize_t, IntAlloc> strides_;
|
||||
|
||||
/// \brief Internal utility function to iterate over a list,
|
||||
/// check if the dim is valid and then insert it into the shape.
|
||||
/// \param[in] list Iterable list
|
||||
/// \return true if the shape is valid and no overflow would be generated when counting the number of elements.
|
||||
/// False otherwise.
|
||||
template <typename T>
|
||||
void AddListToShape(const T &list);
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_SHAPE_H_
|
|
@ -0,0 +1,252 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "include/constants.h"
|
||||
#include "include/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
class TensorOp;
|
||||
|
||||
// Char arrays storing name of corresponding classes (in alphabetical order)
|
||||
constexpr char kComposeOperation[] = "Compose";
|
||||
constexpr char kDuplicateOperation[] = "Duplicate";
|
||||
constexpr char kOneHotOperation[] = "OneHot";
|
||||
constexpr char kPreBuiltOperation[] = "PreBuilt";
|
||||
constexpr char kRandomApplyOperation[] = "RandomApply";
|
||||
constexpr char kRandomChoiceOperation[] = "RandomChoice";
|
||||
constexpr char kRandomSelectSubpolicyOperation[] = "RandomSelectSubpolicy";
|
||||
constexpr char kTypeCastOperation[] = "TypeCast";
|
||||
constexpr char kUniqueOperation[] = "Unique";
|
||||
|
||||
// Abstract class to represent a dataset in the data pipeline.
|
||||
class TensorOperation : public std::enable_shared_from_this<TensorOperation> {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
TensorOperation() : random_op_(false) {}
|
||||
|
||||
/// \brief Constructor
|
||||
explicit TensorOperation(bool random) : random_op_(random) {}
|
||||
|
||||
/// \brief Destructor
|
||||
~TensorOperation() = default;
|
||||
|
||||
/// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object.
|
||||
/// \return shared pointer to the newly created TensorOp.
|
||||
virtual std::shared_ptr<TensorOp> Build() = 0;
|
||||
|
||||
virtual Status ValidateParams() = 0;
|
||||
|
||||
virtual std::string Name() const = 0;
|
||||
|
||||
/// \brief Check whether the operation is deterministic.
|
||||
/// \return true if this op is a random op (returns non-deterministic result e.g. RandomCrop)
|
||||
bool IsRandomOp() const { return random_op_; }
|
||||
|
||||
protected:
|
||||
bool random_op_;
|
||||
};
|
||||
|
||||
// Helper function to validate fill value
|
||||
Status ValidateVectorFillvalue(const std::string &transform_name, const std::vector<uint8_t> &fill_value);
|
||||
|
||||
// Helper function to validate probability
|
||||
Status ValidateProbability(const std::string &transform_name, const float &probability);
|
||||
|
||||
// Helper function to validate padding
|
||||
Status ValidateVectorPadding(const std::string &transform_name, const std::vector<int32_t> &padding);
|
||||
|
||||
// Helper function to validate size
|
||||
Status ValidateVectorPositive(const std::string &transform_name, const std::vector<int32_t> &size);
|
||||
|
||||
// Helper function to validate transforms
|
||||
Status ValidateVectorTransforms(const std::string &transform_name,
|
||||
const std::vector<std::shared_ptr<TensorOperation>> &transforms);
|
||||
|
||||
// Helper function to compare float value
|
||||
bool CmpFloat(const float &a, const float &b, float epsilon = 0.0000000001f);
|
||||
|
||||
// Transform operations for performing data transformation.
|
||||
namespace transforms {
|
||||
|
||||
// Transform Op classes (in alphabetical order)
|
||||
class ComposeOperation;
|
||||
class DuplicateOperation;
|
||||
class OneHotOperation;
|
||||
class PreBuiltOperation;
|
||||
class RandomApplyOperation;
|
||||
class RandomChoiceOperation;
|
||||
class TypeCastOperation;
|
||||
|
||||
/// \brief Function to create a Compose TensorOperation.
|
||||
/// \notes Compose a list of transforms into a single transform.
|
||||
/// \param[in] transforms A vector of transformations to be applied.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<ComposeOperation> Compose(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
|
||||
|
||||
/// \brief Function to create a Duplicate TensorOperation.
|
||||
/// \notes Duplicate the input tensor to a new output tensor.
|
||||
/// The input tensor is carried over to the output list.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<DuplicateOperation> Duplicate();
|
||||
|
||||
/// \brief Function to create a OneHot TensorOperation.
|
||||
/// \notes Convert the labels into OneHot format.
|
||||
/// \param[in] num_classes number of classes.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<OneHotOperation> OneHot(int32_t num_classes);
|
||||
|
||||
/// \brief Function to create a RandomApply TensorOperation.
|
||||
/// \notes Randomly perform a series of transforms with a given probability.
|
||||
/// \param[in] transforms A vector of transformations to be applied.
|
||||
/// \param[in] prob The probability to apply the transformation list (default=0.5)
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<RandomApplyOperation> RandomApply(const std::vector<std::shared_ptr<TensorOperation>> &transforms,
|
||||
double prob = 0.5);
|
||||
|
||||
/// \brief Function to create a RandomChoice TensorOperation.
|
||||
/// \notes Randomly selects one transform from a list of transforms to perform operation.
|
||||
/// \param[in] transforms A vector of transformations to be chosen from to apply.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<RandomChoiceOperation> RandomChoice(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
|
||||
|
||||
/// \brief Function to create a TypeCast TensorOperation.
|
||||
/// \notes Tensor operation to cast to a given MindSpore data type.
|
||||
/// \param[in] data_type mindspore.dtype to be cast to.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<TypeCastOperation> TypeCast(std::string data_type);
|
||||
|
||||
/* ####################################### Derived TensorOperation classes ################################# */
|
||||
|
||||
class ComposeOperation : public TensorOperation {
|
||||
public:
|
||||
explicit ComposeOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
|
||||
|
||||
~ComposeOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kComposeOperation; }
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> transforms_;
|
||||
};
|
||||
|
||||
class DuplicateOperation : public TensorOperation {
|
||||
public:
|
||||
DuplicateOperation() = default;
|
||||
|
||||
~DuplicateOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kDuplicateOperation; }
|
||||
};
|
||||
|
||||
class OneHotOperation : public TensorOperation {
|
||||
public:
|
||||
explicit OneHotOperation(int32_t num_classes_);
|
||||
|
||||
~OneHotOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kOneHotOperation; }
|
||||
|
||||
private:
|
||||
float num_classes_;
|
||||
};
|
||||
|
||||
class PreBuiltOperation : public TensorOperation {
|
||||
public:
|
||||
explicit PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op);
|
||||
|
||||
~PreBuiltOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kPreBuiltOperation; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<TensorOp> op_;
|
||||
};
|
||||
|
||||
class RandomApplyOperation : public TensorOperation {
|
||||
public:
|
||||
explicit RandomApplyOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms, double prob);
|
||||
|
||||
~RandomApplyOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kRandomApplyOperation; }
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> transforms_;
|
||||
double prob_;
|
||||
};
|
||||
|
||||
class RandomChoiceOperation : public TensorOperation {
|
||||
public:
|
||||
explicit RandomChoiceOperation(const std::vector<std::shared_ptr<TensorOperation>> &transforms);
|
||||
|
||||
~RandomChoiceOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kRandomChoiceOperation; }
|
||||
|
||||
private:
|
||||
std::vector<std::shared_ptr<TensorOperation>> transforms_;
|
||||
};
|
||||
class TypeCastOperation : public TensorOperation {
|
||||
public:
|
||||
explicit TypeCastOperation(std::string data_type);
|
||||
|
||||
~TypeCastOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kTypeCastOperation; }
|
||||
|
||||
private:
|
||||
std::string data_type_;
|
||||
};
|
||||
} // namespace transforms
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_TRANSFORMS_H_
|
|
@ -0,0 +1,198 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_
|
||||
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "include/transforms.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Transform operations for performing computer vision.
|
||||
namespace vision {
|
||||
|
||||
// Char arrays storing name of corresponding classes (in alphabetical order)
|
||||
constexpr char kCenterCropOperation[] = "CenterCrop";
|
||||
constexpr char kCropOperation[] = "Crop";
|
||||
constexpr char kDecodeOperation[] = "Decode";
|
||||
constexpr char kNormalizeOperation[] = "Normalize";
|
||||
constexpr char kResizeOperation[] = "Resize";
|
||||
constexpr char kRotateOperation[] = "Rotate";
|
||||
// Transform Op classes (in alphabetical order)
|
||||
class CenterCropOperation;
|
||||
class CropOperation;
|
||||
class DecodeOperation;
|
||||
class NormalizeOperation;
|
||||
class ResizeOperation;
|
||||
class RotateOperation;
|
||||
|
||||
/// \brief Function to create a CenterCrop TensorOperation.
|
||||
/// \notes Crops the input image at the center to the given size.
|
||||
/// \param[in] size A vector representing the output size of the cropped image.
|
||||
/// If size is a single value, a square crop of size (size, size) is returned.
|
||||
/// If size has 2 values, it should be (height, width).
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<CenterCropOperation> CenterCrop(std::vector<int32_t> size);
|
||||
|
||||
/// \brief Function to create a Crop TensorOp
|
||||
/// \notes Crop an image based on location and crop size
|
||||
/// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor}
|
||||
/// \param[in] size Size of the cropped area.
|
||||
/// If size is a single value, a square crop of size (size, size) is returned.
|
||||
/// If size has 2 values, it should be (height, width).
|
||||
/// \return Shared pointer to the current TensorOp
|
||||
std::shared_ptr<CropOperation> Crop(std::vector<int32_t> coordinates, std::vector<int32_t> size);
|
||||
|
||||
/// \brief Function to create a Decode TensorOperation.
|
||||
/// \notes Decode the input image in RGB mode.
|
||||
/// \param[in] rgb A boolean of whether to decode in RGB mode or not.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<DecodeOperation> Decode(bool rgb = true);
|
||||
|
||||
/// \brief Function to create a Normalize TensorOperation.
|
||||
/// \notes Normalize the input image with respect to mean and standard deviation.
|
||||
/// \param[in] mean A vector of mean values for each channel, w.r.t channel order.
|
||||
/// The mean values must be in range [0.0, 255.0].
|
||||
/// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order.
|
||||
/// The standard deviation values must be in range (0.0, 255.0]
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<NormalizeOperation> Normalize(std::vector<float> mean, std::vector<float> std);
|
||||
|
||||
/// \brief Function to create a Resize TensorOperation.
|
||||
/// \notes Resize the input image to the given size.
|
||||
/// \param[in] size A vector representing the output size of the resized image.
|
||||
/// If size is a single value, the image will be resized to this value with
|
||||
/// the same image aspect ratio. If size has 2 values, it should be (height, width).
|
||||
/// \param[in] interpolation An enum for the mode of interpolation
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<ResizeOperation> Resize(std::vector<int32_t> size,
|
||||
InterpolationMode interpolation = InterpolationMode::kLinear);
|
||||
/// \brief Applies an rotate transformation to an image.
|
||||
/// \notes Rotate the input image using a specified angle id.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<RotateOperation> Rotate();
|
||||
|
||||
class CenterCropOperation : public TensorOperation {
|
||||
public:
|
||||
explicit CenterCropOperation(std::vector<int32_t> size);
|
||||
|
||||
~CenterCropOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kCenterCropOperation; }
|
||||
|
||||
private:
|
||||
std::vector<int32_t> size_;
|
||||
};
|
||||
|
||||
class CropOperation : public TensorOperation {
|
||||
public:
|
||||
CropOperation(std::vector<int32_t> coordinates, std::vector<int32_t> size);
|
||||
|
||||
~CropOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kCropOperation; }
|
||||
|
||||
private:
|
||||
std::vector<int32_t> coordinates_;
|
||||
std::vector<int32_t> size_;
|
||||
};
|
||||
class DecodeOperation : public TensorOperation {
|
||||
public:
|
||||
explicit DecodeOperation(bool rgb = true);
|
||||
|
||||
~DecodeOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kDecodeOperation; }
|
||||
|
||||
private:
|
||||
bool rgb_;
|
||||
};
|
||||
|
||||
class NormalizeOperation : public TensorOperation {
|
||||
public:
|
||||
NormalizeOperation(std::vector<float> mean, std::vector<float> std);
|
||||
|
||||
~NormalizeOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kNormalizeOperation; }
|
||||
|
||||
private:
|
||||
std::vector<float> mean_;
|
||||
std::vector<float> std_;
|
||||
};
|
||||
|
||||
class ResizeOperation : public TensorOperation {
|
||||
public:
|
||||
explicit ResizeOperation(std::vector<int32_t> size,
|
||||
InterpolationMode interpolation_mode = InterpolationMode::kLinear);
|
||||
|
||||
~ResizeOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kResizeOperation; }
|
||||
|
||||
private:
|
||||
std::vector<int32_t> size_;
|
||||
InterpolationMode interpolation_;
|
||||
};
|
||||
|
||||
class RotateOperation : public TensorOperation {
|
||||
public:
|
||||
RotateOperation();
|
||||
|
||||
~RotateOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kRotateOperation; }
|
||||
|
||||
void setAngle(uint64_t angle_id);
|
||||
|
||||
private:
|
||||
std::shared_ptr<TensorOp> rotate_op;
|
||||
};
|
||||
} // namespace vision
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_VISION_LITE_H_
|
|
@ -14,6 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "minddata/dataset/util/task.h"
|
||||
|
||||
#include <unistd.h>
|
||||
#include "utils/ms_utils.h"
|
||||
#include "minddata/dataset/util/log_adapter.h"
|
||||
#include "minddata/dataset/util/task_manager.h"
|
||||
|
|
|
@ -99,233 +99,148 @@ AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/util MINDDATA_UTIL_SRC_FILES)
|
|||
|
||||
AUX_SOURCE_DIRECTORY(${MINDDATA_DIR}/kernels/image/lite_cv MINDDATA_KERNELS_IMAGE_LITE_CV_FILES)
|
||||
|
||||
if(BUILD_MINDDATA STREQUAL "full")
|
||||
set(BUILD_MINDDATA "wrapper")
|
||||
endif()
|
||||
|
||||
if(BUILD_MINDDATA STREQUAL "full")
|
||||
include_directories("${CMAKE_SOURCE_DIR}/../ccsrc/minddata/dataset/kernels/image")
|
||||
list(REMOVE_ITEM MINDDATA_API_SRC_FILES
|
||||
"${MINDDATA_DIR}/api/text.cc"
|
||||
"${MINDDATA_DIR}/api/minddata_eager.cc"
|
||||
)
|
||||
include_directories("${MINDDATA_DIR}/kernels/image")
|
||||
include_directories("${MINDDATA_DIR}/liteapi")
|
||||
include_directories("${TOP_DIR}")
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_CALLBACK_SRC_FILES
|
||||
"${MINDDATA_DIR}/callback/py_ds_callback.cc"
|
||||
)
|
||||
set(MINDDATA_FULL_SRC
|
||||
${TOP_DIR}/mindspore/lite/src/cxx_api/types.cc
|
||||
${TOP_DIR}/mindspore/lite/src/cxx_api/tensor/tensor_impl.cc
|
||||
${TOP_DIR}/mindspore/lite/src/tensor.cc
|
||||
${CORE_DIR}/utils/status.cc
|
||||
${MINDDATA_DIR}/api/datasets.cc
|
||||
${MINDDATA_DIR}/kernels/data/data_utils.cc
|
||||
${MINDDATA_DIR}/api/samplers.cc
|
||||
${MINDDATA_DIR}/api/iterator.cc
|
||||
${MINDDATA_DIR}/api/execute.cc
|
||||
${MINDDATA_DIR}/core/de_tensor.cc
|
||||
${MINDDATA_DIR}/core/tensor_shape.cc
|
||||
${MINDDATA_DIR}/util/memory_pool.cc
|
||||
${MINDDATA_DIR}/core/config_manager.cc
|
||||
${MINDDATA_DIR}/core/data_type.cc
|
||||
${MINDDATA_DIR}/core/tensor_helpers.cc
|
||||
${MINDDATA_DIR}/core/tensor.cc
|
||||
${MINDDATA_DIR}/core/global_context.cc
|
||||
${MINDDATA_DIR}/core/client.cc
|
||||
${MINDDATA_DIR}/engine/consumers/tree_consumer.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/dataset_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/epoch_ctrl_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/batch_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/map_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/root_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/repeat_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/project_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/shuffle_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/source/album_node.cc
|
||||
${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/dataset_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/repeat_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/epoch_ctrl_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/device_queue_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/project_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/shuffle_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/pipeline_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/batch_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/parallel_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/map_op/map_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/map_op/cpu_map_job.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/album_op.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_CORE_SRC_FILES
|
||||
"${MINDDATA_DIR}/core/cv_tensor.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_KERNELS_SRC_FILES "${MINDDATA_DIR}/kernels/py_func_op.cc")
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/datasetops/build_sentence_piece_vocab_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/filter_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/barrier_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/bucket_batch_by_length_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/build_vocab_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/cache_merge_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/cache_base_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/cache_lookup_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/cache_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/concat_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/rename_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/skip_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/take_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/zip_op.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/generator_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/manifest_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/mindrecord_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/tf_reader_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/celeba_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/cifar_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/clue_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/coco_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/csv_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/image_folder_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/mnist_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/random_data_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/text_file_op.cc"
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/voc_op.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/datasetops/source/sampler/python_sampler.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/opt/post/generator_node_pass.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_OPT_POST_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/opt/post/repeat_pass.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_OPT_PRE_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/opt/pre/cache_transform_pass.cc"
|
||||
"${MINDDATA_DIR}/engine/opt/pre/cache_error_pass.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_IR_CACHE_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/ir/cache/dataset_cache_impl.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/cache/pre_built_dataset_cache.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/generator_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/minddata_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/tf_record_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/voc_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/celeba_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/cifar10_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/cifar100_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/coco_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/csv_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/image_folder_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/manifest_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/mnist_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/random_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/text_file_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/source/clue_node.cc"
|
||||
)
|
||||
list(REMOVE_ITEM MINDDATA_KERNELS_IMAGE_SRC_FILES
|
||||
"${MINDDATA_DIR}/kernels/image/affine_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/auto_contrast_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/bounding_box_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/bounding_box_augment_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/concatenate_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/cut_out_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/cutmix_batch_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/equalize_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/hwc_to_chw_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/image_utils.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/invert_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/math_utils.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/mixup_batch_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/pad_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/posterize_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/normalize_pad_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_affine_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_color_adjust_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_crop_and_resize_with_bbox_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_crop_decode_resize_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_crop_and_resize_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_crop_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_crop_with_bbox_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_horizontal_flip_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_horizontal_flip_with_bbox_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_posterize_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_resize_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_rotation_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_select_subpolicy_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_solarize_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_vertical_flip_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_vertical_flip_with_bbox_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_sharpness_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/rescale_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/rgba_to_bgr_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/rgba_to_rgb_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/sharpness_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/solarize_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/swap_red_blue_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/uniform_aug_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/resize_with_bbox_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_resize_with_bbox_op.cc"
|
||||
"${MINDDATA_DIR}/kernels/image/random_color_op.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/bucket_batch_by_length_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/build_sentence_piece_vocab_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/build_vocab_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/filter_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/sync_wait_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/skip_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/take_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/transfer_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/zip_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/rename_node.cc"
|
||||
"${MINDDATA_DIR}/engine/ir/datasetops/concat_node.cc"
|
||||
)
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_CONSUMERS_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/consumers/python_tree_consumer.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_ENGINE_SRC_FILES
|
||||
"${MINDDATA_DIR}/engine/python_runtime_context.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_KERNELS_DATA_SRC_FILES
|
||||
"${MINDDATA_DIR}/kernels/data/unique_op.cc"
|
||||
)
|
||||
|
||||
list(REMOVE_ITEM MINDDATA_UTIL_SRC_FILES
|
||||
"${MINDDATA_DIR}/util/numa_interface.cc"
|
||||
)
|
||||
include_directories("${CMAKE_BINARY_DIR}/minddata/dataset/engine/cache")
|
||||
|
||||
if(BUILD_MINDDATA_EXAMPLE AND (PLATFORM_ARM32 OR PLATFORM_ARM64))
|
||||
set(MINDDATA_TODAPI_SRC ${CMAKE_CURRENT_SOURCE_DIR}/wrapper/MDToDApi.cc)
|
||||
endif()
|
||||
${MINDDATA_DIR}/engine/datasetops/source/io_block.cc
|
||||
${MINDDATA_DIR}/engine/opt/pre/getter_pass.cc
|
||||
${MINDDATA_DIR}/engine/opt/pre/input_validation_pass.cc
|
||||
${MINDDATA_DIR}/engine/opt/pre/cache_validation_pass.cc
|
||||
${MINDDATA_DIR}/engine/opt/pre/node_removal_pass.cc
|
||||
${MINDDATA_DIR}/engine/opt/pre/epoch_ctrl_pass.cc
|
||||
${MINDDATA_DIR}/engine/opt/pre/deep_copy_pass.cc
|
||||
${MINDDATA_DIR}/engine/opt/post/auto_worker_pass.cc
|
||||
${MINDDATA_DIR}/engine/opt/pass.cc
|
||||
${MINDDATA_DIR}/engine/perf/profiling.cc
|
||||
${MINDDATA_DIR}/engine/perf/monitor.cc
|
||||
${MINDDATA_DIR}/engine/perf/device_queue_tracing.cc
|
||||
${MINDDATA_DIR}/engine/perf/connector_size.cc
|
||||
${MINDDATA_DIR}/engine/perf/connector_throughput.cc
|
||||
${MINDDATA_DIR}/engine/perf/dataset_iterator_tracing.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/sampler/sampler.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/sampler/subset_sampler.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/sampler/distributed_sampler.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/sampler/pk_sampler.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/sampler/random_sampler.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/sampler/sequential_sampler.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/sampler/subset_random_sampler.cc
|
||||
${MINDDATA_DIR}/engine/datasetops/source/sampler/weighted_random_sampler.cc
|
||||
${MINDDATA_DIR}/engine/runtime_context.cc
|
||||
${MINDDATA_DIR}/engine/tree_adapter.cc
|
||||
${MINDDATA_DIR}/engine/data_buffer.cc
|
||||
${MINDDATA_DIR}/engine/execution_tree.cc
|
||||
${MINDDATA_DIR}/engine/dataset_iterator.cc
|
||||
${MINDDATA_DIR}/core/tensor_row.cc
|
||||
${MINDDATA_DIR}/api/vision.cc
|
||||
${MINDDATA_DIR}/api/transforms.cc
|
||||
${MINDDATA_DIR}/util/path.cc
|
||||
${MINDDATA_DIR}/util/status.cc
|
||||
${MINDDATA_DIR}/util/service.cc
|
||||
${MINDDATA_DIR}/util/data_helper.cc
|
||||
${MINDDATA_DIR}/util/cond_var.cc
|
||||
${MINDDATA_DIR}/engine/data_schema.cc
|
||||
${MINDDATA_DIR}/kernels/tensor_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/lite_image_utils.cc
|
||||
${MINDDATA_DIR}/kernels/image/center_crop_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/crop_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/normalize_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/resize_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/rotate_op.cc
|
||||
${MINDDATA_DIR}/kernels/data/compose_op.cc
|
||||
${MINDDATA_DIR}/kernels/data/duplicate_op.cc
|
||||
${MINDDATA_DIR}/kernels/data/one_hot_op.cc
|
||||
${MINDDATA_DIR}/kernels/data/random_apply_op.cc
|
||||
${MINDDATA_DIR}/kernels/data/random_choice_op.cc
|
||||
${MINDDATA_DIR}/kernels/data/type_cast_op.cc
|
||||
${MINDDATA_DIR}/kernels/image/exif_utils.cc
|
||||
${MINDDATA_DIR}/callback/callback_manager.cc
|
||||
${MINDDATA_DIR}/util/task_manager.cc
|
||||
${MINDDATA_DIR}/util/services.cc
|
||||
${MINDDATA_DIR}/util/wait_post.cc
|
||||
${MINDDATA_DIR}/util/task.cc
|
||||
${MINDDATA_DIR}/util/circular_pool.cc
|
||||
${MINDDATA_DIR}/util/lock.cc
|
||||
${MINDDATA_DIR}/util/wait_post.cc
|
||||
${MINDDATA_DIR}/util/intrp_service.cc
|
||||
${MINDDATA_DIR}/util/arena.cc
|
||||
)
|
||||
|
||||
add_library(minddata-lite SHARED
|
||||
${MINDDATA_API_SRC_FILES}
|
||||
${MINDDATA_CALLBACK_SRC_FILES}
|
||||
${MINDDATA_CORE_SRC_FILES}
|
||||
${MINDDATA_ENGINE_SRC_FILES}
|
||||
#${MINDDATA_ENGINE_CACHE_SRC_FILES}
|
||||
${MINDDATA_ENGINE_CONSUMERS_SRC_FILES}
|
||||
${MINDDATA_ENGINE_DATASETOPS_SRC_FILES}
|
||||
${MINDDATA_ENGINE_DATASETOPS_MAPOP_SRC_FILES}
|
||||
${MINDDATA_ENGINE_DATASETOPS_SOURCE_SRC_FILES}
|
||||
${MINDDATA_ENGINE_DATASETOPS_SOURCE_SAMPLER_SRC_FILES}
|
||||
${MINDDATA_ENGINE_IR_DATASETOPS_SRC_FILES}
|
||||
${MINDDATA_ENGINE_IR_CACHE_SRC_FILES}
|
||||
${MINDDATA_ENGINE_IR_DATASETOPS_SOURCE_SRC_FILES}
|
||||
${MINDDATA_ENGINE_OPT_SRC_FILES}
|
||||
${MINDDATA_ENGINE_OPT_OPTIONAL_SRC_FILES}
|
||||
${MINDDATA_ENGINE_OPT_POST_SRC_FILES}
|
||||
${MINDDATA_ENGINE_OPT_PRE_SRC_FILES}
|
||||
${MINDDATA_ENGINE_OPT_UTIL_SRC_FILES}
|
||||
${MINDDATA_ENGINE_PERF_SRC_FILES}
|
||||
${MINDDATA_KERNELS_SRC_FILES}
|
||||
${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
|
||||
${MINDDATA_KERNELS_IMAGE_SRC_FILES}
|
||||
${MINDDATA_KERNELS_DATA_SRC_FILES}
|
||||
${MINDDATA_UTIL_SRC_FILES}
|
||||
${MINDDATA_EXAMPLE_SRC}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
|
||||
${CORE_DIR}/utils/ms_utils.cc
|
||||
)
|
||||
${MINDDATA_KERNELS_IMAGE_LITE_CV_FILES}
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../src/common/log_adapter.cc
|
||||
${CORE_DIR}/utils/ms_utils.cc
|
||||
${MINDDATA_FULL_SRC}
|
||||
)
|
||||
|
||||
find_package(Threads REQUIRED)
|
||||
target_link_libraries(minddata-lite
|
||||
securec
|
||||
mindspore::jpeg_turbo
|
||||
mindspore::turbojpeg
|
||||
mindspore::json
|
||||
Threads::Threads
|
||||
)
|
||||
securec
|
||||
mindspore::jpeg_turbo
|
||||
mindspore::turbojpeg
|
||||
mindspore::json
|
||||
Threads::Threads
|
||||
)
|
||||
|
||||
# ref: https://github.com/android/ndk/issues/1202
|
||||
if(PLATFORM_ARM32)
|
||||
file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a)
|
||||
if(LIBCLANG_RT_LIB STREQUAL "")
|
||||
MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}")
|
||||
endif()
|
||||
target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB})
|
||||
file(GLOB_RECURSE LIBCLANG_RT_LIB $ENV{ANDROID_NDK}/libclang_rt.builtins-arm-android.a)
|
||||
if(LIBCLANG_RT_LIB STREQUAL "")
|
||||
MESSAGE(FATAL_ERROR "Cannot find libclang_rt.builtins-arm-androi2d.a in $ENV{ANDROID_NDK}")
|
||||
endif()
|
||||
target_link_libraries(minddata-lite ${LIBCLANG_RT_LIB})
|
||||
endif()
|
||||
|
||||
if(PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
target_link_libraries(minddata-lite log)
|
||||
elseif(BUILD_MINDDATA_EXAMPLE)
|
||||
endif()
|
||||
target_link_libraries(minddata-lite log)
|
||||
elseif()
|
||||
endif()
|
||||
elseif(BUILD_MINDDATA STREQUAL "wrapper")
|
||||
include_directories("${MINDDATA_DIR}/kernels/image")
|
||||
include_directories("${MINDDATA_DIR}/util")
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
cmake_minimum_required(VERSION 3.14.1)
|
||||
project(testlenet)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I/usr/local/include -std=c++17 -Werror
|
||||
-Wall -Wno-deprecated-declarations -fPIC")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-compare")
|
||||
|
||||
set(DepDIR "${CMAKE_CURRENT_SOURCE_DIR}/mindspore-lite-1.1.0-inference-linux-x64/minddata")
|
||||
|
||||
include_directories(${DepDIR})
|
||||
|
||||
|
||||
|
||||
add_executable(testlenet
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/testlenet.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(testlenet
|
||||
${DepDIR}/lib/libminddata-lite.so
|
||||
${DepDIR}/third_party/libjpeg-turbo/lib/libjpeg.so.62
|
||||
${DepDIR}/third_party/libjpeg-turbo/lib/libturbojpeg.so.0
|
||||
pthread)
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "include/datasets.h"
|
||||
#include "include/iterator.h"
|
||||
#include "include/vision_lite.h"
|
||||
#include "include/transforms.h"
|
||||
#include "include/tensor.h"
|
||||
|
||||
using mindspore::dataset::Dataset;
|
||||
using mindspore::dataset::Iterator;
|
||||
using mindspore::dataset::Mnist;
|
||||
using mindspore::dataset::Tensor;
|
||||
using mindspore::dataset::TensorOperation;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
std::string folder_path = "./testMnistData/";
|
||||
std::shared_ptr<Dataset> ds = Mnist(folder_path, "all");
|
||||
|
||||
std::shared_ptr<TensorOperation> resize = mindspore::dataset::vision::Resize({32, 32});
|
||||
ds = ds->Map({resize});
|
||||
|
||||
ds->Shuffle(2);
|
||||
ds->Batch(2);
|
||||
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
i++;
|
||||
auto image = row["image"];
|
||||
iter->GetNextRow(&row);
|
||||
}
|
||||
|
||||
iter->Stop();
|
||||
}
|
Loading…
Reference in New Issue